NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/data.py

Revision 67 (checked in by cbc, 17 years ago)

Fulfill ticket #7: Add deep copy funtionality to Data class and ticket #8: Add string to float and !NaN conversion to Data class

Line 
1 #!/usr/bin/python
2 """
3 Classes to handle sodar data samples.
4
5 Sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class Data) of samples
9 (modeled by the class Sample) in chronological order. A Data object is
10 initialized with a string representing the daily file data:
11
12      dataHandle = open('20070601.dat')
13      dataString = data.read()
14      dataObject = Data(dataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a Data object may also be accessed by time using a string of the format
18 YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19 Sample in the Data object:
20
21     dataObject[0] # the first Sample object of the day
22     dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     dataObject[15].header # the Header object of the 16th Sample
24     dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     dataObject[15].header['VAL2'] # the number of validations for beam 2
30     dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                   # probability on beam 3
32     dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35
36 Body objects act as lists of dictionaries. The dictionaries access
37 altitude-specific parameters by name as keywords. The dictionaries are in
38 altitude-ascending order. Each dictionary may also by accessed by indexing with
39 an altitude string:
40
41     dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42     dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43     dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44     dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45                                                      # at 70 meters
46
47 The body attribute of a Sample object may also be indexed directly on a Sample
48 object for the most convenient semantics:
49
50     dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51     dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52                                                 # 70 meters, 9:15am
53 """
54
55 __author__ = 'Chris Calloway'
56 __email__ = 'cbc@unc.edu'
57 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58 __license__ = 'GPL2'
59
60 import re
61 import numpy as n
62
63 class Data(list):
64    
65     """Daily sodar file data.
66        
67        (A chronologically ordered list of samples.)
68     """
69    
70     def __init__(self, data):
71         """Divide daily string into list of Samples separated by $."""
72         super(Data, self).__init__()
73         self.extend([Sample(sample)
74                      for sample in
75                      [sample.strip() for sample in data.split('$')]
76                      if sample.strip()])
77         self._normalize()
78
79     def __getitem__(self, index):
80         """Allow sample retrieval by Sample time in header."""
81         try:
82             return super(Data,self).__getitem__(index)
83         except TypeError:
84             return self._find(index)
85
86     def _find(self, index):
87         """Find Sample in Data
88            
89            where sample time of form YYYY-MM-DD-HH-MM.
90         """
91        
92         try:
93             year,month,day,hour,minute = index.split('-')
94         except ValueError:
95             raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
96         except AttributeError:
97             raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
98         for sample in self:
99             try:
100                 if sample.header['YEAR'].rjust(4,'0') != year: continue
101                 if sample.header['MONTH'].rjust(2,'0') != month: continue
102                 if sample.header['DAY'].rjust(2,'0') != day: continue
103                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
104                 if sample.header['MIN'].rjust(2,'0') != minute: continue
105                 return sample
106             except TypeError:   # sample.header may not exist
107                 continue
108         raise IndexError('Data index out of range')
109
110     def _normalize(self):
111         """Clean up data for analysis."""
112         self._copy()
113         self._convert()
114         # compute time interval
115         # correct for missing times
116         # compute minium altitude
117         # compute maximum overall altitude
118         # compute number of altitudes
119         # compute altitude interval
120         # correct for missing altitudes
121         # mark maximum altitude with good values for each sample
122         # mark minimum altitude with invalid values for each sample
123         # convert direction to radians
124         # compute u,v,c components
125         # compute colorspecs
126         # compute plotting parameters
127    
128     def _copy(self):
129         """Create a deep copy as a list of Sample copies."""
130         self.samples = [sample._copy() for sample in self]
131    
132     def _convert(self):
133         """Convert to numbers and correct for invalid values."""
134         INVALID = "-9999"
135         for sample in self.samples:
136             body = sample[1]
137             for altitude in body:
138                 for key,value in altitude.items():
139                     try:
140                         if value == INVALID:
141                             raise ValueError
142                         altitude[key] = float(value)
143                     except (ValueError, TypeError, KeyError):
144                         altitude[key] = n.NaN
145
146
147 class Sample(object):
148    
149     """A single sample from daily sodar file data.
150        
151        (A header and a body attribute.)
152     """
153    
154     def __init__(self,sample):
155         """Separate Sample into Header and Body objects."""
156         super(Sample, self).__init__()
157         # first three groups of lines are the header; rest is body
158         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
159                                        (?P<body>.*$)
160                                     ''',re.DOTALL | re.VERBOSE)
161         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
162         # getattr with default covers parsing invalid Samples
163         self.header = getattr(self, 'header', None)
164         if self.header is not None:
165             self.header = Header(self.header)
166         self.body = getattr(self, 'body', None)
167         if self.body is not None:
168             self.body = Body(self.body)
169    
170     def __getitem__(self, index):
171         """Index Sample by body attribute."""
172         try:
173             return self.body[index]
174         except TypeError:   # sample.body may not exist
175             raise IndexError('Sample index out of range')
176    
177     def _copy(self):
178         """Create a deep copy as a tuple of header and body copies."""
179         return (self.header._copy(), self.body._copy())
180
181
182 class Header(dict):
183    
184     """A sodar data sample header.
185
186       (A dictionary of sample-wide parameters.)
187     """
188    
189     def __init__(self, header):
190        
191         """Identify discreet header parameter names and values.
192            
193            Every other line contains parameter keys;
194            every other line contains parameter values.
195         """
196        
197         super(Header, self).__init__()
198         headerLines = [headerLine.strip()
199                        for headerLine in header.split('\n')
200                        if headerLine.strip()]
201         #fix for bad match between names and values
202         self.update(dict(zip(" ".join(headerLines[::2]).split(),
203                              " ".join(headerLines[1::2]).split())))
204    
205     def _copy(self):
206         """Create a shallow copy as a dictionary."""
207         return self.copy()
208
209
210 class Body(list):
211    
212     """A sodar data sample body.
213
214        (A list of dictionariess at each altitude.)
215     """
216    
217     def __init__(self, body):
218        
219         """Identify discreet body parameter names and values.
220            
221            The first line contains parameter keys;
222            the remaining lines contains parameter values,
223            one set of parameters for a single altitude per line.
224         """
225        
226         super(Body, self).__init__()
227         bodyLines = [bodyLine.strip()
228                      for bodyLine in body.split('\n')
229                      if bodyLine.strip()]
230         bodyKeys = bodyLines[0].split()
231         #fix for bad match between names and values
232         self.extend([dict(zip(bodyKeys, bodyLine.split()))
233                      for bodyLine in bodyLines[1:]])
234         self.reverse()           
235
236     def __getitem__(self, index):
237         """Return altitude data by altitude string."""
238         try:
239             return super(Body, self).__getitem__(index)
240         except TypeError:
241             return self._find(index)
242
243     def _find(self, index):
244         """Find altitude data in Body."""
245         for altitude in self:
246             try:
247                 if altitude['ALT'] != index: continue
248             except KeyError:
249                 continue
250             return altitude
251         raise IndexError('Body index, out of range')
252
253     def _copy(self):
254         """Create a deep copy as a list of dictionaries."""
255         return [altitude.copy() for altitude in self]
256
257
258 def _main():
259     """Process as script from command line."""
260     import urllib2
261     try:
262         dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
263         dataString = dataHandle.read()
264     except:
265         raise IOError("Failure to read test data")
266     dataObject = Data(dataString)
267     print dataObject['2007-06-01-09-15']['70']['SPEED']
268
269 if __name__ == "__main__":
270     _main()
Note: See TracBrowser for help on using the browser.