NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/data.py

Revision 69 (checked in by cbc, 17 years ago)

Fulfill ticket #11: Compute minimum altitude

Line 
1 #!/usr/bin/python
2 """
3 Classes to handle sodar data samples.
4
5 Sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class Data) of samples
9 (modeled by the class Sample) in chronological order. A Data object is
10 initialized with a string representing the daily file data:
11
12      dataHandle = open('20070601.dat')
13      dataString = data.read()
14      dataObject = Data(dataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a Data object may also be accessed by time using a string of the format
18 YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19 Sample in the Data object:
20
21     dataObject[0] # the first Sample object of the day
22     dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     dataObject[15].header # the Header object of the 16th Sample
24     dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     dataObject[15].header['VAL2'] # the number of validations for beam 2
30     dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                   # probability on beam 3
32     dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35
36 Body objects act as lists of dictionaries. The dictionaries access
37 altitude-specific parameters by name as keywords. The dictionaries are in
38 altitude-ascending order. Each dictionary may also by accessed by indexing with
39 an altitude string:
40
41     dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42     dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43     dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44     dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45                                                      # at 70 meters
46
47 The body attribute of a Sample object may also be indexed directly on a Sample
48 object for the most convenient semantics:
49
50     dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51     dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52                                                 # 70 meters, 9:15am
53 """
54
55 __author__ = 'Chris Calloway'
56 __email__ = 'cbc@unc.edu'
57 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58 __license__ = 'GPL2'
59
60 import re
61 import numpy as n
62 import datetime
63
64 class Data(list):
65    
66     """Daily sodar file data.
67        
68        (A chronologically ordered list of samples.)
69     """
70    
71     def __init__(self, data):
72         """Divide daily string into list of Samples separated by $."""
73         super(Data, self).__init__()
74         self.extend([Sample(sample)
75                      for sample in
76                      [sample.strip() for sample in data.split('$')]
77                      if sample.strip()])
78         self._normalize()
79
80     def __getitem__(self, index):
81         """Allow sample retrieval by Sample time in header."""
82         try:
83             return super(Data,self).__getitem__(index)
84         except TypeError:
85             return self._find(index)
86
87     def _find(self, index):
88         """Find Sample in Data
89            
90            where sample time of form YYYY-MM-DD-HH-MM.
91         """
92        
93         try:
94             year,month,day,hour,minute = index.split('-')
95         except ValueError:
96             raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
97         except AttributeError:
98             raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
99         for sample in self:
100             try:
101                 if sample.header['YEAR'].rjust(4,'0') != year: continue
102                 if sample.header['MONTH'].rjust(2,'0') != month: continue
103                 if sample.header['DAY'].rjust(2,'0') != day: continue
104                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
105                 if sample.header['MIN'].rjust(2,'0') != minute: continue
106                 return sample
107             except TypeError:   # sample.header may not exist
108                 continue
109         raise IndexError('Data index out of range')
110
111     def _normalize(self):
112         """Clean up data for analysis."""
113         self._copy()
114         self._convert()
115         self._stamp()
116         self._sampleInterval()
117         # correct for missing times
118         self._minimumAltitude()
119         # compute maximum overall altitude
120         # compute number of altitudes
121         # compute altitude interval
122         # correct for missing altitudes
123         # mark maximum altitude with good values for each sample
124         # mark minimum altitude with invalid values for each sample
125         # convert direction to radians
126         # compute u,v,c components
127         # compute colorspecs
128         # compute plotting parameters
129    
130     def _copy(self):
131         """Create a deep copy as a list of Sample copies."""
132         self.samples = [sample._copy() for sample in self]
133    
134     def _convert(self):
135         """Convert to numbers and correct for invalid values."""
136         INVALID = "-9999"
137         for sample in self.samples:
138             for altitude in sample['body']:
139                 for key,value in altitude.items():
140                     try:
141                         if value == INVALID:
142                             raise ValueError
143                         altitude[key] = float(value)
144                     except (ValueError, TypeError, KeyError):
145                         altitude[key] = n.NaN
146             for key,value in sample['header'].items():
147                 try:
148                     if value == INVALID:
149                         raise ValueError
150                     sample['header'][key] = int(value)
151                 except (ValueError, TypeError, KeyError):
152                     sample['header'][key] = n.NaN
153    
154     def _stamp(self):
155         """Add a datetime stamp to each sample."""
156         for sample in self.samples:
157             try:
158                 header = sample['header']
159                 sample['stamp'] = datetime.datetime(header['YEAR'],
160                                                     header['MONTH'],
161                                                     header['DAY'],
162                                                     header['HOUR'],
163                                                     header['MIN'])
164             except (KeyError, TypeError):
165                 sample['stamp'] = datatime.datetime.min
166    
167     def _sampleInterval(self):
168         """Add a sample interval attribute."""
169         intervals = zip([sample['stamp'] for sample in self.samples[:-1]],
170                         [sample['stamp'] for sample in self.samples[1:]])
171         intervals = [interval[1] - interval[0] for interval in intervals]
172         accumulator = {}
173         for interval in intervals:
174             if interval in accumulator:
175                 accumulator[interval] += 1
176             else:
177                 accumulator[interval] = 1
178         maxVotes = max(accumulator.values())
179         for key,value in accumulator.items():
180             if value == maxVotes:
181                 self.sampleInterval = key
182                 break
183         self.sampleInterval = getattr(self,
184                                     'sampleInterval',
185                                     datetime.timedelta.resolution)
186    
187     def _minimumAltitude(self):
188         """Add a minimum altitude attribute."""
189         accumulator = {}
190         for sample in self.samples:
191             minalt = sample['body'][0]['ALT']
192             sample['header']['minalt'] = minalt
193             if minalt is not n.NaN:
194                 if minalt in accumulator:
195                     accumulator[minalt] += 1
196                 else:
197                     accumulator[minalt] = 1
198         maxVotes = max(accumulator.values())
199         for key,value in accumulator.items():
200             if value == maxVotes:
201                 self.minimumAltitude = key
202                 break
203         self.minimumAltitude = getattr(self,
204                                        'minimumAltitude',
205                                        0.0)
206
207
208 class Sample(object):
209    
210     """A single sample from daily sodar file data.
211        
212        (A header and a body attribute.)
213     """
214    
215     def __init__(self,sample):
216         """Separate Sample into Header and Body objects."""
217         super(Sample, self).__init__()
218         # first three groups of lines are the header; rest is body
219         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
220                                        (?P<body>.*$)
221                                     ''',re.DOTALL | re.VERBOSE)
222         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
223         # getattr with default covers parsing invalid Samples
224         self.header = getattr(self, 'header', None)
225         if self.header is not None:
226             self.header = Header(self.header)
227         self.body = getattr(self, 'body', None)
228         if self.body is not None:
229             self.body = Body(self.body)
230    
231     def __getitem__(self, index):
232         """Index Sample by body attribute."""
233         try:
234             return self.body[index]
235         except TypeError:   # sample.body may not exist
236             raise IndexError('Sample index out of range')
237    
238     def _copy(self):
239         """Create a deep copy as a dictionary of header and body copies."""
240         return {'header':self.header._copy(),
241                 'body':self.body._copy()}
242
243
244 class Header(dict):
245    
246     """A sodar data sample header.
247
248       (A dictionary of sample-wide parameters.)
249     """
250    
251     def __init__(self, header):
252        
253         """Identify discreet header parameter names and values.
254            
255            Every other line contains parameter keys;
256            every other line contains parameter values.
257         """
258        
259         super(Header, self).__init__()
260         headerLines = [headerLine.strip()
261                        for headerLine in header.split('\n')
262                        if headerLine.strip()]
263         #fix for bad match between names and values
264         self.update(dict(zip(" ".join(headerLines[::2]).split(),
265                              " ".join(headerLines[1::2]).split())))
266    
267     def _copy(self):
268         """Create a shallow copy as a dictionary."""
269         return self.copy()
270
271
272 class Body(list):
273    
274     """A sodar data sample body.
275
276        (A list of dictionariess at each altitude.)
277     """
278    
279     def __init__(self, body):
280        
281         """Identify discreet body parameter names and values.
282            
283            The first line contains parameter keys;
284            the remaining lines contains parameter values,
285            one set of parameters for a single altitude per line.
286         """
287        
288         super(Body, self).__init__()
289         bodyLines = [bodyLine.strip()
290                      for bodyLine in body.split('\n')
291                      if bodyLine.strip()]
292         bodyKeys = bodyLines[0].split()
293         #fix for bad match between names and values
294         self.extend([dict(zip(bodyKeys, bodyLine.split()))
295                      for bodyLine in bodyLines[1:]])
296         self.reverse()           
297
298     def __getitem__(self, index):
299         """Return altitude data by altitude string."""
300         try:
301             return super(Body, self).__getitem__(index)
302         except TypeError:
303             return self._find(index)
304
305     def _find(self, index):
306         """Find altitude data in Body."""
307         for altitude in self:
308             try:
309                 if altitude['ALT'] != index: continue
310             except KeyError:
311                 continue
312             return altitude
313         raise IndexError('Body index, out of range')
314
315     def _copy(self):
316         """Create a deep copy as a list of dictionaries."""
317         return [altitude.copy() for altitude in self]
318
319
320 def _main():
321     """Process as script from command line."""
322     import urllib2
323     try:
324         dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
325         dataString = dataHandle.read()
326     except:
327         raise IOError("Failure to read test data")
328     dataObject = Data(dataString)
329     print dataObject['2007-06-01-09-15']['70']['SPEED']
330
331 if __name__ == "__main__":
332     _main()
Note: See TracBrowser for help on using the browser.