NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/data.py

Revision 66 (checked in by cbc, 17 years ago)

Daily check-in of code. Started _normalize.

Line 
1 #!/usr/bin/python
2 """
3 Classes to handle sodar data samples.
4
5 Sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class Data) of samples
9 (modeled by the class Sample) in chronological order. A Data object is
10 initialized with a string representing the daily file data:
11
12      dataHandle = open('20070601.dat')
13      dataString = data.read()
14      dataObject = Data(dataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a Data object may also be accessed by time using a string of the format
18 YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19 Sample in the Data object:
20
21     dataObject[0] # the first Sample object of the day
22     dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     dataObject[15].header # the Header object of the 16th Sample
24     dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     dataObject[15].header['VAL2'] # the number of validations for beam 2
30     dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                   # probability on beam 3
32     dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35
36 Body objects act as lists of dictionaries. The dictionaries access
37 altitude-specific parameters by name as keywords. The dictionaries are in
38 altitude-ascending order. Each dictionary may also by accessed by indexing with
39 an altitude string:
40
41     dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42     dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43     dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44     dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45                                                      # at 70 meters
46
47 The body attribute of a Sample object may also be indexed directly on a Sample
48 object for the most convenient semantics:
49
50     dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51     dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52                                                 # 70 meters, 9:15am
53 """
54
55 __author__ = 'Chris Calloway'
56 __email__ = 'cbc@unc.edu'
57 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58 __license__ = 'GPL2'
59
60 import re
61 import numpy as n
62
63 class Data(list):
64    
65     """Daily sodar file data.
66        
67        (A chronologically ordered list of samples.)
68     """
69    
70     def __init__(self, data):
71         """Divide daily string into list of Samples separated by $."""
72         super(Data, self).__init__()
73         self.extend([Sample(sample)
74                      for sample in
75                      [sample.strip() for sample in data.split('$')]
76                      if sample.strip()])
77         self._normalize()
78
79     def __getitem__(self, index):
80         """Allow sample retrieval by Sample time in header."""
81         try:
82             return super(Data,self).__getitem__(index)
83         except TypeError:
84             return self._find(index)
85
86     def _find(self, index):
87         """Find Sample in Data
88            
89            where sample time of form YYYY-MM-DD-HH-MM.
90         """
91        
92         try:
93             year,month,day,hour,minute = index.split('-')
94         except ValueError:
95             raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
96         except AttributeError:
97             raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
98         for sample in self:
99             try:
100                 if sample.header['YEAR'].rjust(4,'0') != year: continue
101                 if sample.header['MONTH'].rjust(2,'0') != month: continue
102                 if sample.header['DAY'].rjust(2,'0') != day: continue
103                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
104                 if sample.header['MIN'].rjust(2,'0') != minute: continue
105                 return sample
106             except TypeError:   # sample.header may not exist
107                 continue
108         raise IndexError('Data index out of range')
109
110     def _normalize(self):
111         """Clean up data for analysis."""
112         self._copy()
113         # self._convert()
114         # compute time interval
115         # correct for missing times
116         # compute minium altitude
117         # compute maximum overall altitude
118         # compute number of altitudes
119         # compute altitude interval
120         # correct for missing altitudes
121         # mark maximum altitude with good values for each sample
122         # mark minimum altitude with invalid values for each sample
123         # convert direction to radians
124         # compute u,v,c components
125         # compute colorspecs
126         # compute plotting parameters
127    
128     def _copy(self):
129         """Create a deep copy of all the samples in this Data instance."""
130         self.samples = [(dict(sample.header), list(sample.body))
131                         for sample in self]
132         for sample in self.samples:
133             for altitude in sample[1]:
134                 altitude = dict(altitude)
135    
136     def _convert(self):
137         """Convert to numbers and correct for invalid values."""
138         INVALID = "-9999"
139         # convert to numbers and correct for invalid values
140         for sample in self.samples:
141             for altitude in sample[1]:
142                 for key in altitude.keys():
143                     try:
144                         if altitude[key] == INVALID:
145                             raise ValueError
146                         altitude[key] = float(altitude[key])
147                     except (ValueError, TypeError, KeyError):
148                         altitude[key] = n.NaN
149
150
151 class Sample(object):
152    
153     """A single sample from daily sodar file data.
154        
155        (A header and a body attribute.)
156     """
157    
158     def __init__(self,sample):
159         """Separate Sample into Header and Body objects."""
160         super(Sample, self).__init__()
161         # first three groups of lines are the header; rest is body
162         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
163                                        (?P<body>.*$)
164                                     ''',re.DOTALL | re.VERBOSE)
165         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
166         # getattr with default covers parsing invalid Samples
167         self.header = getattr(self, 'header', None)
168         if self.header is not None:
169             self.header = Header(self.header)
170         self.body = getattr(self, 'body', None)
171         if self.body is not None:
172             self.body = Body(self.body)
173
174     def __getitem__(self, index):
175         """Index Sample by body attribute."""
176         try:
177             return self.body[index]
178         except TypeError:   # sample.body may not exist
179             raise IndexError('Sample index out of range')
180
181
182 class Header(dict):
183    
184     """A sodar data sample header.
185
186       (A dictionary of sample-wide parameters.)
187     """
188    
189     def __init__(self, header):
190        
191         """Identify discreet header parameter names and values.
192            
193            Every other line contains parameter keys;
194            every other line contains parameter values.
195         """
196        
197         super(Header, self).__init__()
198         headerLines = [headerLine.strip()
199                        for headerLine in header.split('\n')
200                        if headerLine.strip()]
201         #fix for bad match between names and values
202         self.update(dict(zip(" ".join(headerLines[::2]).split(),
203                              " ".join(headerLines[1::2]).split())))
204        
205
206 class Body(list):
207    
208     """A sodar data sample body.
209
210        (A list of dictionariess at each altitude.)
211     """
212    
213     def __init__(self, body):
214        
215         """Identify discreet body parameter names and values.
216            
217            The first line contains parameter keys;
218            the remaining lines contains parameter values,
219            one set of parameters for a single altitude per line.
220         """
221        
222         super(Body, self).__init__()
223         bodyLines = [bodyLine.strip()
224                      for bodyLine in body.split('\n')
225                      if bodyLine.strip()]
226         bodyKeys = bodyLines[0].split()
227         #fix for bad match between names and values
228         self.extend([dict(zip(bodyKeys, bodyLine.split()))
229                      for bodyLine in bodyLines[1:]])
230         self.reverse()           
231
232     def __getitem__(self, index):
233         """Return altitude data by altitude string."""
234         try:
235             return super(Body, self).__getitem__(index)
236         except TypeError:
237             return self._find(index)
238
239     def _find(self, index):
240         """Find altitude data in Body."""
241         for altitudeData in self:
242             if altitudeData['ALT'] != index: continue
243             return altitudeData
244         raise IndexError('Body index, out of range')
245
246
247 def _main():
248     """Process as script from command line."""
249     import urllib2
250     try:
251         dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
252         dataString = dataHandle.read()
253     except:
254         raise IOError("Failure to read test data")
255     dataObject = Data(dataString)
256     print dataObject['2007-06-01-09-15']['70']['SPEED']
257
258 if __name__ == "__main__":
259     _main()
Note: See TracBrowser for help on using the browser.