NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/rawData.py

Revision 74 (checked in by cbc, 17 years ago)

Saving tested refactors.

Line 
1 #!/usr/bin/python
2 """
3 Classes to handle raw sodar data samples.
4
5 Raw sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class RawData) of samples
9 (modeled by the class Sample) in chronological order. A RawData object is
10 initialized with a string representing the daily file data:
11
12      rawDataHandle = open('20070601.dat')
13      rawDataString = rawDataHandle.read()
14      rawDataObject = RawData(rawDataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a RawData object may also be accessed by time using a string of the
18 format YYYY-MM-DD-HH-MM as in index on the RawData object to return the first
19 matching Sample in the RawData object:
20
21     rawDataObject[0] # the first Sample object of the day
22     rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     rawDataObject[15].header # the Header object of the 16th Sample
24     rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     rawDataObject[15].header['VAL2'] # the number of validations for beam 2
30     rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                      # probability on beam 3
32     rawDataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35 Different sodar models have different sets of header parameters. This model
36 seeks to be model agnostic, and parses the header parameter names from the
37 raw data itself.
38
39 Body objects act as lists of dictionaries. The dictionaries access
40 altitude-specific parameters by name as keywords. The dictionaries are in
41 altitude-ascending order. Each dictionary may also by accessed by indexing with
42 an altitude string:
43
44     rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
45     rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
46     rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
47     rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
48                                                         # at 70 meters
49
50 The body attribute of a Sample object may also be indexed directly on a Sample
51 object for the most convenient semantics:
52
53     rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
54     rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
55                                                    # 70 meters, 9:15am
56 """
57
58 __author__ = 'Chris Calloway'
59 __email__ = 'cbc@unc.edu'
60 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
61 __license__ = 'GPL2'
62
63 import re
64
65 class RawData(list):
66    
67     """Raw daily sodar file data.
68       
69        (A chronologically ordered list of samples.)
70     """
71    
72     def __init__(self, data):
73         """Divide daily string into list of Samples separated by $."""
74         super(RawData, self).__init__()
75         self.extend([Sample(sample)
76                      for sample in
77                      [sample.strip() for sample in data.split('$')]
78                      if sample.strip()])
79
80     def __getitem__(self, index):
81         """Allow sample retrieval by Sample time in header."""
82         try:
83             return super(RawData,self).__getitem__(index)
84         except TypeError:
85             return self._find(index)
86
87     def _find(self, index):
88         """Find Sample in RawData.
89           
90            Where sample time of form YYYY-MM-DD-HH-MM.
91         """
92        
93         try:
94             year,month,day,hour,minute = index.split('-')
95         except ValueError:
96             raise ValueError('RawData index by date must be '\
97                              '"YYYY-MM-DD-HH-MM"')
98         except AttributeError:
99             raise AttributeError('RawData index by date must be '\
100                                  '"YYYY-MM-DD-HH-MM"')
101         for sample in self:
102             try:
103                 if sample.header['YEAR'].rjust(4,'0') != year: continue
104                 if sample.header['MONTH'].rjust(2,'0') != month: continue
105                 if sample.header['DAY'].rjust(2,'0') != day: continue
106                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
107                 if sample.header['MIN'].rjust(2,'0') != minute: continue
108                 return sample
109             except TypeError:   # sample.header may not exist
110                 continue
111         raise IndexError('RawData index out of range')
112
113
114 class Sample(object):
115    
116     """A single sample from raw daily sodar file data.
117       
118        (A header and a body attribute.)
119     """
120    
121     def __init__(self,sample):
122         """Separate Sample into Header and Body objects."""
123         super(Sample, self).__init__()
124         # first three groups of lines are the header; rest is body
125         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
126                                        (?P<body>.*$)
127                                     ''',re.DOTALL | re.VERBOSE)
128         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
129         # getattr with default covers parsing invalid Samples
130         self.header = getattr(self, 'header', None)
131         if self.header is not None:
132             self.header = Header(self.header)
133         self.body = getattr(self, 'body', None)
134         if self.body is not None:
135             self.body = Body(self.body)
136    
137     def __getitem__(self, index):
138         """Index Sample by body attribute."""
139         try:
140             return self.body[index]
141         except TypeError:   # sample.body may not exist
142             raise IndexError('Sample index out of range')
143    
144     def data(self):
145         """Create a deep copy as a dictionary of header and body data."""
146         return {'header':self.header.data(),
147                 'body':self.body.data()}
148
149
150 class Header(dict):
151    
152     """A raw sodar data sample header.
153
154       (A dictionary of sample-wide parameters.)
155     """
156    
157     def __init__(self, header):
158        
159         """Identify discreet header parameter names and values.
160           
161            Every other line contains parameter keys;
162            every other line contains parameter values.
163         """
164        
165         super(Header, self).__init__()
166         headerLines = [headerLine.strip()
167                        for headerLine in header.split('\n')
168                        if headerLine.strip()]
169         #fix for bad match between names and values
170         self.update(dict(zip(" ".join(headerLines[::2]).split(),
171                              " ".join(headerLines[1::2]).split())))
172    
173     def data(self):
174         """Create a shallow copy of the data as a dictionary."""
175         return self.copy()
176
177
178 class Body(list):
179    
180     """A raw sodar data sample body.
181
182        (A list of dictionariess at each altitude.)
183     """
184    
185     def __init__(self, body):
186        
187         """Identify discreet body parameter names and values.
188           
189            The first line contains parameter keys;
190            the remaining lines contains parameter values,
191            one set of parameters for a single altitude per line.
192         """
193        
194         super(Body, self).__init__()
195         bodyLines = [bodyLine.strip()
196                      for bodyLine in body.split('\n')
197                      if bodyLine.strip()]
198         bodyKeys = bodyLines[0].split()
199         #fix for bad match between names and values
200         self.extend([dict(zip(bodyKeys, bodyLine.split()))
201                      for bodyLine in bodyLines[1:]])
202         self.reverse()           
203
204     def __getitem__(self, index):
205         """Return raw altitude data by altitude string."""
206         try:
207             return super(Body, self).__getitem__(index)
208         except TypeError:
209             return self._find(index)
210
211     def _find(self, index):
212         """Find raw altitude data in Body."""
213         for altitude in self:
214             try:
215                 if altitude['ALT'] != index: continue
216             except KeyError:
217                 continue
218             return altitude
219         raise IndexError('Body index, out of range')
220
221     def data(self):
222         """Create a deep copy of the data as a list of dictionaries."""
223         return [altitude.copy() for altitude in self]
224
225
226 def _main():
227     """Process as script from command line."""
228     import urllib2
229     try:
230         rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/'\
231                                         'data/nccoos/level0/dukeforest/sodar/'\
232                                         'store/2007-06/20070601.dat')
233         rawDataString = rawDataHandle.read()
234     except:
235         raise IOError("Failure to read raw test data")
236     rawDataObject = RawData(rawDataString)
237     print rawDataObject['2007-06-01-09-15']['70']['SPEED']
238
239 if __name__ == "__main__":
240     _main()
Note: See TracBrowser for help on using the browser.