NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/rawData.py

Revision 272 (checked in by cbc, 14 years ago)

Merge raw2proc-dev branch changes.

Line 
1 #!/usr/bin/python
2 """
3 Module to handle raw sodar data samples.
4
5 Raw sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class RawData) of samples
9 (modeled by the class Sample) in chronological order. A RawData object is
10 initialized with a string representing the daily file data:
11
12      rawDataHandle = open('20070601.dat')
13      rawDataString = rawDataHandle.read()
14      rawDataObject = RawData(rawDataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a RawData object may also be accessed by time using a string of the
18 format YYYY-MM-DD-HH-MM as in index on the RawData object to return the first
19 matching Sample in the RawData object:
20
21     rawDataObject[0] # the first Sample object of the day
22     rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     rawDataObject[15].header # the Header object of the 16th Sample
24     rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     rawDataObject[15].header['VAL2'] # the number of validations for beam 2
30     rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                      # probability on beam 3
32     rawDataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35 Different sodar models have different sets of header parameters. This model
36 seeks to be model agnostic, and parses the header parameter names from the
37 raw data itself.
38
39 Body objects act as lists of dictionaries. The dictionaries access
40 altitude-specific parameters by name as keywords. The dictionaries are in
41 altitude-ascending order. Each dictionary may also by accessed by indexing with
42 an altitude string:
43
44     rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
45     rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
46     rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
47     rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
48                                                         # at 70 meters
49
50 The body attribute of a Sample object may also be indexed directly on a Sample
51 object for the most convenient semantics:
52
53     rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
54     rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
55                                                    # 70 meters, 9:15am
56 """
57
58 __author__ = 'Chris Calloway'
59 __email__ = 'cbc@unc.edu'
60 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
61 __license__ = 'GPL2'
62
63 import re
64
65 class RawData(list):
66    
67     """Class to handle raw daily sodar file data.
68       
69        A chronologically ordered list of samples.
70     """
71    
72     def __init__(self, data):
73         """Parse raw daily sodar file data."""
74         super(RawData, self).__init__()
75         # Divide daily string into list of Samples separated by $.
76         self.extend([Sample(sample)
77                      for sample in
78                      [sample.strip() for sample in data.split('$')]
79                      if sample.strip()])
80
81     def __getitem__(self, index):
82         """Allow sample retrieval by Sample time in header."""
83         try:
84             return super(RawData,self).__getitem__(index)
85         except TypeError:
86             return self._find(index)
87
88     def _find(self, index):
89         """Find Sample in RawData.
90           
91            Where sample time of form YYYY-MM-DD-HH-MM.
92         """
93        
94         try:
95             year,month,day,hour,minute = index.split('-')
96         except ValueError:
97             raise ValueError('RawData index by date must be '\
98                              '"YYYY-MM-DD-HH-MM"')
99         except AttributeError:
100             raise AttributeError('RawData index by date must be '\
101                                  '"YYYY-MM-DD-HH-MM"')
102         for sample in self:
103             try:
104                 if sample.header['YEAR'].rjust(4,'0') != year: continue
105                 if sample.header['MONTH'].rjust(2,'0') != month: continue
106                 if sample.header['DAY'].rjust(2,'0') != day: continue
107                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
108                 if sample.header['MIN'].rjust(2,'0') != minute: continue
109                 return sample
110             except TypeError:   # sample.header may not exist
111                 continue
112         raise IndexError('RawData index out of range')
113
114
115 class Sample(object):
116    
117     """A single sample from raw daily sodar file data.
118       
119        (A header and a body attribute.)
120     """
121    
122     def __init__(self,sample):
123         """Separate Sample into Header and Body objects."""
124         super(Sample, self).__init__()
125         # first three groups of lines are the header; rest is body
126         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
127                                        (?P<body>.*$)
128                                     ''',re.DOTALL | re.VERBOSE)
129         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
130         # getattr with default covers parsing invalid Samples
131         self.header = getattr(self, 'header', None)
132         if self.header is not None:
133             self.header = Header(self.header)
134         self.body = getattr(self, 'body', None)
135         if self.body is not None:
136             self.body = Body(self.body)
137    
138     def __getitem__(self, index):
139         """Index Sample by body or header attribute."""
140         try:
141             return self.body[index]
142         except TypeError:   # sample.body may not exist
143             raise IndexError('Sample index out of range')
144         except IndexError:  # sample.body out of range
145             try:
146                 return self.header[index]
147             except KeyError: # sample.header may not exist
148                 raise IndexError('Sample index out of range')
149    
150     def data(self):
151         """Create a deep copy as a dictionary of header and body data."""
152         return {'header':self.header.data(),
153                 'body':self.body.data()}
154
155
156 class Header(dict):
157    
158     """A raw sodar data sample header.
159
160       (A dictionary of sample-wide parameters.)
161     """
162    
163     def __init__(self, header):
164        
165         """Identify discreet header parameter names and values.
166           
167            Every other line contains parameter keys;
168            every other line contains parameter values.
169         """
170        
171         super(Header, self).__init__()
172         headerLines = [headerLine.strip()
173                        for headerLine in header.split('\n')
174                        if headerLine.strip()]
175         #fix for bad match between names and values
176         self.update(dict(zip(" ".join(headerLines[::2]).split(),
177                              " ".join(headerLines[1::2]).split())))
178    
179     def data(self):
180         """Create a deep/shallow copy of the data as a dictionary."""
181         return self.copy()
182
183
184 class Body(list):
185    
186     """A raw sodar data sample body.
187
188        (A list of dictionariess at each altitude.)
189     """
190    
191     def __init__(self, body):
192        
193         """Identify discreet body parameter names and values.
194           
195            The first line contains parameter keys;
196            the remaining lines contains parameter values,
197            one set of parameters for a single altitude per line.
198         """
199        
200         super(Body, self).__init__()
201         bodyLines = [bodyLine.strip()
202                      for bodyLine in body.split('\n')
203                      if bodyLine.strip()]
204         bodyKeys = bodyLines[0].split()
205         #fix for bad match between names and values
206         self.extend([dict(zip(bodyKeys, bodyLine.split()))
207                      for bodyLine in bodyLines[1:]])
208         self.reverse()           
209
210     def __getitem__(self, index):
211         """Return raw altitude data by altitude string."""
212         try:
213             return super(Body, self).__getitem__(index)
214         except TypeError:
215             return self._find(index)
216
217     def _find(self, index):
218         """Find raw altitude data in Body."""
219         for altitude in self:
220             try:
221                 if altitude['ALT'] != index: continue
222             except KeyError:
223                 continue
224             return altitude
225         raise IndexError('Body index out of range')
226
227     def data(self):
228         """Create a deep copy of the data as a list of dictionaries."""
229         return [altitude.copy() for altitude in self]
230
231
232 def _main():
233     """Process as script from command line."""
234     import urllib2
235     try:
236         rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/'\
237                                         'data/nccoos/level0/ims/sodar/'\
238                                         '2008_01/20080101.dat')
239         rawDataString = rawDataHandle.read()
240         rawDataHandle.close()
241     except:
242         raise IOError("Failure to read raw test data")
243     rawDataObject = RawData(rawDataString)
244     print rawDataObject['2008-01-01-09-15']['70']['SPEED']
245
246 if __name__ == "__main__":
247     _main()
Note: See TracBrowser for help on using the browser.