NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/rawData.py

Revision 80 (checked in by cbc, 17 years ago)

Fulfill ticket #20: Compute u,v arrays.

Line 
1 #!/usr/bin/python
2 """
3 Module to handle raw sodar data samples.
4
5 Raw sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class RawData) of samples
9 (modeled by the class Sample) in chronological order. A RawData object is
10 initialized with a string representing the daily file data:
11
12      rawDataHandle = open('20070601.dat')
13      rawDataString = rawDataHandle.read()
14      rawDataObject = RawData(rawDataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a RawData object may also be accessed by time using a string of the
18 format YYYY-MM-DD-HH-MM as in index on the RawData object to return the first
19 matching Sample in the RawData object:
20
21     rawDataObject[0] # the first Sample object of the day
22     rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     rawDataObject[15].header # the Header object of the 16th Sample
24     rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     rawDataObject[15].header['VAL2'] # the number of validations for beam 2
30     rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                      # probability on beam 3
32     rawDataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35 Different sodar models have different sets of header parameters. This model
36 seeks to be model agnostic, and parses the header parameter names from the
37 raw data itself.
38
39 Body objects act as lists of dictionaries. The dictionaries access
40 altitude-specific parameters by name as keywords. The dictionaries are in
41 altitude-ascending order. Each dictionary may also by accessed by indexing with
42 an altitude string:
43
44     rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
45     rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
46     rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
47     rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
48                                                         # at 70 meters
49
50 The body attribute of a Sample object may also be indexed directly on a Sample
51 object for the most convenient semantics:
52
53     rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
54     rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
55                                                    # 70 meters, 9:15am
56 """
57
58 __author__ = 'Chris Calloway'
59 __email__ = 'cbc@unc.edu'
60 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
61 __license__ = 'GPL2'
62
63 import re
64
65 class RawData(list):
66    
67     """Class to handle raw daily sodar file data.
68        
69        A chronologically ordered list of samples.
70     """
71    
72     def __init__(self, data):
73         """Parse raw daily sodar file data."""
74         super(RawData, self).__init__()
75         # Divide daily string into list of Samples separated by $.
76         self.extend([Sample(sample)
77                      for sample in
78                      [sample.strip() for sample in data.split('$')]
79                      if sample.strip()])
80
81     def __getitem__(self, index):
82         """Allow sample retrieval by Sample time in header."""
83         try:
84             return super(RawData,self).__getitem__(index)
85         except TypeError:
86             return self._find(index)
87
88     def _find(self, index):
89         """Find Sample in RawData.
90            
91            Where sample time of form YYYY-MM-DD-HH-MM.
92         """
93        
94         try:
95             year,month,day,hour,minute = index.split('-')
96         except ValueError:
97             raise ValueError('RawData index by date must be '\
98                              '"YYYY-MM-DD-HH-MM"')
99         except AttributeError:
100             raise AttributeError('RawData index by date must be '\
101                                  '"YYYY-MM-DD-HH-MM"')
102         for sample in self:
103             try:
104                 if sample.header['YEAR'].rjust(4,'0') != year: continue
105                 if sample.header['MONTH'].rjust(2,'0') != month: continue
106                 if sample.header['DAY'].rjust(2,'0') != day: continue
107                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
108                 if sample.header['MIN'].rjust(2,'0') != minute: continue
109                 return sample
110             except TypeError:   # sample.header may not exist
111                 continue
112         raise IndexError('RawData index out of range')
113
114
115 class Sample(object):
116    
117     """A single sample from raw daily sodar file data.
118        
119        (A header and a body attribute.)
120     """
121    
122     def __init__(self,sample):
123         """Separate Sample into Header and Body objects."""
124         super(Sample, self).__init__()
125         # first three groups of lines are the header; rest is body
126         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
127                                        (?P<body>.*$)
128                                     ''',re.DOTALL | re.VERBOSE)
129         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
130         # getattr with default covers parsing invalid Samples
131         self.header = getattr(self, 'header', None)
132         if self.header is not None:
133             self.header = Header(self.header)
134         self.body = getattr(self, 'body', None)
135         if self.body is not None:
136             self.body = Body(self.body)
137    
138     def __getitem__(self, index):
139         """Index Sample by body attribute."""
140         try:
141             return self.body[index]
142         except TypeError:   # sample.body may not exist
143             raise IndexError('Sample index out of range')
144    
145     def data(self):
146         """Create a deep copy as a dictionary of header and body data."""
147         return {'header':self.header.data(),
148                 'body':self.body.data()}
149
150
151 class Header(dict):
152    
153     """A raw sodar data sample header.
154
155       (A dictionary of sample-wide parameters.)
156     """
157    
158     def __init__(self, header):
159        
160         """Identify discreet header parameter names and values.
161            
162            Every other line contains parameter keys;
163            every other line contains parameter values.
164         """
165        
166         super(Header, self).__init__()
167         headerLines = [headerLine.strip()
168                        for headerLine in header.split('\n')
169                        if headerLine.strip()]
170         #fix for bad match between names and values
171         self.update(dict(zip(" ".join(headerLines[::2]).split(),
172                              " ".join(headerLines[1::2]).split())))
173    
174     def data(self):
175         """Create a deep/shallow copy of the data as a dictionary."""
176         return self.copy()
177
178
179 class Body(list):
180    
181     """A raw sodar data sample body.
182
183        (A list of dictionariess at each altitude.)
184     """
185    
186     def __init__(self, body):
187        
188         """Identify discreet body parameter names and values.
189            
190            The first line contains parameter keys;
191            the remaining lines contains parameter values,
192            one set of parameters for a single altitude per line.
193         """
194        
195         super(Body, self).__init__()
196         bodyLines = [bodyLine.strip()
197                      for bodyLine in body.split('\n')
198                      if bodyLine.strip()]
199         bodyKeys = bodyLines[0].split()
200         #fix for bad match between names and values
201         self.extend([dict(zip(bodyKeys, bodyLine.split()))
202                      for bodyLine in bodyLines[1:]])
203         self.reverse()           
204
205     def __getitem__(self, index):
206         """Return raw altitude data by altitude string."""
207         try:
208             return super(Body, self).__getitem__(index)
209         except TypeError:
210             return self._find(index)
211
212     def _find(self, index):
213         """Find raw altitude data in Body."""
214         for altitude in self:
215             try:
216                 if altitude['ALT'] != index: continue
217             except KeyError:
218                 continue
219             return altitude
220         raise IndexError('Body index, out of range')
221
222     def data(self):
223         """Create a deep copy of the data as a list of dictionaries."""
224         return [altitude.copy() for altitude in self]
225
226
227 def _main():
228     """Process as script from command line."""
229     import urllib2
230     try:
231         rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/'\
232                                         'data/nccoos/level0/dukeforest/sodar/'\
233                                         'store/2007-06/20070601.dat')
234         rawDataString = rawDataHandle.read()
235     except:
236         raise IOError("Failure to read raw test data")
237     rawDataObject = RawData(rawDataString)
238     print rawDataObject['2007-06-01-09-15']['70']['SPEED']
239
240 if __name__ == "__main__":
241     _main()
Note: See TracBrowser for help on using the browser.