NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/rawData.py

Revision 72 (checked in by cbc, 17 years ago)

Fulfill ticket #21: Move array normalization methods to new ProcessedData class

Line 
1 #!/usr/bin/python
2 """
3 Classes to handle raw sodar data samples.
4
5 Raw sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class RawData) of samples
9 (modeled by the class Sample) in chronological order. A RawData object is
10 initialized with a string representing the daily file data:
11
12      rawDataHandle = open('20070601.dat')
13      rawDataString = rawDataHandle.read()
14      rawDataObject = RawData(rawDataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a RawData object may also be accessed by time using a string of the format
18 YYYY-MM-DD-HH-MM as in index on the RawData object to return the first matching
19 Sample in the RawData object:
20
21     rawDataObject[0] # the first Sample object of the day
22     rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     rawDataObject[15].header # the Header object of the 16th Sample
24     rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     rawDataObject[15].header['VAL2'] # the number of validations for beam 2
30     rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                      # probability on beam 3
32     rawDataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35 Different sodar models have different sets of header parameters. This model
36 seeks to be model agnostic, and parses the header parameter names from the
37 raw data itself.
38
39 Body objects act as lists of dictionaries. The dictionaries access
40 altitude-specific parameters by name as keywords. The dictionaries are in
41 altitude-ascending order. Each dictionary may also by accessed by indexing with
42 an altitude string:
43
44     rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
45     rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
46     rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
47     rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
48                                                         # at 70 meters
49
50 The body attribute of a Sample object may also be indexed directly on a Sample
51 object for the most convenient semantics:
52
53     rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
54     rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
55                                                    # 70 meters, 9:15am
56 """
57
58 __author__ = 'Chris Calloway'
59 __email__ = 'cbc@unc.edu'
60 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
61 __license__ = 'GPL2'
62
63 import re
64
65 class RawData(list):
66    
67     """Raw daily sodar file data.
68        
69        (A chronologically ordered list of samples.)
70     """
71    
72     def __init__(self, data):
73         """Divide daily string into list of Samples separated by $."""
74         super(RawData, self).__init__()
75         self.extend([Sample(sample)
76                      for sample in
77                      [sample.strip() for sample in data.split('$')]
78                      if sample.strip()])
79
80     def __getitem__(self, index):
81         """Allow sample retrieval by Sample time in header."""
82         try:
83             return super(RawData,self).__getitem__(index)
84         except TypeError:
85             return self._find(index)
86
87     def _find(self, index):
88         """Find Sample in RawData.
89            
90            Where sample time of form YYYY-MM-DD-HH-MM.
91         """
92        
93         try:
94             year,month,day,hour,minute = index.split('-')
95         except ValueError:
96             raise ValueError('RawData index by date must be "YYYY-MM-DD-HH-MM"')
97         except AttributeError:
98             raise AttributeError('RawData index by date must be "YYYY-MM-DD-HH-MM"')
99         for sample in self:
100             try:
101                 if sample.header['YEAR'].rjust(4,'0') != year: continue
102                 if sample.header['MONTH'].rjust(2,'0') != month: continue
103                 if sample.header['DAY'].rjust(2,'0') != day: continue
104                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
105                 if sample.header['MIN'].rjust(2,'0') != minute: continue
106                 return sample
107             except TypeError:   # sample.header may not exist
108                 continue
109         raise IndexError('RawData index out of range')
110
111
112 class Sample(object):
113    
114     """A single sample from raw daily sodar file data.
115        
116        (A header and a body attribute.)
117     """
118    
119     def __init__(self,sample):
120         """Separate Sample into Header and Body objects."""
121         super(Sample, self).__init__()
122         # first three groups of lines are the header; rest is body
123         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
124                                        (?P<body>.*$)
125                                     ''',re.DOTALL | re.VERBOSE)
126         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
127         # getattr with default covers parsing invalid Samples
128         self.header = getattr(self, 'header', None)
129         if self.header is not None:
130             self.header = Header(self.header)
131         self.body = getattr(self, 'body', None)
132         if self.body is not None:
133             self.body = Body(self.body)
134    
135     def __getitem__(self, index):
136         """Index Sample by body attribute."""
137         try:
138             return self.body[index]
139         except TypeError:   # sample.body may not exist
140             raise IndexError('Sample index out of range')
141    
142     def _copy(self):
143         """Create a deep copy as a dictionary of header and body copies."""
144         return {'header':self.header._copy(),
145                 'body':self.body._copy()}
146
147
148 class Header(dict):
149    
150     """A raw sodar data sample header.
151
152       (A dictionary of sample-wide parameters.)
153     """
154    
155     def __init__(self, header):
156        
157         """Identify discreet header parameter names and values.
158            
159            Every other line contains parameter keys;
160            every other line contains parameter values.
161         """
162        
163         super(Header, self).__init__()
164         headerLines = [headerLine.strip()
165                        for headerLine in header.split('\n')
166                        if headerLine.strip()]
167         #fix for bad match between names and values
168         self.update(dict(zip(" ".join(headerLines[::2]).split(),
169                              " ".join(headerLines[1::2]).split())))
170    
171     def _copy(self):
172         """Create a shallow copy as a dictionary."""
173         return self.copy()
174
175
176 class Body(list):
177    
178     """A raw sodar data sample body.
179
180        (A list of dictionariess at each altitude.)
181     """
182    
183     def __init__(self, body):
184        
185         """Identify discreet body parameter names and values.
186            
187            The first line contains parameter keys;
188            the remaining lines contains parameter values,
189            one set of parameters for a single altitude per line.
190         """
191        
192         super(Body, self).__init__()
193         bodyLines = [bodyLine.strip()
194                      for bodyLine in body.split('\n')
195                      if bodyLine.strip()]
196         bodyKeys = bodyLines[0].split()
197         #fix for bad match between names and values
198         self.extend([dict(zip(bodyKeys, bodyLine.split()))
199                      for bodyLine in bodyLines[1:]])
200         self.reverse()           
201
202     def __getitem__(self, index):
203         """Return raw altitude data by altitude string."""
204         try:
205             return super(Body, self).__getitem__(index)
206         except TypeError:
207             return self._find(index)
208
209     def _find(self, index):
210         """Find raw altitude data in Body."""
211         for altitude in self:
212             try:
213                 if altitude['ALT'] != index: continue
214             except KeyError:
215                 continue
216             return altitude
217         raise IndexError('Body index, out of range')
218
219     def _copy(self):
220         """Create a deep copy as a list of dictionaries."""
221         return [altitude.copy() for altitude in self]
222
223
224 def _main():
225     """Process as script from command line."""
226     import urllib2
227     try:
228         rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
229         rawDataString = rawDataHandle.read()
230     except:
231         raise IOError("Failure to read raw test data")
232     rawDataObject = RawData(rawDataString)
233     print rawDataObject['2007-06-01-09-15']['70']['SPEED']
234
235 if __name__ == "__main__":
236     _main()
Note: See TracBrowser for help on using the browser.