1 |
#!/usr/bin/python |
---|
2 |
""" |
---|
3 |
Classes to handle sodar data samples. |
---|
4 |
|
---|
5 |
Sodar data samples are collected into daily files. Each sample consists of a |
---|
6 |
header followed by an observation for each height. |
---|
7 |
|
---|
8 |
The daily file is split into a list (modeled by the class Data) of samples |
---|
9 |
(modeled by the class Sample) in chronological order. A Data object is |
---|
10 |
initialized with a string representing the daily file data: |
---|
11 |
|
---|
12 |
dataHandle = open('20070601.dat') |
---|
13 |
dataString = data.read() |
---|
14 |
dataObject = Data(dataString) |
---|
15 |
|
---|
16 |
Each Sample object has attributes for a Header and Body object. The Samples |
---|
17 |
within a Data object may also be accessed by time using a string of the format |
---|
18 |
YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching |
---|
19 |
Sample in the Data object: |
---|
20 |
|
---|
21 |
dataObject[0] # the first Sample object of the day |
---|
22 |
dataObject['2007-06-01-09-15'] # the Sample object for 9:15am |
---|
23 |
dataObject[15].header # the Header object of the 16th Sample |
---|
24 |
dataObject['2007-06-01-09-15'].body # the Body object for 9:15am |
---|
25 |
|
---|
26 |
Header objects act as dictionaries. Access each sample-wide parameter of |
---|
27 |
interest using the header parameter name as a keyword on the Header object: |
---|
28 |
|
---|
29 |
dataObject[15].header['VAL2'] # the number of validations for beam 2 |
---|
30 |
dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal |
---|
31 |
# probability on beam 3 |
---|
32 |
dataObject[0].header['SNR1'] # signal to noise on beam 1 |
---|
33 |
|
---|
34 |
Consult your Sodar documentation for a complete list of header parameters. |
---|
35 |
|
---|
36 |
Body objects act as lists of dictionaries. The dictionaries access |
---|
37 |
altitude-specific parameters by name as keywords. The dictionaries are in |
---|
38 |
altitude-ascending order. Each dictionary may also by accessed by indexing with |
---|
39 |
an altitude string: |
---|
40 |
|
---|
41 |
dataObject[15].body[0] # the data for the lowest altitude, 16th sample |
---|
42 |
dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters |
---|
43 |
dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude |
---|
44 |
dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction |
---|
45 |
# at 70 meters |
---|
46 |
|
---|
47 |
The body attribute of a Sample object may also be indexed directly on a Sample |
---|
48 |
object for the most convenient semantics: |
---|
49 |
|
---|
50 |
dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample |
---|
51 |
dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction, |
---|
52 |
# 70 meters, 9:15am |
---|
53 |
""" |
---|
54 |
|
---|
55 |
__author__ = 'Chris Calloway' |
---|
56 |
__email__ = 'cbc@unc.edu' |
---|
57 |
__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science' |
---|
58 |
__license__ = 'GPL2' |
---|
59 |
|
---|
60 |
import re |
---|
61 |
import numpy as n |
---|
62 |
import datetime |
---|
63 |
|
---|
64 |
class Data(list): |
---|
65 |
|
---|
66 |
"""Daily sodar file data. |
---|
67 |
|
---|
68 |
(A chronologically ordered list of samples.) |
---|
69 |
""" |
---|
70 |
|
---|
71 |
def __init__(self, data): |
---|
72 |
"""Divide daily string into list of Samples separated by $.""" |
---|
73 |
super(Data, self).__init__() |
---|
74 |
self.extend([Sample(sample) |
---|
75 |
for sample in |
---|
76 |
[sample.strip() for sample in data.split('$')] |
---|
77 |
if sample.strip()]) |
---|
78 |
self._normalize() |
---|
79 |
|
---|
80 |
def __getitem__(self, index): |
---|
81 |
"""Allow sample retrieval by Sample time in header.""" |
---|
82 |
try: |
---|
83 |
return super(Data,self).__getitem__(index) |
---|
84 |
except TypeError: |
---|
85 |
return self._find(index) |
---|
86 |
|
---|
87 |
def _find(self, index): |
---|
88 |
"""Find Sample in Data |
---|
89 |
|
---|
90 |
where sample time of form YYYY-MM-DD-HH-MM. |
---|
91 |
""" |
---|
92 |
|
---|
93 |
try: |
---|
94 |
year,month,day,hour,minute = index.split('-') |
---|
95 |
except ValueError: |
---|
96 |
raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"') |
---|
97 |
except AttributeError: |
---|
98 |
raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"') |
---|
99 |
for sample in self: |
---|
100 |
try: |
---|
101 |
if sample.header['YEAR'].rjust(4,'0') != year: continue |
---|
102 |
if sample.header['MONTH'].rjust(2,'0') != month: continue |
---|
103 |
if sample.header['DAY'].rjust(2,'0') != day: continue |
---|
104 |
if sample.header['HOUR'].rjust(2,'0') != hour: continue |
---|
105 |
if sample.header['MIN'].rjust(2,'0') != minute: continue |
---|
106 |
return sample |
---|
107 |
except TypeError: # sample.header may not exist |
---|
108 |
continue |
---|
109 |
raise IndexError('Data index out of range') |
---|
110 |
|
---|
111 |
def _normalize(self): |
---|
112 |
"""Clean up data for analysis.""" |
---|
113 |
self._copy() |
---|
114 |
self._convert() |
---|
115 |
self._stamp() |
---|
116 |
self._sampleInterval() |
---|
117 |
# correct for missing times |
---|
118 |
self._minimumAltitude() |
---|
119 |
# compute maximum overall altitude |
---|
120 |
# compute number of altitudes |
---|
121 |
# compute altitude interval |
---|
122 |
# correct for missing altitudes |
---|
123 |
# mark maximum altitude with good values for each sample |
---|
124 |
# mark minimum altitude with invalid values for each sample |
---|
125 |
# convert direction to radians |
---|
126 |
# compute u,v,c components |
---|
127 |
# compute colorspecs |
---|
128 |
# compute plotting parameters |
---|
129 |
|
---|
130 |
def _copy(self): |
---|
131 |
"""Create a deep copy as a list of Sample copies.""" |
---|
132 |
self.samples = [sample._copy() for sample in self] |
---|
133 |
|
---|
134 |
def _convert(self): |
---|
135 |
"""Convert to numbers and correct for invalid values.""" |
---|
136 |
INVALID = "-9999" |
---|
137 |
for sample in self.samples: |
---|
138 |
for altitude in sample['body']: |
---|
139 |
for key,value in altitude.items(): |
---|
140 |
try: |
---|
141 |
if value == INVALID: |
---|
142 |
raise ValueError |
---|
143 |
altitude[key] = float(value) |
---|
144 |
except (ValueError, TypeError, KeyError): |
---|
145 |
altitude[key] = n.NaN |
---|
146 |
for key,value in sample['header'].items(): |
---|
147 |
try: |
---|
148 |
if value == INVALID: |
---|
149 |
raise ValueError |
---|
150 |
sample['header'][key] = int(value) |
---|
151 |
except (ValueError, TypeError, KeyError): |
---|
152 |
sample['header'][key] = n.NaN |
---|
153 |
|
---|
154 |
def _stamp(self): |
---|
155 |
"""Add a datetime stamp to each sample.""" |
---|
156 |
for sample in self.samples: |
---|
157 |
try: |
---|
158 |
header = sample['header'] |
---|
159 |
sample['stamp'] = datetime.datetime(header['YEAR'], |
---|
160 |
header['MONTH'], |
---|
161 |
header['DAY'], |
---|
162 |
header['HOUR'], |
---|
163 |
header['MIN']) |
---|
164 |
except (KeyError, TypeError): |
---|
165 |
sample['stamp'] = datatime.datetime.min |
---|
166 |
|
---|
167 |
def _sampleInterval(self): |
---|
168 |
"""Add a sample interval attribute.""" |
---|
169 |
intervals = zip([sample['stamp'] for sample in self.samples[:-1]], |
---|
170 |
[sample['stamp'] for sample in self.samples[1:]]) |
---|
171 |
intervals = [interval[1] - interval[0] for interval in intervals] |
---|
172 |
accumulator = {} |
---|
173 |
for interval in intervals: |
---|
174 |
if interval in accumulator: |
---|
175 |
accumulator[interval] += 1 |
---|
176 |
else: |
---|
177 |
accumulator[interval] = 1 |
---|
178 |
maxVotes = max(accumulator.values()) |
---|
179 |
for key,value in accumulator.items(): |
---|
180 |
if value == maxVotes: |
---|
181 |
self.sampleInterval = key |
---|
182 |
break |
---|
183 |
self.sampleInterval = getattr(self, |
---|
184 |
'sampleInterval', |
---|
185 |
datetime.timedelta.resolution) |
---|
186 |
|
---|
187 |
def _minimumAltitude(self): |
---|
188 |
"""Add a minimum altitude attribute.""" |
---|
189 |
accumulator = {} |
---|
190 |
for sample in self.samples: |
---|
191 |
minalt = sample['body'][0]['ALT'] |
---|
192 |
sample['header']['minalt'] = minalt |
---|
193 |
if minalt is not n.NaN: |
---|
194 |
if minalt in accumulator: |
---|
195 |
accumulator[minalt] += 1 |
---|
196 |
else: |
---|
197 |
accumulator[minalt] = 1 |
---|
198 |
maxVotes = max(accumulator.values()) |
---|
199 |
for key,value in accumulator.items(): |
---|
200 |
if value == maxVotes: |
---|
201 |
self.minimumAltitude = key |
---|
202 |
break |
---|
203 |
self.minimumAltitude = getattr(self, |
---|
204 |
'minimumAltitude', |
---|
205 |
0.0) |
---|
206 |
|
---|
207 |
|
---|
208 |
class Sample(object): |
---|
209 |
|
---|
210 |
"""A single sample from daily sodar file data. |
---|
211 |
|
---|
212 |
(A header and a body attribute.) |
---|
213 |
""" |
---|
214 |
|
---|
215 |
def __init__(self,sample): |
---|
216 |
"""Separate Sample into Header and Body objects.""" |
---|
217 |
super(Sample, self).__init__() |
---|
218 |
# first three groups of lines are the header; rest is body |
---|
219 |
samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n) |
---|
220 |
(?P<body>.*$) |
---|
221 |
''',re.DOTALL | re.VERBOSE) |
---|
222 |
self.__dict__.update(samplePattern.match(sample.strip()).groupdict()) |
---|
223 |
# getattr with default covers parsing invalid Samples |
---|
224 |
self.header = getattr(self, 'header', None) |
---|
225 |
if self.header is not None: |
---|
226 |
self.header = Header(self.header) |
---|
227 |
self.body = getattr(self, 'body', None) |
---|
228 |
if self.body is not None: |
---|
229 |
self.body = Body(self.body) |
---|
230 |
|
---|
231 |
def __getitem__(self, index): |
---|
232 |
"""Index Sample by body attribute.""" |
---|
233 |
try: |
---|
234 |
return self.body[index] |
---|
235 |
except TypeError: # sample.body may not exist |
---|
236 |
raise IndexError('Sample index out of range') |
---|
237 |
|
---|
238 |
def _copy(self): |
---|
239 |
"""Create a deep copy as a dictionary of header and body copies.""" |
---|
240 |
return {'header':self.header._copy(), |
---|
241 |
'body':self.body._copy()} |
---|
242 |
|
---|
243 |
|
---|
244 |
class Header(dict): |
---|
245 |
|
---|
246 |
"""A sodar data sample header. |
---|
247 |
|
---|
248 |
(A dictionary of sample-wide parameters.) |
---|
249 |
""" |
---|
250 |
|
---|
251 |
def __init__(self, header): |
---|
252 |
|
---|
253 |
"""Identify discreet header parameter names and values. |
---|
254 |
|
---|
255 |
Every other line contains parameter keys; |
---|
256 |
every other line contains parameter values. |
---|
257 |
""" |
---|
258 |
|
---|
259 |
super(Header, self).__init__() |
---|
260 |
headerLines = [headerLine.strip() |
---|
261 |
for headerLine in header.split('\n') |
---|
262 |
if headerLine.strip()] |
---|
263 |
#fix for bad match between names and values |
---|
264 |
self.update(dict(zip(" ".join(headerLines[::2]).split(), |
---|
265 |
" ".join(headerLines[1::2]).split()))) |
---|
266 |
|
---|
267 |
def _copy(self): |
---|
268 |
"""Create a shallow copy as a dictionary.""" |
---|
269 |
return self.copy() |
---|
270 |
|
---|
271 |
|
---|
272 |
class Body(list): |
---|
273 |
|
---|
274 |
"""A sodar data sample body. |
---|
275 |
|
---|
276 |
(A list of dictionariess at each altitude.) |
---|
277 |
""" |
---|
278 |
|
---|
279 |
def __init__(self, body): |
---|
280 |
|
---|
281 |
"""Identify discreet body parameter names and values. |
---|
282 |
|
---|
283 |
The first line contains parameter keys; |
---|
284 |
the remaining lines contains parameter values, |
---|
285 |
one set of parameters for a single altitude per line. |
---|
286 |
""" |
---|
287 |
|
---|
288 |
super(Body, self).__init__() |
---|
289 |
bodyLines = [bodyLine.strip() |
---|
290 |
for bodyLine in body.split('\n') |
---|
291 |
if bodyLine.strip()] |
---|
292 |
bodyKeys = bodyLines[0].split() |
---|
293 |
#fix for bad match between names and values |
---|
294 |
self.extend([dict(zip(bodyKeys, bodyLine.split())) |
---|
295 |
for bodyLine in bodyLines[1:]]) |
---|
296 |
self.reverse() |
---|
297 |
|
---|
298 |
def __getitem__(self, index): |
---|
299 |
"""Return altitude data by altitude string.""" |
---|
300 |
try: |
---|
301 |
return super(Body, self).__getitem__(index) |
---|
302 |
except TypeError: |
---|
303 |
return self._find(index) |
---|
304 |
|
---|
305 |
def _find(self, index): |
---|
306 |
"""Find altitude data in Body.""" |
---|
307 |
for altitude in self: |
---|
308 |
try: |
---|
309 |
if altitude['ALT'] != index: continue |
---|
310 |
except KeyError: |
---|
311 |
continue |
---|
312 |
return altitude |
---|
313 |
raise IndexError('Body index, out of range') |
---|
314 |
|
---|
315 |
def _copy(self): |
---|
316 |
"""Create a deep copy as a list of dictionaries.""" |
---|
317 |
return [altitude.copy() for altitude in self] |
---|
318 |
|
---|
319 |
|
---|
320 |
def _main(): |
---|
321 |
"""Process as script from command line.""" |
---|
322 |
import urllib2 |
---|
323 |
try: |
---|
324 |
dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat') |
---|
325 |
dataString = dataHandle.read() |
---|
326 |
except: |
---|
327 |
raise IOError("Failure to read test data") |
---|
328 |
dataObject = Data(dataString) |
---|
329 |
print dataObject['2007-06-01-09-15']['70']['SPEED'] |
---|
330 |
|
---|
331 |
if __name__ == "__main__": |
---|
332 |
_main() |
---|