Revision Log

data.py

Revision 69 (checked in by cbc, 17 years ago)	Fulfill ticket #11: Compute minimum altitude

Line
1	#!/usr/bin/python
2	"""
3	Classes to handle sodar data samples.
4
5	Sodar data samples are collected into daily files. Each sample consists of a
6	header followed by an observation for each height.
7
8	The daily file is split into a list (modeled by the class Data) of samples
9	(modeled by the class Sample) in chronological order. A Data object is
10	initialized with a string representing the daily file data:
11
12	dataHandle = open('20070601.dat')
13	dataString = data.read()
14	dataObject = Data(dataString)
15
16	Each Sample object has attributes for a Header and Body object. The Samples
17	within a Data object may also be accessed by time using a string of the format
18	YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19	Sample in the Data object:
20
21	dataObject[0] # the first Sample object of the day
22	dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23	dataObject[15].header # the Header object of the 16th Sample
24	dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26	Header objects act as dictionaries. Access each sample-wide parameter of
27	interest using the header parameter name as a keyword on the Header object:
28
29	dataObject[15].header['VAL2'] # the number of validations for beam 2
30	dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31	# probability on beam 3
32	dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34	Consult your Sodar documentation for a complete list of header parameters.
35
36	Body objects act as lists of dictionaries. The dictionaries access
37	altitude-specific parameters by name as keywords. The dictionaries are in
38	altitude-ascending order. Each dictionary may also by accessed by indexing with
39	an altitude string:
40
41	dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42	dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43	dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44	dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45	# at 70 meters
46
47	The body attribute of a Sample object may also be indexed directly on a Sample
48	object for the most convenient semantics:
49
50	dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51	dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52	# 70 meters, 9:15am
53	"""
54
55	__author__ = 'Chris Calloway'
56	__email__ = 'cbc@unc.edu'
57	__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58	__license__ = 'GPL2'
59
60	import re
61	import numpy as n
62	import datetime
63
64	class Data(list):
65
66	"""Daily sodar file data.
67
68	(A chronologically ordered list of samples.)
69	"""
70
71	def __init__(self, data):
72	"""Divide daily string into list of Samples separated by $."""
73	super(Data, self).__init__()
74	self.extend([Sample(sample)
75	for sample in
76	[sample.strip() for sample in data.split('$')]
77	if sample.strip()])
78	self._normalize()
79
80	def __getitem__(self, index):
81	"""Allow sample retrieval by Sample time in header."""
82	try:
83	return super(Data,self).__getitem__(index)
84	except TypeError:
85	return self._find(index)
86
87	def _find(self, index):
88	"""Find Sample in Data
89
90	where sample time of form YYYY-MM-DD-HH-MM.
91	"""
92
93	try:
94	year,month,day,hour,minute = index.split('-')
95	except ValueError:
96	raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
97	except AttributeError:
98	raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
99	for sample in self:
100	try:
101	if sample.header['YEAR'].rjust(4,'0') != year: continue
102	if sample.header['MONTH'].rjust(2,'0') != month: continue
103	if sample.header['DAY'].rjust(2,'0') != day: continue
104	if sample.header['HOUR'].rjust(2,'0') != hour: continue
105	if sample.header['MIN'].rjust(2,'0') != minute: continue
106	return sample
107	except TypeError: # sample.header may not exist
108	continue
109	raise IndexError('Data index out of range')
110
111	def _normalize(self):
112	"""Clean up data for analysis."""
113	self._copy()
114	self._convert()
115	self._stamp()
116	self._sampleInterval()
117	# correct for missing times
118	self._minimumAltitude()
119	# compute maximum overall altitude
120	# compute number of altitudes
121	# compute altitude interval
122	# correct for missing altitudes
123	# mark maximum altitude with good values for each sample
124	# mark minimum altitude with invalid values for each sample
125	# convert direction to radians
126	# compute u,v,c components
127	# compute colorspecs
128	# compute plotting parameters
129
130	def _copy(self):
131	"""Create a deep copy as a list of Sample copies."""
132	self.samples = [sample._copy() for sample in self]
133
134	def _convert(self):
135	"""Convert to numbers and correct for invalid values."""
136	INVALID = "-9999"
137	for sample in self.samples:
138	for altitude in sample['body']:
139	for key,value in altitude.items():
140	try:
141	if value == INVALID:
142	raise ValueError
143	altitude[key] = float(value)
144	except (ValueError, TypeError, KeyError):
145	altitude[key] = n.NaN
146	for key,value in sample['header'].items():
147	try:
148	if value == INVALID:
149	raise ValueError
150	sample['header'][key] = int(value)
151	except (ValueError, TypeError, KeyError):
152	sample['header'][key] = n.NaN
153
154	def _stamp(self):
155	"""Add a datetime stamp to each sample."""
156	for sample in self.samples:
157	try:
158	header = sample['header']
159	sample['stamp'] = datetime.datetime(header['YEAR'],
160	header['MONTH'],
161	header['DAY'],
162	header['HOUR'],
163	header['MIN'])
164	except (KeyError, TypeError):
165	sample['stamp'] = datatime.datetime.min
166
167	def _sampleInterval(self):
168	"""Add a sample interval attribute."""
169	intervals = zip([sample['stamp'] for sample in self.samples[:-1]],
170	[sample['stamp'] for sample in self.samples[1:]])
171	intervals = [interval[1] - interval[0] for interval in intervals]
172	accumulator = {}
173	for interval in intervals:
174	if interval in accumulator:
175	accumulator[interval] += 1
176	else:
177	accumulator[interval] = 1
178	maxVotes = max(accumulator.values())
179	for key,value in accumulator.items():
180	if value == maxVotes:
181	self.sampleInterval = key
182	break
183	self.sampleInterval = getattr(self,
184	'sampleInterval',
185	datetime.timedelta.resolution)
186
187	def _minimumAltitude(self):
188	"""Add a minimum altitude attribute."""
189	accumulator = {}
190	for sample in self.samples:
191	minalt = sample['body'][0]['ALT']
192	sample['header']['minalt'] = minalt
193	if minalt is not n.NaN:
194	if minalt in accumulator:
195	accumulator[minalt] += 1
196	else:
197	accumulator[minalt] = 1
198	maxVotes = max(accumulator.values())
199	for key,value in accumulator.items():
200	if value == maxVotes:
201	self.minimumAltitude = key
202	break
203	self.minimumAltitude = getattr(self,
204	'minimumAltitude',
205	0.0)
206
207
208	class Sample(object):
209
210	"""A single sample from daily sodar file data.
211
212	(A header and a body attribute.)
213	"""
214
215	def __init__(self,sample):
216	"""Separate Sample into Header and Body objects."""
217	super(Sample, self).__init__()
218	# first three groups of lines are the header; rest is body
219	samplePattern = re.compile(r'''(?P<header>.?\n\n.?\n\n.*?\n\n)
220	(?P<body>.*$)
221	''',re.DOTALL \| re.VERBOSE)
222	self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
223	# getattr with default covers parsing invalid Samples
224	self.header = getattr(self, 'header', None)
225	if self.header is not None:
226	self.header = Header(self.header)
227	self.body = getattr(self, 'body', None)
228	if self.body is not None:
229	self.body = Body(self.body)
230
231	def __getitem__(self, index):
232	"""Index Sample by body attribute."""
233	try:
234	return self.body[index]
235	except TypeError: # sample.body may not exist
236	raise IndexError('Sample index out of range')
237
238	def _copy(self):
239	"""Create a deep copy as a dictionary of header and body copies."""
240	return {'header':self.header._copy(),
241	'body':self.body._copy()}
242
243
244	class Header(dict):
245
246	"""A sodar data sample header.
247
248	(A dictionary of sample-wide parameters.)
249	"""
250
251	def __init__(self, header):
252
253	"""Identify discreet header parameter names and values.
254
255	Every other line contains parameter keys;
256	every other line contains parameter values.
257	"""
258
259	super(Header, self).__init__()
260	headerLines = [headerLine.strip()
261	for headerLine in header.split('\n')
262	if headerLine.strip()]
263	#fix for bad match between names and values
264	self.update(dict(zip(" ".join(headerLines[::2]).split(),
265	" ".join(headerLines[1::2]).split())))
266
267	def _copy(self):
268	"""Create a shallow copy as a dictionary."""
269	return self.copy()
270
271
272	class Body(list):
273
274	"""A sodar data sample body.
275
276	(A list of dictionariess at each altitude.)
277	"""
278
279	def __init__(self, body):
280
281	"""Identify discreet body parameter names and values.
282
283	The first line contains parameter keys;
284	the remaining lines contains parameter values,
285	one set of parameters for a single altitude per line.
286	"""
287
288	super(Body, self).__init__()
289	bodyLines = [bodyLine.strip()
290	for bodyLine in body.split('\n')
291	if bodyLine.strip()]
292	bodyKeys = bodyLines[0].split()
293	#fix for bad match between names and values
294	self.extend([dict(zip(bodyKeys, bodyLine.split()))
295	for bodyLine in bodyLines[1:]])
296	self.reverse()
297
298	def __getitem__(self, index):
299	"""Return altitude data by altitude string."""
300	try:
301	return super(Body, self).__getitem__(index)
302	except TypeError:
303	return self._find(index)
304
305	def _find(self, index):
306	"""Find altitude data in Body."""
307	for altitude in self:
308	try:
309	if altitude['ALT'] != index: continue
310	except KeyError:
311	continue
312	return altitude
313	raise IndexError('Body index, out of range')
314
315	def _copy(self):
316	"""Create a deep copy as a list of dictionaries."""
317	return [altitude.copy() for altitude in self]
318
319
320	def _main():
321	"""Process as script from command line."""
322	import urllib2
323	try:
324	dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
325	dataString = dataHandle.read()
326	except:
327	raise IOError("Failure to read test data")
328	dataObject = Data(dataString)
329	print dataObject['2007-06-01-09-15']['70']['SPEED']
330
331	if __name__ == "__main__":
332	_main()

Note: See TracBrowser for help on using the browser.

root/sodar/trunk/sodar/data.py

Download in other formats: