Revision Log

data.py

Revision 67 (checked in by cbc, 17 years ago)	Fulfill ticket #7: Add deep copy funtionality to Data class and ticket #8: Add string to float and !NaN conversion to Data class

Line
1	#!/usr/bin/python
2	"""
3	Classes to handle sodar data samples.
4
5	Sodar data samples are collected into daily files. Each sample consists of a
6	header followed by an observation for each height.
7
8	The daily file is split into a list (modeled by the class Data) of samples
9	(modeled by the class Sample) in chronological order. A Data object is
10	initialized with a string representing the daily file data:
11
12	dataHandle = open('20070601.dat')
13	dataString = data.read()
14	dataObject = Data(dataString)
15
16	Each Sample object has attributes for a Header and Body object. The Samples
17	within a Data object may also be accessed by time using a string of the format
18	YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19	Sample in the Data object:
20
21	dataObject[0] # the first Sample object of the day
22	dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23	dataObject[15].header # the Header object of the 16th Sample
24	dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26	Header objects act as dictionaries. Access each sample-wide parameter of
27	interest using the header parameter name as a keyword on the Header object:
28
29	dataObject[15].header['VAL2'] # the number of validations for beam 2
30	dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31	# probability on beam 3
32	dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34	Consult your Sodar documentation for a complete list of header parameters.
35
36	Body objects act as lists of dictionaries. The dictionaries access
37	altitude-specific parameters by name as keywords. The dictionaries are in
38	altitude-ascending order. Each dictionary may also by accessed by indexing with
39	an altitude string:
40
41	dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42	dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43	dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44	dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45	# at 70 meters
46
47	The body attribute of a Sample object may also be indexed directly on a Sample
48	object for the most convenient semantics:
49
50	dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51	dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52	# 70 meters, 9:15am
53	"""
54
55	__author__ = 'Chris Calloway'
56	__email__ = 'cbc@unc.edu'
57	__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58	__license__ = 'GPL2'
59
60	import re
61	import numpy as n
62
63	class Data(list):
64
65	"""Daily sodar file data.
66
67	(A chronologically ordered list of samples.)
68	"""
69
70	def __init__(self, data):
71	"""Divide daily string into list of Samples separated by $."""
72	super(Data, self).__init__()
73	self.extend([Sample(sample)
74	for sample in
75	[sample.strip() for sample in data.split('$')]
76	if sample.strip()])
77	self._normalize()
78
79	def __getitem__(self, index):
80	"""Allow sample retrieval by Sample time in header."""
81	try:
82	return super(Data,self).__getitem__(index)
83	except TypeError:
84	return self._find(index)
85
86	def _find(self, index):
87	"""Find Sample in Data
88
89	where sample time of form YYYY-MM-DD-HH-MM.
90	"""
91
92	try:
93	year,month,day,hour,minute = index.split('-')
94	except ValueError:
95	raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
96	except AttributeError:
97	raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
98	for sample in self:
99	try:
100	if sample.header['YEAR'].rjust(4,'0') != year: continue
101	if sample.header['MONTH'].rjust(2,'0') != month: continue
102	if sample.header['DAY'].rjust(2,'0') != day: continue
103	if sample.header['HOUR'].rjust(2,'0') != hour: continue
104	if sample.header['MIN'].rjust(2,'0') != minute: continue
105	return sample
106	except TypeError: # sample.header may not exist
107	continue
108	raise IndexError('Data index out of range')
109
110	def _normalize(self):
111	"""Clean up data for analysis."""
112	self._copy()
113	self._convert()
114	# compute time interval
115	# correct for missing times
116	# compute minium altitude
117	# compute maximum overall altitude
118	# compute number of altitudes
119	# compute altitude interval
120	# correct for missing altitudes
121	# mark maximum altitude with good values for each sample
122	# mark minimum altitude with invalid values for each sample
123	# convert direction to radians
124	# compute u,v,c components
125	# compute colorspecs
126	# compute plotting parameters
127
128	def _copy(self):
129	"""Create a deep copy as a list of Sample copies."""
130	self.samples = [sample._copy() for sample in self]
131
132	def _convert(self):
133	"""Convert to numbers and correct for invalid values."""
134	INVALID = "-9999"
135	for sample in self.samples:
136	body = sample[1]
137	for altitude in body:
138	for key,value in altitude.items():
139	try:
140	if value == INVALID:
141	raise ValueError
142	altitude[key] = float(value)
143	except (ValueError, TypeError, KeyError):
144	altitude[key] = n.NaN
145
146
147	class Sample(object):
148
149	"""A single sample from daily sodar file data.
150
151	(A header and a body attribute.)
152	"""
153
154	def __init__(self,sample):
155	"""Separate Sample into Header and Body objects."""
156	super(Sample, self).__init__()
157	# first three groups of lines are the header; rest is body
158	samplePattern = re.compile(r'''(?P<header>.?\n\n.?\n\n.*?\n\n)
159	(?P<body>.*$)
160	''',re.DOTALL \| re.VERBOSE)
161	self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
162	# getattr with default covers parsing invalid Samples
163	self.header = getattr(self, 'header', None)
164	if self.header is not None:
165	self.header = Header(self.header)
166	self.body = getattr(self, 'body', None)
167	if self.body is not None:
168	self.body = Body(self.body)
169
170	def __getitem__(self, index):
171	"""Index Sample by body attribute."""
172	try:
173	return self.body[index]
174	except TypeError: # sample.body may not exist
175	raise IndexError('Sample index out of range')
176
177	def _copy(self):
178	"""Create a deep copy as a tuple of header and body copies."""
179	return (self.header._copy(), self.body._copy())
180
181
182	class Header(dict):
183
184	"""A sodar data sample header.
185
186	(A dictionary of sample-wide parameters.)
187	"""
188
189	def __init__(self, header):
190
191	"""Identify discreet header parameter names and values.
192
193	Every other line contains parameter keys;
194	every other line contains parameter values.
195	"""
196
197	super(Header, self).__init__()
198	headerLines = [headerLine.strip()
199	for headerLine in header.split('\n')
200	if headerLine.strip()]
201	#fix for bad match between names and values
202	self.update(dict(zip(" ".join(headerLines[::2]).split(),
203	" ".join(headerLines[1::2]).split())))
204
205	def _copy(self):
206	"""Create a shallow copy as a dictionary."""
207	return self.copy()
208
209
210	class Body(list):
211
212	"""A sodar data sample body.
213
214	(A list of dictionariess at each altitude.)
215	"""
216
217	def __init__(self, body):
218
219	"""Identify discreet body parameter names and values.
220
221	The first line contains parameter keys;
222	the remaining lines contains parameter values,
223	one set of parameters for a single altitude per line.
224	"""
225
226	super(Body, self).__init__()
227	bodyLines = [bodyLine.strip()
228	for bodyLine in body.split('\n')
229	if bodyLine.strip()]
230	bodyKeys = bodyLines[0].split()
231	#fix for bad match between names and values
232	self.extend([dict(zip(bodyKeys, bodyLine.split()))
233	for bodyLine in bodyLines[1:]])
234	self.reverse()
235
236	def __getitem__(self, index):
237	"""Return altitude data by altitude string."""
238	try:
239	return super(Body, self).__getitem__(index)
240	except TypeError:
241	return self._find(index)
242
243	def _find(self, index):
244	"""Find altitude data in Body."""
245	for altitude in self:
246	try:
247	if altitude['ALT'] != index: continue
248	except KeyError:
249	continue
250	return altitude
251	raise IndexError('Body index, out of range')
252
253	def _copy(self):
254	"""Create a deep copy as a list of dictionaries."""
255	return [altitude.copy() for altitude in self]
256
257
258	def _main():
259	"""Process as script from command line."""
260	import urllib2
261	try:
262	dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
263	dataString = dataHandle.read()
264	except:
265	raise IOError("Failure to read test data")
266	dataObject = Data(dataString)
267	print dataObject['2007-06-01-09-15']['70']['SPEED']
268
269	if __name__ == "__main__":
270	_main()

Note: See TracBrowser for help on using the browser.

root/sodar/trunk/sodar/data.py

Download in other formats: