Revision Log

data.py

Revision 66 (checked in by cbc, 17 years ago)	Daily check-in of code. Started _normalize.

Line
1	#!/usr/bin/python
2	"""
3	Classes to handle sodar data samples.
4
5	Sodar data samples are collected into daily files. Each sample consists of a
6	header followed by an observation for each height.
7
8	The daily file is split into a list (modeled by the class Data) of samples
9	(modeled by the class Sample) in chronological order. A Data object is
10	initialized with a string representing the daily file data:
11
12	dataHandle = open('20070601.dat')
13	dataString = data.read()
14	dataObject = Data(dataString)
15
16	Each Sample object has attributes for a Header and Body object. The Samples
17	within a Data object may also be accessed by time using a string of the format
18	YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19	Sample in the Data object:
20
21	dataObject[0] # the first Sample object of the day
22	dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23	dataObject[15].header # the Header object of the 16th Sample
24	dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26	Header objects act as dictionaries. Access each sample-wide parameter of
27	interest using the header parameter name as a keyword on the Header object:
28
29	dataObject[15].header['VAL2'] # the number of validations for beam 2
30	dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31	# probability on beam 3
32	dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34	Consult your Sodar documentation for a complete list of header parameters.
35
36	Body objects act as lists of dictionaries. The dictionaries access
37	altitude-specific parameters by name as keywords. The dictionaries are in
38	altitude-ascending order. Each dictionary may also by accessed by indexing with
39	an altitude string:
40
41	dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42	dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43	dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44	dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45	# at 70 meters
46
47	The body attribute of a Sample object may also be indexed directly on a Sample
48	object for the most convenient semantics:
49
50	dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51	dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52	# 70 meters, 9:15am
53	"""
54
55	__author__ = 'Chris Calloway'
56	__email__ = 'cbc@unc.edu'
57	__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58	__license__ = 'GPL2'
59
60	import re
61	import numpy as n
62
63	class Data(list):
64
65	"""Daily sodar file data.
66
67	(A chronologically ordered list of samples.)
68	"""
69
70	def __init__(self, data):
71	"""Divide daily string into list of Samples separated by $."""
72	super(Data, self).__init__()
73	self.extend([Sample(sample)
74	for sample in
75	[sample.strip() for sample in data.split('$')]
76	if sample.strip()])
77	self._normalize()
78
79	def __getitem__(self, index):
80	"""Allow sample retrieval by Sample time in header."""
81	try:
82	return super(Data,self).__getitem__(index)
83	except TypeError:
84	return self._find(index)
85
86	def _find(self, index):
87	"""Find Sample in Data
88
89	where sample time of form YYYY-MM-DD-HH-MM.
90	"""
91
92	try:
93	year,month,day,hour,minute = index.split('-')
94	except ValueError:
95	raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
96	except AttributeError:
97	raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
98	for sample in self:
99	try:
100	if sample.header['YEAR'].rjust(4,'0') != year: continue
101	if sample.header['MONTH'].rjust(2,'0') != month: continue
102	if sample.header['DAY'].rjust(2,'0') != day: continue
103	if sample.header['HOUR'].rjust(2,'0') != hour: continue
104	if sample.header['MIN'].rjust(2,'0') != minute: continue
105	return sample
106	except TypeError: # sample.header may not exist
107	continue
108	raise IndexError('Data index out of range')
109
110	def _normalize(self):
111	"""Clean up data for analysis."""
112	self._copy()
113	# self._convert()
114	# compute time interval
115	# correct for missing times
116	# compute minium altitude
117	# compute maximum overall altitude
118	# compute number of altitudes
119	# compute altitude interval
120	# correct for missing altitudes
121	# mark maximum altitude with good values for each sample
122	# mark minimum altitude with invalid values for each sample
123	# convert direction to radians
124	# compute u,v,c components
125	# compute colorspecs
126	# compute plotting parameters
127
128	def _copy(self):
129	"""Create a deep copy of all the samples in this Data instance."""
130	self.samples = [(dict(sample.header), list(sample.body))
131	for sample in self]
132	for sample in self.samples:
133	for altitude in sample[1]:
134	altitude = dict(altitude)
135
136	def _convert(self):
137	"""Convert to numbers and correct for invalid values."""
138	INVALID = "-9999"
139	# convert to numbers and correct for invalid values
140	for sample in self.samples:
141	for altitude in sample[1]:
142	for key in altitude.keys():
143	try:
144	if altitude[key] == INVALID:
145	raise ValueError
146	altitude[key] = float(altitude[key])
147	except (ValueError, TypeError, KeyError):
148	altitude[key] = n.NaN
149
150
151	class Sample(object):
152
153	"""A single sample from daily sodar file data.
154
155	(A header and a body attribute.)
156	"""
157
158	def __init__(self,sample):
159	"""Separate Sample into Header and Body objects."""
160	super(Sample, self).__init__()
161	# first three groups of lines are the header; rest is body
162	samplePattern = re.compile(r'''(?P<header>.?\n\n.?\n\n.*?\n\n)
163	(?P<body>.*$)
164	''',re.DOTALL \| re.VERBOSE)
165	self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
166	# getattr with default covers parsing invalid Samples
167	self.header = getattr(self, 'header', None)
168	if self.header is not None:
169	self.header = Header(self.header)
170	self.body = getattr(self, 'body', None)
171	if self.body is not None:
172	self.body = Body(self.body)
173
174	def __getitem__(self, index):
175	"""Index Sample by body attribute."""
176	try:
177	return self.body[index]
178	except TypeError: # sample.body may not exist
179	raise IndexError('Sample index out of range')
180
181
182	class Header(dict):
183
184	"""A sodar data sample header.
185
186	(A dictionary of sample-wide parameters.)
187	"""
188
189	def __init__(self, header):
190
191	"""Identify discreet header parameter names and values.
192
193	Every other line contains parameter keys;
194	every other line contains parameter values.
195	"""
196
197	super(Header, self).__init__()
198	headerLines = [headerLine.strip()
199	for headerLine in header.split('\n')
200	if headerLine.strip()]
201	#fix for bad match between names and values
202	self.update(dict(zip(" ".join(headerLines[::2]).split(),
203	" ".join(headerLines[1::2]).split())))
204
205
206	class Body(list):
207
208	"""A sodar data sample body.
209
210	(A list of dictionariess at each altitude.)
211	"""
212
213	def __init__(self, body):
214
215	"""Identify discreet body parameter names and values.
216
217	The first line contains parameter keys;
218	the remaining lines contains parameter values,
219	one set of parameters for a single altitude per line.
220	"""
221
222	super(Body, self).__init__()
223	bodyLines = [bodyLine.strip()
224	for bodyLine in body.split('\n')
225	if bodyLine.strip()]
226	bodyKeys = bodyLines[0].split()
227	#fix for bad match between names and values
228	self.extend([dict(zip(bodyKeys, bodyLine.split()))
229	for bodyLine in bodyLines[1:]])
230	self.reverse()
231
232	def __getitem__(self, index):
233	"""Return altitude data by altitude string."""
234	try:
235	return super(Body, self).__getitem__(index)
236	except TypeError:
237	return self._find(index)
238
239	def _find(self, index):
240	"""Find altitude data in Body."""
241	for altitudeData in self:
242	if altitudeData['ALT'] != index: continue
243	return altitudeData
244	raise IndexError('Body index, out of range')
245
246
247	def _main():
248	"""Process as script from command line."""
249	import urllib2
250	try:
251	dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
252	dataString = dataHandle.read()
253	except:
254	raise IOError("Failure to read test data")
255	dataObject = Data(dataString)
256	print dataObject['2007-06-01-09-15']['70']['SPEED']
257
258	if __name__ == "__main__":
259	_main()

Note: See TracBrowser for help on using the browser.

root/sodar/trunk/sodar/data.py

Download in other formats: