Revision Log

rawData.py

Revision 74 (checked in by cbc, 17 years ago)	Saving tested refactors.

Line
1	#!/usr/bin/python
2	"""
3	Classes to handle raw sodar data samples.
4
5	Raw sodar data samples are collected into daily files. Each sample consists of a
6	header followed by an observation for each height.
7
8	The daily file is split into a list (modeled by the class RawData) of samples
9	(modeled by the class Sample) in chronological order. A RawData object is
10	initialized with a string representing the daily file data:
11
12	rawDataHandle = open('20070601.dat')
13	rawDataString = rawDataHandle.read()
14	rawDataObject = RawData(rawDataString)
15
16	Each Sample object has attributes for a Header and Body object. The Samples
17	within a RawData object may also be accessed by time using a string of the
18	format YYYY-MM-DD-HH-MM as in index on the RawData object to return the first
19	matching Sample in the RawData object:
20
21	rawDataObject[0] # the first Sample object of the day
22	rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23	rawDataObject[15].header # the Header object of the 16th Sample
24	rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26	Header objects act as dictionaries. Access each sample-wide parameter of
27	interest using the header parameter name as a keyword on the Header object:
28
29	rawDataObject[15].header['VAL2'] # the number of validations for beam 2
30	rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31	# probability on beam 3
32	rawDataObject[0].header['SNR1'] # signal to noise on beam 1
33
34	Consult your Sodar documentation for a complete list of header parameters.
35	Different sodar models have different sets of header parameters. This model
36	seeks to be model agnostic, and parses the header parameter names from the
37	raw data itself.
38
39	Body objects act as lists of dictionaries. The dictionaries access
40	altitude-specific parameters by name as keywords. The dictionaries are in
41	altitude-ascending order. Each dictionary may also by accessed by indexing with
42	an altitude string:
43
44	rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
45	rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
46	rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
47	rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
48	# at 70 meters
49
50	The body attribute of a Sample object may also be indexed directly on a Sample
51	object for the most convenient semantics:
52
53	rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
54	rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
55	# 70 meters, 9:15am
56	"""
57
58	__author__ = 'Chris Calloway'
59	__email__ = 'cbc@unc.edu'
60	__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
61	__license__ = 'GPL2'
62
63	import re
64
65	class RawData(list):
66
67	"""Raw daily sodar file data.
68
69	(A chronologically ordered list of samples.)
70	"""
71
72	def __init__(self, data):
73	"""Divide daily string into list of Samples separated by $."""
74	super(RawData, self).__init__()
75	self.extend([Sample(sample)
76	for sample in
77	[sample.strip() for sample in data.split('$')]
78	if sample.strip()])
79
80	def __getitem__(self, index):
81	"""Allow sample retrieval by Sample time in header."""
82	try:
83	return super(RawData,self).__getitem__(index)
84	except TypeError:
85	return self._find(index)
86
87	def _find(self, index):
88	"""Find Sample in RawData.
89
90	Where sample time of form YYYY-MM-DD-HH-MM.
91	"""
92
93	try:
94	year,month,day,hour,minute = index.split('-')
95	except ValueError:
96	raise ValueError('RawData index by date must be '\
97	'"YYYY-MM-DD-HH-MM"')
98	except AttributeError:
99	raise AttributeError('RawData index by date must be '\
100	'"YYYY-MM-DD-HH-MM"')
101	for sample in self:
102	try:
103	if sample.header['YEAR'].rjust(4,'0') != year: continue
104	if sample.header['MONTH'].rjust(2,'0') != month: continue
105	if sample.header['DAY'].rjust(2,'0') != day: continue
106	if sample.header['HOUR'].rjust(2,'0') != hour: continue
107	if sample.header['MIN'].rjust(2,'0') != minute: continue
108	return sample
109	except TypeError: # sample.header may not exist
110	continue
111	raise IndexError('RawData index out of range')
112
113
114	class Sample(object):
115
116	"""A single sample from raw daily sodar file data.
117
118	(A header and a body attribute.)
119	"""
120
121	def __init__(self,sample):
122	"""Separate Sample into Header and Body objects."""
123	super(Sample, self).__init__()
124	# first three groups of lines are the header; rest is body
125	samplePattern = re.compile(r'''(?P<header>.?\n\n.?\n\n.*?\n\n)
126	(?P<body>.*$)
127	''',re.DOTALL \| re.VERBOSE)
128	self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
129	# getattr with default covers parsing invalid Samples
130	self.header = getattr(self, 'header', None)
131	if self.header is not None:
132	self.header = Header(self.header)
133	self.body = getattr(self, 'body', None)
134	if self.body is not None:
135	self.body = Body(self.body)
136
137	def __getitem__(self, index):
138	"""Index Sample by body attribute."""
139	try:
140	return self.body[index]
141	except TypeError: # sample.body may not exist
142	raise IndexError('Sample index out of range')
143
144	def data(self):
145	"""Create a deep copy as a dictionary of header and body data."""
146	return {'header':self.header.data(),
147	'body':self.body.data()}
148
149
150	class Header(dict):
151
152	"""A raw sodar data sample header.
153
154	(A dictionary of sample-wide parameters.)
155	"""
156
157	def __init__(self, header):
158
159	"""Identify discreet header parameter names and values.
160
161	Every other line contains parameter keys;
162	every other line contains parameter values.
163	"""
164
165	super(Header, self).__init__()
166	headerLines = [headerLine.strip()
167	for headerLine in header.split('\n')
168	if headerLine.strip()]
169	#fix for bad match between names and values
170	self.update(dict(zip(" ".join(headerLines[::2]).split(),
171	" ".join(headerLines[1::2]).split())))
172
173	def data(self):
174	"""Create a shallow copy of the data as a dictionary."""
175	return self.copy()
176
177
178	class Body(list):
179
180	"""A raw sodar data sample body.
181
182	(A list of dictionariess at each altitude.)
183	"""
184
185	def __init__(self, body):
186
187	"""Identify discreet body parameter names and values.
188
189	The first line contains parameter keys;
190	the remaining lines contains parameter values,
191	one set of parameters for a single altitude per line.
192	"""
193
194	super(Body, self).__init__()
195	bodyLines = [bodyLine.strip()
196	for bodyLine in body.split('\n')
197	if bodyLine.strip()]
198	bodyKeys = bodyLines[0].split()
199	#fix for bad match between names and values
200	self.extend([dict(zip(bodyKeys, bodyLine.split()))
201	for bodyLine in bodyLines[1:]])
202	self.reverse()
203
204	def __getitem__(self, index):
205	"""Return raw altitude data by altitude string."""
206	try:
207	return super(Body, self).__getitem__(index)
208	except TypeError:
209	return self._find(index)
210
211	def _find(self, index):
212	"""Find raw altitude data in Body."""
213	for altitude in self:
214	try:
215	if altitude['ALT'] != index: continue
216	except KeyError:
217	continue
218	return altitude
219	raise IndexError('Body index, out of range')
220
221	def data(self):
222	"""Create a deep copy of the data as a list of dictionaries."""
223	return [altitude.copy() for altitude in self]
224
225
226	def _main():
227	"""Process as script from command line."""
228	import urllib2
229	try:
230	rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/'\
231	'data/nccoos/level0/dukeforest/sodar/'\
232	'store/2007-06/20070601.dat')
233	rawDataString = rawDataHandle.read()
234	except:
235	raise IOError("Failure to read raw test data")
236	rawDataObject = RawData(rawDataString)
237	print rawDataObject['2007-06-01-09-15']['70']['SPEED']
238
239	if __name__ == "__main__":
240	_main()

Note: See TracBrowser for help on using the browser.

root/sodar/trunk/sodar/rawData.py

Download in other formats: