Revision Log

rawData.py

Revision 72 (checked in by cbc, 17 years ago)	Fulfill ticket #21: Move array normalization methods to new ProcessedData class

Line
1	#!/usr/bin/python
2	"""
3	Classes to handle raw sodar data samples.
4
5	Raw sodar data samples are collected into daily files. Each sample consists of a
6	header followed by an observation for each height.
7
8	The daily file is split into a list (modeled by the class RawData) of samples
9	(modeled by the class Sample) in chronological order. A RawData object is
10	initialized with a string representing the daily file data:
11
12	rawDataHandle = open('20070601.dat')
13	rawDataString = rawDataHandle.read()
14	rawDataObject = RawData(rawDataString)
15
16	Each Sample object has attributes for a Header and Body object. The Samples
17	within a RawData object may also be accessed by time using a string of the format
18	YYYY-MM-DD-HH-MM as in index on the RawData object to return the first matching
19	Sample in the RawData object:
20
21	rawDataObject[0] # the first Sample object of the day
22	rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23	rawDataObject[15].header # the Header object of the 16th Sample
24	rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26	Header objects act as dictionaries. Access each sample-wide parameter of
27	interest using the header parameter name as a keyword on the Header object:
28
29	rawDataObject[15].header['VAL2'] # the number of validations for beam 2
30	rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31	# probability on beam 3
32	rawDataObject[0].header['SNR1'] # signal to noise on beam 1
33
34	Consult your Sodar documentation for a complete list of header parameters.
35	Different sodar models have different sets of header parameters. This model
36	seeks to be model agnostic, and parses the header parameter names from the
37	raw data itself.
38
39	Body objects act as lists of dictionaries. The dictionaries access
40	altitude-specific parameters by name as keywords. The dictionaries are in
41	altitude-ascending order. Each dictionary may also by accessed by indexing with
42	an altitude string:
43
44	rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
45	rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
46	rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
47	rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
48	# at 70 meters
49
50	The body attribute of a Sample object may also be indexed directly on a Sample
51	object for the most convenient semantics:
52
53	rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
54	rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
55	# 70 meters, 9:15am
56	"""
57
58	__author__ = 'Chris Calloway'
59	__email__ = 'cbc@unc.edu'
60	__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
61	__license__ = 'GPL2'
62
63	import re
64
65	class RawData(list):
66
67	"""Raw daily sodar file data.
68
69	(A chronologically ordered list of samples.)
70	"""
71
72	def __init__(self, data):
73	"""Divide daily string into list of Samples separated by $."""
74	super(RawData, self).__init__()
75	self.extend([Sample(sample)
76	for sample in
77	[sample.strip() for sample in data.split('$')]
78	if sample.strip()])
79
80	def __getitem__(self, index):
81	"""Allow sample retrieval by Sample time in header."""
82	try:
83	return super(RawData,self).__getitem__(index)
84	except TypeError:
85	return self._find(index)
86
87	def _find(self, index):
88	"""Find Sample in RawData.
89
90	Where sample time of form YYYY-MM-DD-HH-MM.
91	"""
92
93	try:
94	year,month,day,hour,minute = index.split('-')
95	except ValueError:
96	raise ValueError('RawData index by date must be "YYYY-MM-DD-HH-MM"')
97	except AttributeError:
98	raise AttributeError('RawData index by date must be "YYYY-MM-DD-HH-MM"')
99	for sample in self:
100	try:
101	if sample.header['YEAR'].rjust(4,'0') != year: continue
102	if sample.header['MONTH'].rjust(2,'0') != month: continue
103	if sample.header['DAY'].rjust(2,'0') != day: continue
104	if sample.header['HOUR'].rjust(2,'0') != hour: continue
105	if sample.header['MIN'].rjust(2,'0') != minute: continue
106	return sample
107	except TypeError: # sample.header may not exist
108	continue
109	raise IndexError('RawData index out of range')
110
111
112	class Sample(object):
113
114	"""A single sample from raw daily sodar file data.
115
116	(A header and a body attribute.)
117	"""
118
119	def __init__(self,sample):
120	"""Separate Sample into Header and Body objects."""
121	super(Sample, self).__init__()
122	# first three groups of lines are the header; rest is body
123	samplePattern = re.compile(r'''(?P<header>.?\n\n.?\n\n.*?\n\n)
124	(?P<body>.*$)
125	''',re.DOTALL \| re.VERBOSE)
126	self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
127	# getattr with default covers parsing invalid Samples
128	self.header = getattr(self, 'header', None)
129	if self.header is not None:
130	self.header = Header(self.header)
131	self.body = getattr(self, 'body', None)
132	if self.body is not None:
133	self.body = Body(self.body)
134
135	def __getitem__(self, index):
136	"""Index Sample by body attribute."""
137	try:
138	return self.body[index]
139	except TypeError: # sample.body may not exist
140	raise IndexError('Sample index out of range')
141
142	def _copy(self):
143	"""Create a deep copy as a dictionary of header and body copies."""
144	return {'header':self.header._copy(),
145	'body':self.body._copy()}
146
147
148	class Header(dict):
149
150	"""A raw sodar data sample header.
151
152	(A dictionary of sample-wide parameters.)
153	"""
154
155	def __init__(self, header):
156
157	"""Identify discreet header parameter names and values.
158
159	Every other line contains parameter keys;
160	every other line contains parameter values.
161	"""
162
163	super(Header, self).__init__()
164	headerLines = [headerLine.strip()
165	for headerLine in header.split('\n')
166	if headerLine.strip()]
167	#fix for bad match between names and values
168	self.update(dict(zip(" ".join(headerLines[::2]).split(),
169	" ".join(headerLines[1::2]).split())))
170
171	def _copy(self):
172	"""Create a shallow copy as a dictionary."""
173	return self.copy()
174
175
176	class Body(list):
177
178	"""A raw sodar data sample body.
179
180	(A list of dictionariess at each altitude.)
181	"""
182
183	def __init__(self, body):
184
185	"""Identify discreet body parameter names and values.
186
187	The first line contains parameter keys;
188	the remaining lines contains parameter values,
189	one set of parameters for a single altitude per line.
190	"""
191
192	super(Body, self).__init__()
193	bodyLines = [bodyLine.strip()
194	for bodyLine in body.split('\n')
195	if bodyLine.strip()]
196	bodyKeys = bodyLines[0].split()
197	#fix for bad match between names and values
198	self.extend([dict(zip(bodyKeys, bodyLine.split()))
199	for bodyLine in bodyLines[1:]])
200	self.reverse()
201
202	def __getitem__(self, index):
203	"""Return raw altitude data by altitude string."""
204	try:
205	return super(Body, self).__getitem__(index)
206	except TypeError:
207	return self._find(index)
208
209	def _find(self, index):
210	"""Find raw altitude data in Body."""
211	for altitude in self:
212	try:
213	if altitude['ALT'] != index: continue
214	except KeyError:
215	continue
216	return altitude
217	raise IndexError('Body index, out of range')
218
219	def _copy(self):
220	"""Create a deep copy as a list of dictionaries."""
221	return [altitude.copy() for altitude in self]
222
223
224	def _main():
225	"""Process as script from command line."""
226	import urllib2
227	try:
228	rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
229	rawDataString = rawDataHandle.read()
230	except:
231	raise IOError("Failure to read raw test data")
232	rawDataObject = RawData(rawDataString)
233	print rawDataObject['2007-06-01-09-15']['70']['SPEED']
234
235	if __name__ == "__main__":
236	_main()

Note: See TracBrowser for help on using the browser.

root/sodar/trunk/sodar/rawData.py

Download in other formats: