Revision Log

rawData.py

Revision 120 (checked in by cbc, 17 years ago)	Change test data location to match new automated download to nemo.

Line
1	#!/usr/bin/python
2	"""
3	Module to handle raw sodar data samples.
4
5	Raw sodar data samples are collected into daily files. Each sample consists of a
6	header followed by an observation for each height.
7
8	The daily file is split into a list (modeled by the class RawData) of samples
9	(modeled by the class Sample) in chronological order. A RawData object is
10	initialized with a string representing the daily file data:
11
12	rawDataHandle = open('20070601.dat')
13	rawDataString = rawDataHandle.read()
14	rawDataObject = RawData(rawDataString)
15
16	Each Sample object has attributes for a Header and Body object. The Samples
17	within a RawData object may also be accessed by time using a string of the
18	format YYYY-MM-DD-HH-MM as in index on the RawData object to return the first
19	matching Sample in the RawData object:
20
21	rawDataObject[0] # the first Sample object of the day
22	rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23	rawDataObject[15].header # the Header object of the 16th Sample
24	rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26	Header objects act as dictionaries. Access each sample-wide parameter of
27	interest using the header parameter name as a keyword on the Header object:
28
29	rawDataObject[15].header['VAL2'] # the number of validations for beam 2
30	rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31	# probability on beam 3
32	rawDataObject[0].header['SNR1'] # signal to noise on beam 1
33
34	Consult your Sodar documentation for a complete list of header parameters.
35	Different sodar models have different sets of header parameters. This model
36	seeks to be model agnostic, and parses the header parameter names from the
37	raw data itself.
38
39	Body objects act as lists of dictionaries. The dictionaries access
40	altitude-specific parameters by name as keywords. The dictionaries are in
41	altitude-ascending order. Each dictionary may also by accessed by indexing with
42	an altitude string:
43
44	rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
45	rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
46	rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
47	rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
48	# at 70 meters
49
50	The body attribute of a Sample object may also be indexed directly on a Sample
51	object for the most convenient semantics:
52
53	rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
54	rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
55	# 70 meters, 9:15am
56	"""
57
58	__author__ = 'Chris Calloway'
59	__email__ = 'cbc@unc.edu'
60	__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
61	__license__ = 'GPL2'
62
63	import re
64
65	class RawData(list):
66
67	"""Class to handle raw daily sodar file data.
68
69	A chronologically ordered list of samples.
70	"""
71
72	def __init__(self, data):
73	"""Parse raw daily sodar file data."""
74	super(RawData, self).__init__()
75	# Divide daily string into list of Samples separated by $.
76	self.extend([Sample(sample)
77	for sample in
78	[sample.strip() for sample in data.split('$')]
79	if sample.strip()])
80
81	def __getitem__(self, index):
82	"""Allow sample retrieval by Sample time in header."""
83	try:
84	return super(RawData,self).__getitem__(index)
85	except TypeError:
86	return self._find(index)
87
88	def _find(self, index):
89	"""Find Sample in RawData.
90
91	Where sample time of form YYYY-MM-DD-HH-MM.
92	"""
93
94	try:
95	year,month,day,hour,minute = index.split('-')
96	except ValueError:
97	raise ValueError('RawData index by date must be '\
98	'"YYYY-MM-DD-HH-MM"')
99	except AttributeError:
100	raise AttributeError('RawData index by date must be '\
101	'"YYYY-MM-DD-HH-MM"')
102	for sample in self:
103	try:
104	if sample.header['YEAR'].rjust(4,'0') != year: continue
105	if sample.header['MONTH'].rjust(2,'0') != month: continue
106	if sample.header['DAY'].rjust(2,'0') != day: continue
107	if sample.header['HOUR'].rjust(2,'0') != hour: continue
108	if sample.header['MIN'].rjust(2,'0') != minute: continue
109	return sample
110	except TypeError: # sample.header may not exist
111	continue
112	raise IndexError('RawData index out of range')
113
114
115	class Sample(object):
116
117	"""A single sample from raw daily sodar file data.
118
119	(A header and a body attribute.)
120	"""
121
122	def __init__(self,sample):
123	"""Separate Sample into Header and Body objects."""
124	super(Sample, self).__init__()
125	# first three groups of lines are the header; rest is body
126	samplePattern = re.compile(r'''(?P<header>.?\n\n.?\n\n.*?\n\n)
127	(?P<body>.*$)
128	''',re.DOTALL \| re.VERBOSE)
129	self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
130	# getattr with default covers parsing invalid Samples
131	self.header = getattr(self, 'header', None)
132	if self.header is not None:
133	self.header = Header(self.header)
134	self.body = getattr(self, 'body', None)
135	if self.body is not None:
136	self.body = Body(self.body)
137
138	def __getitem__(self, index):
139	"""Index Sample by body attribute."""
140	try:
141	return self.body[index]
142	except TypeError: # sample.body may not exist
143	raise IndexError('Sample index out of range')
144
145	def data(self):
146	"""Create a deep copy as a dictionary of header and body data."""
147	return {'header':self.header.data(),
148	'body':self.body.data()}
149
150
151	class Header(dict):
152
153	"""A raw sodar data sample header.
154
155	(A dictionary of sample-wide parameters.)
156	"""
157
158	def __init__(self, header):
159
160	"""Identify discreet header parameter names and values.
161
162	Every other line contains parameter keys;
163	every other line contains parameter values.
164	"""
165
166	super(Header, self).__init__()
167	headerLines = [headerLine.strip()
168	for headerLine in header.split('\n')
169	if headerLine.strip()]
170	#fix for bad match between names and values
171	self.update(dict(zip(" ".join(headerLines[::2]).split(),
172	" ".join(headerLines[1::2]).split())))
173
174	def data(self):
175	"""Create a deep/shallow copy of the data as a dictionary."""
176	return self.copy()
177
178
179	class Body(list):
180
181	"""A raw sodar data sample body.
182
183	(A list of dictionariess at each altitude.)
184	"""
185
186	def __init__(self, body):
187
188	"""Identify discreet body parameter names and values.
189
190	The first line contains parameter keys;
191	the remaining lines contains parameter values,
192	one set of parameters for a single altitude per line.
193	"""
194
195	super(Body, self).__init__()
196	bodyLines = [bodyLine.strip()
197	for bodyLine in body.split('\n')
198	if bodyLine.strip()]
199	bodyKeys = bodyLines[0].split()
200	#fix for bad match between names and values
201	self.extend([dict(zip(bodyKeys, bodyLine.split()))
202	for bodyLine in bodyLines[1:]])
203	self.reverse()
204
205	def __getitem__(self, index):
206	"""Return raw altitude data by altitude string."""
207	try:
208	return super(Body, self).__getitem__(index)
209	except TypeError:
210	return self._find(index)
211
212	def _find(self, index):
213	"""Find raw altitude data in Body."""
214	for altitude in self:
215	try:
216	if altitude['ALT'] != index: continue
217	except KeyError:
218	continue
219	return altitude
220	raise IndexError('Body index, out of range')
221
222	def data(self):
223	"""Create a deep copy of the data as a list of dictionaries."""
224	return [altitude.copy() for altitude in self]
225
226
227	def _main():
228	"""Process as script from command line."""
229	import urllib2
230	try:
231	rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/'\
232	'data/nccoos/level0/ims/sodar/'\
233	'2008_01/20080101.dat')
234	rawDataString = rawDataHandle.read()
235	rawDataHandle.close()
236	except:
237	raise IOError("Failure to read raw test data")
238	rawDataObject = RawData(rawDataString)
239	print rawDataObject['2008-01-01-09-15']['70']['SPEED']
240
241	if __name__ == "__main__":
242	_main()

Note: See TracBrowser for help on using the browser.

root/sodar/trunk/sodar/rawData.py

Download in other formats: