Revision Log

data.py

Revision 65 (checked in by cbc, 17 years ago)	Slight refactor in Data class to use getattr.

Line
1	#!/usr/bin/python
2	"""
3	Classes to handle sodar data samples.
4
5	Sodar data samples are collected into daily files. Each sample consists of a
6	header followed by an observation for each height.
7
8	The daily file is split into a list (modeled by the class Data) of samples
9	(modeled by the class Sample) in chronological order. A Data object is
10	initialized with a string representing the daily file data:
11
12	dataHandle = open('20070601.dat')
13	dataString = data.read()
14	dataObject = Data(dataString)
15
16	Each Sample object has attributes for a Header and Body object. The Samples
17	within a Data object may also be accessed by time using a string of the format
18	YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19	Sample in the Data object:
20
21	dataObject[0] # the first Sample object of the day
22	dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23	dataObject[15].header # the Header object of the 16th Sample
24	dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26	Header objects act as dictionaries. Access each sample-wide parameter of
27	interest using the header parameter name as a keyword on the Header object:
28
29	dataObject[15].header['VAL2'] # the number of validations for beam 2
30	dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31	# probability on beam 3
32	dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34	Consult your Sodar documentation for a complete list of header parameters.
35
36	Body objects act as lists of dictionaries. The dictionaries access
37	altitude-specific parameters by name as keywords. The dictionaries are in
38	altitude-ascending order. Each dictionary may also by accessed by indexing with
39	an altitude string:
40
41	dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42	dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43	dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44	dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45	# at 70 meters
46
47	The body attribute of a Sample object may also be indexed directly on a Sample
48	object for the most convenient semantics:
49
50	dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51	dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52	# 70 meters, 9:15am
53	"""
54
55	__author__ = 'Chris Calloway'
56	__email__ = 'cbc@unc.edu'
57	__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58	__license__ = 'GPL2'
59
60	import re
61
62
63	class Data(list):
64
65	"""Daily sodar file data.
66
67	(A chronologically ordered list of samples.)
68	"""
69
70	def __init__(self, data):
71	"""Divide daily string into list of Samples separated by $."""
72	super(Data, self).__init__()
73	self.extend([Sample(sample)
74	for sample in
75	[sample.strip() for sample in data.split('$')]
76	if sample.strip()])
77
78	def __getitem__(self, index):
79	"""Allow sample retrieval by Sample time in header."""
80	try:
81	return super(Data,self).__getitem__(index)
82	except TypeError:
83	return self._find(index)
84
85	def _find(self, index):
86	"""Find Sample in Data
87
88	where sample time of form YYYY-MM-DD-HH-MM.
89	"""
90
91	try:
92	year,month,day,hour,minute = index.split('-')
93	except ValueError:
94	raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
95	except AttributeError:
96	raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
97	for sample in self:
98	try:
99	if sample.header['YEAR'].rjust(4,'0') != year: continue
100	if sample.header['MONTH'].rjust(2,'0') != month: continue
101	if sample.header['DAY'].rjust(2,'0') != day: continue
102	if sample.header['HOUR'].rjust(2,'0') != hour: continue
103	if sample.header['MIN'].rjust(2,'0') != minute: continue
104	return sample
105	except TypeError: # sample.header may not exist
106	continue
107	raise IndexError('Data index out of range')
108
109
110	class Sample(object):
111
112	"""A single sample from daily sodar file data.
113
114	(A header and a body attribute.)
115	"""
116
117	def __init__(self,sample):
118	"""Separate Sample into Header and Body objects."""
119	super(Sample, self).__init__()
120	# first three groups of lines are the header; rest is body
121	samplePattern = re.compile(r'''(?P<header>.?\n\n.?\n\n.*?\n\n)
122	(?P<body>.*$)
123	''',re.DOTALL \| re.VERBOSE)
124	self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
125	# getattr with default covers parsing invalid Samples
126	self.header = getattr(self, 'header', None)
127	if self.header is not None:
128	self.header = Header(self.header)
129	self.body = getattr(self, 'body', None)
130	if self.body is not None:
131	self.body = Body(self.body)
132
133	def __getitem__(self, index):
134	"""Index Sample by body attribute."""
135	try:
136	return self.body[index]
137	except TypeError: # sample.body may not exist
138	raise IndexError('Sample index out of range')
139
140
141	class Header(dict):
142
143	"""A sodar data sample header.
144
145	(A dictionary of sample-wide parameters.)
146	"""
147
148	def __init__(self, header):
149
150	"""Identify discreet header parameter names and values.
151
152	Every other line contains parameter keys;
153	every other line contains parameter values.
154	"""
155
156	super(Header, self).__init__()
157	headerLines = [headerLine.strip()
158	for headerLine in header.split('\n')
159	if headerLine.strip()]
160	#fix for bad match between names and values
161	self.update(dict(zip(" ".join(headerLines[::2]).split(),
162	" ".join(headerLines[1::2]).split())))
163
164
165	class Body(list):
166
167	"""A sodar data sample body.
168
169	(A list of dictionariess at each altitude.)
170	"""
171
172	def __init__(self, body):
173
174	"""Identify discreet body parameter names and values.
175
176	The first line contains parameter keys;
177	the remaining lines contains parameter values,
178	one set of parameters for a single altitude per line.
179	"""
180
181	super(Body, self).__init__()
182	bodyLines = [bodyLine.strip()
183	for bodyLine in body.split('\n')
184	if bodyLine.strip()]
185	bodyKeys = bodyLines[0].split()
186	#fix for bad match between names and values
187	self.extend([dict(zip(bodyKeys, bodyLine.split()))
188	for bodyLine in bodyLines[1:]])
189	self.reverse()
190
191	def __getitem__(self, index):
192	"""Return altitude data by altitude string."""
193	try:
194	return super(Body, self).__getitem__(index)
195	except TypeError:
196	return self._find(index)
197
198	def _find(self, index):
199	"""Find altitude data in Body."""
200	for altitudeData in self:
201	if altitudeData['ALT'] != index: continue
202	return altitudeData
203	raise IndexError('Body index, out of range')
204
205
206	def _main():
207	"""Process as script from command line."""
208	import urllib2
209	try:
210	dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
211	dataString = dataHandle.read()
212	except:
213	raise IOError("Failure to read test data")
214	dataObject = Data(dataString)
215	print dataObject['2007-06-01-09-15']['70']['SPEED']
216
217	if __name__ == "__main__":
218	_main()

Note: See TracBrowser for help on using the browser.

root/sodar/trunk/sodar/data.py

Download in other formats: