NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/data.py

Revision 64 (checked in by cbc, 17 years ago)

Miscellaneous clean up and documentation.

Line 
1 #!/usr/bin/python
2 """
3 Classes to handle sodar data samples.
4
5 Sodar data samples are collected into daily files. Each sample consists of a
6 header followed by an observation for each height.
7
8 The daily file is split into a list (modeled by the class Data) of samples
9 (modeled by the class Sample) in chronological order. A Data object is
10 initialized with a string representing the daily file data:
11
12      dataHandle = open('20070601.dat')
13      dataString = data.read()
14      dataObject = Data(dataString)
15
16 Each Sample object has attributes for a Header and Body object. The Samples
17 within a Data object may also be accessed by time using a string of the format
18 YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
19 Sample in the Data object:
20
21     dataObject[0] # the first Sample object of the day
22     dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
23     dataObject[15].header # the Header object of the 16th Sample
24     dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
25
26 Header objects act as dictionaries. Access each sample-wide parameter of
27 interest using the header parameter name as a keyword on the Header object:
28
29     dataObject[15].header['VAL2'] # the number of validations for beam 2
30     dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
31                                                   # probability on beam 3
32     dataObject[0].header['SNR1'] # signal to noise on beam 1
33
34 Consult your Sodar documentation for a complete list of header parameters.
35
36 Body objects act as lists of dictionaries. The dictionaries access
37 altitude-specific parameters by name as keywords. The dictionaries are in
38 altitude-ascending order. Each dictionary may also by accessed by indexing with
39 an altitude string:
40
41     dataObject[15].body[0] # the data for the lowest altitude, 16th sample
42     dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
43     dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
44     dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
45                                                      # at 70 meters
46
47 The body attribute of a Sample object may also be indexed directly on a Sample
48 object for the most convenient semantics:
49
50     dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
51     dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
52                                                 # 70 meters, 9:15am
53 """
54
55 __author__ = 'Chris Calloway'
56 __email__ = 'cbc@unc.edu'
57 __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
58 __license__ = 'GPL2'
59
60 import re
61
62
63 class Data(list):
64    
65     """Daily sodar file data.
66        
67        (A chronologically ordered list of samples.)
68     """
69    
70     def __init__(self, data):
71         """Divide daily string into list of Samples separated by $."""
72         super(Data, self).__init__()
73         self.extend([Sample(sample)
74                      for sample in
75                      [sample.strip() for sample in data.split('$')]
76                      if sample.strip()])
77
78     def __getitem__(self, index):
79         """Allow sample retrieval by Sample time in header."""
80         try:
81             return super(Data,self).__getitem__(index)
82         except TypeError:
83             return self._find(index)
84
85     def _find(self, index):
86         """Find Sample in Data
87            
88            where sample time of form YYYY-MM-DD-HH-MM.
89         """
90        
91         try:
92             year,month,day,hour,minute = index.split('-')
93         except ValueError:
94             raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
95         except AttributeError:
96             raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
97         for sample in self:
98             try:
99                 if sample.header['YEAR'].rjust(4,'0') != year: continue
100                 if sample.header['MONTH'].rjust(2,'0') != month: continue
101                 if sample.header['DAY'].rjust(2,'0') != day: continue
102                 if sample.header['HOUR'].rjust(2,'0') != hour: continue
103                 if sample.header['MIN'].rjust(2,'0') != minute: continue
104                 return sample
105             except TypeError:   # sample.header may not exist
106                 continue
107         raise IndexError('Data index out of range')
108
109
110 class Sample(object):
111    
112     """A single sample from daily sodar file data.
113        
114        (A header and a body attribute.)
115     """
116    
117     def __init__(self,sample):
118         """Separate Sample into Header and Body objects."""
119         super(Sample, self).__init__()
120         # first three groups of lines are the header; rest is body
121         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
122                                        (?P<body>.*$)
123                                     ''',re.DOTALL | re.VERBOSE)
124         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
125         # self.__dict__.get covers parsing invalid Samples
126         self.header = self.__dict__.get('header', None)
127         if self.header is not None:
128             self.header = Header(self.header)
129         self.body = self.__dict__.get('body', None)
130         if self.body is not None:
131             self.body = Body(self.body)
132
133     def __getitem__(self, index):
134         """Index Sample by body attribute."""
135         try:
136             return self.body[index]
137         except TypeError:   # sample.body may not exist
138             raise IndexError('Sample index out of range')
139
140
141 class Header(dict):
142    
143     """A sodar data sample header.
144
145       (A dictionary of sample-wide parameters.)
146     """
147    
148     def __init__(self, header):
149        
150         """Identify discreet header parameter names and values.
151            
152            Every other line contains parameter keys;
153            every other line contains parameter values.
154         """
155        
156         super(Header, self).__init__()
157         headerLines = [headerLine.strip()
158                        for headerLine in header.split('\n')
159                        if headerLine.strip()]
160         #fix for bad match between names and values
161         self.update(dict(zip(" ".join(headerLines[::2]).split(),
162                              " ".join(headerLines[1::2]).split())))
163        
164
165 class Body(list):
166    
167     """A sodar data sample body.
168
169        (A list of dictionariess at each altitude.)
170     """
171    
172     def __init__(self, body):
173        
174         """Identify discreet body parameter names and values.
175            
176            The first line contains parameter keys;
177            the remaining lines contains parameter values,
178            one set of parameters for a single altitude per line.
179         """
180        
181         super(Body, self).__init__()
182         bodyLines = [bodyLine.strip()
183                      for bodyLine in body.split('\n')
184                      if bodyLine.strip()]
185         bodyKeys = bodyLines[0].split()
186         #fix for bad match between names and values
187         self.extend([dict(zip(bodyKeys, bodyLine.split()))
188                      for bodyLine in bodyLines[1:]])
189         self.reverse()           
190
191     def __getitem__(self, index):
192         """Return altitude data by altitude string."""
193         try:
194             return super(Body, self).__getitem__(index)
195         except TypeError:
196             return self._find(index)
197
198     def _find(self, index):
199         """Find altitude data in Body."""
200         for altitudeData in self:
201             if altitudeData['ALT'] != index: continue
202             return altitudeData
203         raise IndexError('Body index, out of range')
204
205
206 def _main():
207     """Process as script from command line."""
208     import urllib2
209     try:
210         dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
211         dataString = dataHandle.read()
212     except:
213         raise IOError("Failure to read test data")
214     dataObject = Data(dataString)
215     print dataObject['2007-06-01-09-15']['70']['SPEED']
216
217 if __name__ == "__main__":
218     _main()
Note: See TracBrowser for help on using the browser.