NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/data.py

Revision 62 (checked in by cbc, 17 years ago)

Refactor __getitem__ in Data and Header

Line 
1 #!/usr/bin/python
2 """Classes to handle sodar data samples
3
4 Sodar data samples are collected into daily files. Each sample consists of a
5 header followed by an observation for each height.
6 """
7
8 import re
9
10 class Data(list):
11     """Daily sodar file data
12
13        (a collection of samples)
14     """
15     def __init__(self,data):
16         super(Data,self).__init__()
17         # samples in file are terminated by $
18         self.extend([Sample(sample)
19                      for sample in
20                      [sample.strip() for sample in data.split('$')]
21                      if sample])
22
23     def __getitem__(self,index):
24         """allow sample retrieval by sample time in header
25         """
26         try:
27             return super(Data,self).__getitem__(index)
28         except TypeError:
29             return self.find(index)
30
31     def find(self,index):
32         """find Sample in Data
33
34             where sample time of form YYYY-MM-DD-HH-MM
35         """
36         try:
37             year,month,day,hour,min = index.split('-')
38         except ValueError:
39             raise ValueError,'Data index by date must be "YYYY-MM-DD-HH-MM"'
40         except AttributeError:
41             raise AttributeError,'Data index by date must be "YYYY-MM-DD-HH-MM"'
42         for sample in self:
43             if sample.header['YEAR'].rjust(4,'0') != year: continue
44             if sample.header['MONTH'].rjust(2,'0') != month: continue
45             if sample.header['DAY'].rjust(2,'0') != day: continue
46             if sample.header['HOUR'].rjust(2,'0') != hour: continue
47             if sample.header['MIN'].rjust(2,'0') != min: continue
48             return sample
49         raise IndexError,'Data index out of range'
50
51 class Sample(object):
52     """A single sample from daily sodar file data
53
54        (a header and a body)
55     """
56     def __init__(self,sample):
57         super(Sample,self).__init__()
58         # first three groups of lines are the header; rest is body
59         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
60                                        (?P<body>.*$)
61                                     ''',re.DOTALL | re.VERBOSE)
62         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
63         # fix for missing keys
64         self.header = Header(self.header)
65         self.body = Body(self.body)
66
67     def __getitem__(self,index):
68         return self.body[index]
69
70 class Header(dict):
71     """A sodar data sample header
72
73       (a collection of sample-wide parameters)
74     """
75     def __init__(self,header):
76         super(Header,self).__init__()
77         headerLines = [headerLine.strip()
78                        for headerLine in header.split('\n')
79                        if headerLine]
80         # every other line contains parameter names;
81         # every other line contains parameter values
82         parametersPairs = [(headerLine,
83                             headerLines[headerLines.index(headerLine)+1])
84                            for headerLine in headerLines[::2]]
85         for parameterNames,parameterValues in parametersPairs:
86             # parameter names must be valid Python identifiers
87             # for named groups matching
88             parameterNames = [parameterName.strip('#')
89                               for parameterName in parameterNames.split()]
90             parameterPattern = re.compile(r'(?P<' +
91                                           '>\S+)\s+(?P<'.join(parameterNames) +
92                                           '>.*$)')
93             self.update(parameterPattern.
94                         match(parameterValues).
95                         groupdict())
96
97 class Body(list):
98     """A sodar data sample body
99
100        (a collection of collections at each altitude)
101     """
102     def __init__(self,body):
103         super(Body,self).__init__()
104         bodyLines = [bodyLine.strip()
105                      for bodyLine in body.split('\n')
106                      if bodyLine]
107         parameterNames = [parameterName
108                           for parameterName in bodyLines[0].split()]
109         parameterPattern = re.compile(r'(?P<' +
110                                       '>\S+)\s+(?P<'.join(parameterNames) +
111                                       '>.*$)')
112         self.extend([parameterPattern.match(bodyLine).groupdict()
113                      for bodyLine in bodyLines[1:]])
114         self.reverse()           
115
116     def __getitem__(self,index):
117         """allow retrieval by altitude string
118         """
119         try:
120             return super(Body,self).__getitem__(index)
121         except TypeError:
122             return self.find(index)
123
124     def find(self,index):
125         """find altitude data in Body
126         """
127         for altitudeData in self:
128             if altitudeData['ALT'] != index: continue
129             return altitudeData
130         raise IndexError,'Body index, out of range'
131
132 def __main():
133     """Process as script from command line
134     """
135     import urllib2
136     try:
137        data = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
138        data = data.read()
139     except:
140         print "Failure to read test data"
141     data = Data(data)
142     print '   --- Sample ---\n'.join(data.samples)
143
144 if __name__ == "__main__":
145     __main()
Note: See TracBrowser for help on using the browser.