NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/data.py

Revision 60 (checked in by cbc, 17 years ago)

Tested daily progress on #6 (header class done)

Line 
1 #!/usr/bin/python
2 """Classes to handle sodar data samples
3
4 Sodar data samples are collected into daily files. Each sample consists of a
5 header followed by an observation for each height.
6 """
7
8 import re
9
10 class Data(object):
11     """Daily sodar file data (a collection of samples)"""
12     def __init__(self,data):
13         object.__init__(self)
14         # samples in file are terminated by $
15         self.samples = [Sample(sample) for sample in [sample.strip() for sample in data.split('$')] if sample]
16
17 class Sample(object):
18     """A single sample from daily sodar file data (a header and a body)"""
19     def __init__(self,sample):
20         object.__init__(self)
21         # first three groups of lines are the header; rest is body
22         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
23                                        (?P<body>.*$)
24                                     ''',re.DOTALL | re.VERBOSE)
25         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
26         # fix for missing keys
27         self.header = Header(self.header)
28
29 class Header(object):
30     """A sodar data sample header (a collection of sample-wide parameters)"""
31     def __init__(self,header):
32         object.__init__(self)
33         headerLines = [headerLine.strip() for headerLine in header.split('\n') if headerLine]
34         # every other line contains parameter names; every other line contains parameter values
35         parametersPairs = [(headerLine,headerLines[headerLines.index(headerLine)+1]) for headerLine in headerLines[::2]]
36         for parameterNames,parameterValues in parametersPairs:
37             # parameter names must be valid Python identifiers for named groups matching
38             parameterNames = [parameterName.strip('#') for parameterName in parameterNames.split()]
39             parameterPattern = re.compile(r'(?P<'+'>\S+)\s+(?P<'.join(parameterNames)+'>.*$)')
40             self.__dict__.update(parameterPattern.match(parameterValues).groupdict())
41
42 def __main():
43     """Process as script from command line"""
44     import urllib2
45     try:
46        data = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
47        data = data.read()
48     except:
49         print "Failure to read test data"
50     data = Data(data)
51     print '   --- Sample ---\n'.join(data.samples)
52
53 if __name__ == "__main__":
54     __main()
Note: See TracBrowser for help on using the browser.