Index: sodar/trunk/sodar/data.py =================================================================== --- sodar/trunk/sodar/data.py (revision 63) +++ sodar/trunk/sodar/data.py (revision 64) @@ -1,59 +1,121 @@ #!/usr/bin/python -"""Classes to handle sodar data samples +""" +Classes to handle sodar data samples. Sodar data samples are collected into daily files. Each sample consists of a header followed by an observation for each height. + +The daily file is split into a list (modeled by the class Data) of samples +(modeled by the class Sample) in chronological order. A Data object is +initialized with a string representing the daily file data: + + dataHandle = open('20070601.dat') + dataString = data.read() + dataObject = Data(dataString) + +Each Sample object has attributes for a Header and Body object. The Samples +within a Data object may also be accessed by time using a string of the format +YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching +Sample in the Data object: + + dataObject[0] # the first Sample object of the day + dataObject['2007-06-01-09-15'] # the Sample object for 9:15am + dataObject[15].header # the Header object of the 16th Sample + dataObject['2007-06-01-09-15'].body # the Body object for 9:15am + +Header objects act as dictionaries. Access each sample-wide parameter of +interest using the header parameter name as a keyword on the Header object: + + dataObject[15].header['VAL2'] # the number of validations for beam 2 + dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal + # probability on beam 3 + dataObject[0].header['SNR1'] # signal to noise on beam 1 + +Consult your Sodar documentation for a complete list of header parameters. + +Body objects act as lists of dictionaries. The dictionaries access +altitude-specific parameters by name as keywords. The dictionaries are in +altitude-ascending order. Each dictionary may also by accessed by indexing with +an altitude string: + + dataObject[15].body[0] # the data for the lowest altitude, 16th sample + dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters + dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude + dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction + # at 70 meters + +The body attribute of a Sample object may also be indexed directly on a Sample +object for the most convenient semantics: + + dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample + dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction, + # 70 meters, 9:15am """ +__author__ = 'Chris Calloway' +__email__ = 'cbc@unc.edu' +__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science' +__license__ = 'GPL2' + import re + class Data(list): - """Daily sodar file data - - (a collection of samples) - """ - def __init__(self,data): - super(Data,self).__init__() - # samples in file are terminated by $ + + """Daily sodar file data. + + (A chronologically ordered list of samples.) + """ + + def __init__(self, data): + """Divide daily string into list of Samples separated by $.""" + super(Data, self).__init__() self.extend([Sample(sample) for sample in [sample.strip() for sample in data.split('$')] - if sample]) - - def __getitem__(self,index): - """allow sample retrieval by sample time in header - """ + if sample.strip()]) + + def __getitem__(self, index): + """Allow sample retrieval by Sample time in header.""" try: return super(Data,self).__getitem__(index) except TypeError: - return self.find(index) - - def find(self,index): - """find Sample in Data - - where sample time of form YYYY-MM-DD-HH-MM + return self._find(index) + + def _find(self, index): + """Find Sample in Data + + where sample time of form YYYY-MM-DD-HH-MM. """ - try: - year,month,day,hour,min = index.split('-') + + try: + year,month,day,hour,minute = index.split('-') except ValueError: - raise ValueError,'Data index by date must be "YYYY-MM-DD-HH-MM"' + raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"') except AttributeError: - raise AttributeError,'Data index by date must be "YYYY-MM-DD-HH-MM"' + raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"') for sample in self: - if sample.header['YEAR'].rjust(4,'0') != year: continue - if sample.header['MONTH'].rjust(2,'0') != month: continue - if sample.header['DAY'].rjust(2,'0') != day: continue - if sample.header['HOUR'].rjust(2,'0') != hour: continue - if sample.header['MIN'].rjust(2,'0') != min: continue - return sample - raise IndexError,'Data index out of range' + try: + if sample.header['YEAR'].rjust(4,'0') != year: continue + if sample.header['MONTH'].rjust(2,'0') != month: continue + if sample.header['DAY'].rjust(2,'0') != day: continue + if sample.header['HOUR'].rjust(2,'0') != hour: continue + if sample.header['MIN'].rjust(2,'0') != minute: continue + return sample + except TypeError: # sample.header may not exist + continue + raise IndexError('Data index out of range') + class Sample(object): - """A single sample from daily sodar file data - - (a header and a body) - """ + + """A single sample from daily sodar file data. + + (A header and a body attribute.) + """ + def __init__(self,sample): - super(Sample,self).__init__() + """Separate Sample into Header and Body objects.""" + super(Sample, self).__init__() # first three groups of lines are the header; rest is body samplePattern = re.compile(r'''(?P
.*?\n\n.*?\n\n.*?\n\n) @@ -61,65 +123,96 @@ ''',re.DOTALL | re.VERBOSE) self.__dict__.update(samplePattern.match(sample.strip()).groupdict()) - # fix for missing keys - self.header = Header(self.header) - self.body = Body(self.body) - - def __getitem__(self,index): - return self.body[index] + # self.__dict__.get covers parsing invalid Samples + self.header = self.__dict__.get('header', None) + if self.header is not None: + self.header = Header(self.header) + self.body = self.__dict__.get('body', None) + if self.body is not None: + self.body = Body(self.body) + + def __getitem__(self, index): + """Index Sample by body attribute.""" + try: + return self.body[index] + except TypeError: # sample.body may not exist + raise IndexError('Sample index out of range') + class Header(dict): - """A sodar data sample header - - (a collection of sample-wide parameters) - """ - def __init__(self,header): - super(Header,self).__init__() - headerLines = header.split('\n') - # every other line contains parameter keys; - # every other line contains parameter values + + """A sodar data sample header. + + (A dictionary of sample-wide parameters.) + """ + + def __init__(self, header): + + """Identify discreet header parameter names and values. + + Every other line contains parameter keys; + every other line contains parameter values. + """ + + super(Header, self).__init__() + headerLines = [headerLine.strip() + for headerLine in header.split('\n') + if headerLine.strip()] + #fix for bad match between names and values self.update(dict(zip(" ".join(headerLines[::2]).split(), " ".join(headerLines[1::2]).split()))) + class Body(list): - """A sodar data sample body - - (a collection of collections at each altitude) - """ - def __init__(self,body): - super(Body,self).__init__() - bodyLines = body.split('\n') + + """A sodar data sample body. + + (A list of dictionariess at each altitude.) + """ + + def __init__(self, body): + + """Identify discreet body parameter names and values. + + The first line contains parameter keys; + the remaining lines contains parameter values, + one set of parameters for a single altitude per line. + """ + + super(Body, self).__init__() + bodyLines = [bodyLine.strip() + for bodyLine in body.split('\n') + if bodyLine.strip()] bodyKeys = bodyLines[0].split() + #fix for bad match between names and values self.extend([dict(zip(bodyKeys, bodyLine.split())) for bodyLine in bodyLines[1:]]) self.reverse() - def __getitem__(self,index): - """allow retrieval by altitude string - """ - try: - return super(Body,self).__getitem__(index) + def __getitem__(self, index): + """Return altitude data by altitude string.""" + try: + return super(Body, self).__getitem__(index) except TypeError: - return self.find(index) - - def find(self,index): - """find altitude data in Body - """ + return self._find(index) + + def _find(self, index): + """Find altitude data in Body.""" for altitudeData in self: if altitudeData['ALT'] != index: continue return altitudeData - raise IndexError,'Body index, out of range' - -def __main(): - """Process as script from command line - """ + raise IndexError('Body index, out of range') + + +def _main(): + """Process as script from command line.""" import urllib2 try: - data = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat') - data = data.read() + dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat') + dataString = dataHandle.read() except: - print "Failure to read test data" - data = Data(data) - print data['2007-06-01-09-15']['70']['SPEED'] + raise IOError("Failure to read test data") + dataObject = Data(dataString) + print dataObject['2007-06-01-09-15']['70']['SPEED'] if __name__ == "__main__": - __main() + _main()