#!/usr/bin/python """ Classes to handle sodar data samples. Sodar data samples are collected into daily files. Each sample consists of a header followed by an observation for each height. The daily file is split into a list (modeled by the class Data) of samples (modeled by the class Sample) in chronological order. A Data object is initialized with a string representing the daily file data: dataHandle = open('20070601.dat') dataString = data.read() dataObject = Data(dataString) Each Sample object has attributes for a Header and Body object. The Samples within a Data object may also be accessed by time using a string of the format YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching Sample in the Data object: dataObject[0] # the first Sample object of the day dataObject['2007-06-01-09-15'] # the Sample object for 9:15am dataObject[15].header # the Header object of the 16th Sample dataObject['2007-06-01-09-15'].body # the Body object for 9:15am Header objects act as dictionaries. Access each sample-wide parameter of interest using the header parameter name as a keyword on the Header object: dataObject[15].header['VAL2'] # the number of validations for beam 2 dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal # probability on beam 3 dataObject[0].header['SNR1'] # signal to noise on beam 1 Consult your Sodar documentation for a complete list of header parameters. Body objects act as lists of dictionaries. The dictionaries access altitude-specific parameters by name as keywords. The dictionaries are in altitude-ascending order. Each dictionary may also by accessed by indexing with an altitude string: dataObject[15].body[0] # the data for the lowest altitude, 16th sample dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction # at 70 meters The body attribute of a Sample object may also be indexed directly on a Sample object for the most convenient semantics: dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction, # 70 meters, 9:15am """ __author__ = 'Chris Calloway' __email__ = 'cbc@unc.edu' __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science' __license__ = 'GPL2' import re import numpy as n class Data(list): """Daily sodar file data. (A chronologically ordered list of samples.) """ def __init__(self, data): """Divide daily string into list of Samples separated by $.""" super(Data, self).__init__() self.extend([Sample(sample) for sample in [sample.strip() for sample in data.split('$')] if sample.strip()]) self._normalize() def __getitem__(self, index): """Allow sample retrieval by Sample time in header.""" try: return super(Data,self).__getitem__(index) except TypeError: return self._find(index) def _find(self, index): """Find Sample in Data where sample time of form YYYY-MM-DD-HH-MM. """ try: year,month,day,hour,minute = index.split('-') except ValueError: raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"') except AttributeError: raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"') for sample in self: try: if sample.header['YEAR'].rjust(4,'0') != year: continue if sample.header['MONTH'].rjust(2,'0') != month: continue if sample.header['DAY'].rjust(2,'0') != day: continue if sample.header['HOUR'].rjust(2,'0') != hour: continue if sample.header['MIN'].rjust(2,'0') != minute: continue return sample except TypeError: # sample.header may not exist continue raise IndexError('Data index out of range') def _normalize(self): """Clean up data for analysis.""" self._copy() # self._convert() # compute time interval # correct for missing times # compute minium altitude # compute maximum overall altitude # compute number of altitudes # compute altitude interval # correct for missing altitudes # mark maximum altitude with good values for each sample # mark minimum altitude with invalid values for each sample # convert direction to radians # compute u,v,c components # compute colorspecs # compute plotting parameters def _copy(self): """Create a deep copy of all the samples in this Data instance.""" self.samples = [(dict(sample.header), list(sample.body)) for sample in self] for sample in self.samples: for altitude in sample[1]: altitude = dict(altitude) def _convert(self): """Convert to numbers and correct for invalid values.""" INVALID = "-9999" # convert to numbers and correct for invalid values for sample in self.samples: for altitude in sample[1]: for key in altitude.keys(): try: if altitude[key] == INVALID: raise ValueError altitude[key] = float(altitude[key]) except (ValueError, TypeError, KeyError): altitude[key] = n.NaN class Sample(object): """A single sample from daily sodar file data. (A header and a body attribute.) """ def __init__(self,sample): """Separate Sample into Header and Body objects.""" super(Sample, self).__init__() # first three groups of lines are the header; rest is body samplePattern = re.compile(r'''(?P
.*?\n\n.*?\n\n.*?\n\n) (?P.*$) ''',re.DOTALL | re.VERBOSE) self.__dict__.update(samplePattern.match(sample.strip()).groupdict()) # getattr with default covers parsing invalid Samples self.header = getattr(self, 'header', None) if self.header is not None: self.header = Header(self.header) self.body = getattr(self, 'body', None) if self.body is not None: self.body = Body(self.body) def __getitem__(self, index): """Index Sample by body attribute.""" try: return self.body[index] except TypeError: # sample.body may not exist raise IndexError('Sample index out of range') class Header(dict): """A sodar data sample header. (A dictionary of sample-wide parameters.) """ def __init__(self, header): """Identify discreet header parameter names and values. Every other line contains parameter keys; every other line contains parameter values. """ super(Header, self).__init__() headerLines = [headerLine.strip() for headerLine in header.split('\n') if headerLine.strip()] #fix for bad match between names and values self.update(dict(zip(" ".join(headerLines[::2]).split(), " ".join(headerLines[1::2]).split()))) class Body(list): """A sodar data sample body. (A list of dictionariess at each altitude.) """ def __init__(self, body): """Identify discreet body parameter names and values. The first line contains parameter keys; the remaining lines contains parameter values, one set of parameters for a single altitude per line. """ super(Body, self).__init__() bodyLines = [bodyLine.strip() for bodyLine in body.split('\n') if bodyLine.strip()] bodyKeys = bodyLines[0].split() #fix for bad match between names and values self.extend([dict(zip(bodyKeys, bodyLine.split())) for bodyLine in bodyLines[1:]]) self.reverse() def __getitem__(self, index): """Return altitude data by altitude string.""" try: return super(Body, self).__getitem__(index) except TypeError: return self._find(index) def _find(self, index): """Find altitude data in Body.""" for altitudeData in self: if altitudeData['ALT'] != index: continue return altitudeData raise IndexError('Body index, out of range') def _main(): """Process as script from command line.""" import urllib2 try: dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat') dataString = dataHandle.read() except: raise IOError("Failure to read test data") dataObject = Data(dataString) print dataObject['2007-06-01-09-15']['70']['SPEED'] if __name__ == "__main__": _main()