#!/usr/bin/python """ Module to handle raw sodar data samples. Raw sodar data samples are collected into daily files. Each sample consists of a header followed by an observation for each height. The daily file is split into a list (modeled by the class RawData) of samples (modeled by the class Sample) in chronological order. A RawData object is initialized with a string representing the daily file data: rawDataHandle = open('20070601.dat') rawDataString = rawDataHandle.read() rawDataObject = RawData(rawDataString) Each Sample object has attributes for a Header and Body object. The Samples within a RawData object may also be accessed by time using a string of the format YYYY-MM-DD-HH-MM as in index on the RawData object to return the first matching Sample in the RawData object: rawDataObject[0] # the first Sample object of the day rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am rawDataObject[15].header # the Header object of the 16th Sample rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am Header objects act as dictionaries. Access each sample-wide parameter of interest using the header parameter name as a keyword on the Header object: rawDataObject[15].header['VAL2'] # the number of validations for beam 2 rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal # probability on beam 3 rawDataObject[0].header['SNR1'] # signal to noise on beam 1 Consult your Sodar documentation for a complete list of header parameters. Different sodar models have different sets of header parameters. This model seeks to be model agnostic, and parses the header parameter names from the raw data itself. Body objects act as lists of dictionaries. The dictionaries access altitude-specific parameters by name as keywords. The dictionaries are in altitude-ascending order. Each dictionary may also by accessed by indexing with an altitude string: rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction # at 70 meters The body attribute of a Sample object may also be indexed directly on a Sample object for the most convenient semantics: rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction, # 70 meters, 9:15am """ __author__ = 'Chris Calloway' __email__ = 'cbc@unc.edu' __copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science' __license__ = 'GPL2' import re class RawData(list): """Class to handle raw daily sodar file data. A chronologically ordered list of samples. """ def __init__(self, data): """Parse raw daily sodar file data.""" super(RawData, self).__init__() # Divide daily string into list of Samples separated by $. self.extend([Sample(sample) for sample in [sample.strip() for sample in data.split('$')] if sample.strip()]) def __getitem__(self, index): """Allow sample retrieval by Sample time in header.""" try: return super(RawData,self).__getitem__(index) except TypeError: return self._find(index) def _find(self, index): """Find Sample in RawData. Where sample time of form YYYY-MM-DD-HH-MM. """ try: year,month,day,hour,minute = index.split('-') except ValueError: raise ValueError('RawData index by date must be '\ '"YYYY-MM-DD-HH-MM"') except AttributeError: raise AttributeError('RawData index by date must be '\ '"YYYY-MM-DD-HH-MM"') for sample in self: try: if sample.header['YEAR'].rjust(4,'0') != year: continue if sample.header['MONTH'].rjust(2,'0') != month: continue if sample.header['DAY'].rjust(2,'0') != day: continue if sample.header['HOUR'].rjust(2,'0') != hour: continue if sample.header['MIN'].rjust(2,'0') != minute: continue return sample except TypeError: # sample.header may not exist continue raise IndexError('RawData index out of range') class Sample(object): """A single sample from raw daily sodar file data. (A header and a body attribute.) """ def __init__(self,sample): """Separate Sample into Header and Body objects.""" super(Sample, self).__init__() # first three groups of lines are the header; rest is body samplePattern = re.compile(r'''(?P
.*?\n\n.*?\n\n.*?\n\n) (?P.*$) ''',re.DOTALL | re.VERBOSE) self.__dict__.update(samplePattern.match(sample.strip()).groupdict()) # getattr with default covers parsing invalid Samples self.header = getattr(self, 'header', None) if self.header is not None: self.header = Header(self.header) self.body = getattr(self, 'body', None) if self.body is not None: self.body = Body(self.body) def __getitem__(self, index): """Index Sample by body or header attribute.""" try: return self.body[index] except TypeError: # sample.body may not exist raise IndexError('Sample index out of range') except IndexError: # sample.body out of range try: return self.header[index] except KeyError: # sample.header may not exist raise IndexError('Sample index out of range') def data(self): """Create a deep copy as a dictionary of header and body data.""" return {'header':self.header.data(), 'body':self.body.data()} class Header(dict): """A raw sodar data sample header. (A dictionary of sample-wide parameters.) """ def __init__(self, header): """Identify discreet header parameter names and values. Every other line contains parameter keys; every other line contains parameter values. """ super(Header, self).__init__() headerLines = [headerLine.strip() for headerLine in header.split('\n') if headerLine.strip()] #fix for bad match between names and values self.update(dict(zip(" ".join(headerLines[::2]).split(), " ".join(headerLines[1::2]).split()))) def data(self): """Create a deep/shallow copy of the data as a dictionary.""" return self.copy() class Body(list): """A raw sodar data sample body. (A list of dictionariess at each altitude.) """ def __init__(self, body): """Identify discreet body parameter names and values. The first line contains parameter keys; the remaining lines contains parameter values, one set of parameters for a single altitude per line. """ super(Body, self).__init__() bodyLines = [bodyLine.strip() for bodyLine in body.split('\n') if bodyLine.strip()] bodyKeys = bodyLines[0].split() #fix for bad match between names and values self.extend([dict(zip(bodyKeys, bodyLine.split())) for bodyLine in bodyLines[1:]]) self.reverse() def __getitem__(self, index): """Return raw altitude data by altitude string.""" try: return super(Body, self).__getitem__(index) except TypeError: return self._find(index) def _find(self, index): """Find raw altitude data in Body.""" for altitude in self: try: if altitude['ALT'] != index: continue except KeyError: continue return altitude raise IndexError('Body index out of range') def data(self): """Create a deep copy of the data as a list of dictionaries.""" return [altitude.copy() for altitude in self] def _main(): """Process as script from command line.""" import urllib2 try: rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/'\ 'data/nccoos/level0/ims/sodar/'\ '2008_01/20080101.dat') rawDataString = rawDataHandle.read() rawDataHandle.close() except: raise IOError("Failure to read raw test data") rawDataObject = RawData(rawDataString) print rawDataObject['2008-01-01-09-15']['70']['SPEED'] if __name__ == "__main__": _main()