#!/usr/bin/python
"""
Module to handle raw sodar data samples.

Raw sodar data samples are collected into daily files. Each sample consists of a
header followed by an observation for each height.

The daily file is split into a list (modeled by the class RawData) of samples
(modeled by the class Sample) in chronological order. A RawData object is
initialized with a string representing the daily file data:

     rawDataHandle = open('20070601.dat')
     rawDataString = rawDataHandle.read()
     rawDataObject = RawData(rawDataString)

Each Sample object has attributes for a Header and Body object. The Samples
within a RawData object may also be accessed by time using a string of the
format YYYY-MM-DD-HH-MM as in index on the RawData object to return the first
matching Sample in the RawData object:

    rawDataObject[0] # the first Sample object of the day
    rawDataObject['2007-06-01-09-15'] # the Sample object for 9:15am
    rawDataObject[15].header # the Header object of the 16th Sample
    rawDataObject['2007-06-01-09-15'].body # the Body object for 9:15am

Header objects act as dictionaries. Access each sample-wide parameter of
interest using the header parameter name as a keyword on the Header object:

    rawDataObject[15].header['VAL2'] # the number of validations for beam 2
    rawDataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
                                                     # probability on beam 3
    rawDataObject[0].header['SNR1'] # signal to noise on beam 1

Consult your Sodar documentation for a complete list of header parameters.
Different sodar models have different sets of header parameters. This model
seeks to be model agnostic, and parses the header parameter names from the
raw data itself.

Body objects act as lists of dictionaries. The dictionaries access
altitude-specific parameters by name as keywords. The dictionaries are in
altitude-ascending order. Each dictionary may also by accessed by indexing with
an altitude string:

    rawDataObject[15].body[0] # the data for the lowest altitude, 16th sample
    rawDataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
    rawDataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
    rawDataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
                                                        # at 70 meters

The body attribute of a Sample object may also be indexed directly on a Sample
object for the most convenient semantics:

    rawDataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
    rawDataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
                                                   # 70 meters, 9:15am
"""

__author__ = 'Chris Calloway'
__email__ = 'cbc@unc.edu'
__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
__license__ = 'GPL2'

import re

class RawData(list):
    
    """Class to handle raw daily sodar file data.
       
       A chronologically ordered list of samples.
    """
    
    def __init__(self, data):
        """Parse raw daily sodar file data."""
        super(RawData, self).__init__()
        # Divide daily string into list of Samples separated by $.
        self.extend([Sample(sample)
                     for sample in
                     [sample.strip() for sample in data.split('$')]
                     if sample.strip()])

    def __getitem__(self, index):
        """Allow sample retrieval by Sample time in header."""
        try:
            return super(RawData,self).__getitem__(index)
        except TypeError:
            return self._find(index)

    def _find(self, index):
        """Find Sample in RawData.
           
           Where sample time of form YYYY-MM-DD-HH-MM.
        """
        
        try:
            year,month,day,hour,minute = index.split('-')
        except ValueError:
            raise ValueError('RawData index by date must be '\
                             '"YYYY-MM-DD-HH-MM"')
        except AttributeError:
            raise AttributeError('RawData index by date must be '\
                                 '"YYYY-MM-DD-HH-MM"')
        for sample in self:
            try:
                if sample.header['YEAR'].rjust(4,'0') != year: continue
                if sample.header['MONTH'].rjust(2,'0') != month: continue
                if sample.header['DAY'].rjust(2,'0') != day: continue
                if sample.header['HOUR'].rjust(2,'0') != hour: continue
                if sample.header['MIN'].rjust(2,'0') != minute: continue
                return sample
            except TypeError:   # sample.header may not exist
                continue
        raise IndexError('RawData index out of range')


class Sample(object):
    
    """A single sample from raw daily sodar file data.
       
       (A header and a body attribute.)
    """
    
    def __init__(self,sample):
        """Separate Sample into Header and Body objects."""
        super(Sample, self).__init__()
        # first three groups of lines are the header; rest is body
        samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
                                       (?P<body>.*$)
                                    ''',re.DOTALL | re.VERBOSE)
        self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
        # getattr with default covers parsing invalid Samples
        self.header = getattr(self, 'header', None)
        if self.header is not None:
            self.header = Header(self.header)
        self.body = getattr(self, 'body', None)
        if self.body is not None:
            self.body = Body(self.body)
    
    def __getitem__(self, index):
        """Index Sample by body or header attribute."""
        try:
            return self.body[index]
        except TypeError:   # sample.body may not exist
            raise IndexError('Sample index out of range')
        except IndexError:  # sample.body out of range
            try:
                return self.header[index]
            except KeyError: # sample.header may not exist
                raise IndexError('Sample index out of range')
    
    def data(self):
        """Create a deep copy as a dictionary of header and body data."""
        return {'header':self.header.data(),
                'body':self.body.data()}


class Header(dict):
    
    """A raw sodar data sample header.

      (A dictionary of sample-wide parameters.)
    """
    
    def __init__(self, header):
        
        """Identify discreet header parameter names and values.
           
           Every other line contains parameter keys;
           every other line contains parameter values.
        """
        
        super(Header, self).__init__()
        headerLines = [headerLine.strip()
                       for headerLine in header.split('\n')
                       if headerLine.strip()]
        #fix for bad match between names and values
        self.update(dict(zip(" ".join(headerLines[::2]).split(),
                             " ".join(headerLines[1::2]).split())))
    
    def data(self):
        """Create a deep/shallow copy of the data as a dictionary."""
        return self.copy()


class Body(list):
    
    """A raw sodar data sample body.

       (A list of dictionariess at each altitude.)
    """
    
    def __init__(self, body):
        
        """Identify discreet body parameter names and values.
           
           The first line contains parameter keys;
           the remaining lines contains parameter values,
           one set of parameters for a single altitude per line.
        """
        
        super(Body, self).__init__()
        bodyLines = [bodyLine.strip()
                     for bodyLine in body.split('\n')
                     if bodyLine.strip()]
        bodyKeys = bodyLines[0].split()
        #fix for bad match between names and values
        self.extend([dict(zip(bodyKeys, bodyLine.split()))
                     for bodyLine in bodyLines[1:]])
        self.reverse()            

    def __getitem__(self, index):
        """Return raw altitude data by altitude string."""
        try:
            return super(Body, self).__getitem__(index)
        except TypeError:
            return self._find(index)

    def _find(self, index):
        """Find raw altitude data in Body."""
        for altitude in self:
            try:
                if altitude['ALT'] != index: continue
            except KeyError:
                continue
            return altitude
        raise IndexError('Body index out of range')

    def data(self):
        """Create a deep copy of the data as a list of dictionaries."""
        return [altitude.copy() for altitude in self]


def _main():
    """Process as script from command line."""
    import urllib2
    try:
        rawDataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/'\
                                        'data/nccoos/level0/ims/sodar/'\
                                        '2008_01/20080101.dat')
        rawDataString = rawDataHandle.read()
        rawDataHandle.close()
    except:
        raise IOError("Failure to read raw test data")
    rawDataObject = RawData(rawDataString)
    print rawDataObject['2008-01-01-09-15']['70']['SPEED']

if __name__ == "__main__":
    _main()