#!/usr/bin/python
"""
Classes to handle sodar data samples.

Sodar data samples are collected into daily files. Each sample consists of a
header followed by an observation for each height.

The daily file is split into a list (modeled by the class Data) of samples
(modeled by the class Sample) in chronological order. A Data object is
initialized with a string representing the daily file data:

     dataHandle = open('20070601.dat')
     dataString = data.read()
     dataObject = Data(dataString)

Each Sample object has attributes for a Header and Body object. The Samples
within a Data object may also be accessed by time using a string of the format
YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
Sample in the Data object:

    dataObject[0] # the first Sample object of the day
    dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
    dataObject[15].header # the Header object of the 16th Sample
    dataObject['2007-06-01-09-15'].body # the Body object for 9:15am

Header objects act as dictionaries. Access each sample-wide parameter of
interest using the header parameter name as a keyword on the Header object:

    dataObject[15].header['VAL2'] # the number of validations for beam 2
    dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
                                                  # probability on beam 3
    dataObject[0].header['SNR1'] # signal to noise on beam 1

Consult your Sodar documentation for a complete list of header parameters.

Body objects act as lists of dictionaries. The dictionaries access
altitude-specific parameters by name as keywords. The dictionaries are in
altitude-ascending order. Each dictionary may also by accessed by indexing with
an altitude string:

    dataObject[15].body[0] # the data for the lowest altitude, 16th sample
    dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
    dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
    dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
                                                     # at 70 meters

The body attribute of a Sample object may also be indexed directly on a Sample
object for the most convenient semantics:

    dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
    dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
                                                # 70 meters, 9:15am
"""

__author__ = 'Chris Calloway'
__email__ = 'cbc@unc.edu'
__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
__license__ = 'GPL2'

import re
import numpy as n

class Data(list):
    
    """Daily sodar file data.
       
       (A chronologically ordered list of samples.)
    """
    
    def __init__(self, data):
        """Divide daily string into list of Samples separated by $."""
        super(Data, self).__init__()
        self.extend([Sample(sample)
                     for sample in
                     [sample.strip() for sample in data.split('$')]
                     if sample.strip()])
        self._normalize()

    def __getitem__(self, index):
        """Allow sample retrieval by Sample time in header."""
        try:
            return super(Data,self).__getitem__(index)
        except TypeError:
            return self._find(index)

    def _find(self, index):
        """Find Sample in Data
           
           where sample time of form YYYY-MM-DD-HH-MM.
        """
        
        try:
            year,month,day,hour,minute = index.split('-')
        except ValueError:
            raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
        except AttributeError:
            raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
        for sample in self:
            try:
                if sample.header['YEAR'].rjust(4,'0') != year: continue
                if sample.header['MONTH'].rjust(2,'0') != month: continue
                if sample.header['DAY'].rjust(2,'0') != day: continue
                if sample.header['HOUR'].rjust(2,'0') != hour: continue
                if sample.header['MIN'].rjust(2,'0') != minute: continue
                return sample
            except TypeError:   # sample.header may not exist
                continue
        raise IndexError('Data index out of range')

    def _normalize(self):
        """Clean up data for analysis."""
        self._copy()
        # self._convert()
        # compute time interval
        # correct for missing times
        # compute minium altitude
        # compute maximum overall altitude
        # compute number of altitudes
        # compute altitude interval
        # correct for missing altitudes
        # mark maximum altitude with good values for each sample
        # mark minimum altitude with invalid values for each sample
        # convert direction to radians
        # compute u,v,c components
        # compute colorspecs
        # compute plotting parameters
    
    def _copy(self):
        """Create a deep copy of all the samples in this Data instance."""
        self.samples = [(dict(sample.header), list(sample.body))
                        for sample in self]
        for sample in self.samples:
            for altitude in sample[1]:
                altitude = dict(altitude)
    
    def _convert(self):
        """Convert to numbers and correct for invalid values."""
        INVALID = "-9999"
        # convert to numbers and correct for invalid values
        for sample in self.samples:
            for altitude in sample[1]:
                for key in altitude.keys():
                    try:
                        if altitude[key] == INVALID:
                            raise ValueError
                        altitude[key] = float(altitude[key])
                    except (ValueError, TypeError, KeyError):
                        altitude[key] = n.NaN


class Sample(object):
    
    """A single sample from daily sodar file data.
       
       (A header and a body attribute.)
    """
    
    def __init__(self,sample):
        """Separate Sample into Header and Body objects."""
        super(Sample, self).__init__()
        # first three groups of lines are the header; rest is body
        samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
                                       (?P<body>.*$)
                                    ''',re.DOTALL | re.VERBOSE)
        self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
        # getattr with default covers parsing invalid Samples
        self.header = getattr(self, 'header', None)
        if self.header is not None:
            self.header = Header(self.header)
        self.body = getattr(self, 'body', None)
        if self.body is not None:
            self.body = Body(self.body)

    def __getitem__(self, index):
        """Index Sample by body attribute."""
        try:
            return self.body[index]
        except TypeError:   # sample.body may not exist
            raise IndexError('Sample index out of range')


class Header(dict):
    
    """A sodar data sample header.

      (A dictionary of sample-wide parameters.)
    """
    
    def __init__(self, header):
        
        """Identify discreet header parameter names and values.
           
           Every other line contains parameter keys;
           every other line contains parameter values.
        """
        
        super(Header, self).__init__()
        headerLines = [headerLine.strip()
                       for headerLine in header.split('\n')
                       if headerLine.strip()]
        #fix for bad match between names and values
        self.update(dict(zip(" ".join(headerLines[::2]).split(),
                             " ".join(headerLines[1::2]).split())))
        

class Body(list):
    
    """A sodar data sample body.

       (A list of dictionariess at each altitude.)
    """
    
    def __init__(self, body):
        
        """Identify discreet body parameter names and values.
           
           The first line contains parameter keys;
           the remaining lines contains parameter values,
           one set of parameters for a single altitude per line.
        """
        
        super(Body, self).__init__()
        bodyLines = [bodyLine.strip()
                     for bodyLine in body.split('\n')
                     if bodyLine.strip()]
        bodyKeys = bodyLines[0].split()
        #fix for bad match between names and values
        self.extend([dict(zip(bodyKeys, bodyLine.split()))
                     for bodyLine in bodyLines[1:]])
        self.reverse()            

    def __getitem__(self, index):
        """Return altitude data by altitude string."""
        try:
            return super(Body, self).__getitem__(index)
        except TypeError:
            return self._find(index)

    def _find(self, index):
        """Find altitude data in Body."""
        for altitudeData in self:
            if altitudeData['ALT'] != index: continue
            return altitudeData
        raise IndexError('Body index, out of range')


def _main():
    """Process as script from command line."""
    import urllib2
    try:
        dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
        dataString = dataHandle.read()
    except:
        raise IOError("Failure to read test data")
    dataObject = Data(dataString)
    print dataObject['2007-06-01-09-15']['70']['SPEED']

if __name__ == "__main__":
    _main()