Index: sodar/trunk/sodar/data.py
===================================================================
--- sodar/trunk/sodar/data.py (revision 63)
+++ sodar/trunk/sodar/data.py (revision 64)
@@ -1,59 +1,121 @@
 #!/usr/bin/python
-"""Classes to handle sodar data samples
+"""
+Classes to handle sodar data samples.
 
 Sodar data samples are collected into daily files. Each sample consists of a
 header followed by an observation for each height.
+
+The daily file is split into a list (modeled by the class Data) of samples
+(modeled by the class Sample) in chronological order. A Data object is
+initialized with a string representing the daily file data:
+
+     dataHandle = open('20070601.dat')
+     dataString = data.read()
+     dataObject = Data(dataString)
+
+Each Sample object has attributes for a Header and Body object. The Samples
+within a Data object may also be accessed by time using a string of the format
+YYYY-MM-DD-HH-MM as in index on the Data object to return the first matching
+Sample in the Data object:
+
+    dataObject[0] # the first Sample object of the day
+    dataObject['2007-06-01-09-15'] # the Sample object for 9:15am
+    dataObject[15].header # the Header object of the 16th Sample
+    dataObject['2007-06-01-09-15'].body # the Body object for 9:15am
+
+Header objects act as dictionaries. Access each sample-wide parameter of
+interest using the header parameter name as a keyword on the Header object:
+
+    dataObject[15].header['VAL2'] # the number of validations for beam 2
+    dataObject['2007-06-01-09-15'].header['SPU3'] # normalized false signal
+                                                  # probability on beam 3
+    dataObject[0].header['SNR1'] # signal to noise on beam 1
+
+Consult your Sodar documentation for a complete list of header parameters.
+
+Body objects act as lists of dictionaries. The dictionaries access
+altitude-specific parameters by name as keywords. The dictionaries are in
+altitude-ascending order. Each dictionary may also by accessed by indexing with
+an altitude string:
+
+    dataObject[15].body[0] # the data for the lowest altitude, 16th sample
+    dataObject['2007-06-01-09-15'].body['70'] # the data for 70 meters
+    dataObject[15].body[0]['SPEED'] # wind speed at lowest altitude
+    dataObject['2007-06-01-09-15'].body['70']['DIR'] # wind direction
+                                                     # at 70 meters
+
+The body attribute of a Sample object may also be indexed directly on a Sample
+object for the most convenient semantics:
+
+    dataObject[15][0]['SPEED'] # wind speed at lowest altitude, 16th sample
+    dataObject['2007-06-01-09-15']['70']['DIR'] # wind direction,
+                                                # 70 meters, 9:15am
 """
 
+__author__ = 'Chris Calloway'
+__email__ = 'cbc@unc.edu'
+__copyright__ = 'Copyright 2007 UNC-CH Department of Marine Science'
+__license__ = 'GPL2'
+
 import re
 
+
 class Data(list):
-    """Daily sodar file data
-
-       (a collection of samples)
-    """
-    def __init__(self,data):
-        super(Data,self).__init__()
-        # samples in file are terminated by $
+    
+    """Daily sodar file data.
+       
+       (A chronologically ordered list of samples.)
+    """
+    
+    def __init__(self, data):
+        """Divide daily string into list of Samples separated by $."""
+        super(Data, self).__init__()
         self.extend([Sample(sample)
                      for sample in
                      [sample.strip() for sample in data.split('$')]
-                     if sample])
-
-    def __getitem__(self,index):
-        """allow sample retrieval by sample time in header
-        """
+                     if sample.strip()])
+
+    def __getitem__(self, index):
+        """Allow sample retrieval by Sample time in header."""
         try:
             return super(Data,self).__getitem__(index)
         except TypeError:
-            return self.find(index)
-
-    def find(self,index):
-        """find Sample in Data
-
-            where sample time of form YYYY-MM-DD-HH-MM
+            return self._find(index)
+
+    def _find(self, index):
+        """Find Sample in Data
+           
+           where sample time of form YYYY-MM-DD-HH-MM.
         """
-        try:
-            year,month,day,hour,min = index.split('-')
+        
+        try:
+            year,month,day,hour,minute = index.split('-')
         except ValueError:
-            raise ValueError,'Data index by date must be "YYYY-MM-DD-HH-MM"'
+            raise ValueError('Data index by date must be "YYYY-MM-DD-HH-MM"')
         except AttributeError:
-            raise AttributeError,'Data index by date must be "YYYY-MM-DD-HH-MM"'
+            raise AttributeError('Data index by date must be "YYYY-MM-DD-HH-MM"')
         for sample in self:
-            if sample.header['YEAR'].rjust(4,'0') != year: continue
-            if sample.header['MONTH'].rjust(2,'0') != month: continue
-            if sample.header['DAY'].rjust(2,'0') != day: continue
-            if sample.header['HOUR'].rjust(2,'0') != hour: continue
-            if sample.header['MIN'].rjust(2,'0') != min: continue
-            return sample
-        raise IndexError,'Data index out of range'
+            try:
+                if sample.header['YEAR'].rjust(4,'0') != year: continue
+                if sample.header['MONTH'].rjust(2,'0') != month: continue
+                if sample.header['DAY'].rjust(2,'0') != day: continue
+                if sample.header['HOUR'].rjust(2,'0') != hour: continue
+                if sample.header['MIN'].rjust(2,'0') != minute: continue
+                return sample
+            except TypeError:   # sample.header may not exist
+                continue
+        raise IndexError('Data index out of range')
+
 
 class Sample(object):
-    """A single sample from daily sodar file data
-
-       (a header and a body)
-    """
+    
+    """A single sample from daily sodar file data.
+       
+       (A header and a body attribute.)
+    """
+    
     def __init__(self,sample):
-        super(Sample,self).__init__()
+        """Separate Sample into Header and Body objects."""
+        super(Sample, self).__init__()
         # first three groups of lines are the header; rest is body
         samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n)
@@ -61,65 +123,96 @@
                                     ''',re.DOTALL | re.VERBOSE)
         self.__dict__.update(samplePattern.match(sample.strip()).groupdict())
-        # fix for missing keys
-        self.header = Header(self.header)
-        self.body = Body(self.body)
-
-    def __getitem__(self,index):
-        return self.body[index]
+        # self.__dict__.get covers parsing invalid Samples
+        self.header = self.__dict__.get('header', None)
+        if self.header is not None:
+            self.header = Header(self.header)
+        self.body = self.__dict__.get('body', None)
+        if self.body is not None:
+            self.body = Body(self.body)
+
+    def __getitem__(self, index):
+        """Index Sample by body attribute."""
+        try:
+            return self.body[index]
+        except TypeError:   # sample.body may not exist
+            raise IndexError('Sample index out of range')
+
 
 class Header(dict):
-    """A sodar data sample header
-
-      (a collection of sample-wide parameters)
-    """
-    def __init__(self,header):
-        super(Header,self).__init__()
-        headerLines = header.split('\n')
-        # every other line contains parameter keys;
-        # every other line contains parameter values
+    
+    """A sodar data sample header.
+
+      (A dictionary of sample-wide parameters.)
+    """
+    
+    def __init__(self, header):
+        
+        """Identify discreet header parameter names and values.
+           
+           Every other line contains parameter keys;
+           every other line contains parameter values.
+        """
+        
+        super(Header, self).__init__()
+        headerLines = [headerLine.strip()
+                       for headerLine in header.split('\n')
+                       if headerLine.strip()]
+        #fix for bad match between names and values
         self.update(dict(zip(" ".join(headerLines[::2]).split(),
                              " ".join(headerLines[1::2]).split())))
+        
 
 class Body(list):
-    """A sodar data sample body
-
-       (a collection of collections at each altitude)
-    """
-    def __init__(self,body):
-        super(Body,self).__init__()
-        bodyLines = body.split('\n')
+    
+    """A sodar data sample body.
+
+       (A list of dictionariess at each altitude.)
+    """
+    
+    def __init__(self, body):
+        
+        """Identify discreet body parameter names and values.
+           
+           The first line contains parameter keys;
+           the remaining lines contains parameter values,
+           one set of parameters for a single altitude per line.
+        """
+        
+        super(Body, self).__init__()
+        bodyLines = [bodyLine.strip()
+                     for bodyLine in body.split('\n')
+                     if bodyLine.strip()]
         bodyKeys = bodyLines[0].split()
+        #fix for bad match between names and values
         self.extend([dict(zip(bodyKeys, bodyLine.split()))
                      for bodyLine in bodyLines[1:]])
         self.reverse()            
 
-    def __getitem__(self,index):
-        """allow retrieval by altitude string
-        """
-        try:
-            return super(Body,self).__getitem__(index)
+    def __getitem__(self, index):
+        """Return altitude data by altitude string."""
+        try:
+            return super(Body, self).__getitem__(index)
         except TypeError:
-            return self.find(index)
-
-    def find(self,index):
-        """find altitude data in Body
-        """
+            return self._find(index)
+
+    def _find(self, index):
+        """Find altitude data in Body."""
         for altitudeData in self:
             if altitudeData['ALT'] != index: continue
             return altitudeData
-        raise IndexError,'Body index, out of range'
-
-def __main():
-    """Process as script from command line
-    """
+        raise IndexError('Body index, out of range')
+
+
+def _main():
+    """Process as script from command line."""
     import urllib2
     try:
-       data = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
-       data = data.read()
+        dataHandle = urllib2.urlopen('http://nemo.isis.unc.edu/data/nccoos/level0/dukeforest/sodar/store/2007-06/20070601.dat')
+        dataString = dataHandle.read()
     except:
-        print "Failure to read test data"
-    data = Data(data)
-    print data['2007-06-01-09-15']['70']['SPEED']
+        raise IOError("Failure to read test data")
+    dataObject = Data(dataString)
+    print dataObject['2007-06-01-09-15']['70']['SPEED']
 
 if __name__ == "__main__":
-    __main()
+    _main()