Index: raw2proc/trunk/raw2proc/bogue_config_20060918.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/bogue_config_20060918.py (revision 101)
@@ -1,0 +1,34 @@
+platform_info = {
+    'id' : 'bogue',
+    'location' : 'Bogue Inlet Pier, Bogue, NC',
+    'lat' : 34.661568,  # degrees true (-) south, (+) north
+    'lon' : -77.034131, # degrees true (-) west, (+) east
+    'mvar' : -9.7,      # degrees (-) west, (+) east
+    'water_depth' : 8., # meters
+    'institution' : 'nccoos',
+    # 
+    'config_start_date' : '2006-09-18 14:29:00',
+    'config_end_date' : '2006-10-06 17:00:00', # None or yyyy-mm-dd HH:MM:SS
+    'packages' : ('adcp', 'adcpwaves'),
+    }
+sensor_info = {
+    'adcp' : { 'id' : 'adcp',
+               'description' : 'Current profile data',
+               'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData',
+               'raw_file_glob' : '*',
+               'proc_dir' : '/scratch/nccoos/level1/bogue/adcp',
+               'process_module' : 'proc_rdi_logdata_adcp',
+               'nbins' : 50,
+               'bin_size' : 0.5,      # meters
+               'transducer_ht' : 0.5, # meters above the bottom
+               'blanking_ht' : 1.6,   # meters above transducer
+               },
+    'adcpwaves' : {'id' : 'adcpwaves',
+                   'description' : 'Directional wave data',
+                   'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData',
+                   'raw_file_glob' : '*',
+                   'proc_dir' : '/scratch/nccoos/level1/bogue/adcpwaves',
+                   'process_module' : 'proc_rdi_logdata_dw',
+                   },
+    }
+    
Index: raw2proc/trunk/raw2proc/bogue_config_20070224.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/bogue_config_20070224.py (revision 101)
@@ -1,0 +1,34 @@
+platform_info = {
+    'id' : 'bogue',
+    'location' : 'Bogue Inlet Pier, Bogue, NC',
+    'lat' : 34.661568,  # degrees true (-) south, (+) north
+    'lon' : -77.034131, # degrees true (-) west, (+) east
+    'mvar' : -9.7,      # degrees (-) west, (+) east
+    'water_depth' : 8., # meters
+    'institution' : 'nccoos',
+    # 
+    'config_start_date' : '2007-02-24 00:49:00',
+    'config_end_date' : None, # None or yyyy-mm-dd HH:MM:SS
+    'packages' : ('adcp', 'adcpwaves'),
+    }
+sensor_info = {
+    'adcp' : { 'id' : 'adcp',
+               'description' : 'Current profile data',
+               'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData',
+               'raw_file_glob' : '*',
+               'proc_dir' : '/scratch/nccoos/level1/bogue/adcp',
+               'process_module' : 'proc_rdi_logdata_adcp',
+               'nbins' : 50,
+               'bin_size' : 0.5,      # meters
+               'transducer_ht' : 0.5, # meters above the bottom
+               'blanking_ht' : 1.6,   # meters above transducer
+               },
+    'adcpwaves' : {'id' : 'adcpwaves',
+                   'description' : 'Directional wave data',
+                   'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData',
+                   'raw_file_glob' : '*',
+                   'proc_dir' : '/scratch/nccoos/level1/bogue/adcpwaves',
+                   'process_module' : 'proc_rdi_logdata_dw',
+                   },
+    }
+    
Index: raw2proc/trunk/raw2proc/ncutil.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/ncutil.py (revision 101)
@@ -1,0 +1,172 @@
+#!/usr/bin/env python
+# Last modified:  Time-stamp: <2008-01-08 16:10:22 haines>
+"""
+Create, update and load utilities for netcdf files
+"""
+
+from pycdf import *
+import os
+import numpy
+
+def nc_create(ncFile, (global_atts, var_atts, dim_inits, var_inits, var_data)):
+    """
+    Create new netcdf file
+
+    :Parameters:
+        ncFile : string
+          Path and name of file to create
+    """
+    try:
+        # Open new netCDF file, overwrite if it exists, create if does not
+        nc = CDF(ncFile, NC.WRITE|NC.CREATE|NC.TRUNC)
+        # Automatically set define and data modes.
+        nc.automode()
+        #
+        # GLOBALS
+        for attrName in global_atts.keys():
+            setattr(nc, attrName, global_atts[attrName])
+        
+        # DIMENSIONS
+        for dim in dim_inits:
+            dimName, dimValue = dim
+            # print '%s = %d' % (dimName, dimValue)
+            ncdim = nc.def_dim(dimName, dimValue)
+        
+        # VARIABLES
+        for var in var_inits:
+            varName, varType, varDim = var
+            ncvar = nc.def_var(varName, varType, varDim)
+            # add attributes
+            for attrName in var_atts[varName].keys():
+                setattr(ncvar, attrName, var_atts[varName][attrName])
+            # setattr(ncvar, '_FillValue', numpy.nan)
+            
+        # add data
+        nrecs = nc.inq_unlimlen()
+        for var in var_data:
+            varName, varData = var
+            # print varName
+            ncvar = nc.var(varName)
+            # e.g. lat = array(var_data['lat'])
+            # if an array
+            if type(varData) == numpy.ndarray:
+                if ncvar.isrecord():
+                    # time, ens, u, v
+                    ncvar[nrecs:nrecs+len(varData)] = varData.tolist()
+                else:
+                    ncvar[:] = varData.tolist() # z
+            else:
+                # if tuple, sequence or scalar
+                ncvar[:] = varData
+        
+        nc.close()
+    except CDFError, msg:
+        print "CDFError:", msg
+        # if nc:
+        #     nc.close()
+        #     del(nc)
+
+def nc_update(ncFile, (global_atts, var_atts, var_data)):
+    """
+    Create new netcdf file
+
+    :Parameters:
+        ncFile : string
+          Path and name of file to create
+    """
+    try:
+        # Open netCDF in write mode
+        nc = CDF(ncFile, NC.WRITE)
+        # Automatically set define and data modes.
+        nc.automode()
+        #
+        # GLOBALS
+        for attrName in global_atts.keys():
+            setattr(nc, attrName, global_atts[attrName])
+        
+        # VARIABLES
+        # update attributes
+        for var in var_atts:
+            varName, atts = var
+            ncvar = nc.var(varName)
+            for attrName in atts.keys():
+                setattr(ncvar, attrName, atts[attrName])
+            
+        # update data
+        nrecs = nc.inq_unlimlen()
+        for var in var_data:
+            varName, varData = var
+            ncvar = nc.var(varName)
+            # e.g. lat = array(var_data['lat'])
+            # if an array
+            if type(varData) == numpy.ndarray:
+                if ncvar.isrecord():
+                    # time, ens, u, v (with unlimited dimension)
+                    ncvar[nrecs:nrecs+len(varData)] = varData.tolist()
+                else:
+                    ncvar[:] = varData.tolist() # z (limited dimension)
+            else:
+                # if tuple, sequence or scalar
+                ncvar[:] = varData
+
+        nc.close()
+    except CDFError, msg:
+        print "CDFError:", msg
+        # if nc:
+        #     nc.close()
+        #     del(nc)
+
+def nc_get_time(ncFile):
+    """get time array from file """
+    try:
+        nc = CDF(ncFile)
+        ncvars = nc.variables()
+        if 'time' in ncvars.keys():
+            es = nc.var('time')[:]
+            units = nc.var('time').units
+        nc.close()
+        return (es, units)
+    except CDFError, msg:
+        print "CDFError:", msg
+
+
+    
+
+def nc_load(ncFile, nameType='variable_name',
+            varNames='all', ga_flag=True, va_flag=True):
+    """
+    Load netcdf file
+
+    :Parameters:
+        ncFile : string
+            Path and name of file to load
+
+    :Other Parameters:
+        nameType : string 'variable_name' (default) or 'standard_name'
+            Defines naming convention to use for variable names as data
+            are loaded.  Variable name is the name used to store data
+            in file.  'standard_name' means use variable name based on
+            variable attribute called 'standard_name' of netcdf variable.
+        varNames : string or tuple of strings
+            specific variable names to be loaded into a sequence or scalar
+            in python following specification set in nameType
+            By default, all variables will be loaded.
+        ga_flag : boolean flag
+            By default, load the global file attributes
+        va_flag : boolean flag
+            By default, load the variable file attributes
+            
+    """
+    try:
+        nc = CDF(ncFile)
+        attr = nc.attributes(full=1)
+        dims = nc.dimensions(full=1)
+        ncvars = nc.variables()
+        for var in ncvars.keys():
+            # load each variable by name??
+            pass
+        
+    except CDFError, msg:
+        print "CDFError:", msg
+
+        
Index: raw2proc/trunk/raw2proc/proc_rdi_logdata_adcp.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/proc_rdi_logdata_adcp.py (revision 101)
@@ -1,0 +1,313 @@
+#!/usr/bin/env python
+# Last modified:  Time-stamp: <2008-01-14 12:20:17 haines>
+"""
+how to parse data, and assert what data and info goes into
+creating and updating monthly netcdf files
+
+RDI/Wavesmon processed adcp current profile data
+
+parser : sample date and time, ensemble number, currents
+         and wave summary output from WavesMon software
+creator : lat, lon, z, time, ens, u, v
+updator : time, ens, u, v
+
+
+Examples
+--------
+
+>> (parse, create, update) = load_processors('proc_rdi_logdata_adcp')
+or
+>> si = get_config(cn+'.sensor_info')
+>> (parse, create, update) = load_processors(si['adcp']['proc_module'])
+
+>> lines = load_data(filename)
+>> data = parse(platform_info, sensor_info, lines)
+>> create(platform_info, sensor_info, data) or
+>> update(platform_info, sensor_info, data)
+
+"""
+
+
+from raw2proc import *
+from procutil import *
+from ncutil import *
+
+now_dt = datetime.utcnow()
+now_dt.replace(microsecond=0)
+
+def parser(platform_info, sensor_info, lines):
+    """
+    parse and assign currents data from RDI ADCP Log Data
+
+    """
+ 
+    i = 0
+   
+    for line in lines:
+        # split line and parse float and integers
+        rdi = []
+        sw = re.split(',', line)
+        for s in sw:
+            m = re.search(REAL_RE_STR, s)
+            if m:
+                rdi.append(float(m.groups()[0]))
+
+        # assign specific fields
+        n = len(rdi)
+        burst_num = int(rdi[0]) # Ensemble Number
+
+        # get sample datetime from data
+        sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(rdi[1:7])
+        sample_dt = scanf_datetime(sample_str, fmt='%y-%m-%d %H:%M:%S')
+         #   datetime(*strptime(sample_str, "%y-%m-%d %H:%M:%S")[0:6])
+
+        # get sample datetime from filename
+        # compare with datetime from filename 
+
+        sig_wave_ht = rdi[8]         # Significant Wave Height (Hs, meters)
+        peak_wave_period = rdi[9]    # Peak Wave Period (Tp, sec)
+        peak_wave_dir = rdi[10]      # Peak Wave Direction (deg N)
+        max_wave_ht = rdi[12]        # Maximum Wave Height (Hmax, meters)
+        max_wave_period = rdi[13]    # Maximum Wave Period (Tmax, sec)
+
+        water_depth = rdi[11]/1000   # Water Depth (meters) (based on ADCP backscatter or input config??)
+        nbins = int(rdi[14])         # Number of bins
+
+        current_spd = numpy.array(rdi[15::2]) # starting at idx=15 skip=2 to end
+        current_dir = numpy.array(rdi[16::2]) # starting at idx=16 skip=2 to end
+
+        if nbins!=sensor_info['nbins']:
+            print 'Number of bins reported in data ('+ \
+                  str(nbins)+') does not match config number ('+ \
+                  str(sensor_info['nbins'])+')'
+
+        if len(current_spd)!=nbins or len(current_dir)!=nbins:
+            print 'Data length does not match number of bins in data'
+
+        ibad = (current_spd==-32768) | (current_dir==-32768)
+        current_spd[ibad] = numpy.nan
+        current_dir[ibad] = numpy.nan
+
+        # these items can also be teased out of raw adcp but for now get from config file
+        th = sensor_info['transducer_ht']  # Transducer height above bottom (meters)
+        bh = sensor_info['blanking_ht']    # Blanking height above Transducer (meters)
+        bin_size = sensor_info['bin_size'] # Bin Size (meters)
+
+        # compute height for each bin above the bottom
+        bins = numpy.arange(1,nbins+1)
+        bin_habs = (bins*bin_size+bin_size/2)+th+bh
+
+        # compute water mask 
+        # Using George Voulgaris' method based on water depth
+        # minus half of the significant wave height (Hs)
+        # and computed habs
+        # if positive is up, what's less than zero depth?
+        bin_depths =  bin_habs-(water_depth-sig_wave_ht/2)
+        iwater = bin_depths+bin_size/2 < 0
+
+        z = bin_habs
+        # check that length of bin_depths is equal to nbins
+        u = numpy.ones(nbins)*numpy.nan
+        v = numpy.ones(nbins)*numpy.nan
+
+        u[iwater] = current_spd[iwater]*numpy.sin(current_dir[iwater]*numpy.pi/180)
+        v[iwater] = current_spd[iwater]*numpy.cos(current_dir[iwater]*numpy.pi/180)
+
+        # set up dict of data if first line
+        if i==0:
+            data = {
+                'en' : numpy.array(numpy.ones((len(lines),), dtype=int)*numpy.nan),
+                'dt' : numpy.array(numpy.ones((len(lines),), dtype=object)*numpy.nan),
+                'time' : numpy.array(numpy.ones((len(lines),), dtype=long)*numpy.nan),
+                'z' : numpy.array(numpy.ones((nbins,), dtype=float)*numpy.nan),
+                'u' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan),
+                'v' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan),
+                'water_depth' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan),
+                }
+        
+        data['en'][i] = burst_num
+        data['dt'][i] = sample_dt # sample datetime
+        data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
+        data['z'] =  z
+        data['u'][i] =  u
+        data['v'][i] =  v
+        data['water_depth'][i] = water_depth 
+        i = i+1
+
+    return data
+
+def creator(platform_info, sensor_info, data):
+    #
+    # 
+    title_str = sensor_info['description']+' at '+ platform_info['location']
+    global_atts = { 
+        'title' : title_str,
+        'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
+        'institution_url' : 'http://nccoos.unc.edu',
+        'institution_dods_url' : 'http://nccoos.unc.edu',
+        'metadata_url' : 'http://nccoos.unc.edu',
+        'references' : 'http://nccoos.unc.edu',
+        'contact' : 'Sara Haines (haines@email.unc.edu)',
+        # 
+        'source' : 'fixed-profiler (acoustic doppler) observation',
+        'history' : 'raw2proc using ' + sensor_info['process_module'],
+        'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
+        # conventions
+        'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
+        # SEACOOS CDL codes
+        'format_category_code' : 'fixed-profiler',
+        'institution_code' : platform_info['institution'],
+        'platform_code' : platform_info['id'],
+        'package_code' : sensor_info['id'],
+        # institution specific
+        'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
+        'project_url' : 'http://nccoos.unc.edu',
+        # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
+        # first date in monthly file
+        'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
+        # last date in monthly file
+        'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), 
+        'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        #
+        'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        'process_level' : 'level1',
+        #
+        # must type match to data (e.g. fillvalue is real if data is real)
+        '_FillValue' : -99999.,
+        }
+
+    var_atts = {
+        # coordinate variables
+        'time' : {'short_name': 'time',
+                  'long_name': 'Time',
+                  'standard_name': 'time',
+                  'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
+                  'axis': 'T',
+                  },
+        'lat' : {'short_name': 'lat',
+             'long_name': 'Latitude',
+             'standard_name': 'latitude',
+             'reference':'geographic coordinates',
+             'units': 'degrees_north',
+             'valid_range':(-90.,90.),
+             'axis': 'Y',
+             },
+        'lon' : {'short_name': 'lon',
+                 'long_name': 'Longtitude',
+                 'standard_name': 'longtitude',
+                 'reference':'geographic coordinates',
+                 'units': 'degrees_east',
+                 'valid_range':(-180.,180.),
+                 'axis': 'Y',
+                 },
+        'z' : {'short_name': 'z',
+               'long_name': 'Height',
+               'standard_name': 'height',
+               'reference':'zero at sea-surface',
+               'units': 'm',
+               'axis': 'Z',
+               },
+        # data variables
+        'en' : {'long_name': 'Ensemble Number',
+                 'standard_name': 'ensemble_number',                          
+                 'units': 'None',
+                 },
+        'u': {'long_name': 'East/West Component of Current',
+              'standard_name': 'eastward_current',
+              'units': 'm s-1',
+              'reference': 'clockwise from True East',
+              },
+        'v': {'long_name': 'North/South Component of Current',
+              'standard_name': 'northward_current',                          
+              'units': 'm s-1',
+              'reference': 'clockwise from True North',
+              },
+        'water_depth': {'short_name': '',
+                        'long_name': 'Water Depth',
+                        'standard_name': 'water_depth',                          
+                        'units': 'm',
+                        },
+        }
+
+
+    # dimension names use tuple so order of initialization is maintained
+    dim_inits = (
+        ('ntime', NC.UNLIMITED),
+        ('nlat', 1),
+        ('nlon', 1),
+        ('nz', sensor_info['nbins'])
+        )
+    
+    # using tuple of tuples so order of initialization is maintained
+    # using dict for attributes order of init not important
+    # use dimension names not values
+    # (varName, varType, (dimName1, [dimName2], ...))
+    var_inits = (
+        # coordinate variables
+        ('time', NC.INT, ('ntime',)),
+        ('lat', NC.FLOAT, ('nlat',)),
+        ('lon', NC.FLOAT, ('nlon',)),
+        ('z',  NC.FLOAT, ('nz',)),
+        # data variables
+        ('en', NC.INT, ('ntime', )),
+        ('u', NC.FLOAT, ('ntime', 'nz')),
+        ('v', NC.FLOAT, ('ntime', 'nz')),
+        ('water_depth', NC.FLOAT, ('ntime',)),
+        )
+
+    # subset data only to month being processed (see raw2proc.process())
+    i = data['in']
+    
+    # var data 
+    var_data = (
+        ('lat',  platform_info['lat']),
+        ('lon', platform_info['lon']),
+        ('z', data['z']),
+        #
+        ('time', data['time'][i]),
+        ('en', data['en'][i]),
+        ('u', data['u'][i]),
+        ('v', data['v'][i]),
+        ('water_depth', data['water_depth'][i]),
+        )
+
+    return (global_atts, var_atts, dim_inits, var_inits, var_data)
+
+def updater(platform_info, sensor_info, data):
+    #
+    global_atts = { 
+        # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
+        # last date in monthly file
+        'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), 
+        'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        #
+        'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        }
+
+    # data variables
+    # update any variable attributes like range, min, max
+    var_atts = {}
+    # var_atts = {
+    #    'u': {'max': max(data.u),
+    #          'min': min(data.v),
+    #          },
+    #    'v': {'max': max(data.u),
+    #          'min': min(data.v),
+    #          },
+    #    }
+    
+    # subset data only to month being processed (see raw2proc.process())
+    i = data['in']
+
+    # data 
+    var_data = (
+        ('time', data['time'][i]),
+        ('en', data['en'][i]),
+        ('u', data['u'][i]),
+        ('v', data['v'][i]),
+        ('water_depth', data['water_depth'][i]),
+        )
+
+    return (global_atts, var_atts, var_data)
+#
Index: raw2proc/trunk/raw2proc/proc_rdi_logdata_dw.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/proc_rdi_logdata_dw.py (revision 101)
@@ -1,0 +1,312 @@
+#!/usr/bin/env python
+# Last modified:  Time-stamp: <2008-01-14 12:14:32 haines>
+"""
+how to parse data, and assert what data and info goes into
+creating and updating monthly netcdf files
+
+RDI/Wavesmon processed adcp current profile data
+
+parser : sample date and time, ensemble number, wave summary output from WavesMon software
+creator : lat, lon, z, time, sig_wave_ht, peak_wave_period, peak_wave_dir,
+          max_wave_ht, max_wave_period, water_depth
+updater : time, sig_wave_ht, peak_wave_period, peak_wave_dir,
+          max_wave_ht, max_wave_period, water_depth
+
+Examples
+--------
+
+>> (parse, create, update) = load_processors('proc_rdi_logdata_adcp')
+or
+>> si = get_config(cn+'.sensor_info')
+>> (parse, create, update) = load_processors(si['adcp']['proc_module'])
+
+>> lines = load_data(filename)
+>> data = parse(platform_info, sensor_info, lines)
+>> create(platform_info, sensor_info, data) or
+>> update(platform_info, sensor_info, data)
+
+"""
+
+from raw2proc import *
+from procutil import *
+from ncutil import *
+
+now_dt = datetime.utcnow()
+now_dt.replace(microsecond=0)
+
+def parser(platform_info, sensor_info, lines):
+    """
+    parse and assign currents data from RDI ADCP Log Data
+
+    """
+ 
+    i = 0
+   
+    for line in lines:
+        # split line and parse float and integers
+        rdi = []
+        sw = re.split(',', line)
+        for s in sw:
+            m = re.search(REAL_RE_STR, s)
+            if m:
+                rdi.append(float(m.groups()[0]))
+
+        # assign specific fields
+        n = len(rdi)
+        burst_num = int(rdi[0]) # Ensemble Number
+
+        # get sample datetime from data
+        sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(rdi[1:7])
+        sample_dt = scanf_datetime(sample_str, fmt='%y-%m-%d %H:%M:%S')
+         #   datetime(*strptime(sample_str, "%y-%m-%d %H:%M:%S")[0:6])
+
+        # get sample datetime from filename
+        # compare with datetime from filename 
+
+        sig_wave_ht = rdi[8]         # Significant Wave Height (Hs, meters)
+        peak_wave_period = rdi[9]    # Peak Wave Period (Tp, sec)
+        peak_wave_dir = rdi[10]      # Peak Wave Direction (deg N)
+        max_wave_ht = rdi[12]        # Maximum Wave Height (Hmax, meters)
+        mean_wave_period = rdi[13]    # Maximum Wave Period (Tmean, sec)
+
+        water_depth = rdi[11]/1000   # Water Depth (meters) (based on ADCP backscatter or input config??)
+        nbins = int(rdi[14])         # Number of bins
+
+        # set up dict of data if first line
+        if i==0:
+            data = {
+                'en' : numpy.array(numpy.ones((len(lines),), dtype=int)*numpy.nan),
+                'dt' : numpy.array(numpy.ones((len(lines),), dtype=object)*numpy.nan),
+                'time' : numpy.array(numpy.ones((len(lines),), dtype=long)*numpy.nan),
+                'sig_wave_ht' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan),
+                'peak_wave_period' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan),
+                'peak_wave_dir' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan),
+                'max_wave_ht' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan),
+                'mean_wave_period' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan),
+                'water_depth' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan),
+                }
+        
+        data['en'][i] = burst_num
+        data['dt'][i] = sample_dt # sample datetime
+        data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
+        data['sig_wave_ht'][i] = sig_wave_ht
+        data['peak_wave_period'][i] = peak_wave_period
+        data['peak_wave_dir'][i] = peak_wave_dir
+        data['max_wave_ht'][i] =  max_wave_ht
+        data['mean_wave_period'][i] =  mean_wave_period
+        data['water_depth'][i] = water_depth 
+        i = i+1
+
+    return data
+
+def creator(platform_info, sensor_info, data):
+    #
+    # 
+    title_str = sensor_info['description']+' at '+ platform_info['location']
+    global_atts = { 
+        'title' : title_str,
+        'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
+        'institution_url' : 'http://nccoos.unc.edu',
+        'institution_dods_url' : 'http://nccoos.unc.edu',
+        'metadata_url' : 'http://nccoos.unc.edu',
+        'references' : 'http://nccoos.unc.edu',
+        'contact' : 'Sara Haines (haines@email.unc.edu)',
+        # 
+        'source' : 'directional wave (acoustic doppler) observation',
+        'history' : 'raw2proc using ' + sensor_info['process_module'],
+        'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
+        # conventions
+        'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
+        # SEACOOS CDL codes
+        'format_category_code' : 'directional waves',
+        'institution_code' : platform_info['institution'],
+        'platform_code' : platform_info['id'],
+        'package_code' : sensor_info['id'],
+        # institution specific
+        'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
+        'project_url' : 'http://nccoos.unc.edu',
+        # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
+        'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
+        'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), 
+        'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        #
+        'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        'process_level' : 'level1',
+        #
+        # must type match to data (e.g. fillvalue is real if data is real)
+        '_FillValue' : -99999.,
+        }
+
+    var_atts = {
+        # coordinate variables
+        'time' : {'short_name': 'time',
+                  'long_name': 'Time',
+                  'standard_name': 'time',
+                  'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
+                  'axis': 'T',
+                  },
+        'lat' : {'short_name': 'lat',
+             'long_name': 'Latitude',
+             'standard_name': 'latitude',
+             'reference':'geographic coordinates',
+             'units': 'degrees_north',
+             'valid_range':(-90.,90.),
+             'axis': 'Y',
+             },
+        'lon' : {'short_name': 'lon',
+                 'long_name': 'Longtitude',
+                 'standard_name': 'longtitude',
+                 'reference':'geographic coordinates',
+                 'units': 'degrees_east',
+                 'valid_range':(-180.,180.),
+                 'axis': 'Y',
+                 },
+        'z' : {'short_name': 'z',
+               'long_name': 'Height',
+               'standard_name': 'height',
+               'reference':'zero at sea-surface',
+               'units': 'm',
+               'axis': 'Z',
+               },
+        # data variables
+        'en' : {'short_name': 'en',
+                'long_name': 'Ensemble Number',
+                'standard_name': 'ensemble_number',                          
+                'units': 'None',
+                 },
+        'sig_wave_ht' : {'short_name': 'Hs',
+                         'long_name': 'Significant Wave Height',
+                         'definition': 'Four times the square root of the first moment of the wave spectrum (4*sqrt(m0))',
+                         'standard_name': 'significant_wave_height',
+                         'units': 'm',
+                        },
+        'peak_wave_period' : {'short_name': 'Tp',
+                             'long_name': 'Peak Wave Period',
+                             'definition': 'Period of strongest wave (wave energy maximum)',
+                             'standard_name': 'peak_wave_period',                          
+                             'units': 'sec',
+                             },
+        'peak_wave_dir' : {'short_name': 'Dp',
+                           'long_name': 'Peak Wave Direction',
+                           'definition': 'Direction from which strongest waves (wave energy max) are coming',
+                           'standard_name': 'peak_wave_from_direction',                          
+                           'units': 'deg from N',
+                           'reference': 'clockwise from True North',
+                           },
+        'max_wave_ht' : {'short_name': 'Hmax',
+                         'long_name': 'Maximum Wave Height',
+                         'standard_name': 'max_wave_height',                          
+                         'units': 'm',
+                         },
+        'mean_wave_period' : {'short_name': 'Tmean',
+                              'long_name': 'Mean Wave Period',
+                              'definition': 'Zero-moment of the non-directional spectrum divided by the first-moment (m0/m1)',
+                              'standard_name': 'mean_wave_period',                          
+                              'units': 'sec',
+                              },
+        'water_depth': {'short_name': '',
+                        'long_name': 'Water Depth',
+                        'standard_name': 'water_depth',                          
+                        'units': 'm',
+                        },
+
+        }
+
+
+    # integer values 
+    ntime=NC.UNLIMITED
+    nlat=1
+    nlon=1
+    nz=1
+    
+    # dimension names use tuple so order of initialization is maintained
+    dim_inits = (
+        ('ntime', NC.UNLIMITED),
+        ('nlat', 1),
+        ('nlon', 1),
+        ('nz', 1)
+        )
+    
+    # using tuple of tuples so order of initialization is maintained
+    # using dict for attributes order of init not important
+    # use dimension names not values
+    # (varName, varType, (dimName1, [dimName2], ...))
+    var_inits = (
+        # coordinate variables
+        ('time', NC.INT, ('ntime',)),
+        ('lat', NC.FLOAT, ('nlat',)),
+        ('lon', NC.FLOAT, ('nlon',)),
+        ('z',  NC.FLOAT, ('nz',)),
+        # data variables
+        ('en', NC.INT, ('ntime', )),
+        ('sig_wave_ht', NC.FLOAT, ('ntime',)),
+        ('peak_wave_period', NC.FLOAT, ('ntime',)),
+        ('peak_wave_dir', NC.FLOAT, ('ntime',)),
+        ('max_wave_ht', NC.FLOAT, ('ntime',)),
+        ('mean_wave_period', NC.FLOAT, ('ntime',)),
+        ('water_depth', NC.FLOAT, ('ntime',)),
+        )
+    
+    # subset data only to month being processed (see raw2proc.process())
+    i = data['in']
+
+    # var data 
+    var_data = (
+        ('lat',  platform_info['lat']),
+        ('lon', platform_info['lon']),
+        ('z', 0),
+        #
+        ('time', data['time'][i]),
+        ('en', data['en'][i]),
+        ('sig_wave_ht', data['sig_wave_ht'][i]),
+        ('peak_wave_period', data['peak_wave_period'][i]),
+        ('peak_wave_dir', data['peak_wave_dir'][i]),
+        ('max_wave_ht', data['max_wave_ht'][i]),
+        ('mean_wave_period', data['mean_wave_period'][i]),
+        ('water_depth', data['water_depth'][i]),
+        )
+
+    return (global_atts, var_atts, dim_inits, var_inits, var_data)
+
+def updater(platform_info, sensor_info, data):
+    #
+    global_atts = { 
+        # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
+        # last date in monthly file
+        'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), 
+        'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        #
+        'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
+        }
+
+    # data variables
+    # update any variable attributes like range, min, max
+    var_atts = {}
+    # var_atts = {
+    #    'u': {'max': max(data.u),
+    #          'min': min(data.v),
+    #          },
+    #    'v': {'max': max(data.u),
+    #          'min': min(data.v),
+    #          },
+    #    }
+    
+    # subset data only to month being processed (see raw2proc.process())
+    i = data['in']
+
+    # data 
+    var_data = (
+        ('time', data['time'][i]),
+        ('en', data['en'][i]),
+        ('sig_wave_ht', data['sig_wave_ht'][i]),
+        ('peak_wave_period', data['peak_wave_period'][i]),
+        ('peak_wave_dir', data['peak_wave_dir'][i]),
+        ('max_wave_ht', data['max_wave_ht'][i]),
+        ('mean_wave_period', data['mean_wave_period'][i]),
+        ('water_depth', data['water_depth'][i]),
+        )
+
+    return (global_atts, var_atts, var_data)
+
+#
Index: raw2proc/trunk/raw2proc/proc_rdi_rawdata_adcp.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/proc_rdi_rawdata_adcp.py (revision 101)
@@ -1,0 +1,278 @@
+#!/usr/bin/env python
+# Last modified:  Time-stamp: <2007-12-27 15:16:33 haines>
+"""
+how to parse data, and assert what data and info goes into
+creating and updating monthly netcdf files
+
+RDI/Wavesmon processed adcp current profile data
+
+parser : sample date and time, ensemble number, currents
+         and wave summary output from WavesMon software
+nc_creator :
+nc_updator :
+
+Examples
+--------
+
+>> (parse, create, update) = load_processors('proc_rdi_logdata')
+>> data = parse(lines)
+>> create(platform_info, sensor_info, data)
+>> update(platform_info, sensor_info, data)
+
+"""
+
+def parser(lines):
+    """
+    parse and assign currents data from RDI ADCP Log Data
+
+    """
+    i = 0
+   
+    for line in lines:
+        # split line and parse float and integers
+        rdi = []
+        sw = re.split(',', line)
+        for s in sw:
+            m = re.search(REAL_RE_STR, s)
+            if m:
+                rdi.append(float(m.groups()[0]))
+
+        # assign specific fields
+        n = len(rdi)
+        burst_num = int(rdi[0]) # Ensemble Number
+
+        # get sample datetime from data
+        sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(rdi[1:7])
+        sample_dt = datetime(*strptime(sample_str, "%y-%m-%d %H:%M:%S")[0:6])
+
+        # get sample datetime from filename
+        # compare with datetime from filename 
+
+        sig_wave_ht = rdi[8]         # Significant Wave Height (Hs, meters)
+        peak_wave_period = rdi[9]    # Peak Wave Period (Tp, sec)
+        peak_wave_dir = rdi[10]      # Peak Wave Direction (deg N)
+        max_wave_ht = rdi[12]        # Maximum Wave Height (Hmax, meters)
+        max_wave_period = rdi[13]    # Maximum Wave Period (Tmax, sec)
+
+        water_depth = rdi[11]/1000   # Water Depth (meters) (based on ADCP backscatter or input config??)
+        nbins = int(rdi[14])         # Number of bins
+
+        current_spd = numpy.array(rdi[15::2]) # starting at idx=15 skip=2 to end
+        current_dir = numpy.array(rdi[16::2]) # starting at idx=16 skip=2 to end
+
+        if nbins!=sensor_info['adcp']['nbins']:
+            print 'Number of bins reported in data ('+ \
+                  str(nbins)+') does not match config number ('+ \
+                  str(sensor_info['adcp']['nbins'])+')'
+
+        if len(current_spd)!=nbins or len(current_dir)!=nbins:
+            print 'Data length does not match number of bins in data'
+
+        ibad = (current_spd==-32768) | (current_dir==-32768)
+        current_spd[ibad] = numpy.nan
+        current_dir[ibad] = numpy.nan
+
+        # these items can also be teased out of raw adcp but for now get from config file
+        th = sensor_info['adcp']['transducer_ht']  # Transducer height above bottom (meters)
+        bh = sensor_info['adcp']['blanking_ht']    # Blanking height above Transducer (meters)
+        bin_size = sensor_info['adcp']['bin_size'] # Bin Size (meters)
+
+        # compute height for each bin above the bottom
+        bins = numpy.arange(1,nbins+1)
+        bin_habs = (bins*bin_size+bin_size/2)+th+bh
+
+        # compute water mask 
+        # Using George Voulgaris' method based on water depth
+        # minus half of the significant wave height (Hs)
+        # and computed habs
+        # if positive is up, what's less than zero depth?
+        bin_depths =  bin_habs-(water_depth-sig_wave_ht/2)
+        iwater = bin_depths+bin_size/2 < 0
+
+        z = bin_habs
+        # check that length of bin_depths is equal to nbins
+        u = numpy.ones(nbins)*numpy.nan
+        v = numpy.ones(nbins)*numpy.nan
+
+        u[iwater] = current_spd[iwater]*numpy.sin(current_dir[iwater]*numpy.pi/180)
+        v[iwater] = current_spd[iwater]*numpy.cos(current_dir[iwater]*numpy.pi/180)
+
+        # set up dict of data if first line
+        if i==0:
+            data = {
+                'en' : numpy.array(numpy.ones((len(lines),), dtype=float)*numpy.nan),
+                'dt' : numpy.array(numpy.ones((len(lines),), dtype=object)*numpy.nan),
+                'nbins' : numpy.array(numpy.zeros((len(lines),), dtype=int)),
+                'z' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan),
+                'u' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan),
+                'v' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan),
+                }
+        
+        data['en'][i] = burst_num
+        data['dt'][i] = sample_dt
+        data['nbins'][i] =  nbins
+        data['z'][i] =  z
+        data['u'][i] =  u
+        data['v'][i] =  v
+        i = i+1
+
+    return data
+
+def creator(platform_info, sensor_info, data):
+    #
+    # 
+    title_str = sensor_info['description']+' at '+ platform_info['location']
+    global_atts = { 
+        'title' : title_str,
+        'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
+        'institution_url' : 'http://nccoos.unc.edu',
+        'institution_dods_url' : 'http://nccoos.unc.edu',
+        'metadata_url' : 'http://nccoos.unc.edu',
+        'references' : 'http://nccoos.unc.edu',
+        'contact' : 'Sara Haines (haines@email.unc.edu)',
+        # 
+        'source' : 'fixed-profiler (acoustic doppler) observation',
+        'history' : 'Data processed by NCCOOS',
+        'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
+        # conventions
+        'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
+        # SEACOOS CDL codes
+        'format_category_code' : 'fixed-profiler',
+        'institution_code' : platform_info['instituion'],
+        'platform_code' : platform_info['id'],
+        'package_code' : sensor_info['id'],
+        # institution specific
+        'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
+        'project_url' : 'http://nccoos.unc.edu',
+        # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
+        'start_date' : data['sample_dt'].strftime("%Y-%m-%d %H:%M:%S"),
+        'end_date' : data['sample_dt'].strftime("%Y-%m-%d %H:%M:%S"), 
+        'release_date' : now.strftime("%Y-%m-%d %H:%M:%S"),
+        #
+        'creation_date' : now.strftime("%Y-%m-%d %H:%M:%S"),
+        'modification_date' : now.strftime("%Y-%m-%d %H:%M:%S"),
+        'process_level' : 'level1',
+        #
+        # must type match to data (e.g. fillvalue is real if data is real)
+        '_FillValue' : -99999.,
+        }
+
+    var_atts = {
+        # coordinate variables
+        'time' : {'short_name': 'time',
+                  'long_name': 'Time',
+                  'standard_name': 'time',
+                  'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
+                  'axis': 'T',
+                  },
+        'lat' : {'short_name': 'lat',
+             'long_name': 'Latitude',
+             'standard_name': 'latitude',
+             'reference':'geographic coordinates',
+             'units': 'degrees_north',
+             'valid_range':(-90.,90.),
+             'axis': 'Y',
+             },
+        'lon' : {'short_name': 'lon',
+                 'long_name': 'Longtitude',
+                 'standard_name': 'longtitude',
+                 'reference':'geographic coordinates',
+                 'units': 'degrees_east',
+                 'valid_range':(-180.,180.),
+                 'axis': 'Y',
+                 },
+        'z' : {'short_name': 'z',
+               'long_name': 'Height',
+               'standard_name': 'height',
+               'reference':'zero at sea-surface',
+               'units': 'm',
+               'axis': 'Z',
+               },
+        # data variables
+        'u': {'long_name': 'East/West Component of Current',
+              'standard_name': 'eastward_current',
+              'units': 'm s-1',
+              'reference': 'clockwise from True East',
+              },
+        'v': {'long_name': 'North/South Component of Current',
+              'standard_name': 'northward_current',                          
+              'units': 'm s-1',
+              'reference': 'clockwise from True North',
+              },
+        'w': {'long_name': 'Upward/Downward Component of Current',
+              'standard_name': 'upward_current',                          
+              'units': 'm s-1',
+              'positive': 'up',
+              },
+        'back_scatter':{'long_name': 'Backscatter',
+                        'standard_name': 'back_scatter',                    
+                        'units': 'decibels',
+                        },
+        'wtemp': {'long_name': 'Water Temperature',
+                  'standard_name': 'water_temperature',
+                  'units': 'degrees Celsius',
+                  },
+        }
+
+
+    # integer values 
+    ntime=NC.UNLIMITED
+    nlat=1
+    nlon=1
+    nz=sensor_info['nbins']
+    
+    # dimension names use tuple so order of initialization is maintained
+    dimensions = ('ntime', 'nlat', 'nlon', 'nz')
+    
+    # using tuple of tuples so order of initialization is maintained
+    # using dict for attributes order of init not important
+    # use dimension names not values
+    # (varName, varType, (dimName1, [dimName2], ...))
+    var_inits = (
+        # coordinate variables
+        ('time', NC.INT, ('ntime',)),
+        ('lat', NC.FLOAT, ('nlat',)),
+        ('lon', NC.FLOAT, ('nlon',)),
+        ('z',  NC.FLOAT, ('nz',)),
+        # data variables
+        ('u', NC.FLOAT, ('ntime', 'nz')),
+        ('v', NC.FLOAT, ('ntime', 'nz')),
+        ('w', NC.FLOAT, ('ntime', 'nz')),
+        ('back_scatter', NC.FLOAT, ('ntime', 'nz')),
+        ('wtemp', NC.FLOAT, ('ntime',)),
+        )
+    
+    # var data 
+    var_data = (
+        ('lat',  platform_info['lat']),
+        ('lon', platform_info['lon']),
+        ('z', []),
+        ('u', []),
+        ('v', []),
+        ('w', []),
+        ('back_scatter', []),
+        ('wtemp', []),
+        )
+
+    return (global_atts, dimensions, var_inits, var_data)
+
+def updater(platform_info, sensor_info, data):
+    #
+    global_atts = { 
+        # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
+        'end_date' : data['sample_dt'].strftime("%Y-%m-%d %H:%M:%S"), 
+        'release_date' : now.strftime("%Y-%m-%d %H:%M:%S"),
+        #
+        'creation_date' : now.strftime("%Y-%m-%d %H:%M:%S"),
+        'modification_date' : now.strftime("%Y-%m-%d %H:%M:%S"),
+        }
+    # var data 
+    var_data = (
+        ('u', data['u']),
+        ('v', data['v']),
+        ('w',  data['w']),
+        ('back_scatter', data['back_scatter']),
+        ('wtemp', data['wtemp']),
+        )
+    return (global_atts, var_data)
+#
Index: raw2proc/trunk/raw2proc/procutil.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/procutil.py (revision 101)
@@ -1,0 +1,279 @@
+#!/usr/bin/env python
+# Last modified:  Time-stamp: <2008-01-03 09:50:53 haines>
+"""Utilities to help data processing 
+
+   Mostly time functions right now
+
+   TO DO:
+   check_configs()
+"""
+
+__version__ = "v0.1"
+__author__ = "Sara Haines <sara_haines@unc.edu>"
+
+from datetime import datetime, timedelta, tzinfo
+from dateutil.tz import tzlocal, tzutc
+import time
+
+def check_configs():
+    """Test config files for comformnity
+
+    check either one or all for a platform
+    
+    id in filename == platform.id
+    datetime in filename <= platform.config_start_date
+       (close in time usually the same day
+    also platform.config_start_date < platform.config_end_date
+       (there needs to be some time that the platform was operational)
+    test existence of specific structural elements (platform info and sensor info)
+    and specific fields for both platform and sensor
+    verify that for each platform_info['packages'] there is sensor_info and same id
+      for pi['packages'][0] in si.keys()
+      pi['packages'][0] == si['adcp']['id']
+    bounds on data in fields
+    show difference between two consecutive configs?
+    pretty print to screen of dictionary info for platform and sensor info
+    
+        cn = os.path.splitext(os.path.basename(config))[0]
+        cndt = filt_datetime(os.path.basename(config))
+        pi = get_config(cn+'.platform_info')
+        if pi['config_start_date']:
+            config_start_dt = filt_datetime(pi['config_start_date'])
+        elif pi['config_start_date'] == None:
+            config_start_dt = now_dt
+        if pi['config_end_date']:
+            config_end_dt = filt_datetime(pi['config_end_date'])
+        elif pi['config_end_date'] == None:
+            config_end_dt = now_dt
+
+        print cn + ' -----------------'
+        print cndt
+        print config_start_dt
+        print config_end_dt 
+        print now_dt
+        print 'file date ok? ' + str(cndt <= config_start_dt)
+        print 'operation date ok? ' + str(config_start_dt < config_end_dt)
+    """
+
+def dt2es(dt):
+    """Convert datetime object to epoch seconds (es) as seconds since Jan-01-1970 """
+    # microseconds of timedelta object not used
+    delta = dt - datetime(1970,1,1,0,0,0)
+    es = delta.days*24*60*60 + delta.seconds
+    return es 
+
+def es2dt(es):
+    """ Convert epoch seconds (es) to datetime object"""
+    dt = datetime(*time.gmtime(es)[0:6])
+    return dt
+
+def find_months(year, month=1):
+    """Find which months to process
+
+    Since data are in subdirectories based on months determine
+    previous, current, and next month to look in directories for data
+    of the current month or month to process.
+
+    :Parameters:
+        year : int value or str 'yyyy_mm'
+        month : int value
+
+    :Returns:
+        which_months : tuple of 3 datetime objects
+             (prev_month, current_month, next_month)
+
+    Examples
+    --------
+    >>> find_months(2007, 2)
+    >>> find_months('2007_02')
+    
+    """
+    if type(year) == int and type(month) == int :
+        dt = datetime(year, month, day=1)
+        this_month = dt
+    elif type(year) == str :
+        dt = filt_datetime(year)
+        this_month = dt
+    #
+    if dt.month == 1: # if January
+        prev_month = datetime(dt.year-1, month=12, day=1) # Dec
+        next_month = datetime(dt.year, dt.month+1, day=1) # Feb
+    elif dt.month == 12: # if December
+        prev_month = datetime(dt.year, dt.month-1, day=1) # Nov
+        next_month = datetime(dt.year+1, month=1, day=1)  # Jan
+    else:
+        prev_month = datetime(dt.year, dt.month-1, day=1)
+        next_month = datetime(dt.year, dt.month+1, day=1)
+    #
+    return (prev_month, this_month, next_month)
+
+def this_month():
+    """Return this month (GMT) as formatted string (yyyy_mm) """
+    this_month_str = "%4d_%02d" % time.gmtime()[0:2]
+    return this_month_str
+
+def scanf_datetime(ts, fmt='%Y-%m-%dT%H:%M:%S'):
+    """Convert string representing date and time to datetime object"""
+    # default string format follows convention YYYY-MM-DDThh:mm:ss
+    
+    t = time.strptime(ts, fmt)
+    # the '*' operator unpacks the tuple, producing the argument list.
+    dt = datetime(*t[0:6])
+    return dt
+
+def filt_datetime(input_string, remove_ext=True):
+    """
+    Following the template, (YY)YYMMDDhhmmss
+    and versions with of this with decreasing time precision,
+    find the most precise, reasonable string match and
+    return its datetime object.
+    """
+
+    # remove any trailing filename extension
+    from os.path import splitext
+    import re
+    if remove_ext:
+        (s, e) = splitext(input_string)
+        input_string = s
+    
+    # YYYYMMDDhhmmss and should handle most cases of the stamp
+    # other forms this should pass
+    # YY_MM_DD_hh:mm:ss
+    # YYYY_MM_DD_hh:mm:ss
+    # YYYY,MM,DD,hh,mm,ss
+    # YY,MM,DD,hh,mm,ss
+
+    case1_regex = r"""
+    # case 1: (YY)YYMMDDhhmmss 
+    (\d{4}|\d{2})     # 2- or 4-digit YEAR (e.g. '07' or '2007')
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
+    (\d{2})           # 2-digit MONTH (e.g. '12')
+    \D?               # optional 1 character non-digit separator
+    (\d{2})           # 2-digit DAY of month (e.g. '10')
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or 'T')
+    (\d{2})           # 2-digit HOUR (e.g. '10')
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or ':')
+    (\d{2})           # 2-digit MINUTE (e.g. '10')
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or ':')
+    (\d{2})           # 2-digit SECOND (e.g. '10')
+    """
+
+    case2_regex = r"""
+    # case 2: (YY)YYMMDDhhmm (no seconds) 
+    (\d{4}|\d{2})     # 2- or 4-digit YEAR 
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
+    (\d{2})           # 2-digit MONTH 
+    \D?               # optional 1 character non-digit separator
+    (\d{2})           # 2-digit DAY 
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or 'T')
+    (\d{2})           # 2-digit HOUR 
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or ':')
+    (\d{2})           # 2-digit MINUTE 
+    """
+
+    case3_regex = r"""
+    # case 3: (YY)YYMMDDhh (no seconds, no minutes)
+    (\d{4}|\d{2})     # 2- or 4-digit YEAR 
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
+    (\d{2})           # 2-digit MONTH 
+    \D?               # optional 1 character non-digit separator
+    (\d{2})           # 2-digit DAY 
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or 'T')
+    (\d{2})           # 2-digit HOUR 
+    """
+
+    case4_regex = r"""
+    # case 4: (YY)YYMMDD (no time values, just date)
+    (\d{4}|\d{2})     # 2- or 4-digit YEAR 
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
+    (\d{2})           # 2-digit MONTH 
+    \D?               # optional 1 character non-digit separator
+    (\d{2})           # 2-digit DAY 
+    """
+
+    case5_regex = r"""
+    # case 5: (YY)YYMM (no time values, just month year)
+    (\d{4}|\d{2})     # 2- or 4-digit YEAR 
+    \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
+    (\d{2})           # 2-digit MONTH 
+    """
+
+    ##  Verbose regular expressions require use of re.VERBOSE flag.
+    ##  so we can use multiline regexp
+
+    # cases are ordered from precise to more coarse resolution of time
+    cases = [case1_regex, case2_regex, case3_regex, case4_regex, case5_regex]
+    patterns = [re.compile(c, re.VERBOSE) for c in cases]
+    matches = [p.search(input_string) for p in patterns]
+
+    # for testing, try to computer datetime objects
+    # just because there is a match does not mean it makes sense
+    for ind in range(len(matches)):
+        if bool(matches[ind]):
+            # print matches[ind].groups()
+            bits = matches[ind].groups()
+            values = [int(yi) for yi in bits]
+            # check for 2-digit year 
+            if values[0] < 50:
+                values[0] += 2000
+            elif values[0]>=50 and values[0]<100:
+                values[0] += 1900
+            #
+            # we must have at least 3 arg input to datetime
+            if len(values)==1:
+                values.extend([1,1]) # add First of January
+            elif len(values)==2:
+                values.extend([1]) # add first day of month
+
+            #
+            # compute dt
+            try:
+                dt = datetime(*values)
+            except ValueError, e:
+                # value error if something not valid for datetime
+                # e.g. month 1...12, something parsed wrong
+                dt = None
+            else:
+                # absolute difference in days from now (UTC)
+                z = dt - datetime.utcnow()
+                daysdiff = abs(z.days)
+                # if this date unreasonable (>10 years*365), throw it out
+                # something parsed wrong
+                if daysdiff > 3650:
+                    dt = None                
+        else:
+            dt = None
+
+        # place datetime object or None within sequence of matches
+        matches[ind] = dt
+
+    # find the first (most precise) date match since there might be more than
+    # as we searched more coarse templates, but now we have thrown out 
+    b = [bool(x) for x in matches]
+    try:
+        ind = b.index(True)
+    except ValueError, e:
+        print 'filt_datetime: No date found in ', input_string
+        dt = None
+    else:
+       dt = matches[ind]
+       return dt
+
+def display_time_diff(diff):
+    """Display time difference in HH:MM:DD using number weeks (W)
+    and days (D) if necessary"""
+    # weeks, days = divmod(diff.days, 7)
+    days = diff.days
+    minutes, seconds = divmod(diff.seconds, 60)
+    hours, minutes = divmod(minutes, 60)    
+    # if (weeks>2 and days>0):
+    #    str = "%d Weeks, %d Days %02d:%02d" % (days, hours, minutes)
+    if (days==1):
+        str = "%02d:%02d" % (24+hours, minutes)
+    elif (days>1):
+        str = "%d Days %02d:%02d" % (days, hours, minutes)
+    else:
+        str = "%02d:%02d" % (hours, minutes)
+    return str
+
+#
Index: raw2proc/trunk/raw2proc/raw2proc.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/raw2proc.py (revision 101)
@@ -1,0 +1,377 @@
+#!/usr/bin/env python
+# Last modified:  Time-stamp: <2008-01-14 11:03:16 haines>
+"""Process raw data to monthly netCDF data files
+
+This module processes raw ascii- or binary-data from different NCCOOS
+sensors (ctd, adcp, waves-adcp, met) based on manual or automated
+operation.  If automated processing, add raw data (level0) from all
+active sensors to current month's netcdf data files (level1) with the
+current configuration setting.  If manual processing, determine which
+configurations to use for requested platform, sensor, and month.
+
+:Processing steps:
+  0. raw2proc auto or manual for platform, sensor, month
+  1. list of files to process 
+  2. parse data
+  3. create, update netcdf
+
+  to-do
+  3. qc (measured) data 
+  4. process derived data (and regrid?) 
+  5. qc (measured and derived) data flags
+
+"""
+
+__version__ = "v0.1"
+__author__ = "Sara Haines <sara_haines@unc.edu>"
+
+import sys
+import os
+import re
+
+# define config file location to run under cron
+defconfigs='/afs/isis.unc.edu/depts/marine/workspace/haines/nc-coos/raw2proc'
+
+import numpy
+
+from procutil import *
+from ncutil import *
+
+REAL_RE_STR = '\\s*(-?\\d(\\.\\d+|)[Ee][+\\-]\\d\\d?|-?(\\d+\\.\\d*|\\d*\\.\\d+)|-?\\d+)\\s*'
+
+def load_data(inFile):
+    lines=None
+    if os.path.exists(inFile):
+        f = open(inFile, 'r')
+        lines = f.readlines()
+        f.close()
+        if len(lines)<=0:
+            print 'Empty file: '+ inFile           
+    else:
+        print 'File does not exist: '+ inFile
+    return lines
+
+def import_parser(name):
+    mod = __import__('parsers')
+    parser = getattr(mod, name)
+    return parser
+
+def import_processors(mod_name):
+    mod = __import__(mod_name)
+    parser = getattr(mod, 'parser')
+    creator = getattr(mod, 'creator')
+    updater = getattr(mod, 'updater')
+    return (parser, creator, updater)
+    
+
+def get_config(name):
+    """Usage Example >>>sensor_info = get_config('bogue_config_20060918.sensor_info')"""
+    components = name.split('.')
+    mod = __import__(components[0])
+    for comp in components[1:]:
+        attr = getattr(mod, comp)
+    return attr
+
+def find_configs(platform, yyyy_mm, config_dir=''):
+    """Find which configuration files for specified platform and month
+
+    :Parameters:
+       platform : string
+           Platfrom id to process (e.g. 'bogue')
+       yyyy_mm : string
+           Year and month of data to process (e.g. '2007_07')
+
+    :Returns:
+       cns : list of str
+           List of configurations that overlap with desired month
+           If empty [], no configs were found
+    """
+    import glob
+    # list of config files based on platform
+    configs = glob.glob(os.path.join(config_dir, platform + '_config_*.py'))
+    now_dt = datetime.utcnow()
+    now_dt.replace(microsecond=0)
+    # determine when month starts and ends
+    (prev_month, this_month, next_month) = find_months(yyyy_mm)
+    month_start_dt = this_month
+    month_end_dt = next_month - timedelta(seconds=1)
+    # print month_start_dt; print month_end_dt
+    # 
+    cns = []
+    for config in configs:
+        # datetime from filename 
+        cn = os.path.splitext(os.path.basename(config))[0]
+        cndt = filt_datetime(os.path.basename(config))
+        pi = get_config(cn+'.platform_info')
+        if pi['config_start_date']:
+            config_start_dt = filt_datetime(pi['config_start_date'])
+        elif pi['config_start_date'] == None:
+            config_start_dt = now_dt
+        if pi['config_end_date']:
+            config_end_dt = filt_datetime(pi['config_end_date'])
+        elif pi['config_end_date'] == None:
+            config_end_dt = now_dt
+        # 
+        if (config_start_dt <= month_start_dt or config_start_dt <= month_end_dt) and \
+               (config_end_dt >= month_start_dt or config_end_dt >= month_end_dt):
+            cns.append(cn)
+    return cns
+
+
+def find_active_configs(config_dir=''):
+    """Find which configuration files are active
+
+    :Returns:
+       cns : list of str
+           List of configurations that overlap with desired month
+           If empty [], no configs were found
+    """
+    import glob
+    # list of all config files 
+    configs = glob.glob(os.path.join(config_dir, '*_config_*.py'))
+    now_dt = datetime.utcnow()
+    now_dt.replace(microsecond=0)
+    # 
+    cns = []
+    for config in configs:
+        # datetime from filename 
+        cn = os.path.splitext(os.path.basename(config))[0]
+        cndt = filt_datetime(os.path.basename(config))
+        pi = get_config(cn+'.platform_info')
+        if pi['config_end_date'] == None:
+            cns.append(cn)
+    return cns
+
+
+def find_raw(si, yyyy_mm):
+    """Determine which list of raw files to process for month """
+    import glob
+    # determine when month starts and ends
+    # 
+    months = find_months(yyyy_mm)
+    # list all the raw files in prev-month, this-month, and next-month
+    all_raw_files = []
+    for mon in months:
+        mstr = mon.strftime('%Y_%m')
+        gs = os.path.join(si['raw_dir'], mstr, si['raw_file_glob'])
+        all_raw_files.extend(glob.glob(gs))
+
+    all_raw_files.sort()
+        
+    # ****** ((SMH) NOTE: Will need to override looking in specific
+    # subdirs of months if all data is contained in one file for long
+    # deployment, such as with adcp binary data.
+
+    # 
+    dt_start = si['proc_start_dt']-timedelta(days=1)
+    dt_end = si['proc_end_dt']+timedelta(days=1)
+    raw_files = []; raw_dts = []
+    # compute datetime for each file
+    for fn in all_raw_files:
+        fndt = filt_datetime(os.path.basename(fn))
+        if fndt:
+            if dt_start <= fndt <= dt_end:
+                raw_files.append(fn)
+                raw_dts.append(fndt)
+        
+    return (raw_files, raw_dts)
+
+def which_raw(pi, raw_files, dts):
+    """Further limit file names based on configuration file timeframe """
+
+    now_dt = datetime.utcnow()
+    now_dt.replace(microsecond=0)
+    if pi['config_start_date']:
+        config_start_dt = filt_datetime(pi['config_start_date'])
+    elif pi['config_start_date'] == None:
+        config_start_dt = now_dt
+
+    if pi['config_end_date']:
+        config_end_dt = filt_datetime(pi['config_end_date'])
+    elif pi['config_end_date'] == None:
+        config_end_dt = now_dt
+        
+    new_list = [raw_files[i] for i in range(len(raw_files)) \
+                     if config_start_dt <= dts[i] <= config_end_dt]
+    return new_list
+        
+
+def raw2proc(proctype, platform=None, package=None, yyyy_mm=None):
+    """
+    Process data either in auto-mode or manual-mode
+
+    If auto-mode, process newest data for all platforms, all
+    sensors. Otherwise in manual-mode, process data for specified
+    platform, sensor package, and month.
+
+    :Parameters:
+       proctype : string
+           'auto' or 'manual'
+
+       platform : string
+           Platfrom id to process (e.g. 'bogue')
+       package : string
+           Sensor package id to process (e.g. 'adcp')
+       yyyy_mm : string
+           Year and month of data to process (e.g. '2007_07')
+
+    Examples
+    --------
+    >>> raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_06')
+    >>> raw2proc('manual', 'bogue', 'adcp', '2007_06')
+          
+    """
+    print '\nStart time for raw2proc: %s\n' % start_dt.strftime("%Y-%b-%d %H:%M:%S UTC")
+
+    if proctype == 'auto':
+        print 'Processing in auto-mode, all platforms, all packages, latest data'
+        auto()
+    elif proctype == 'manual':
+        if platform and package and yyyy_mm:
+            print 'Processing in manually ...'
+            print ' ...  platform id : %s' % platform
+            print ' ... package name : %s' % package
+            print ' ...        month : %s' % yyyy_mm
+            print ' ...  starting at : %s' % start_dt.strftime("%Y-%m-%d %H:%M:%S UTC")
+            manual(platform, package, yyyy_mm)
+        else:
+            print 'raw2proc: Manual operation requires platform, package, and month'
+            print "   >>> raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_07')"
+    else:
+        print 'raw2proc: requires either auto or manual operation'
+
+
+def auto():
+    """Process all platforms, all packages, latest data
+
+    Notes
+    -----
+    
+    1. determine which platforms (all platforms with currently active
+       config files i.e. config_end_date is None
+    2. for each platform
+         get latest config
+         for each package
+           (determine process for 'latest' data) copy to new area when grabbed
+           parse recent data
+           yyyy_mm is the current month
+           load this months netcdf, if new month, create this months netcdf
+           update modified date and append new data in netcdf
+           
+    """
+    yyyy_mm = this_month()
+    months = find_months(yyyy_mm)
+    month_start_dt = months[1]
+    month_end_dt = months[2] - timedelta(seconds=1)
+
+    configs = find_active_configs(config_dir=defconfigs)
+    if configs:
+        # for each configuration 
+        for cn in configs:
+            print ' ... config file : %s' % cn
+            pi = get_config(cn+'.platform_info')
+            asi = get_config(cn+'.sensor_info')
+            platform = pi['id']
+            # for each sensor package
+            for package in asi.keys():
+                print ' ... package name : %s' % package
+                si = asi[package]
+                si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm)
+                ofn = os.path.join(si['proc_dir'], si['proc_filename'])
+                si['proc_start_dt'] = month_start_dt
+                si['proc_end_dt'] = month_end_dt
+                if os.path.exists(ofn):
+                    # get last dt from current month file
+                    (es, units) = nc_get_time(ofn)
+                    last_dt = es2dt(es[-1])
+                    # if older than month_start_dt use it instead to only process newest data
+                    if last_dt>=month_start_dt:
+                        si['proc_start_dt'] = last_dt
+
+                (raw_files, raw_dts) = find_raw(si, yyyy_mm)
+                raw_files = which_raw(pi, raw_files, raw_dts)
+                process(pi, si, raw_files, yyyy_mm)
+    #
+    else:
+        print ' ... ... ... \nNOTE: No active platforms\n'
+
+def manual(platform, package, yyyy_mm):
+    """Process data for specified platform, sensor package, and month
+
+    Notes
+    -----
+    
+    1. determine which configs
+    2. for each config for specific platform
+           if have package in config
+               which raw files
+    """
+     # determine when month starts and ends
+    months = find_months(yyyy_mm)
+    month_start_dt = months[1]
+    month_end_dt = months[2] - timedelta(seconds=1)
+   
+    configs = find_configs(platform, yyyy_mm, config_dir=defconfigs)
+
+    if configs:
+        # for each configuration 
+        for index in range(len(configs)):
+            cn = configs[index]
+            print ' ... config file : %s' % cn
+            pi = get_config(cn+'.platform_info')
+            # month start and end dt to pi info
+            asi = get_config(cn+'.sensor_info')
+            if package in pi['packages']:
+                si = asi[package]
+                si['proc_start_dt'] = month_start_dt
+                si['proc_end_dt'] = month_end_dt
+                si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm)
+                ofn = os.path.join(si['proc_dir'], si['proc_filename'])
+                (raw_files, raw_dts) = find_raw(si, yyyy_mm)
+                raw_files = which_raw(pi, raw_files, raw_dts)
+                # remove any previous netcdf file (platform_package_yyyy_mm.nc)
+                if index==0  and os.path.exists(ofn):
+                    os.remove(ofn)
+                #
+                process(pi, si, raw_files, yyyy_mm)
+            else:
+                print ' ... ... \nNOTE: %s not operational on %s for %s\n' % (package, platform, yyyy_mm)                
+    else:
+        print ' ... ... ... \nNOTE: %s not operational for %s\n' % (platform, yyyy_mm)
+    
+def process(pi, si, raw_files, yyyy_mm):
+    # tailored data processing for different input file formats and control over output
+    (parse, create, update) = import_processors(si['process_module'])
+    for fn in raw_files:
+        # sys.stdout.write('... %s ... ' % fn)
+        lines = load_data(fn)
+        data = parse(pi, si, lines)
+        # determine which index of data is within the specified timeframe (usually the month)
+        data['in'] =  data['dt']>si['proc_start_dt'] and data['dt']<=si['proc_end_dt']
+        # if any records are in the month then write to netcdf
+        if data['in'].any():
+            sys.stdout.write('... %s ... ' % fn)
+            sys.stdout.write('%d\n' % len(data['in']))
+            ofn = os.path.join(si['proc_dir'], si['proc_filename'])
+            # update or create netcdf 
+            if os.path.exists(ofn):
+                ut = update(pi,si,data)
+                nc_update(ofn, ut)
+            else:
+                ct = create(pi,si,data)
+                nc_create(ofn, ct)
+
+
+        
+    
+# globals
+start_dt = datetime.utcnow()
+start_dt.replace(microsecond=0)
+
+if __name__ == "__main__":
+    import optparse
+    raw2proc('auto')
+
+    # for testing 
+    # proctype='manual'; platform='bogue'; package='adcp'; yyyy_mm='2007_07'
+    # raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_07')
Index: raw2proc/trunk/raw2proc/test_raw2proc.py
===================================================================
---  (revision )
+++ raw2proc/trunk/raw2proc/test_raw2proc.py (revision 101)
@@ -1,0 +1,21 @@
+#!/usr/bin/env python
+# Last modified:  Time-stamp: <2007-12-11 11:37:00 haines>
+"""
+Tests processing raw data to monthly netcdf
+"""
+
+# test interface within python
+raw2proc('auto')
+raw2proc(proctype='auto')
+raw2proc(platform='bogue',package='adcp')  # fails not defined
+
+raw2proc('manual') # fails, not enough params
+raw2proc(proctype='manual') # fails, not enough params
+
+raw2proc('bogue','adcp','2007_07')
+raw2proc(proctype='manual', platform='bogue',package='adcp', yyyy_mm='2007_07')
+raw2proc(platform='bogue',package='adcp',yyyy_mm='2007_07')
+
+raw2proc(yyyy_mm='2007_07', package='adcp', platform='bogue')
+
+# test interface from shell