#!/usr/bin/env python
# Last modified:  Time-stamp: <2008-09-09 12:56:46 haines>
"""
parse ascii text file of YSI 6600 V2 water quality data (.dat)

load data file
parse data into variables for appending to netCDF data

"""

REAL_RE_STR = '\\s*(-?\\d(\\.\\d+|)[Ee][+\\-]\\d\\d?|-?(\\d+\\.\\d*|\\d*\\.\\d+)|-?\\d+)\\s*'

import sys
import os
import re

def parse_avp_YSI_6600V2(fn, lines):
    """
    parse Automated Vertical Profile Station (AVP) Water Quality Data

    month, day, year, hour, min, sec, temp (deg. C), conductivity
    (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
    chlorophyll (micrograms per liter), DO (micrograms per liter)

    Notes
    -----
    1. Column Format

    temp, cond, salin, depth, pH, turb, chl, DO
    (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)

    Profile Time: 00:30:00
    Profile Date: 08/18/2008
    Profile Depth: 255.0 cm
    Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
    08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
    08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
    08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
    08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66

    2. While each parameter is measured uniquely with time and depth such that, temp(t) and z(t)
    match up with time, we want to grid depth every 1 cm and make each param as temp(t,z).

    Tony Whipple at IMS says 'The AVPs sample at one second intervals.
    Between the waves and the instrument descending from a spool of
    line with variable radius it works out to about 3-5 cm between
    observations on average.  When I process the data to make the
    images, I bin the data every 10 cm and take the average of however
    many observations fell within that bin.'

    Do we interpolate or average samples in bin? 

    """
    import numpy
    from datetime import datetime
    from time import strptime

    # get sample datetime from filename
    # fn = sensor_info['fn']
    sample_dt_start = filt_datetime(fn)[0]

    # how many profiles in one file, count number of "Profile Time:" in lines
    nprof = 0
    for line in lines:
        m=re.search("Profile Time:", line)
        if m:
            nprof=nprof+1

    # remove first occurrence of blank line if within first 10-40 lines
    # and put it on the end to signal end of profile after last profile
    for i in range(len(lines[0:40])):
        if re.search("^ \r\n", lines[i]):
            # print str(i) + " " + lines[i] + " " + lines[i+1]
            blank_line = lines.pop(i)
    lines.append(blank_line)
    
    bin_size = 0.1 # 10cm or 0.1m
    z = numpy.arange(0,4.0,bin_size,dtype=float)
    
    N = nprof
    nbins = len(z)
    data = {
        'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
        'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
        'water_depth' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
        'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
        'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
        'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
        'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
        'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
        'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
        'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
        }

    # current profile count
    i = 0 

    for line in lines:
        ysi = []
        # split line and parse float and integers
        sw = re.split('[\s/\:]*', line)
        for s in sw:
            m = re.search(REAL_RE_STR, s)
            if m:
                ysi.append(float(m.groups()[0]))

        if re.search("Profile Time:", line):
            HH=ysi[0]
            MM=ysi[1]
            SS=ysi[2]

        elif re.search("Profile Date:", line):
            dd=ysi[0]
            mm=ysi[1]
            yyyy=ysi[2]

        elif re.search("Profile Depth:", line):
            water_depth = ysi[0]/100.  # cm to meters
            sample_str = '%02d-%02d-%d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
            # if  sensor_info['utc_offset']:
            #     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
            #                 timedelta(hours=sensor_info['utc_offset'])
            # else:
            sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S')
                                                                        
            # initialize for new profile at zero for averaging samples within each bin
            wtemp = numpy.zeros(nbins)
            cond = numpy.zeros(nbins)
            salin = numpy.zeros(nbins)
            turb = numpy.zeros(nbins)
            ph = numpy.zeros(nbins)
            chl = numpy.zeros(nbins)
            do = numpy.zeros(nbins)

            Ns = numpy.zeros(nbins) # count samples per bin for averaging

        elif len(ysi)==14:                                                                             
            # get sample datetime from data
            # sample_str = '%02d-%02d-%2d %02d:%02d:%02d' % tuple(ysi[0:6])
            # if  sensor_info['utc_offset']:
            #     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
            #                 timedelta(hours=sensor_info['utc_offset'])
            # else:
            # sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')

            depth = ysi[9] # depth (m)
            ibin = ((z)<=depth)*(depth<(z+bin_size))

            Ns[ibin] = Ns[ibin]+1
            wtemp[ibin] = wtemp[ibin]+ysi[6] # water temperature (C)
            cond[ibin] = cond[ibin]+ysi[7]   # conductivity (mS/cm)
            salin[ibin] = salin[ibin]+ysi[8] # salinity (ppt or PSU??)
            #
            ph[ibin] = ph[ibin]+ysi[10]      # ph
            turb[ibin] = turb[ibin]+ysi[11]  # turbidity (NTU)
            chl[ibin] = chl[ibin]+ysi[12]    # chlorophyll (ug/l)
            do[ibin] = do[ibin]+ysi[13]      # dissolved oxygen (ug/l)

        elif (len(ysi)==0):
            # average summations by sample count per bin
            # where count is zero make it NaN so average is not divide by zero
            Ns[Ns==0]=numpy.nan*Ns[Ns==0]
            
            data['dt'][i] = sample_dt # sample datetime
            data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
            data['water_depth'][i] = water_depth
            # divide by counts 
            data['wtemp'][i] =  wtemp/Ns
            data['cond'][i] = cond/Ns
            data['salin'][i] = salin/Ns
            data['turb'][i] = turb/Ns
            data['ph'][i] = ph/Ns
            data['chl'][i] = chl/Ns
            data['do'][i] = do/Ns
            
            i=i+1
            
        # if-elif
    # for line

    return data
    

def load_data(inFile):
    lines=None
    if os.path.exists(inFile):
        f = open(inFile, 'r')
        lines = f.readlines()
        f.close()
        if len(lines)<=0:
            print 'Empty file: '+ inFile           
    else:
        print 'File does not exist: '+ inFile
    return lines

# from jpier_config_20080411 import *
from raw2proc import *

def test1(fn):
    lines = load_data(fn)
    return parse_avp_YSI_6600V2(fn, lines)

def test2(logFile):
    pi = get_config('stones_config_YYYYMMDD.platform_info')
    asi = get_config('stones_config_YYYYMMDD.sensor_info')
    si = asi['met']
    lines = load_data(logFile)
    si['fn'] = logFile
    (parse, create, update) = import_processors(si['process_module'])
    return parse(pi, si, logFile)

if __name__ == '__main__':
    fn = '/seacoos/data/nccoos/level0/stones/avp/2008_08/AVP1_20080811.dat'
    # dataFile = 'D:/haines/nc-coos/raw2proc/stones/met/2008_08/AVP1_20080811.wnd'

    # logFile = sys.argv[1]
    try:
        data = test1(fn)
    except:
        pass