Index: raw2proc/trunk/raw2proc/bogue_config_20060918.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/bogue_config_20060918.py (revision 101) @@ -1,0 +1,34 @@ +platform_info = { + 'id' : 'bogue', + 'location' : 'Bogue Inlet Pier, Bogue, NC', + 'lat' : 34.661568, # degrees true (-) south, (+) north + 'lon' : -77.034131, # degrees true (-) west, (+) east + 'mvar' : -9.7, # degrees (-) west, (+) east + 'water_depth' : 8., # meters + 'institution' : 'nccoos', + # + 'config_start_date' : '2006-09-18 14:29:00', + 'config_end_date' : '2006-10-06 17:00:00', # None or yyyy-mm-dd HH:MM:SS + 'packages' : ('adcp', 'adcpwaves'), + } +sensor_info = { + 'adcp' : { 'id' : 'adcp', + 'description' : 'Current profile data', + 'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData', + 'raw_file_glob' : '*', + 'proc_dir' : '/scratch/nccoos/level1/bogue/adcp', + 'process_module' : 'proc_rdi_logdata_adcp', + 'nbins' : 50, + 'bin_size' : 0.5, # meters + 'transducer_ht' : 0.5, # meters above the bottom + 'blanking_ht' : 1.6, # meters above transducer + }, + 'adcpwaves' : {'id' : 'adcpwaves', + 'description' : 'Directional wave data', + 'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData', + 'raw_file_glob' : '*', + 'proc_dir' : '/scratch/nccoos/level1/bogue/adcpwaves', + 'process_module' : 'proc_rdi_logdata_dw', + }, + } + Index: raw2proc/trunk/raw2proc/bogue_config_20070224.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/bogue_config_20070224.py (revision 101) @@ -1,0 +1,34 @@ +platform_info = { + 'id' : 'bogue', + 'location' : 'Bogue Inlet Pier, Bogue, NC', + 'lat' : 34.661568, # degrees true (-) south, (+) north + 'lon' : -77.034131, # degrees true (-) west, (+) east + 'mvar' : -9.7, # degrees (-) west, (+) east + 'water_depth' : 8., # meters + 'institution' : 'nccoos', + # + 'config_start_date' : '2007-02-24 00:49:00', + 'config_end_date' : None, # None or yyyy-mm-dd HH:MM:SS + 'packages' : ('adcp', 'adcpwaves'), + } +sensor_info = { + 'adcp' : { 'id' : 'adcp', + 'description' : 'Current profile data', + 'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData', + 'raw_file_glob' : '*', + 'proc_dir' : '/scratch/nccoos/level1/bogue/adcp', + 'process_module' : 'proc_rdi_logdata_adcp', + 'nbins' : 50, + 'bin_size' : 0.5, # meters + 'transducer_ht' : 0.5, # meters above the bottom + 'blanking_ht' : 1.6, # meters above transducer + }, + 'adcpwaves' : {'id' : 'adcpwaves', + 'description' : 'Directional wave data', + 'raw_dir' : '/scratch/nccoos/level0/bogue/adcp_bLogData', + 'raw_file_glob' : '*', + 'proc_dir' : '/scratch/nccoos/level1/bogue/adcpwaves', + 'process_module' : 'proc_rdi_logdata_dw', + }, + } + Index: raw2proc/trunk/raw2proc/ncutil.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/ncutil.py (revision 101) @@ -1,0 +1,172 @@ +#!/usr/bin/env python +# Last modified: Time-stamp: <2008-01-08 16:10:22 haines> +""" +Create, update and load utilities for netcdf files +""" + +from pycdf import * +import os +import numpy + +def nc_create(ncFile, (global_atts, var_atts, dim_inits, var_inits, var_data)): + """ + Create new netcdf file + + :Parameters: + ncFile : string + Path and name of file to create + """ + try: + # Open new netCDF file, overwrite if it exists, create if does not + nc = CDF(ncFile, NC.WRITE|NC.CREATE|NC.TRUNC) + # Automatically set define and data modes. + nc.automode() + # + # GLOBALS + for attrName in global_atts.keys(): + setattr(nc, attrName, global_atts[attrName]) + + # DIMENSIONS + for dim in dim_inits: + dimName, dimValue = dim + # print '%s = %d' % (dimName, dimValue) + ncdim = nc.def_dim(dimName, dimValue) + + # VARIABLES + for var in var_inits: + varName, varType, varDim = var + ncvar = nc.def_var(varName, varType, varDim) + # add attributes + for attrName in var_atts[varName].keys(): + setattr(ncvar, attrName, var_atts[varName][attrName]) + # setattr(ncvar, '_FillValue', numpy.nan) + + # add data + nrecs = nc.inq_unlimlen() + for var in var_data: + varName, varData = var + # print varName + ncvar = nc.var(varName) + # e.g. lat = array(var_data['lat']) + # if an array + if type(varData) == numpy.ndarray: + if ncvar.isrecord(): + # time, ens, u, v + ncvar[nrecs:nrecs+len(varData)] = varData.tolist() + else: + ncvar[:] = varData.tolist() # z + else: + # if tuple, sequence or scalar + ncvar[:] = varData + + nc.close() + except CDFError, msg: + print "CDFError:", msg + # if nc: + # nc.close() + # del(nc) + +def nc_update(ncFile, (global_atts, var_atts, var_data)): + """ + Create new netcdf file + + :Parameters: + ncFile : string + Path and name of file to create + """ + try: + # Open netCDF in write mode + nc = CDF(ncFile, NC.WRITE) + # Automatically set define and data modes. + nc.automode() + # + # GLOBALS + for attrName in global_atts.keys(): + setattr(nc, attrName, global_atts[attrName]) + + # VARIABLES + # update attributes + for var in var_atts: + varName, atts = var + ncvar = nc.var(varName) + for attrName in atts.keys(): + setattr(ncvar, attrName, atts[attrName]) + + # update data + nrecs = nc.inq_unlimlen() + for var in var_data: + varName, varData = var + ncvar = nc.var(varName) + # e.g. lat = array(var_data['lat']) + # if an array + if type(varData) == numpy.ndarray: + if ncvar.isrecord(): + # time, ens, u, v (with unlimited dimension) + ncvar[nrecs:nrecs+len(varData)] = varData.tolist() + else: + ncvar[:] = varData.tolist() # z (limited dimension) + else: + # if tuple, sequence or scalar + ncvar[:] = varData + + nc.close() + except CDFError, msg: + print "CDFError:", msg + # if nc: + # nc.close() + # del(nc) + +def nc_get_time(ncFile): + """get time array from file """ + try: + nc = CDF(ncFile) + ncvars = nc.variables() + if 'time' in ncvars.keys(): + es = nc.var('time')[:] + units = nc.var('time').units + nc.close() + return (es, units) + except CDFError, msg: + print "CDFError:", msg + + + + +def nc_load(ncFile, nameType='variable_name', + varNames='all', ga_flag=True, va_flag=True): + """ + Load netcdf file + + :Parameters: + ncFile : string + Path and name of file to load + + :Other Parameters: + nameType : string 'variable_name' (default) or 'standard_name' + Defines naming convention to use for variable names as data + are loaded. Variable name is the name used to store data + in file. 'standard_name' means use variable name based on + variable attribute called 'standard_name' of netcdf variable. + varNames : string or tuple of strings + specific variable names to be loaded into a sequence or scalar + in python following specification set in nameType + By default, all variables will be loaded. + ga_flag : boolean flag + By default, load the global file attributes + va_flag : boolean flag + By default, load the variable file attributes + + """ + try: + nc = CDF(ncFile) + attr = nc.attributes(full=1) + dims = nc.dimensions(full=1) + ncvars = nc.variables() + for var in ncvars.keys(): + # load each variable by name?? + pass + + except CDFError, msg: + print "CDFError:", msg + + Index: raw2proc/trunk/raw2proc/proc_rdi_logdata_adcp.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/proc_rdi_logdata_adcp.py (revision 101) @@ -1,0 +1,313 @@ +#!/usr/bin/env python +# Last modified: Time-stamp: <2008-01-14 12:20:17 haines> +""" +how to parse data, and assert what data and info goes into +creating and updating monthly netcdf files + +RDI/Wavesmon processed adcp current profile data + +parser : sample date and time, ensemble number, currents + and wave summary output from WavesMon software +creator : lat, lon, z, time, ens, u, v +updator : time, ens, u, v + + +Examples +-------- + +>> (parse, create, update) = load_processors('proc_rdi_logdata_adcp') +or +>> si = get_config(cn+'.sensor_info') +>> (parse, create, update) = load_processors(si['adcp']['proc_module']) + +>> lines = load_data(filename) +>> data = parse(platform_info, sensor_info, lines) +>> create(platform_info, sensor_info, data) or +>> update(platform_info, sensor_info, data) + +""" + + +from raw2proc import * +from procutil import * +from ncutil import * + +now_dt = datetime.utcnow() +now_dt.replace(microsecond=0) + +def parser(platform_info, sensor_info, lines): + """ + parse and assign currents data from RDI ADCP Log Data + + """ + + i = 0 + + for line in lines: + # split line and parse float and integers + rdi = [] + sw = re.split(',', line) + for s in sw: + m = re.search(REAL_RE_STR, s) + if m: + rdi.append(float(m.groups()[0])) + + # assign specific fields + n = len(rdi) + burst_num = int(rdi[0]) # Ensemble Number + + # get sample datetime from data + sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(rdi[1:7]) + sample_dt = scanf_datetime(sample_str, fmt='%y-%m-%d %H:%M:%S') + # datetime(*strptime(sample_str, "%y-%m-%d %H:%M:%S")[0:6]) + + # get sample datetime from filename + # compare with datetime from filename + + sig_wave_ht = rdi[8] # Significant Wave Height (Hs, meters) + peak_wave_period = rdi[9] # Peak Wave Period (Tp, sec) + peak_wave_dir = rdi[10] # Peak Wave Direction (deg N) + max_wave_ht = rdi[12] # Maximum Wave Height (Hmax, meters) + max_wave_period = rdi[13] # Maximum Wave Period (Tmax, sec) + + water_depth = rdi[11]/1000 # Water Depth (meters) (based on ADCP backscatter or input config??) + nbins = int(rdi[14]) # Number of bins + + current_spd = numpy.array(rdi[15::2]) # starting at idx=15 skip=2 to end + current_dir = numpy.array(rdi[16::2]) # starting at idx=16 skip=2 to end + + if nbins!=sensor_info['nbins']: + print 'Number of bins reported in data ('+ \ + str(nbins)+') does not match config number ('+ \ + str(sensor_info['nbins'])+')' + + if len(current_spd)!=nbins or len(current_dir)!=nbins: + print 'Data length does not match number of bins in data' + + ibad = (current_spd==-32768) | (current_dir==-32768) + current_spd[ibad] = numpy.nan + current_dir[ibad] = numpy.nan + + # these items can also be teased out of raw adcp but for now get from config file + th = sensor_info['transducer_ht'] # Transducer height above bottom (meters) + bh = sensor_info['blanking_ht'] # Blanking height above Transducer (meters) + bin_size = sensor_info['bin_size'] # Bin Size (meters) + + # compute height for each bin above the bottom + bins = numpy.arange(1,nbins+1) + bin_habs = (bins*bin_size+bin_size/2)+th+bh + + # compute water mask + # Using George Voulgaris' method based on water depth + # minus half of the significant wave height (Hs) + # and computed habs + # if positive is up, what's less than zero depth? + bin_depths = bin_habs-(water_depth-sig_wave_ht/2) + iwater = bin_depths+bin_size/2 < 0 + + z = bin_habs + # check that length of bin_depths is equal to nbins + u = numpy.ones(nbins)*numpy.nan + v = numpy.ones(nbins)*numpy.nan + + u[iwater] = current_spd[iwater]*numpy.sin(current_dir[iwater]*numpy.pi/180) + v[iwater] = current_spd[iwater]*numpy.cos(current_dir[iwater]*numpy.pi/180) + + # set up dict of data if first line + if i==0: + data = { + 'en' : numpy.array(numpy.ones((len(lines),), dtype=int)*numpy.nan), + 'dt' : numpy.array(numpy.ones((len(lines),), dtype=object)*numpy.nan), + 'time' : numpy.array(numpy.ones((len(lines),), dtype=long)*numpy.nan), + 'z' : numpy.array(numpy.ones((nbins,), dtype=float)*numpy.nan), + 'u' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan), + 'v' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan), + 'water_depth' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan), + } + + data['en'][i] = burst_num + data['dt'][i] = sample_dt # sample datetime + data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds + data['z'] = z + data['u'][i] = u + data['v'][i] = v + data['water_depth'][i] = water_depth + i = i+1 + + return data + +def creator(platform_info, sensor_info, data): + # + # + title_str = sensor_info['description']+' at '+ platform_info['location'] + global_atts = { + 'title' : title_str, + 'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)', + 'institution_url' : 'http://nccoos.unc.edu', + 'institution_dods_url' : 'http://nccoos.unc.edu', + 'metadata_url' : 'http://nccoos.unc.edu', + 'references' : 'http://nccoos.unc.edu', + 'contact' : 'Sara Haines (haines@email.unc.edu)', + # + 'source' : 'fixed-profiler (acoustic doppler) observation', + 'history' : 'raw2proc using ' + sensor_info['process_module'], + 'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(), + # conventions + 'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0', + # SEACOOS CDL codes + 'format_category_code' : 'fixed-profiler', + 'institution_code' : platform_info['institution'], + 'platform_code' : platform_info['id'], + 'package_code' : sensor_info['id'], + # institution specific + 'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)', + 'project_url' : 'http://nccoos.unc.edu', + # timeframe of data contained in file yyyy-mm-dd HH:MM:SS + # first date in monthly file + 'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"), + # last date in monthly file + 'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), + 'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + # + 'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + 'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + 'process_level' : 'level1', + # + # must type match to data (e.g. fillvalue is real if data is real) + '_FillValue' : -99999., + } + + var_atts = { + # coordinate variables + 'time' : {'short_name': 'time', + 'long_name': 'Time', + 'standard_name': 'time', + 'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC + 'axis': 'T', + }, + 'lat' : {'short_name': 'lat', + 'long_name': 'Latitude', + 'standard_name': 'latitude', + 'reference':'geographic coordinates', + 'units': 'degrees_north', + 'valid_range':(-90.,90.), + 'axis': 'Y', + }, + 'lon' : {'short_name': 'lon', + 'long_name': 'Longtitude', + 'standard_name': 'longtitude', + 'reference':'geographic coordinates', + 'units': 'degrees_east', + 'valid_range':(-180.,180.), + 'axis': 'Y', + }, + 'z' : {'short_name': 'z', + 'long_name': 'Height', + 'standard_name': 'height', + 'reference':'zero at sea-surface', + 'units': 'm', + 'axis': 'Z', + }, + # data variables + 'en' : {'long_name': 'Ensemble Number', + 'standard_name': 'ensemble_number', + 'units': 'None', + }, + 'u': {'long_name': 'East/West Component of Current', + 'standard_name': 'eastward_current', + 'units': 'm s-1', + 'reference': 'clockwise from True East', + }, + 'v': {'long_name': 'North/South Component of Current', + 'standard_name': 'northward_current', + 'units': 'm s-1', + 'reference': 'clockwise from True North', + }, + 'water_depth': {'short_name': '', + 'long_name': 'Water Depth', + 'standard_name': 'water_depth', + 'units': 'm', + }, + } + + + # dimension names use tuple so order of initialization is maintained + dim_inits = ( + ('ntime', NC.UNLIMITED), + ('nlat', 1), + ('nlon', 1), + ('nz', sensor_info['nbins']) + ) + + # using tuple of tuples so order of initialization is maintained + # using dict for attributes order of init not important + # use dimension names not values + # (varName, varType, (dimName1, [dimName2], ...)) + var_inits = ( + # coordinate variables + ('time', NC.INT, ('ntime',)), + ('lat', NC.FLOAT, ('nlat',)), + ('lon', NC.FLOAT, ('nlon',)), + ('z', NC.FLOAT, ('nz',)), + # data variables + ('en', NC.INT, ('ntime', )), + ('u', NC.FLOAT, ('ntime', 'nz')), + ('v', NC.FLOAT, ('ntime', 'nz')), + ('water_depth', NC.FLOAT, ('ntime',)), + ) + + # subset data only to month being processed (see raw2proc.process()) + i = data['in'] + + # var data + var_data = ( + ('lat', platform_info['lat']), + ('lon', platform_info['lon']), + ('z', data['z']), + # + ('time', data['time'][i]), + ('en', data['en'][i]), + ('u', data['u'][i]), + ('v', data['v'][i]), + ('water_depth', data['water_depth'][i]), + ) + + return (global_atts, var_atts, dim_inits, var_inits, var_data) + +def updater(platform_info, sensor_info, data): + # + global_atts = { + # update times of data contained in file (yyyy-mm-dd HH:MM:SS) + # last date in monthly file + 'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), + 'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + # + 'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + } + + # data variables + # update any variable attributes like range, min, max + var_atts = {} + # var_atts = { + # 'u': {'max': max(data.u), + # 'min': min(data.v), + # }, + # 'v': {'max': max(data.u), + # 'min': min(data.v), + # }, + # } + + # subset data only to month being processed (see raw2proc.process()) + i = data['in'] + + # data + var_data = ( + ('time', data['time'][i]), + ('en', data['en'][i]), + ('u', data['u'][i]), + ('v', data['v'][i]), + ('water_depth', data['water_depth'][i]), + ) + + return (global_atts, var_atts, var_data) +# Index: raw2proc/trunk/raw2proc/proc_rdi_logdata_dw.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/proc_rdi_logdata_dw.py (revision 101) @@ -1,0 +1,312 @@ +#!/usr/bin/env python +# Last modified: Time-stamp: <2008-01-14 12:14:32 haines> +""" +how to parse data, and assert what data and info goes into +creating and updating monthly netcdf files + +RDI/Wavesmon processed adcp current profile data + +parser : sample date and time, ensemble number, wave summary output from WavesMon software +creator : lat, lon, z, time, sig_wave_ht, peak_wave_period, peak_wave_dir, + max_wave_ht, max_wave_period, water_depth +updater : time, sig_wave_ht, peak_wave_period, peak_wave_dir, + max_wave_ht, max_wave_period, water_depth + +Examples +-------- + +>> (parse, create, update) = load_processors('proc_rdi_logdata_adcp') +or +>> si = get_config(cn+'.sensor_info') +>> (parse, create, update) = load_processors(si['adcp']['proc_module']) + +>> lines = load_data(filename) +>> data = parse(platform_info, sensor_info, lines) +>> create(platform_info, sensor_info, data) or +>> update(platform_info, sensor_info, data) + +""" + +from raw2proc import * +from procutil import * +from ncutil import * + +now_dt = datetime.utcnow() +now_dt.replace(microsecond=0) + +def parser(platform_info, sensor_info, lines): + """ + parse and assign currents data from RDI ADCP Log Data + + """ + + i = 0 + + for line in lines: + # split line and parse float and integers + rdi = [] + sw = re.split(',', line) + for s in sw: + m = re.search(REAL_RE_STR, s) + if m: + rdi.append(float(m.groups()[0])) + + # assign specific fields + n = len(rdi) + burst_num = int(rdi[0]) # Ensemble Number + + # get sample datetime from data + sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(rdi[1:7]) + sample_dt = scanf_datetime(sample_str, fmt='%y-%m-%d %H:%M:%S') + # datetime(*strptime(sample_str, "%y-%m-%d %H:%M:%S")[0:6]) + + # get sample datetime from filename + # compare with datetime from filename + + sig_wave_ht = rdi[8] # Significant Wave Height (Hs, meters) + peak_wave_period = rdi[9] # Peak Wave Period (Tp, sec) + peak_wave_dir = rdi[10] # Peak Wave Direction (deg N) + max_wave_ht = rdi[12] # Maximum Wave Height (Hmax, meters) + mean_wave_period = rdi[13] # Maximum Wave Period (Tmean, sec) + + water_depth = rdi[11]/1000 # Water Depth (meters) (based on ADCP backscatter or input config??) + nbins = int(rdi[14]) # Number of bins + + # set up dict of data if first line + if i==0: + data = { + 'en' : numpy.array(numpy.ones((len(lines),), dtype=int)*numpy.nan), + 'dt' : numpy.array(numpy.ones((len(lines),), dtype=object)*numpy.nan), + 'time' : numpy.array(numpy.ones((len(lines),), dtype=long)*numpy.nan), + 'sig_wave_ht' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan), + 'peak_wave_period' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan), + 'peak_wave_dir' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan), + 'max_wave_ht' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan), + 'mean_wave_period' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan), + 'water_depth' : numpy.array(numpy.ones((len(lines)), dtype=float)*numpy.nan), + } + + data['en'][i] = burst_num + data['dt'][i] = sample_dt # sample datetime + data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds + data['sig_wave_ht'][i] = sig_wave_ht + data['peak_wave_period'][i] = peak_wave_period + data['peak_wave_dir'][i] = peak_wave_dir + data['max_wave_ht'][i] = max_wave_ht + data['mean_wave_period'][i] = mean_wave_period + data['water_depth'][i] = water_depth + i = i+1 + + return data + +def creator(platform_info, sensor_info, data): + # + # + title_str = sensor_info['description']+' at '+ platform_info['location'] + global_atts = { + 'title' : title_str, + 'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)', + 'institution_url' : 'http://nccoos.unc.edu', + 'institution_dods_url' : 'http://nccoos.unc.edu', + 'metadata_url' : 'http://nccoos.unc.edu', + 'references' : 'http://nccoos.unc.edu', + 'contact' : 'Sara Haines (haines@email.unc.edu)', + # + 'source' : 'directional wave (acoustic doppler) observation', + 'history' : 'raw2proc using ' + sensor_info['process_module'], + 'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(), + # conventions + 'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0', + # SEACOOS CDL codes + 'format_category_code' : 'directional waves', + 'institution_code' : platform_info['institution'], + 'platform_code' : platform_info['id'], + 'package_code' : sensor_info['id'], + # institution specific + 'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)', + 'project_url' : 'http://nccoos.unc.edu', + # timeframe of data contained in file yyyy-mm-dd HH:MM:SS + 'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"), + 'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), + 'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + # + 'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + 'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + 'process_level' : 'level1', + # + # must type match to data (e.g. fillvalue is real if data is real) + '_FillValue' : -99999., + } + + var_atts = { + # coordinate variables + 'time' : {'short_name': 'time', + 'long_name': 'Time', + 'standard_name': 'time', + 'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC + 'axis': 'T', + }, + 'lat' : {'short_name': 'lat', + 'long_name': 'Latitude', + 'standard_name': 'latitude', + 'reference':'geographic coordinates', + 'units': 'degrees_north', + 'valid_range':(-90.,90.), + 'axis': 'Y', + }, + 'lon' : {'short_name': 'lon', + 'long_name': 'Longtitude', + 'standard_name': 'longtitude', + 'reference':'geographic coordinates', + 'units': 'degrees_east', + 'valid_range':(-180.,180.), + 'axis': 'Y', + }, + 'z' : {'short_name': 'z', + 'long_name': 'Height', + 'standard_name': 'height', + 'reference':'zero at sea-surface', + 'units': 'm', + 'axis': 'Z', + }, + # data variables + 'en' : {'short_name': 'en', + 'long_name': 'Ensemble Number', + 'standard_name': 'ensemble_number', + 'units': 'None', + }, + 'sig_wave_ht' : {'short_name': 'Hs', + 'long_name': 'Significant Wave Height', + 'definition': 'Four times the square root of the first moment of the wave spectrum (4*sqrt(m0))', + 'standard_name': 'significant_wave_height', + 'units': 'm', + }, + 'peak_wave_period' : {'short_name': 'Tp', + 'long_name': 'Peak Wave Period', + 'definition': 'Period of strongest wave (wave energy maximum)', + 'standard_name': 'peak_wave_period', + 'units': 'sec', + }, + 'peak_wave_dir' : {'short_name': 'Dp', + 'long_name': 'Peak Wave Direction', + 'definition': 'Direction from which strongest waves (wave energy max) are coming', + 'standard_name': 'peak_wave_from_direction', + 'units': 'deg from N', + 'reference': 'clockwise from True North', + }, + 'max_wave_ht' : {'short_name': 'Hmax', + 'long_name': 'Maximum Wave Height', + 'standard_name': 'max_wave_height', + 'units': 'm', + }, + 'mean_wave_period' : {'short_name': 'Tmean', + 'long_name': 'Mean Wave Period', + 'definition': 'Zero-moment of the non-directional spectrum divided by the first-moment (m0/m1)', + 'standard_name': 'mean_wave_period', + 'units': 'sec', + }, + 'water_depth': {'short_name': '', + 'long_name': 'Water Depth', + 'standard_name': 'water_depth', + 'units': 'm', + }, + + } + + + # integer values + ntime=NC.UNLIMITED + nlat=1 + nlon=1 + nz=1 + + # dimension names use tuple so order of initialization is maintained + dim_inits = ( + ('ntime', NC.UNLIMITED), + ('nlat', 1), + ('nlon', 1), + ('nz', 1) + ) + + # using tuple of tuples so order of initialization is maintained + # using dict for attributes order of init not important + # use dimension names not values + # (varName, varType, (dimName1, [dimName2], ...)) + var_inits = ( + # coordinate variables + ('time', NC.INT, ('ntime',)), + ('lat', NC.FLOAT, ('nlat',)), + ('lon', NC.FLOAT, ('nlon',)), + ('z', NC.FLOAT, ('nz',)), + # data variables + ('en', NC.INT, ('ntime', )), + ('sig_wave_ht', NC.FLOAT, ('ntime',)), + ('peak_wave_period', NC.FLOAT, ('ntime',)), + ('peak_wave_dir', NC.FLOAT, ('ntime',)), + ('max_wave_ht', NC.FLOAT, ('ntime',)), + ('mean_wave_period', NC.FLOAT, ('ntime',)), + ('water_depth', NC.FLOAT, ('ntime',)), + ) + + # subset data only to month being processed (see raw2proc.process()) + i = data['in'] + + # var data + var_data = ( + ('lat', platform_info['lat']), + ('lon', platform_info['lon']), + ('z', 0), + # + ('time', data['time'][i]), + ('en', data['en'][i]), + ('sig_wave_ht', data['sig_wave_ht'][i]), + ('peak_wave_period', data['peak_wave_period'][i]), + ('peak_wave_dir', data['peak_wave_dir'][i]), + ('max_wave_ht', data['max_wave_ht'][i]), + ('mean_wave_period', data['mean_wave_period'][i]), + ('water_depth', data['water_depth'][i]), + ) + + return (global_atts, var_atts, dim_inits, var_inits, var_data) + +def updater(platform_info, sensor_info, data): + # + global_atts = { + # update times of data contained in file (yyyy-mm-dd HH:MM:SS) + # last date in monthly file + 'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), + 'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + # + 'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), + } + + # data variables + # update any variable attributes like range, min, max + var_atts = {} + # var_atts = { + # 'u': {'max': max(data.u), + # 'min': min(data.v), + # }, + # 'v': {'max': max(data.u), + # 'min': min(data.v), + # }, + # } + + # subset data only to month being processed (see raw2proc.process()) + i = data['in'] + + # data + var_data = ( + ('time', data['time'][i]), + ('en', data['en'][i]), + ('sig_wave_ht', data['sig_wave_ht'][i]), + ('peak_wave_period', data['peak_wave_period'][i]), + ('peak_wave_dir', data['peak_wave_dir'][i]), + ('max_wave_ht', data['max_wave_ht'][i]), + ('mean_wave_period', data['mean_wave_period'][i]), + ('water_depth', data['water_depth'][i]), + ) + + return (global_atts, var_atts, var_data) + +# Index: raw2proc/trunk/raw2proc/proc_rdi_rawdata_adcp.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/proc_rdi_rawdata_adcp.py (revision 101) @@ -1,0 +1,278 @@ +#!/usr/bin/env python +# Last modified: Time-stamp: <2007-12-27 15:16:33 haines> +""" +how to parse data, and assert what data and info goes into +creating and updating monthly netcdf files + +RDI/Wavesmon processed adcp current profile data + +parser : sample date and time, ensemble number, currents + and wave summary output from WavesMon software +nc_creator : +nc_updator : + +Examples +-------- + +>> (parse, create, update) = load_processors('proc_rdi_logdata') +>> data = parse(lines) +>> create(platform_info, sensor_info, data) +>> update(platform_info, sensor_info, data) + +""" + +def parser(lines): + """ + parse and assign currents data from RDI ADCP Log Data + + """ + i = 0 + + for line in lines: + # split line and parse float and integers + rdi = [] + sw = re.split(',', line) + for s in sw: + m = re.search(REAL_RE_STR, s) + if m: + rdi.append(float(m.groups()[0])) + + # assign specific fields + n = len(rdi) + burst_num = int(rdi[0]) # Ensemble Number + + # get sample datetime from data + sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(rdi[1:7]) + sample_dt = datetime(*strptime(sample_str, "%y-%m-%d %H:%M:%S")[0:6]) + + # get sample datetime from filename + # compare with datetime from filename + + sig_wave_ht = rdi[8] # Significant Wave Height (Hs, meters) + peak_wave_period = rdi[9] # Peak Wave Period (Tp, sec) + peak_wave_dir = rdi[10] # Peak Wave Direction (deg N) + max_wave_ht = rdi[12] # Maximum Wave Height (Hmax, meters) + max_wave_period = rdi[13] # Maximum Wave Period (Tmax, sec) + + water_depth = rdi[11]/1000 # Water Depth (meters) (based on ADCP backscatter or input config??) + nbins = int(rdi[14]) # Number of bins + + current_spd = numpy.array(rdi[15::2]) # starting at idx=15 skip=2 to end + current_dir = numpy.array(rdi[16::2]) # starting at idx=16 skip=2 to end + + if nbins!=sensor_info['adcp']['nbins']: + print 'Number of bins reported in data ('+ \ + str(nbins)+') does not match config number ('+ \ + str(sensor_info['adcp']['nbins'])+')' + + if len(current_spd)!=nbins or len(current_dir)!=nbins: + print 'Data length does not match number of bins in data' + + ibad = (current_spd==-32768) | (current_dir==-32768) + current_spd[ibad] = numpy.nan + current_dir[ibad] = numpy.nan + + # these items can also be teased out of raw adcp but for now get from config file + th = sensor_info['adcp']['transducer_ht'] # Transducer height above bottom (meters) + bh = sensor_info['adcp']['blanking_ht'] # Blanking height above Transducer (meters) + bin_size = sensor_info['adcp']['bin_size'] # Bin Size (meters) + + # compute height for each bin above the bottom + bins = numpy.arange(1,nbins+1) + bin_habs = (bins*bin_size+bin_size/2)+th+bh + + # compute water mask + # Using George Voulgaris' method based on water depth + # minus half of the significant wave height (Hs) + # and computed habs + # if positive is up, what's less than zero depth? + bin_depths = bin_habs-(water_depth-sig_wave_ht/2) + iwater = bin_depths+bin_size/2 < 0 + + z = bin_habs + # check that length of bin_depths is equal to nbins + u = numpy.ones(nbins)*numpy.nan + v = numpy.ones(nbins)*numpy.nan + + u[iwater] = current_spd[iwater]*numpy.sin(current_dir[iwater]*numpy.pi/180) + v[iwater] = current_spd[iwater]*numpy.cos(current_dir[iwater]*numpy.pi/180) + + # set up dict of data if first line + if i==0: + data = { + 'en' : numpy.array(numpy.ones((len(lines),), dtype=float)*numpy.nan), + 'dt' : numpy.array(numpy.ones((len(lines),), dtype=object)*numpy.nan), + 'nbins' : numpy.array(numpy.zeros((len(lines),), dtype=int)), + 'z' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan), + 'u' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan), + 'v' : numpy.array(numpy.ones((len(lines),nbins), dtype=float)*numpy.nan), + } + + data['en'][i] = burst_num + data['dt'][i] = sample_dt + data['nbins'][i] = nbins + data['z'][i] = z + data['u'][i] = u + data['v'][i] = v + i = i+1 + + return data + +def creator(platform_info, sensor_info, data): + # + # + title_str = sensor_info['description']+' at '+ platform_info['location'] + global_atts = { + 'title' : title_str, + 'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)', + 'institution_url' : 'http://nccoos.unc.edu', + 'institution_dods_url' : 'http://nccoos.unc.edu', + 'metadata_url' : 'http://nccoos.unc.edu', + 'references' : 'http://nccoos.unc.edu', + 'contact' : 'Sara Haines (haines@email.unc.edu)', + # + 'source' : 'fixed-profiler (acoustic doppler) observation', + 'history' : 'Data processed by NCCOOS', + 'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(), + # conventions + 'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0', + # SEACOOS CDL codes + 'format_category_code' : 'fixed-profiler', + 'institution_code' : platform_info['instituion'], + 'platform_code' : platform_info['id'], + 'package_code' : sensor_info['id'], + # institution specific + 'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)', + 'project_url' : 'http://nccoos.unc.edu', + # timeframe of data contained in file yyyy-mm-dd HH:MM:SS + 'start_date' : data['sample_dt'].strftime("%Y-%m-%d %H:%M:%S"), + 'end_date' : data['sample_dt'].strftime("%Y-%m-%d %H:%M:%S"), + 'release_date' : now.strftime("%Y-%m-%d %H:%M:%S"), + # + 'creation_date' : now.strftime("%Y-%m-%d %H:%M:%S"), + 'modification_date' : now.strftime("%Y-%m-%d %H:%M:%S"), + 'process_level' : 'level1', + # + # must type match to data (e.g. fillvalue is real if data is real) + '_FillValue' : -99999., + } + + var_atts = { + # coordinate variables + 'time' : {'short_name': 'time', + 'long_name': 'Time', + 'standard_name': 'time', + 'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC + 'axis': 'T', + }, + 'lat' : {'short_name': 'lat', + 'long_name': 'Latitude', + 'standard_name': 'latitude', + 'reference':'geographic coordinates', + 'units': 'degrees_north', + 'valid_range':(-90.,90.), + 'axis': 'Y', + }, + 'lon' : {'short_name': 'lon', + 'long_name': 'Longtitude', + 'standard_name': 'longtitude', + 'reference':'geographic coordinates', + 'units': 'degrees_east', + 'valid_range':(-180.,180.), + 'axis': 'Y', + }, + 'z' : {'short_name': 'z', + 'long_name': 'Height', + 'standard_name': 'height', + 'reference':'zero at sea-surface', + 'units': 'm', + 'axis': 'Z', + }, + # data variables + 'u': {'long_name': 'East/West Component of Current', + 'standard_name': 'eastward_current', + 'units': 'm s-1', + 'reference': 'clockwise from True East', + }, + 'v': {'long_name': 'North/South Component of Current', + 'standard_name': 'northward_current', + 'units': 'm s-1', + 'reference': 'clockwise from True North', + }, + 'w': {'long_name': 'Upward/Downward Component of Current', + 'standard_name': 'upward_current', + 'units': 'm s-1', + 'positive': 'up', + }, + 'back_scatter':{'long_name': 'Backscatter', + 'standard_name': 'back_scatter', + 'units': 'decibels', + }, + 'wtemp': {'long_name': 'Water Temperature', + 'standard_name': 'water_temperature', + 'units': 'degrees Celsius', + }, + } + + + # integer values + ntime=NC.UNLIMITED + nlat=1 + nlon=1 + nz=sensor_info['nbins'] + + # dimension names use tuple so order of initialization is maintained + dimensions = ('ntime', 'nlat', 'nlon', 'nz') + + # using tuple of tuples so order of initialization is maintained + # using dict for attributes order of init not important + # use dimension names not values + # (varName, varType, (dimName1, [dimName2], ...)) + var_inits = ( + # coordinate variables + ('time', NC.INT, ('ntime',)), + ('lat', NC.FLOAT, ('nlat',)), + ('lon', NC.FLOAT, ('nlon',)), + ('z', NC.FLOAT, ('nz',)), + # data variables + ('u', NC.FLOAT, ('ntime', 'nz')), + ('v', NC.FLOAT, ('ntime', 'nz')), + ('w', NC.FLOAT, ('ntime', 'nz')), + ('back_scatter', NC.FLOAT, ('ntime', 'nz')), + ('wtemp', NC.FLOAT, ('ntime',)), + ) + + # var data + var_data = ( + ('lat', platform_info['lat']), + ('lon', platform_info['lon']), + ('z', []), + ('u', []), + ('v', []), + ('w', []), + ('back_scatter', []), + ('wtemp', []), + ) + + return (global_atts, dimensions, var_inits, var_data) + +def updater(platform_info, sensor_info, data): + # + global_atts = { + # timeframe of data contained in file yyyy-mm-dd HH:MM:SS + 'end_date' : data['sample_dt'].strftime("%Y-%m-%d %H:%M:%S"), + 'release_date' : now.strftime("%Y-%m-%d %H:%M:%S"), + # + 'creation_date' : now.strftime("%Y-%m-%d %H:%M:%S"), + 'modification_date' : now.strftime("%Y-%m-%d %H:%M:%S"), + } + # var data + var_data = ( + ('u', data['u']), + ('v', data['v']), + ('w', data['w']), + ('back_scatter', data['back_scatter']), + ('wtemp', data['wtemp']), + ) + return (global_atts, var_data) +# Index: raw2proc/trunk/raw2proc/procutil.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/procutil.py (revision 101) @@ -1,0 +1,279 @@ +#!/usr/bin/env python +# Last modified: Time-stamp: <2008-01-03 09:50:53 haines> +"""Utilities to help data processing + + Mostly time functions right now + + TO DO: + check_configs() +""" + +__version__ = "v0.1" +__author__ = "Sara Haines " + +from datetime import datetime, timedelta, tzinfo +from dateutil.tz import tzlocal, tzutc +import time + +def check_configs(): + """Test config files for comformnity + + check either one or all for a platform + + id in filename == platform.id + datetime in filename <= platform.config_start_date + (close in time usually the same day + also platform.config_start_date < platform.config_end_date + (there needs to be some time that the platform was operational) + test existence of specific structural elements (platform info and sensor info) + and specific fields for both platform and sensor + verify that for each platform_info['packages'] there is sensor_info and same id + for pi['packages'][0] in si.keys() + pi['packages'][0] == si['adcp']['id'] + bounds on data in fields + show difference between two consecutive configs? + pretty print to screen of dictionary info for platform and sensor info + + cn = os.path.splitext(os.path.basename(config))[0] + cndt = filt_datetime(os.path.basename(config)) + pi = get_config(cn+'.platform_info') + if pi['config_start_date']: + config_start_dt = filt_datetime(pi['config_start_date']) + elif pi['config_start_date'] == None: + config_start_dt = now_dt + if pi['config_end_date']: + config_end_dt = filt_datetime(pi['config_end_date']) + elif pi['config_end_date'] == None: + config_end_dt = now_dt + + print cn + ' -----------------' + print cndt + print config_start_dt + print config_end_dt + print now_dt + print 'file date ok? ' + str(cndt <= config_start_dt) + print 'operation date ok? ' + str(config_start_dt < config_end_dt) + """ + +def dt2es(dt): + """Convert datetime object to epoch seconds (es) as seconds since Jan-01-1970 """ + # microseconds of timedelta object not used + delta = dt - datetime(1970,1,1,0,0,0) + es = delta.days*24*60*60 + delta.seconds + return es + +def es2dt(es): + """ Convert epoch seconds (es) to datetime object""" + dt = datetime(*time.gmtime(es)[0:6]) + return dt + +def find_months(year, month=1): + """Find which months to process + + Since data are in subdirectories based on months determine + previous, current, and next month to look in directories for data + of the current month or month to process. + + :Parameters: + year : int value or str 'yyyy_mm' + month : int value + + :Returns: + which_months : tuple of 3 datetime objects + (prev_month, current_month, next_month) + + Examples + -------- + >>> find_months(2007, 2) + >>> find_months('2007_02') + + """ + if type(year) == int and type(month) == int : + dt = datetime(year, month, day=1) + this_month = dt + elif type(year) == str : + dt = filt_datetime(year) + this_month = dt + # + if dt.month == 1: # if January + prev_month = datetime(dt.year-1, month=12, day=1) # Dec + next_month = datetime(dt.year, dt.month+1, day=1) # Feb + elif dt.month == 12: # if December + prev_month = datetime(dt.year, dt.month-1, day=1) # Nov + next_month = datetime(dt.year+1, month=1, day=1) # Jan + else: + prev_month = datetime(dt.year, dt.month-1, day=1) + next_month = datetime(dt.year, dt.month+1, day=1) + # + return (prev_month, this_month, next_month) + +def this_month(): + """Return this month (GMT) as formatted string (yyyy_mm) """ + this_month_str = "%4d_%02d" % time.gmtime()[0:2] + return this_month_str + +def scanf_datetime(ts, fmt='%Y-%m-%dT%H:%M:%S'): + """Convert string representing date and time to datetime object""" + # default string format follows convention YYYY-MM-DDThh:mm:ss + + t = time.strptime(ts, fmt) + # the '*' operator unpacks the tuple, producing the argument list. + dt = datetime(*t[0:6]) + return dt + +def filt_datetime(input_string, remove_ext=True): + """ + Following the template, (YY)YYMMDDhhmmss + and versions with of this with decreasing time precision, + find the most precise, reasonable string match and + return its datetime object. + """ + + # remove any trailing filename extension + from os.path import splitext + import re + if remove_ext: + (s, e) = splitext(input_string) + input_string = s + + # YYYYMMDDhhmmss and should handle most cases of the stamp + # other forms this should pass + # YY_MM_DD_hh:mm:ss + # YYYY_MM_DD_hh:mm:ss + # YYYY,MM,DD,hh,mm,ss + # YY,MM,DD,hh,mm,ss + + case1_regex = r""" + # case 1: (YY)YYMMDDhhmmss + (\d{4}|\d{2}) # 2- or 4-digit YEAR (e.g. '07' or '2007') + \D? # optional 1 character non-digit separator (e.g. ' ' or '-') + (\d{2}) # 2-digit MONTH (e.g. '12') + \D? # optional 1 character non-digit separator + (\d{2}) # 2-digit DAY of month (e.g. '10') + \D? # optional 1 character non-digit separator (e.g. ' ' or 'T') + (\d{2}) # 2-digit HOUR (e.g. '10') + \D? # optional 1 character non-digit separator (e.g. ' ' or ':') + (\d{2}) # 2-digit MINUTE (e.g. '10') + \D? # optional 1 character non-digit separator (e.g. ' ' or ':') + (\d{2}) # 2-digit SECOND (e.g. '10') + """ + + case2_regex = r""" + # case 2: (YY)YYMMDDhhmm (no seconds) + (\d{4}|\d{2}) # 2- or 4-digit YEAR + \D? # optional 1 character non-digit separator (e.g. ' ' or '-') + (\d{2}) # 2-digit MONTH + \D? # optional 1 character non-digit separator + (\d{2}) # 2-digit DAY + \D? # optional 1 character non-digit separator (e.g. ' ' or 'T') + (\d{2}) # 2-digit HOUR + \D? # optional 1 character non-digit separator (e.g. ' ' or ':') + (\d{2}) # 2-digit MINUTE + """ + + case3_regex = r""" + # case 3: (YY)YYMMDDhh (no seconds, no minutes) + (\d{4}|\d{2}) # 2- or 4-digit YEAR + \D? # optional 1 character non-digit separator (e.g. ' ' or '-') + (\d{2}) # 2-digit MONTH + \D? # optional 1 character non-digit separator + (\d{2}) # 2-digit DAY + \D? # optional 1 character non-digit separator (e.g. ' ' or 'T') + (\d{2}) # 2-digit HOUR + """ + + case4_regex = r""" + # case 4: (YY)YYMMDD (no time values, just date) + (\d{4}|\d{2}) # 2- or 4-digit YEAR + \D? # optional 1 character non-digit separator (e.g. ' ' or '-') + (\d{2}) # 2-digit MONTH + \D? # optional 1 character non-digit separator + (\d{2}) # 2-digit DAY + """ + + case5_regex = r""" + # case 5: (YY)YYMM (no time values, just month year) + (\d{4}|\d{2}) # 2- or 4-digit YEAR + \D? # optional 1 character non-digit separator (e.g. ' ' or '-') + (\d{2}) # 2-digit MONTH + """ + + ## Verbose regular expressions require use of re.VERBOSE flag. + ## so we can use multiline regexp + + # cases are ordered from precise to more coarse resolution of time + cases = [case1_regex, case2_regex, case3_regex, case4_regex, case5_regex] + patterns = [re.compile(c, re.VERBOSE) for c in cases] + matches = [p.search(input_string) for p in patterns] + + # for testing, try to computer datetime objects + # just because there is a match does not mean it makes sense + for ind in range(len(matches)): + if bool(matches[ind]): + # print matches[ind].groups() + bits = matches[ind].groups() + values = [int(yi) for yi in bits] + # check for 2-digit year + if values[0] < 50: + values[0] += 2000 + elif values[0]>=50 and values[0]<100: + values[0] += 1900 + # + # we must have at least 3 arg input to datetime + if len(values)==1: + values.extend([1,1]) # add First of January + elif len(values)==2: + values.extend([1]) # add first day of month + + # + # compute dt + try: + dt = datetime(*values) + except ValueError, e: + # value error if something not valid for datetime + # e.g. month 1...12, something parsed wrong + dt = None + else: + # absolute difference in days from now (UTC) + z = dt - datetime.utcnow() + daysdiff = abs(z.days) + # if this date unreasonable (>10 years*365), throw it out + # something parsed wrong + if daysdiff > 3650: + dt = None + else: + dt = None + + # place datetime object or None within sequence of matches + matches[ind] = dt + + # find the first (most precise) date match since there might be more than + # as we searched more coarse templates, but now we have thrown out + b = [bool(x) for x in matches] + try: + ind = b.index(True) + except ValueError, e: + print 'filt_datetime: No date found in ', input_string + dt = None + else: + dt = matches[ind] + return dt + +def display_time_diff(diff): + """Display time difference in HH:MM:DD using number weeks (W) + and days (D) if necessary""" + # weeks, days = divmod(diff.days, 7) + days = diff.days + minutes, seconds = divmod(diff.seconds, 60) + hours, minutes = divmod(minutes, 60) + # if (weeks>2 and days>0): + # str = "%d Weeks, %d Days %02d:%02d" % (days, hours, minutes) + if (days==1): + str = "%02d:%02d" % (24+hours, minutes) + elif (days>1): + str = "%d Days %02d:%02d" % (days, hours, minutes) + else: + str = "%02d:%02d" % (hours, minutes) + return str + +# Index: raw2proc/trunk/raw2proc/raw2proc.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/raw2proc.py (revision 101) @@ -1,0 +1,377 @@ +#!/usr/bin/env python +# Last modified: Time-stamp: <2008-01-14 11:03:16 haines> +"""Process raw data to monthly netCDF data files + +This module processes raw ascii- or binary-data from different NCCOOS +sensors (ctd, adcp, waves-adcp, met) based on manual or automated +operation. If automated processing, add raw data (level0) from all +active sensors to current month's netcdf data files (level1) with the +current configuration setting. If manual processing, determine which +configurations to use for requested platform, sensor, and month. + +:Processing steps: + 0. raw2proc auto or manual for platform, sensor, month + 1. list of files to process + 2. parse data + 3. create, update netcdf + + to-do + 3. qc (measured) data + 4. process derived data (and regrid?) + 5. qc (measured and derived) data flags + +""" + +__version__ = "v0.1" +__author__ = "Sara Haines " + +import sys +import os +import re + +# define config file location to run under cron +defconfigs='/afs/isis.unc.edu/depts/marine/workspace/haines/nc-coos/raw2proc' + +import numpy + +from procutil import * +from ncutil import * + +REAL_RE_STR = '\\s*(-?\\d(\\.\\d+|)[Ee][+\\-]\\d\\d?|-?(\\d+\\.\\d*|\\d*\\.\\d+)|-?\\d+)\\s*' + +def load_data(inFile): + lines=None + if os.path.exists(inFile): + f = open(inFile, 'r') + lines = f.readlines() + f.close() + if len(lines)<=0: + print 'Empty file: '+ inFile + else: + print 'File does not exist: '+ inFile + return lines + +def import_parser(name): + mod = __import__('parsers') + parser = getattr(mod, name) + return parser + +def import_processors(mod_name): + mod = __import__(mod_name) + parser = getattr(mod, 'parser') + creator = getattr(mod, 'creator') + updater = getattr(mod, 'updater') + return (parser, creator, updater) + + +def get_config(name): + """Usage Example >>>sensor_info = get_config('bogue_config_20060918.sensor_info')""" + components = name.split('.') + mod = __import__(components[0]) + for comp in components[1:]: + attr = getattr(mod, comp) + return attr + +def find_configs(platform, yyyy_mm, config_dir=''): + """Find which configuration files for specified platform and month + + :Parameters: + platform : string + Platfrom id to process (e.g. 'bogue') + yyyy_mm : string + Year and month of data to process (e.g. '2007_07') + + :Returns: + cns : list of str + List of configurations that overlap with desired month + If empty [], no configs were found + """ + import glob + # list of config files based on platform + configs = glob.glob(os.path.join(config_dir, platform + '_config_*.py')) + now_dt = datetime.utcnow() + now_dt.replace(microsecond=0) + # determine when month starts and ends + (prev_month, this_month, next_month) = find_months(yyyy_mm) + month_start_dt = this_month + month_end_dt = next_month - timedelta(seconds=1) + # print month_start_dt; print month_end_dt + # + cns = [] + for config in configs: + # datetime from filename + cn = os.path.splitext(os.path.basename(config))[0] + cndt = filt_datetime(os.path.basename(config)) + pi = get_config(cn+'.platform_info') + if pi['config_start_date']: + config_start_dt = filt_datetime(pi['config_start_date']) + elif pi['config_start_date'] == None: + config_start_dt = now_dt + if pi['config_end_date']: + config_end_dt = filt_datetime(pi['config_end_date']) + elif pi['config_end_date'] == None: + config_end_dt = now_dt + # + if (config_start_dt <= month_start_dt or config_start_dt <= month_end_dt) and \ + (config_end_dt >= month_start_dt or config_end_dt >= month_end_dt): + cns.append(cn) + return cns + + +def find_active_configs(config_dir=''): + """Find which configuration files are active + + :Returns: + cns : list of str + List of configurations that overlap with desired month + If empty [], no configs were found + """ + import glob + # list of all config files + configs = glob.glob(os.path.join(config_dir, '*_config_*.py')) + now_dt = datetime.utcnow() + now_dt.replace(microsecond=0) + # + cns = [] + for config in configs: + # datetime from filename + cn = os.path.splitext(os.path.basename(config))[0] + cndt = filt_datetime(os.path.basename(config)) + pi = get_config(cn+'.platform_info') + if pi['config_end_date'] == None: + cns.append(cn) + return cns + + +def find_raw(si, yyyy_mm): + """Determine which list of raw files to process for month """ + import glob + # determine when month starts and ends + # + months = find_months(yyyy_mm) + # list all the raw files in prev-month, this-month, and next-month + all_raw_files = [] + for mon in months: + mstr = mon.strftime('%Y_%m') + gs = os.path.join(si['raw_dir'], mstr, si['raw_file_glob']) + all_raw_files.extend(glob.glob(gs)) + + all_raw_files.sort() + + # ****** ((SMH) NOTE: Will need to override looking in specific + # subdirs of months if all data is contained in one file for long + # deployment, such as with adcp binary data. + + # + dt_start = si['proc_start_dt']-timedelta(days=1) + dt_end = si['proc_end_dt']+timedelta(days=1) + raw_files = []; raw_dts = [] + # compute datetime for each file + for fn in all_raw_files: + fndt = filt_datetime(os.path.basename(fn)) + if fndt: + if dt_start <= fndt <= dt_end: + raw_files.append(fn) + raw_dts.append(fndt) + + return (raw_files, raw_dts) + +def which_raw(pi, raw_files, dts): + """Further limit file names based on configuration file timeframe """ + + now_dt = datetime.utcnow() + now_dt.replace(microsecond=0) + if pi['config_start_date']: + config_start_dt = filt_datetime(pi['config_start_date']) + elif pi['config_start_date'] == None: + config_start_dt = now_dt + + if pi['config_end_date']: + config_end_dt = filt_datetime(pi['config_end_date']) + elif pi['config_end_date'] == None: + config_end_dt = now_dt + + new_list = [raw_files[i] for i in range(len(raw_files)) \ + if config_start_dt <= dts[i] <= config_end_dt] + return new_list + + +def raw2proc(proctype, platform=None, package=None, yyyy_mm=None): + """ + Process data either in auto-mode or manual-mode + + If auto-mode, process newest data for all platforms, all + sensors. Otherwise in manual-mode, process data for specified + platform, sensor package, and month. + + :Parameters: + proctype : string + 'auto' or 'manual' + + platform : string + Platfrom id to process (e.g. 'bogue') + package : string + Sensor package id to process (e.g. 'adcp') + yyyy_mm : string + Year and month of data to process (e.g. '2007_07') + + Examples + -------- + >>> raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_06') + >>> raw2proc('manual', 'bogue', 'adcp', '2007_06') + + """ + print '\nStart time for raw2proc: %s\n' % start_dt.strftime("%Y-%b-%d %H:%M:%S UTC") + + if proctype == 'auto': + print 'Processing in auto-mode, all platforms, all packages, latest data' + auto() + elif proctype == 'manual': + if platform and package and yyyy_mm: + print 'Processing in manually ...' + print ' ... platform id : %s' % platform + print ' ... package name : %s' % package + print ' ... month : %s' % yyyy_mm + print ' ... starting at : %s' % start_dt.strftime("%Y-%m-%d %H:%M:%S UTC") + manual(platform, package, yyyy_mm) + else: + print 'raw2proc: Manual operation requires platform, package, and month' + print " >>> raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_07')" + else: + print 'raw2proc: requires either auto or manual operation' + + +def auto(): + """Process all platforms, all packages, latest data + + Notes + ----- + + 1. determine which platforms (all platforms with currently active + config files i.e. config_end_date is None + 2. for each platform + get latest config + for each package + (determine process for 'latest' data) copy to new area when grabbed + parse recent data + yyyy_mm is the current month + load this months netcdf, if new month, create this months netcdf + update modified date and append new data in netcdf + + """ + yyyy_mm = this_month() + months = find_months(yyyy_mm) + month_start_dt = months[1] + month_end_dt = months[2] - timedelta(seconds=1) + + configs = find_active_configs(config_dir=defconfigs) + if configs: + # for each configuration + for cn in configs: + print ' ... config file : %s' % cn + pi = get_config(cn+'.platform_info') + asi = get_config(cn+'.sensor_info') + platform = pi['id'] + # for each sensor package + for package in asi.keys(): + print ' ... package name : %s' % package + si = asi[package] + si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm) + ofn = os.path.join(si['proc_dir'], si['proc_filename']) + si['proc_start_dt'] = month_start_dt + si['proc_end_dt'] = month_end_dt + if os.path.exists(ofn): + # get last dt from current month file + (es, units) = nc_get_time(ofn) + last_dt = es2dt(es[-1]) + # if older than month_start_dt use it instead to only process newest data + if last_dt>=month_start_dt: + si['proc_start_dt'] = last_dt + + (raw_files, raw_dts) = find_raw(si, yyyy_mm) + raw_files = which_raw(pi, raw_files, raw_dts) + process(pi, si, raw_files, yyyy_mm) + # + else: + print ' ... ... ... \nNOTE: No active platforms\n' + +def manual(platform, package, yyyy_mm): + """Process data for specified platform, sensor package, and month + + Notes + ----- + + 1. determine which configs + 2. for each config for specific platform + if have package in config + which raw files + """ + # determine when month starts and ends + months = find_months(yyyy_mm) + month_start_dt = months[1] + month_end_dt = months[2] - timedelta(seconds=1) + + configs = find_configs(platform, yyyy_mm, config_dir=defconfigs) + + if configs: + # for each configuration + for index in range(len(configs)): + cn = configs[index] + print ' ... config file : %s' % cn + pi = get_config(cn+'.platform_info') + # month start and end dt to pi info + asi = get_config(cn+'.sensor_info') + if package in pi['packages']: + si = asi[package] + si['proc_start_dt'] = month_start_dt + si['proc_end_dt'] = month_end_dt + si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm) + ofn = os.path.join(si['proc_dir'], si['proc_filename']) + (raw_files, raw_dts) = find_raw(si, yyyy_mm) + raw_files = which_raw(pi, raw_files, raw_dts) + # remove any previous netcdf file (platform_package_yyyy_mm.nc) + if index==0 and os.path.exists(ofn): + os.remove(ofn) + # + process(pi, si, raw_files, yyyy_mm) + else: + print ' ... ... \nNOTE: %s not operational on %s for %s\n' % (package, platform, yyyy_mm) + else: + print ' ... ... ... \nNOTE: %s not operational for %s\n' % (platform, yyyy_mm) + +def process(pi, si, raw_files, yyyy_mm): + # tailored data processing for different input file formats and control over output + (parse, create, update) = import_processors(si['process_module']) + for fn in raw_files: + # sys.stdout.write('... %s ... ' % fn) + lines = load_data(fn) + data = parse(pi, si, lines) + # determine which index of data is within the specified timeframe (usually the month) + data['in'] = data['dt']>si['proc_start_dt'] and data['dt']<=si['proc_end_dt'] + # if any records are in the month then write to netcdf + if data['in'].any(): + sys.stdout.write('... %s ... ' % fn) + sys.stdout.write('%d\n' % len(data['in'])) + ofn = os.path.join(si['proc_dir'], si['proc_filename']) + # update or create netcdf + if os.path.exists(ofn): + ut = update(pi,si,data) + nc_update(ofn, ut) + else: + ct = create(pi,si,data) + nc_create(ofn, ct) + + + + +# globals +start_dt = datetime.utcnow() +start_dt.replace(microsecond=0) + +if __name__ == "__main__": + import optparse + raw2proc('auto') + + # for testing + # proctype='manual'; platform='bogue'; package='adcp'; yyyy_mm='2007_07' + # raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_07') Index: raw2proc/trunk/raw2proc/test_raw2proc.py =================================================================== --- (revision ) +++ raw2proc/trunk/raw2proc/test_raw2proc.py (revision 101) @@ -1,0 +1,21 @@ +#!/usr/bin/env python +# Last modified: Time-stamp: <2007-12-11 11:37:00 haines> +""" +Tests processing raw data to monthly netcdf +""" + +# test interface within python +raw2proc('auto') +raw2proc(proctype='auto') +raw2proc(platform='bogue',package='adcp') # fails not defined + +raw2proc('manual') # fails, not enough params +raw2proc(proctype='manual') # fails, not enough params + +raw2proc('bogue','adcp','2007_07') +raw2proc(proctype='manual', platform='bogue',package='adcp', yyyy_mm='2007_07') +raw2proc(platform='bogue',package='adcp',yyyy_mm='2007_07') + +raw2proc(yyyy_mm='2007_07', package='adcp', platform='bogue') + +# test interface from shell