#!/usr/bin/env python # Last modified: Time-stamp: <2008-12-17 16:49:11 haines> """Utilities to help data processing Mostly time functions right now TO DO: check_configs() unit conversions (udunits?) """ __version__ = "v0.1" __author__ = "Sara Haines " import os.path from datetime import datetime, timedelta, tzinfo from dateutil.tz import tzlocal, tzutc import time import math from ncutil import * def check_configs(): """Test config files for comformnity check either one or all for a platform id in filename == platform.id datetime in filename <= platform.config_start_date (close in time usually the same day also platform.config_start_date < platform.config_end_date (there needs to be some time that the platform was operational) test existence of specific structural elements (platform info and sensor info) and specific fields for both platform and sensor verify that for each platform_info['packages'] there is sensor_info and same id for pi['packages'][0] in si.keys() pi['packages'][0] == si['adcp']['id'] bounds on data in fields show difference between two consecutive configs? pretty print to screen of dictionary info for platform and sensor info cn = os.path.splitext(os.path.basename(config))[0] cndt = filt_datetime(os.path.basename(config))[0] pi = get_config(cn+'.platform_info') if pi['config_start_date']: config_start_dt = filt_datetime(pi['config_start_date'])[0] elif pi['config_start_date'] == None: config_start_dt = now_dt if pi['config_end_date']: config_end_dt = filt_datetime(pi['config_end_date'])[0] elif pi['config_end_date'] == None: config_end_dt = now_dt print cn + ' -----------------' print cndt print config_start_dt print config_end_dt print now_dt print 'file date ok? ' + str(cndt <= config_start_dt) print 'operation date ok? ' + str(config_start_dt < config_end_dt) """ def dt2es(dt): """Convert datetime object to epoch seconds (es) as seconds since Jan-01-1970 """ # microseconds of timedelta object not used delta = dt - datetime(1970,1,1,0,0,0) es = delta.days*24*60*60 + delta.seconds return es def es2dt(es): """ Convert epoch seconds (es) to datetime object""" dt = datetime(*time.gmtime(es)[0:6]) return dt def find_months(year, month=1): """Find which months to process Since data are in subdirectories based on months determine previous, current, and next month to look in directories for data of the current month or month to process. :Parameters: year : int value or str 'yyyy_mm' month : int value :Returns: which_months : tuple of 3 datetime objects (prev_month, current_month, next_month) Examples -------- >>> find_months(2007, 2) >>> find_months('2007_02') """ if type(year) == int and type(month) == int : dt = datetime(year, month, day=1) this_month = dt elif type(year) == str : dt = filt_datetime(year)[0] this_month = dt # if dt.month == 1: # if January prev_month = datetime(dt.year-1, month=12, day=1) # Dec next_month = datetime(dt.year, dt.month+1, day=1) # Feb elif dt.month == 12: # if December prev_month = datetime(dt.year, dt.month-1, day=1) # Nov next_month = datetime(dt.year+1, month=1, day=1) # Jan else: prev_month = datetime(dt.year, dt.month-1, day=1) next_month = datetime(dt.year, dt.month+1, day=1) # return (prev_month, this_month, next_month) def this_month(): """Return this month (GMT) as formatted string (yyyy_mm) """ this_month_str = "%4d_%02d" % time.gmtime()[0:2] return this_month_str def scanf_datetime(ts, fmt='%Y-%m-%dT%H:%M:%S'): """Convert string representing date and time to datetime object""" # default string format follows convention YYYY-MM-DDThh:mm:ss t = time.strptime(ts, fmt) # the '*' operator unpacks the tuple, producing the argument list. dt = datetime(*t[0:6]) return dt def filt_datetime(input_string, remove_ext=True): """ Following the template, (YY)YYMMDDhhmmss and versions with of this with decreasing time precision, find the most precise, reasonable string match and return its datetime object. """ # remove any trailing filename extension from os.path import splitext import re if remove_ext: (s, e) = splitext(input_string) input_string = s # YYYYMMDDhhmmss and should handle most cases of the stamp # other forms this should pass # YY_MM_DD_hh:mm:ss # YYYY_MM_DD_hh:mm:ss # YYYY,MM,DD,hh,mm,ss # YY,MM,DD,hh,mm,ss case1_regex = r""" # case 1: (YY)YYMMDDhhmmss (\d{4}|\d{2}) # 2- or 4-digit YEAR (e.g. '07' or '2007') \D? # optional 1 character non-digit separator (e.g. ' ' or '-') (\d{2}) # 2-digit MONTH (e.g. '12') \D? # optional 1 character non-digit separator (\d{2}) # 2-digit DAY of month (e.g. '10') \D? # optional 1 character non-digit separator (e.g. ' ' or 'T') (\d{2}) # 2-digit HOUR (e.g. '10') \D? # optional 1 character non-digit separator (e.g. ' ' or ':') (\d{2}) # 2-digit MINUTE (e.g. '10') \D? # optional 1 character non-digit separator (e.g. ' ' or ':') (\d{2}) # 2-digit SECOND (e.g. '10') """ case2_regex = r""" # case 2: (YY)YYMMDDhhmm (no seconds) (\d{4}|\d{2}) # 2- or 4-digit YEAR \D? # optional 1 character non-digit separator (e.g. ' ' or '-') (\d{2}) # 2-digit MONTH \D? # optional 1 character non-digit separator (\d{2}) # 2-digit DAY \D? # optional 1 character non-digit separator (e.g. ' ' or 'T') (\d{2}) # 2-digit HOUR \D? # optional 1 character non-digit separator (e.g. ' ' or ':') (\d{2}) # 2-digit MINUTE """ case3_regex = r""" # case 3: (YY)YYMMDDhh (no seconds, no minutes) (\d{4}|\d{2}) # 2- or 4-digit YEAR \D? # optional 1 character non-digit separator (e.g. ' ' or '-') (\d{2}) # 2-digit MONTH \D? # optional 1 character non-digit separator (\d{2}) # 2-digit DAY \D? # optional 1 character non-digit separator (e.g. ' ' or 'T') (\d{2}) # 2-digit HOUR """ case4_regex = r""" # case 4: (YY)YYMMDD (no time values, just date) (\d{4}|\d{2}) # 2- or 4-digit YEAR \D? # optional 1 character non-digit separator (e.g. ' ' or '-') (\d{2}) # 2-digit MONTH \D? # optional 1 character non-digit separator (\d{2}) # 2-digit DAY """ case5_regex = r""" # case 5: (YY)YYMM (no time values, just month year) (\d{4}|\d{2}) # 2- or 4-digit YEAR \D? # optional 1 character non-digit separator (e.g. ' ' or '-') (\d{2}) # 2-digit MONTH """ ## Verbose regular expressions require use of re.VERBOSE flag. ## so we can use multiline regexp # cases are ordered from precise to more coarse resolution of time cases = [case1_regex, case2_regex, case3_regex, case4_regex, case5_regex] patterns = [re.compile(c, re.VERBOSE) for c in cases] matches = [p.search(input_string) for p in patterns] # for testing, try to computer datetime objects # just because there is a match does not mean it makes sense for ind in range(len(matches)): if bool(matches[ind]): # print matches[ind].groups() bits = matches[ind].groups() values = [int(yi) for yi in bits] # check for 2-digit year if values[0] < 50: values[0] += 2000 elif values[0]>=50 and values[0]<100: values[0] += 1900 # # we must have at least 3 arg input to datetime if len(values)==1: values.extend([1,1]) # add First of January elif len(values)==2: values.extend([1]) # add first day of month # # compute dt try: dt = datetime(*values) except ValueError, e: # value error if something not valid for datetime # e.g. month 1...12, something parsed wrong dt = None else: # absolute difference in days from now (UTC) z = dt - datetime.utcnow() daysdiff = abs(z.days) # if this date unreasonable (>10 years*365), throw it out # something parsed wrong if daysdiff > 3650: dt = None else: dt = None # place datetime object or None within sequence of matches matches[ind] = dt # find the first (most precise) date match since there might be more than # as we searched more coarse templates, but now we have thrown out b = [bool(x) for x in matches] try: ind = b.index(True) except ValueError, e: print 'filt_datetime: No date found in ', input_string dt = None else: dt = matches[ind] return dt,ind def display_time_diff(diff): """Display time difference in HH:MM:DD using number weeks (W) and days (D) if necessary""" # weeks, days = divmod(diff.days, 7) days = diff.days minutes, seconds = divmod(diff.seconds, 60) hours, minutes = divmod(minutes, 60) # if (weeks>2 and days>0): # str = "%d Weeks, %d Days %02d:%02d" % (days, hours, minutes) if (days==1): str = "%02d:%02d" % (24+hours, minutes) elif (days>1): str = "%d Days %02d:%02d" % (days, hours, minutes) else: str = "%02d:%02d" % (hours, minutes) return str def copy_loop_sequence(src, dst, fn_glob, numFiles=24): """ """ # src = '/seacoos/data/nccoos/level3/bogue/adcpwaves/dspec/'+this_month.strftime("%Y_%m") # dst = '/home/haines/rayleigh/loop/' # fn_glob = 'bogue_dspec_plot*' def addnan(dt, data): # dt to be only 1-dimension and data to be 1- or 2-dimensional from matplotlib.dates import date2num, num2date # print dt.shape # print data.shape dn = date2num(dt) delta = numpy.diff(dn) sample_interval = numpy.median(delta) maxdelta = 1.5*sample_interval # print maxdelta igap = (delta > maxdelta).nonzero()[0] ngap = len(igap) if not ngap: return (dt, data) else: # convert sample interval to dt object sample_interval = timedelta(0.5*sample_interval) # for each gap in data create NaN data_insert = [numpy.nan for gap in igap] # for each gap in time create datetime value dt_insert = [dt[gap]+sample_interval for gap in igap] # insert new sample times at indices of the gaps new_dt = numpy.insert(numpy.array(dt), igap+1, dt_insert) # insert NaN data at the indices that match the above times new_data = numpy.insert(numpy.array(data), igap+1, data_insert, axis=0) return (new_dt, new_data) # unit conversions def meters2feet(meters): """Convert meters to feet: = *3.28084 """ return meters*3.28084 def feet2meters(feet): """Convert feet to meters: = *0.3048 """ return feet*0.3048 def millibar2inches_Hg(millibar): """Convert millibars to inches Hg: = *0.0295301 """ return millibar*0.0295301 def celsius2fahrenheit(celsius): """Convert deg Celsius to deg Fahrenheit: = ((1.8*)+32) """ return (1.8*celsius)+32 def millimeters2inches(millimeters): """ Convert millimeter to inches: = *0.0393700787) """ return millimeters*0.0393700787 def inches2millimeters(inches): """ Convert = *25.4 """ return inches*25.4 def meters_sec2knots(meters_sec): """ Convert m/s to knots: = *1.94384449) """ return meters_sec*1.94384449 def wind_vector2u(wind_speed, wind_from_direction): """ Convert wind vector to U (east) component: = *sine(*pi/180) """ return wind_speed*math.sin(wind_from_direction*math.pi/180) def wind_vector2v(wind_speed, wind_from_direction): """ Convert wind vector to V (north) component: = *cosine(*pi/180) """ return wind_speed*math.cos(wind_from_direction*math.pi/180) def proc2latest(pi, si, yyyy_mm): """Select specific variables and times from current monthly netCDF and post as latest data. TEST MODE. For each active config file, load specific variables from NCCOOS monthly netCDF, make any necessary changes to data or attributes conform to SEACOOS Data Model, subset data (last 48 hours), and create new netCDF file in latest netCDF directory. NOTE: In test mode right now. See auto() function for similar action. """ platform = pi['id'] package = si['id'] si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm) ifn = os.path.join(si['proc_dir'], si['proc_filename']) if os.path.exists(ifn): # get dt from current month file (es, units) = nc_get_time(ifn) dt = [es2dt(e) for e in es] last_dt = dt[-1] # determine which index of data is within the specified timeframe (last 2 days) n = len(dt) idx = numpy.array([False for i in range(n)]) for i, val in enumerate(dt): if val>last_dt-timedelta(days=2) and val<=last_dt+timedelta(seconds=360): idx[i] = True dt = numpy.array(dt) dt = dt[idx] # read in data and unpack tuple d = nc_load(ifn, si['latest_vars']) global_atts, var_atts, dim_inits, var_inits, var_data = d list_of_record_vars = nc_find_record_vars(ifn) # turn off unlimited dimension (SH NOTE: As of pycdf-0.6-3b cannot # delete a dimension or reset unlimited to limited within either # CDF or CDFDim class, so doing it manually here by setting list # 'dim_init' before creating of new netcdf.) dim_inits = list(dim_inits) for i in range(len(dim_inits)): if dim_inits[i][1]==0: dim_inits[i] = ('ntime', len(dt)) dim_inits = tuple(dim_inits) # subset data varNames = [vn for vn, vt, vd in var_inits] var_data = list(var_data) for i in range(len(varNames)): vn, vd = var_data[i] if vn in list_of_record_vars: var_data[i]=(vn, vd[idx]) var_data = tuple(var_data) global_atts['start_date'] = dt[0].strftime('%Y-%m-%d %H:%M:%S') si['latest_filename'] = 'nccoos_%s_%s_latest.nc' % (platform, package) ofn = os.path.join(si['latest_dir'], si['latest_filename']) d = (global_atts, var_atts, dim_inits, var_inits, var_data) # write latest data nc_create(ofn, d) # quick way to rename dimensions nc_rename_dimension(ofn, 'ntime', 'time') nc_rename_dimension(ofn, 'nlat', 'lat') nc_rename_dimension(ofn, 'nlon', 'lon') nc_rename_dimension(ofn, 'nz', 'z') # global replace _FillValue nc_replace_fillvalue(ofn, -99999.0)