#!/usr/bin/env python # Last modified: Time-stamp: <2010-12-09 16:15:23 haines> """ how to parse data, and assert what data and info goes into creating and updating monthly netcdf files parser : output delimited ASCII file from onsite perl script creator : lat, lon, z, time, wspd, wdir, cdir, u, v, nwnd updater : time, wspd, wdir, cdir, u, v, nwnd Examples -------- >> (parse, create, update) = load_processors('proc_avp_ascii_wnd') or >> si = get_config(cn+'.sensor_info') >> (parse, create, update) = load_processors(si['met']['proc_module']) >> lines = load_data(filename) >> data = parse(platform_info, sensor_info, lines) >> create(platform_info, sensor_info, data) or >> update(platform_info, sensor_info, data) """ from raw2proc import * from procutil import * from ncutil import * import time now_dt = datetime.utcnow() now_dt.replace(microsecond=0) def parser(platform_info, sensor_info, lines): """ parse Automated Vertical Profile Station (AVP) Wind data Notes ----- 1. Wind: Date, time, speed, dir, compass dir, North , East, n-samples (m/s) (magN) (magN) (m/s) (m/s) 08/11/2008 00:00:00 5.881 197 197 -5.638 -1.674 696 08/11/2008 00:30:00 5.506 216 197 -4.448 -3.246 699 08/11/2008 01:00:00 7.233 329 159 6.183 -3.754 705 """ import numpy from datetime import datetime from time import strptime # get sample datetime from filename fn = sensor_info['fn'] sample_dt_start = filt_datetime(fn) # if line has weird ascii chars -- remove it for index, line in enumerate(lines): if re.search(r"[\x1a]", line): # print '... ... remove unexpected ... ' + str(line) lines.pop(index) lines.sort() N = len(lines) data = { 'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan), 'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan), 'wspd' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), 'wdir' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), 'cdir' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), 'v' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), 'u' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), 'nwnd' : numpy.array(numpy.ones((N,), dtype=int)*numpy.nan), } i = 0 mvar = platform_info['mvar'] # Magnetic Variation at station for line in lines: # if line has weird ascii chars -- skip it and iterate to next line if re.search(r"[\x1a]", line): print 'skipping bad data line ... ' + str(line) continue wnd = [] # split line and parse float and integers sw = re.split('[\s\/\:]*', line) for s in sw: m = re.search(REAL_RE_STR, s) if m: wnd.append(float(m.groups()[0])) if len(wnd)>=11: # get sample datetime from data sample_str = '%02d-%02d-%4d %02d:%02d:%02d' % tuple(wnd[0:6]) if sensor_info['utc_offset']: sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \ timedelta(hours=sensor_info['utc_offset']) else: sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') wspd = int(wnd[6]) # wind speed (m/s) wdir = int(wnd[7]) # wind dir (mag N) cdir = wnd[8] # compass dir (mag N) u = wnd[9] # Easterly (?) Component (m/s) (mag or true??) v = wnd[10] # Northerly (?) Component (m/s) (mag or true??) if len(wnd)>=12: nwnd = int(wnd[11]) else: nwnd = numpy.nan # Number of samples in wind average # prior to Sep 2008 number of samples were not recorded # combine wind dir and buoy compass direction # correct direction from magnetic N to true N # rotate u, v to true N # or # recompute u, v from direction and speed data['dt'][i] = sample_dt # sample datetime data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds data['wspd'][i] = wspd data['wdir'][i] = wdir data['cdir'][i] = cdir data['u'][i] = u data['v'][i] = v data['nwnd'][i] = nwnd i=i+1 # if len(wnd)>=11 # for line return data def creator(platform_info, sensor_info, data): # # subset data only to month being processed (see raw2proc.process()) i = data['in'] dt = data['dt'][i] # title_str = sensor_info['description']+' at '+ platform_info['location'] global_atts = { 'title' : title_str, 'institution' : 'University of North Carolina at Chapel Hill (UNC-CH)', 'institution_url' : 'http://nccoos.org', 'institution_dods_url' : 'http://nccoos.org', 'metadata_url' : 'http://nccoos.org', 'references' : 'http://nccoos.org', 'contact' : 'Sara Haines (haines@email.unc.edu)', # 'source' : 'AVP Wind Observations', 'history' : 'raw2proc using ' + sensor_info['process_module'], 'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(), # conventions 'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0', # SEACOOS CDL codes 'format_category_code' : 'fixed-point', 'institution_code' : platform_info['institution'], 'platform_code' : platform_info['id'], 'package_code' : sensor_info['id'], # institution specific 'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)', 'project_url' : 'http://nccoos.org', # timeframe of data contained in file yyyy-mm-dd HH:MM:SS 'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"), 'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"), 'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), # 'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), 'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), 'process_level' : 'level1', # # must type match to data (e.g. fillvalue is real if data is real) '_FillValue' : numpy.nan, } var_atts = { # coordinate variables 'time' : {'short_name': 'time', 'long_name': 'Sample Time', 'standard_name': 'time', 'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC 'axis': 'T', }, 'lat' : {'short_name': 'lat', 'long_name': 'Latitude in Decimal Degrees', 'standard_name': 'latitude', 'reference':'geographic coordinates', 'units': 'degrees_north', 'valid_range':(-90.,90.), 'axis': 'Y', }, 'lon' : {'short_name': 'lon', 'long_name': 'Longitude in Decimal Degrees', 'standard_name': 'longitude', 'reference':'geographic coordinates', 'units': 'degrees_east', 'valid_range':(-180.,180.), 'axis': 'Y', }, 'z' : {'short_name': 'z', 'long_name': 'Height', 'standard_name': 'height', 'reference':'zero at sea-surface', 'positive': 'up', 'units': 'm', 'axis': 'Z', }, # data variables 'wspd' : {'short_name': 'wspd', 'long_name': 'Wind Speed', 'standard_name': 'wind_speed', 'units': 'm s-1', 'can_be_normalized': 'no', 'z' : sensor_info['anemometer_height'], }, 'wdir' : {'short_name': 'wdir', 'long_name': 'Wind Direction from', 'standard_name': 'wind_from_direction', 'reference': 'clockwise from Magnetic North', 'valid_range': (0., 360), 'units': 'degrees', 'z' : sensor_info['anemometer_height'], }, 'cdir' : {'short_name': 'cdir', 'long_name': 'Buoy Orientation', 'standard_name': 'compass_direction', 'reference': 'clockwise from Magnetic North', 'valid_range': (0., 360), 'units': 'degrees', }, 'u' : {'short_name': 'u', 'long_name': 'East/West Component of Wind', 'standard_name': 'eastward_wind', 'reference': 'relative to True East (?)', 'units': 'm s-1', 'can_be_normalized': 'no', 'z' : sensor_info['anemometer_height'], }, 'v' : {'short_name': 'v', 'long_name': 'North/South Component of Wind', 'standard_name': 'northward_wind', 'reference': 'relative to True North (?)', 'units': 'm s-1', 'can_be_normalized': 'no', 'z' : sensor_info['anemometer_height'], }, 'nwnd' : {'short_name': 'nwnd', 'long_name': 'Number of wind samples in sample period', 'standard_name': 'number_of_samples', 'units': '', }, } # dimension names use tuple so order of initialization is maintained dim_inits = ( ('time', NC.UNLIMITED), ('lat', 1), ('lon', 1), ('z', 1) ) # using tuple of tuples so order of initialization is maintained # using dict for attributes order of init not important # use dimension names not values # (varName, varType, (dimName1, [dimName2], ...)) var_inits = ( # coordinate variables ('time', NC.INT, ('time',)), ('lat', NC.FLOAT, ('lat',)), ('lon', NC.FLOAT, ('lon',)), ('z', NC.FLOAT, ('z',)), # data variables ('wspd', NC.FLOAT, ('time',)), ('wdir', NC.FLOAT, ('time',)), ('cdir', NC.FLOAT, ('time',)), ('u', NC.FLOAT, ('time',)), ('v', NC.FLOAT, ('time',)), ('nwnd', NC.FLOAT, ('time',)), ) # var data var_data = ( ('lat', platform_info['lat']), ('lon', platform_info['lon']), ('z', sensor_info['anemometer_height']), # ('time', data['time'][i]), ('wspd', data['wspd'][i]), ('wdir', data['wdir'][i]), ('cdir', data['cdir'][i]), ('u', data['u'][i]), ('v', data['v'][i]), ('nwnd', data['nwnd'][i]), ) return (global_atts, var_atts, dim_inits, var_inits, var_data) def updater(platform_info, sensor_info, data): # # subset data only to month being processed (see raw2proc.process()) i = data['in'] dt = data['dt'][i] # global_atts = { # update times of data contained in file (yyyy-mm-dd HH:MM:SS) # last date in monthly file 'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"), 'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), # 'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), } # data variables # update any variable attributes like range, min, max var_atts = {} # var_atts = { # 'u': {'max': max(data.u), # 'min': min(data.v), # }, # 'v': {'max': max(data.u), # 'min': min(data.v), # }, # } # subset data only to month being processed (see raw2proc.process()) i = data['in'] # data var_data = ( ('time', data['time'][i]), ('wspd', data['wspd'][i]), ('wdir', data['wdir'][i]), ('cdir', data['cdir'][i]), ('u', data['u'][i]), ('v', data['v'][i]), ('nwnd', data['nwnd'][i]), ) return (global_atts, var_atts, var_data) #