#!/usr/bin/env python # Last modified: Time-stamp: <2009-12-16 15:23:36 haines> """ how to parse data, and assert what data and info goes into creating and updating monthly netcdf files parse data from YSI 6600 V2-2 on an automated veritical profiler (avp) parser : date and time, water_depth for each profile sample time, sample depth, as cast measures water temperature, conductivity, salinity, pH, dissolved oxygen, turbidity, and chlorophyll creator : lat, lon, z, stime, (time, water_depth), water_temp, cond, salin, ph, turb, chl, do updator : z, stime, (time, water_depth), water_temp, cond, salin, ph, turb, chl, do using fixed profiler CDL but modified to have raw data for each cast along each column Examples -------- >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2') or >> si = get_config(cn+'.sensor_info') >> (parse, create, update) = load_processors(si['adcp']['proc_module']) >> lines = load_data(filename) >> data = parse(platform_info, sensor_info, lines) >> create(platform_info, sensor_info, data) or >> update(platform_info, sensor_info, data) """ from raw2proc import * from procutil import * from ncutil import * now_dt = datetime.utcnow() now_dt.replace(microsecond=0) def parser(platform_info, sensor_info, lines): """ parse Automated Vertical Profile Station (AVP) Water Quality Data month, day, year, hour, min, sec, temp (deg. C), conductivity (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU), chlorophyll (micrograms per liter), DO (micrograms per liter) Notes ----- 1. Column Format temp, cond, salin, depth, pH, turb, chl, DO (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l) Profile Time: 00:30:00 Profile Date: 08/18/2008 Profile Depth: 255.0 cm Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP 08/18/08 00:30:06 26.94 41.87 26.81 0.134 8.00 3.4 4.5 6.60 08/18/08 00:30:07 26.94 41.87 26.81 0.143 8.00 3.4 4.8 6.59 08/18/08 00:30:08 26.94 41.87 26.81 0.160 8.00 3.4 4.8 6.62 08/18/08 00:30:09 26.94 41.87 26.81 0.183 8.00 3.4 4.8 6.66 2. Use a ragged array to store each uniquely measured param at each time and depth but not gridded, so this uses fixed profiler CDL but modified to have raw data for each cast along each column. For plotting, the data will need to be grid at specified depth bins. Tony Whipple at IMS says 'The AVPs sample at one second intervals. Between the waves and the instrument descending from a spool of line with variable radius it works out to about 3-5 cm between observations on average. When I process the data to make the images, I bin the data every 10 cm and take the average of however many observations fell within that bin.' """ import numpy from datetime import datetime from time import strptime # get sample datetime from filename fn = sensor_info['fn'] sample_dt_start = filt_datetime(fn)[0] # how many profiles in one file, count number of "Profile Time:" in lines nprof = 0 for line in lines: m=re.search("Profile Time:", line) if m: nprof=nprof+1 # remove first occurrence of blank line if within first 40 lines for i in range(len(lines[0:40])): if re.search("^ \r\n", lines[i]): # print str(i) + " " + lines[i] + " " + lines[i+1] blank_line = lines.pop(i) # lines.append(blank_line) # ensure signal end of profile after last profile by appending a blank line to data file lines.append(' \r\n') # ensure blank line between profile casts for i, line in enumerate(lines): if re.search(r"Profile Time", line, re.IGNORECASE): if not re.search("^ \r\n", lines[i-1]): lines.insert(i, " \r\n") N = nprof nbins = sensor_info['nbins'] data = { 'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan), 'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan), 'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), # 'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan), 'wl' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan), # 'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'), # 'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'), # 'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan), 'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), 'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), 'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), 'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), 'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), 'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), 'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), } # current profile count i = 0 have_date = have_time = have_wd = have_location = have_head = False for line in lines: # if line has weird ascii chars -- skip it and iterate to next line if re.search(r"[\x1a]", line): print 'skipping bad data line ... ' + str(line) continue ysi = [] # split line and parse float and integers sw = re.split('[\s/\:]*', line) for s in sw: m = re.search(REAL_RE_STR, s) if m: ysi.append(float(m.groups()[0])) if re.search("Profile Time:", line): have_time = True HH=ysi[0] MM=ysi[1] SS=ysi[2] elif re.search("Profile Date:", line): have_date = True mm=ysi[0] dd=ysi[1] yyyy=ysi[2] elif re.search("Profile Depth:", line): have_wd = True wd = ysi[0]/100. # cm to meters profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS) if sensor_info['utc_offset']: profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \ timedelta(hours=sensor_info['utc_offset']) else: profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') elif re.search("Profile Location:", line): have_location = True # Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP sw = re.findall(r'\w+:\s(\w+)*', line) # ysi_sn = sw[1] # ysi_id = sw[2] # initialize for new profile at zero for averaging samples within each bin wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan ph = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan # keep track of number of samples in one profile so not to exceed nbins j = 0 # have all the headers stuff head = numpy.array([have_date, have_time, have_wd, have_location]) have_head = head.all() elif (len(ysi)==14 and have_head): if j>=nbins: print 'Sample number (' + str(j) + \ ') in profile exceeds maximum value ('+ \ str(nbins) + ') in config' # get sample datetime from data sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6]) # month, day, year try: sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') except ValueError: # day, month, year (month and day switched in some cases) try: sample_dt = scanf_datetime(sample_str, fmt='%d-%m-%y %H:%M:%S') except: sample_dt = datetime(1970,1,1) if sensor_info['utc_offset']: sample_dt = sample_dt + timedelta(hours=sensor_info['utc_offset']) if j