NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v2_CDL2.py

Revision 448 (checked in by cbc, 13 years ago)

Add new Billy Mitchell configs.

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2011-02-24 11:00:27 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V2-2 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, pH, dissolved oxygen,
13          turbidity, and chlorophyll
14          
15
16 creator : lat, lon, z, stime, (time, water_depth), water_temp, cond,
17           salin, ph, turb, chl, do
18
19 updator : z, stime, (time, water_depth), water_temp, cond, salin, ph,
20           turb, chl, do
21
22 using fixed profiler CDL but modified to have raw data for each cast
23 along each column
24
25
26 Examples
27 --------
28
29 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2')
30 or
31 >> si = get_config(cn+'.sensor_info')
32 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
33
34 >> lines = load_data(filename)
35 >> data = parse(platform_info, sensor_info, lines)
36 >> create(platform_info, sensor_info, data) or
37 >> update(platform_info, sensor_info, data)
38
39 """
40
41
42 from raw2proc import *
43 from procutil import *
44 from ncutil import *
45
46 now_dt = datetime.utcnow()
47 now_dt.replace(microsecond=0)
48
49 def parser(platform_info, sensor_info, lines):
50     """
51     parse Automated Vertical Profile Station (AVP) Water Quality Data
52
53     month, day, year, hour, min, sec, temp (deg. C), conductivity
54     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
55     chlorophyll (micrograms per liter), DO (micrograms per liter)
56
57     Notes
58     -----
59     1. Column Format
60
61     temp, cond, salin, depth, pH, turb, chl, DO
62     (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)
63
64     Profile Time: 00:30:00
65     Profile Date: 08/18/2008
66     Profile Depth: 255.0 cm
67     Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
68     08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
69     08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
70     08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
71     08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66
72
73 2.  Use a ragged array to store each uniquely measured param at each
74     time and depth but not gridded, so this uses fixed profiler CDL
75     but modified to have raw data for each cast along each column.
76     For plotting, the data will need to be grid at specified depth bins.
77
78     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
79     Between the waves and the instrument descending from a spool of
80     line with variable radius it works out to about 3-5 cm between
81     observations on average.  When I process the data to make the
82     images, I bin the data every 10 cm and take the average of however
83     many observations fell within that bin.'
84
85     """
86     import numpy
87     from datetime import datetime
88     from time import strptime
89
90     # get sample datetime from filename
91     fn = sensor_info['fn']
92     sample_dt_start = filt_datetime(fn)
93
94     # how many profiles in one file, count number of "Profile Time:" in lines
95     nprof = 0
96     for line in lines:
97         m=re.search("Profile Time:", line)
98         if m:
99             nprof=nprof+1
100
101     # remove first occurrence of blank line if within first 40 lines
102     for i in range(len(lines[0:40])):
103        if re.search("^ \r\n", lines[i]):
104            # print str(i) + " " + lines[i] + " " + lines[i+1]
105            blank_line = lines.pop(i)
106            # lines.append(blank_line)
107
108     # ensure signal end of profile after last profile by appending a blank line to data file
109     lines.append(' \r\n')
110    
111     # ensure blank line between profile casts
112     for i, line in enumerate(lines):
113         if re.search(r"Profile Time", line, re.IGNORECASE):
114             if not re.search("^ \r\n", lines[i-1]):
115                 lines.insert(i, " \r\n")
116    
117     N = nprof
118     nbins = sensor_info['nbins']
119
120     data = {
121         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
122         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
123         'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
124         #
125         'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
126         'wl' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
127         # 'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'),
128         # 'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'),
129         #
130         'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan),
131         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
132         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
133         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
134         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
135         'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
136         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
137         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
138         }
139
140     # current profile count
141     i = 0
142     have_date = have_time = have_wd = have_location = have_head = False
143
144     for line in lines:
145         # if line has weird ascii chars -- skip it and iterate to next line
146         if re.search(r"[\x1a]", line):
147             # print 'skipping bad data line ... ' + str(line)
148             continue
149            
150         ysi = []
151         # split line and parse float and integers
152         sw = re.split('[\s/\:]*', line)
153         for s in sw:
154             m = re.search(REAL_RE_STR, s)
155             if m:
156                 ysi.append(float(m.groups()[0]))
157
158         if re.search("Profile Time:", line):
159             have_time = True
160             HH=ysi[0]
161             MM=ysi[1]
162             SS=ysi[2]
163         elif re.search("Profile Date:", line):
164             have_date = True
165             mm=ysi[0]
166             dd=ysi[1]
167             yyyy=ysi[2]
168         elif re.search("Profile Depth:", line):
169             have_wd = True
170             wd = ysi[0]/100.  # cm to meters
171             profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
172             if  sensor_info['utc_offset']:
173                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \
174                              timedelta(hours=sensor_info['utc_offset'])
175             else:
176                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S')
177         elif re.search("Profile Location:", line):
178             have_location = True
179             # Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
180             sw = re.findall(r'\w+:\s(\w+)*', line)
181             # ysi_sn = sw[1]
182             # ysi_id = sw[2]
183             # initialize for new profile at zero for averaging samples within each bin
184             wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
185             depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
186             cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
187             salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
188             turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
189             ph = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
190             chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
191             do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
192             stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan
193             # keep track of number of samples in one profile so not to exceed nbins
194             j = 0
195             # have all the headers stuff
196             head = numpy.array([have_date, have_time, have_wd, have_location])
197             have_head = head.all()
198
199         elif (len(ysi)==14 and have_head):
200             if j>=nbins:
201                 print 'Sample number (' + str(j) + \
202                       ') in profile exceeds maximum value ('+ \
203                       str(nbins) + ') in config'
204        
205             # get sample datetime from data
206             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
207             # month, day, year
208             try:
209                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
210             except ValueError:
211                 # day, month, year (month and day switched in some cases)
212                 try:
213                     sample_dt = scanf_datetime(sample_str, fmt='%d-%m-%y %H:%M:%S')
214                 except:
215                     sample_dt = datetime(1970,1,1)
216
217             if sample_dt is not None:
218                 if sensor_info['utc_offset']:
219                     sample_dt = sample_dt + timedelta(hours=sensor_info['utc_offset'])
220                 #
221                 if j<nbins:
222                     stime[j] = dt2es(sample_dt) # sample time
223                     wtemp[j] = ysi[6] # water temperature (C)
224                     cond[j] = ysi[7]  # conductivity (mS/cm)
225                     salin[j] = ysi[8] # salinity (ppt or PSU??)
226                     #
227                     depth[j] = ysi[9] # depth (m, positive up)
228                     #
229                     ph[j] = ysi[10]   # ph
230                     turb[j] = ysi[11] # turbidity (NTU)
231                     chl[j] = ysi[12]  # chlorophyll (ug/l)
232                     do[j] = ysi[13]   # dissolved oxygen (mg/l)
233                     #
234                     j = j+1
235             else:
236                 print 'skipping line, ill-formed date ... ' + str(line)
237
238
239         elif (len(ysi)==0 and have_head and i<N):  # each profile separated by empty line
240            
241             data['dt'][i] = profile_dt # profile datetime
242             data['time'][i] = dt2es(profile_dt) # profile time in epoch seconds
243             data['wd'][i] = -1.*wd
244             data['wl'][i] = platform_info['mean_water_depth'] - (-1*wd)
245             # data['ysi_sn'][i] = ysi_sn
246             # data['ysi_id'][i] = ysi_id
247
248             data['stime'][i] =  stime # sample time in epoch seconds
249             data['z'][i] = -1.*depth
250
251             data['wtemp'][i] =  wtemp
252             data['cond'][i] = cond
253             data['salin'][i] = salin
254             data['turb'][i] = turb
255             data['ph'][i] = ph
256             data['chl'][i] = chl
257             data['do'][i] = do
258            
259             i=i+1
260             have_date = have_time = have_wd = have_location = False
261         else:
262             print 'skipping bad data line ... ' + str(line)
263         # if-elif
264     # for line
265
266     return data
267  
268
269 def creator(platform_info, sensor_info, data):
270     #
271     # subset data only to month being processed (see raw2proc.process())
272     i = data['in']
273     dt = data['dt'][i]
274     #
275     title_str = sensor_info['description']+' at '+ platform_info['location']
276     global_atts = {
277         'title' : title_str,
278         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
279         'institution_url' : 'http://nccoos.unc.edu',
280         'institution_dods_url' : 'http://nccoos.unc.edu',
281         'metadata_url' : 'http://nccoos.unc.edu',
282         'references' : 'http://nccoos.unc.edu',
283         'contact' : 'Sara Haines (haines@email.unc.edu)',
284         #
285         'source' : 'fixed-automated-profiler observation',
286         'history' : 'raw2proc using ' + sensor_info['process_module'],
287         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
288         # conventions
289         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
290         # SEACOOS CDL codes
291         'format_category_code' : 'fixed-profiler-ragged',
292         'institution_code' : platform_info['institution'],
293         'platform_code' : platform_info['id'],
294         'package_code' : sensor_info['id'],
295         # institution specific
296         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
297         'project_url' : 'http://nccoos.unc.edu',
298         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
299         # first date in monthly file
300         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
301         # last date in monthly file
302         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
303         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
304         #
305         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
306         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
307         'process_level' : 'level1',
308         #
309         # must type match to data (e.g. fillvalue is real if data is real)
310         '_FillValue' : numpy.nan,
311         }
312
313     var_atts = {
314         # coordinate variables
315         'time' : {'short_name': 'time',
316                   'long_name': 'Time of Profile',
317                   'standard_name': 'time',
318                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
319                   'axis': 'T',
320                   },
321         'lat' : {'short_name': 'lat',
322                  'long_name': 'Latitude',
323                  'standard_name': 'latitude',
324                  'reference':'geographic coordinates',
325                  'units': 'degrees_north',
326                  'valid_range':(-90.,90.),
327                  'axis': 'Y',
328                  },
329         'lon' : {'short_name': 'lon',
330                  'long_name': 'Longitude',
331                  'standard_name': 'longitude',
332                  'reference':'geographic coordinates',
333                  'units': 'degrees_east',
334                  'valid_range':(-180.,180.),
335                  'axis': 'Y',
336                  },
337         'z' : {'short_name': 'z',
338                'long_name': 'Height',
339                'standard_name': 'height',
340                'reference':'zero at sea-surface',
341                'positive' : 'up',
342                'units': 'm',
343                'axis': 'Z',
344                },
345         # data variables
346         'stime' : {'short_name': 'stime',
347                   'long_name': 'Time of Sample ',
348                   'standard_name': 'time',
349                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
350                   },
351         'wd': {'short_name': 'wd',
352                'long_name': 'Water Depth',
353                'standard_name': 'water_depth',                         
354                'reference' : 'zero at sea-surface',
355                'positive' : 'up',
356                'units': 'm',
357                },
358         'wl': {'short_name': 'wl',
359                'long_name': 'Water Level',
360                'standard_name': 'water_level',
361                'reference':'MSL',
362                'reference_to_MSL' : 0.,
363                'reference_MSL_datum' : platform_info['mean_water_depth'],
364                'reference_MSL_datum_time_period' : platform_info['mean_water_depth_time_period'],
365                'positive' : 'up',
366                'z' : 0.,
367                'units': 'm',
368                },
369         # 'ysi_id' : {'short_name':'ysi_id',
370         #             'long_name':'Identification name of YSI Sonde',
371         #             'standard_name': 'identification_name'
372         #             },
373         # 'ysi_sn' : {'short_name':'ysi_sn',
374         #             'long_name':'Serial number of YSI Sonde',
375         #             'standard_name': 'serial_number'
376         #             },
377         'wtemp': {'short_name': 'wtemp',
378                         'long_name': 'Water Temperature',
379                         'standard_name': 'water_temperature',                         
380                         'units': 'degrees_Celsius',
381                         },
382         'cond': {'short_name': 'cond',
383                         'long_name': 'Conductivity',
384                         'standard_name': 'conductivity',                         
385                         'units': 'mS cm-1',
386                         },
387         'salin': {'short_name': 'salin',
388                         'long_name': 'Salinity',
389                         'standard_name': 'salinity',                         
390                         'units': 'PSU',
391                         },
392         'turb': {'short_name': 'turb',
393                         'long_name': 'Turbidity',
394                         'standard_name': 'turbidity',                         
395                         'units': 'NTU',
396                         },
397         'ph': {'short_name': 'ph',
398                         'long_name': 'pH',
399                         'standard_name': 'ph',                         
400                         'units': '',
401                         },
402         'chl': {'short_name': 'chl',
403                         'long_name': 'Chlorophyll',
404                         'standard_name': 'chlorophyll',                         
405                         'units': 'ug l-1',
406                         },
407         'do': {'short_name': 'do',
408                         'long_name': 'Dissolved Oxygen',
409                         'standard_name': 'dissolved_oxygen',                         
410                         'units': 'mg l-1',
411                         },
412         }
413
414     # dimension names use tuple so order of initialization is maintained
415     dim_inits = (
416         ('time', NC.UNLIMITED),
417         ('lat', 1),
418         ('lon', 1),
419         ('z', sensor_info['nbins']),
420         )
421    
422     # using tuple of tuples so order of initialization is maintained
423     # using dict for attributes order of init not important
424     # use dimension names not values
425     # (varName, varType, (dimName1, [dimName2], ...))
426     var_inits = (
427         # coordinate variables
428         ('time', NC.INT, ('time',)),
429         ('lat', NC.FLOAT, ('lat',)),
430         ('lon', NC.FLOAT, ('lon',)),
431         ('z',  NC.FLOAT, ('time', 'z',)),
432         # data variables
433         ('wd', NC.FLOAT, ('time',)),
434         ('wl', NC.FLOAT, ('time',)),
435         # ('ysi_sn', NC.CHAR, ('time', 'nchar')),
436         # ('ysi_id', NC.CHAR, ('time', 'nchar')),
437         ('stime', NC.FLOAT, ('time', 'z')),       
438         ('wtemp', NC.FLOAT, ('time', 'z')),
439         ('cond', NC.FLOAT, ('time', 'z')),
440         ('salin', NC.FLOAT, ('time', 'z')),
441         ('turb', NC.FLOAT, ('time', 'z')),
442         ('ph', NC.FLOAT, ('time', 'z')),
443         ('chl', NC.FLOAT, ('time', 'z')),
444         ('do', NC.FLOAT, ('time', 'z')),
445         )
446
447     # var data
448     var_data = (
449         ('lat',  platform_info['lat']),
450         ('lon', platform_info['lon']),
451         ('time', data['time'][i]),
452         ('wd', data['wd'][i]),
453         ('wl', data['wl'][i]),
454         # ('ysi_id', data['ysi_id'][i]),
455         # ('ysi_sn', data['ysi_sn'][i]),
456         ('stime', data['stime'][i]),
457         ('z', data['z'][i]),
458         #
459         ('wtemp', data['wtemp'][i]),
460         ('cond', data['cond'][i]),
461         ('salin', data['salin'][i]),
462         ('turb', data['turb'][i]),
463         ('ph', data['ph'][i]),
464         ('chl', data['chl'][i]),
465         ('do', data['do'][i]),
466         )
467
468     return (global_atts, var_atts, dim_inits, var_inits, var_data)
469
470 def updater(platform_info, sensor_info, data):
471     #
472     # subset data only to month being processed (see raw2proc.process())
473     i = data['in']
474     dt = data['dt'][i]
475     #
476     global_atts = {
477         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
478         # last date in monthly file
479         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
480         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
481         #
482         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
483         }
484
485     # data variables
486     # update any variable attributes like range, min, max
487     var_atts = {}
488     # var_atts = {
489     #    'wtemp': {'max': max(data.u),
490     #          'min': min(data.v),
491     #          },
492     #    'cond': {'max': max(data.u),
493     #          'min': min(data.v),
494     #          },
495     #    }
496    
497     # data
498     var_data = (
499         ('time', data['time'][i]),
500         ('wd', data['wd'][i]),
501         ('wl', data['wl'][i]),
502         # ('ysi_id', data['ysi_id'][i]),
503         # ('ysi_sn', data['ysi_sn'][i]),
504         ('stime', data['stime'][i]),
505         ('z', data['z'][i]),
506         #
507         ('wtemp', data['wtemp'][i]),
508         ('cond', data['cond'][i]),
509         ('salin', data['salin'][i]),
510         ('turb', data['turb'][i]),
511         ('ph', data['ph'][i]),
512         ('chl', data['chl'][i]),
513         ('do', data['do'][i]),
514         )
515
516     return (global_atts, var_atts, var_data)
517 #
Note: See TracBrowser for help on using the browser.