NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v2_CDL2.py

Revision 219 (checked in by haines, 15 years ago)

AVP fixed profiler CDL2; hampton processing added YSI 6600 V1

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-01-08 11:50:47 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V2-2 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, pH, dissolved oxygen,
13          turbidity, and chlorophyll
14         
15
16 creator : lat, lon, z, stime, (time, water_depth), water_temp, cond,
17           salin, ph, turb, chl, do
18
19 updator : z, stime, (time, water_depth), water_temp, cond, salin, ph,
20           turb, chl, do
21
22 using fixed profiler CDL but modified to have raw data for each cast
23 along each column
24
25
26 Examples
27 --------
28
29 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2')
30 or
31 >> si = get_config(cn+'.sensor_info')
32 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
33
34 >> lines = load_data(filename)
35 >> data = parse(platform_info, sensor_info, lines)
36 >> create(platform_info, sensor_info, data) or
37 >> update(platform_info, sensor_info, data)
38
39 """
40
41
42 from raw2proc import *
43 from procutil import *
44 from ncutil import *
45
46 now_dt = datetime.utcnow()
47 now_dt.replace(microsecond=0)
48
49 def parser(platform_info, sensor_info, lines):
50     """
51     parse Automated Vertical Profile Station (AVP) Water Quality Data
52
53     month, day, year, hour, min, sec, temp (deg. C), conductivity
54     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
55     chlorophyll (micrograms per liter), DO (micrograms per liter)
56
57     Notes
58     -----
59     1. Column Format
60
61     temp, cond, salin, depth, pH, turb, chl, DO
62     (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)
63
64     Profile Time: 00:30:00
65     Profile Date: 08/18/2008
66     Profile Depth: 255.0 cm
67     Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
68     08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
69     08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
70     08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
71     08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66
72
73 2.  Use a ragged array to store each uniquely measured param at each
74     time and depth but not gridded, so this uses fixed profiler CDL
75     but modified to have raw data for each cast along each column.
76     For plotting, the data will need to be grid at specified depth bins.
77
78     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
79     Between the waves and the instrument descending from a spool of
80     line with variable radius it works out to about 3-5 cm between
81     observations on average.  When I process the data to make the
82     images, I bin the data every 10 cm and take the average of however
83     many observations fell within that bin.'
84
85     """
86     import numpy
87     from datetime import datetime
88     from time import strptime
89
90     # get sample datetime from filename
91     fn = sensor_info['fn']
92     sample_dt_start = filt_datetime(fn)[0]
93
94     # how many profiles in one file, count number of "Profile Time:" in lines
95     nprof = 0
96     for line in lines:
97         m=re.search("Profile Time:", line)
98         if m:
99             nprof=nprof+1
100
101     # remove first occurrence of blank line if within first 40 lines
102     for i in range(len(lines[0:40])):
103        if re.search("^ \r\n", lines[i]):
104            # print str(i) + " " + lines[i] + " " + lines[i+1]
105            blank_line = lines.pop(i)
106            # lines.append(blank_line)
107
108     # ensure signal end of profile after last profile by appending a blank line to data file
109     lines.append(' \r\n')
110    
111     # ensure blank line between profile casts
112     for i, line in enumerate(lines):
113         if re.search(r"Profile Time", line, re.IGNORECASE):
114             if not re.search("^ \r\n", lines[i-1]):
115                 lines.insert(i, " \r\n")
116    
117     N = nprof
118     nbins = sensor_info['nbins']
119
120     data = {
121         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
122         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
123         'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
124         #
125         'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
126         'wl' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
127         'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'),
128         'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'),
129         #
130         'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan),
131         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
132         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
133         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
134         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
135         'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
136         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
137         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
138         }
139
140     # current profile count
141     i = 0
142     have_date = have_time = have_wd = have_location = have_head = False
143
144     for line in lines:
145         # if line has weird ascii chars -- skip it and iterate to next line
146         if re.search(r"[\x1a]", line):
147             print 'skipping bad data line ... ' + str(line)
148             continue
149            
150         ysi = []
151         # split line and parse float and integers
152         sw = re.split('[\s/\:]*', line)
153         for s in sw:
154             m = re.search(REAL_RE_STR, s)
155             if m:
156                 ysi.append(float(m.groups()[0]))
157
158         if re.search("Profile Time:", line):
159             have_time = True
160             HH=ysi[0]
161             MM=ysi[1]
162             SS=ysi[2]
163         elif re.search("Profile Date:", line):
164             have_date = True
165             mm=ysi[0]
166             dd=ysi[1]
167             yyyy=ysi[2]
168         elif re.search("Profile Depth:", line):
169             have_wd = True
170             wd = ysi[0]/100.  # cm to meters
171             profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
172             if  sensor_info['utc_offset']:
173                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \
174                              timedelta(hours=sensor_info['utc_offset'])
175             else:
176                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S')
177         elif re.search("Profile Location:", line):
178             have_location = True
179             # Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
180             sw = re.findall(r'\w+:\s(\w+)*', line)
181             ysi_sn = sw[1]
182             ysi_id = sw[2]
183             # initialize for new profile at zero for averaging samples within each bin
184             wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
185             depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
186             cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
187             salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
188             turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
189             ph = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
190             chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
191             do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
192             stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan
193             # keep track of number of samples in one profile so not to exceed nbins
194             j = 0
195             # have all the headers stuff
196             head = numpy.array([have_date, have_time, have_wd, have_location])
197             have_head = head.all()
198
199         elif (len(ysi)==14 and have_head):
200             if j>=nbins:
201                 print 'Sample number (' + str(j) + \
202                       ') in profile exceeds maximum value ('+ \
203                       str(nbins) + ') in config'
204        
205             # get sample datetime from data
206             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
207             if  sensor_info['utc_offset']:
208                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') + \
209                             timedelta(hours=sensor_info['utc_offset'])
210             else:
211                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
212
213             if j<nbins:
214                 stime[j] = dt2es(sample_dt) # sample time
215                 wtemp[j] = ysi[6] # water temperature (C)
216                 cond[j] = ysi[7]  # conductivity (mS/cm)
217                 salin[j] = ysi[8] # salinity (ppt or PSU??)
218                 #
219                 depth[j] = ysi[9] # depth (m, positive up)
220                 #
221                 ph[j] = ysi[10]   # ph
222                 turb[j] = ysi[11] # turbidity (NTU)
223                 chl[j] = ysi[12]  # chlorophyll (ug/l)
224                 do[j] = ysi[13]   # dissolved oxygen (mg/l)
225
226             j = j+1
227
228         elif (len(ysi)==0 and have_head):  # each profile separated by empty line
229             
230             data['dt'][i] = profile_dt # profile datetime
231             data['time'][i] = dt2es(profile_dt) # profile time in epoch seconds
232             data['wd'][i] = -1.*wd
233             data['wl'][i] = platform_info['mean_water_depth'] - (-1*wd)
234             data['ysi_sn'][i] = ysi_sn
235             data['ysi_id'][i] = ysi_id
236
237             data['stime'][i] =  stime # sample time in epoch seconds
238             data['z'][i] = -1.*depth
239
240             data['wtemp'][i] =  wtemp
241             data['cond'][i] = cond
242             data['salin'][i] = salin
243             data['turb'][i] = turb
244             data['ph'][i] = ph
245             data['chl'][i] = chl
246             data['do'][i] = do
247            
248             i=i+1
249             have_date = have_time = have_wd = have_location = False
250         else:
251             print 'skipping bad data line ... ' + str(line)
252         # if-elif
253     # for line
254
255     return data
256  
257
258 def creator(platform_info, sensor_info, data):
259     #
260     # subset data only to month being processed (see raw2proc.process())
261     i = data['in']
262     dt = data['dt'][i]
263     #
264     title_str = sensor_info['description']+' at '+ platform_info['location']
265     global_atts = {
266         'title' : title_str,
267         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
268         'institution_url' : 'http://nccoos.unc.edu',
269         'institution_dods_url' : 'http://nccoos.unc.edu',
270         'metadata_url' : 'http://nccoos.unc.edu',
271         'references' : 'http://nccoos.unc.edu',
272         'contact' : 'Sara Haines (haines@email.unc.edu)',
273         #
274         'source' : 'fixed-automated-profiler observation',
275         'history' : 'raw2proc using ' + sensor_info['process_module'],
276         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
277         # conventions
278         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
279         # SEACOOS CDL codes
280         'format_category_code' : 'fixed-profiler-ragged',
281         'institution_code' : platform_info['institution'],
282         'platform_code' : platform_info['id'],
283         'package_code' : sensor_info['id'],
284         # institution specific
285         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
286         'project_url' : 'http://nccoos.unc.edu',
287         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
288         # first date in monthly file
289         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
290         # last date in monthly file
291         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
292         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
293         #
294         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
295         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
296         'process_level' : 'level1',
297         #
298         # must type match to data (e.g. fillvalue is real if data is real)
299         '_FillValue' : numpy.nan,
300         }
301
302     var_atts = {
303         # coordinate variables
304         'time' : {'short_name': 'time',
305                   'long_name': 'Time of Profile',
306                   'standard_name': 'time',
307                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
308                   'axis': 'T',
309                   },
310         'lat' : {'short_name': 'lat',
311                  'long_name': 'Latitude',
312                  'standard_name': 'latitude',
313                  'reference':'geographic coordinates',
314                  'units': 'degrees_north',
315                  'valid_range':(-90.,90.),
316                  'axis': 'Y',
317                  },
318         'lon' : {'short_name': 'lon',
319                  'long_name': 'Longitude',
320                  'standard_name': 'longitude',
321                  'reference':'geographic coordinates',
322                  'units': 'degrees_east',
323                  'valid_range':(-180.,180.),
324                  'axis': 'Y',
325                  },
326         'z' : {'short_name': 'z',
327                'long_name': 'Height',
328                'standard_name': 'height',
329                'reference':'zero at sea-surface',
330                'positive' : 'up',
331                'units': 'm',
332                'axis': 'Z',
333                },
334         # data variables
335         'stime' : {'short_name': 'stime',
336                   'long_name': 'Time of Sample ',
337                   'standard_name': 'time',
338                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
339                   },
340         'wd': {'short_name': 'wd',
341                'long_name': 'Water Depth',
342                'standard_name': 'water_depth',                         
343                'reference' : 'zero at sea-surface',
344                'positive' : 'up',
345                'units': 'm',
346                },
347         'wl': {'short_name': 'wl',
348                'long_name': 'Water Level',
349                'standard_name': 'water_level',
350                'reference':'MSL',
351                'reference_to_MSL' : 0.,
352                'reference_MSL_datum' : platform_info['mean_water_depth'],
353                'reference_MSL_datum_time_period' : platform_info['mean_water_depth_time_period'],
354                'positive' : 'up',
355                'z' : 0.,
356                'units': 'm',
357                },
358         'ysi_id' : {'short_name':'ysi_id',
359                     'long_name':'Identification name of YSI Sonde',
360                     'standard_name': 'identification_name'
361                     },
362         'ysi_sn' : {'short_name':'ysi_sn',
363                     'long_name':'Serial number of YSI Sonde',
364                     'standard_name': 'serial_number'
365                     },
366         'wtemp': {'short_name': 'wtemp',
367                         'long_name': 'Water Temperature',
368                         'standard_name': 'water_temperature',                         
369                         'units': 'degrees Celsius',
370                         },
371         'cond': {'short_name': 'cond',
372                         'long_name': 'Conductivity',
373                         'standard_name': 'conductivity',                         
374                         'units': 'mS cm-1',
375                         },
376         'salin': {'short_name': 'salin',
377                         'long_name': 'Salinity',
378                         'standard_name': 'salinity',                         
379                         'units': 'PSU',
380                         },
381         'turb': {'short_name': 'turb',
382                         'long_name': 'Turbidity',
383                         'standard_name': 'turbidity',                         
384                         'units': 'NTU',
385                         },
386         'ph': {'short_name': 'ph',
387                         'long_name': 'pH',
388                         'standard_name': 'ph',                         
389                         'units': '',
390                         },
391         'chl': {'short_name': 'chl',
392                         'long_name': 'Chlorophyll',
393                         'standard_name': 'chlorophyll',                         
394                         'units': 'ug l-1',
395                         },
396         'do': {'short_name': 'do',
397                         'long_name': 'Dissolved Oxygen',
398                         'standard_name': 'dissolved_oxygen',                         
399                         'units': 'mg l-1',
400                         },
401         }
402
403     # dimension names use tuple so order of initialization is maintained
404     dim_inits = (
405         ('time', NC.UNLIMITED),
406         ('lat', 1),
407         ('lon', 1),
408         ('z', sensor_info['nbins']),
409         ('nchar', 20),
410         )
411    
412     # using tuple of tuples so order of initialization is maintained
413     # using dict for attributes order of init not important
414     # use dimension names not values
415     # (varName, varType, (dimName1, [dimName2], ...))
416     var_inits = (
417         # coordinate variables
418         ('time', NC.INT, ('time',)),
419         ('lat', NC.FLOAT, ('lat',)),
420         ('lon', NC.FLOAT, ('lon',)),
421         ('z',  NC.FLOAT, ('time', 'z',)),
422         # data variables
423         ('wd', NC.FLOAT, ('time',)),
424         ('wl', NC.FLOAT, ('time',)),
425         # ('ysi_sn', NC.CHAR, ('time', 'nchar')),
426         # ('ysi_id', NC.CHAR, ('time', 'nchar')),
427         ('stime', NC.INT, ('time', 'z')),       
428         ('wtemp', NC.FLOAT, ('time', 'z')),
429         ('cond', NC.FLOAT, ('time', 'z')),
430         ('salin', NC.FLOAT, ('time', 'z')),
431         ('turb', NC.FLOAT, ('time', 'z')),
432         ('ph', NC.FLOAT, ('time', 'z')),
433         ('chl', NC.FLOAT, ('time', 'z')),
434         ('do', NC.FLOAT, ('time', 'z')),
435         )
436
437     # var data
438     var_data = (
439         ('lat',  platform_info['lat']),
440         ('lon', platform_info['lon']),
441         ('time', data['time'][i]),
442         ('wd', data['wd'][i]),
443         ('wl', data['wl'][i]),
444         # ('ysi_id', data['ysi_id'][i]),
445         # ('ysi_sn', data['ysi_sn'][i]),
446         ('stime', data['stime'][i]),
447         ('z', data['z'][i]),
448         #
449         ('wtemp', data['wtemp'][i]),
450         ('cond', data['cond'][i]),
451         ('salin', data['salin'][i]),
452         ('turb', data['turb'][i]),
453         ('ph', data['ph'][i]),
454         ('chl', data['chl'][i]),
455         ('do', data['do'][i]),
456         )
457
458     return (global_atts, var_atts, dim_inits, var_inits, var_data)
459
460 def updater(platform_info, sensor_info, data):
461     #
462     # subset data only to month being processed (see raw2proc.process())
463     i = data['in']
464     dt = data['dt'][i]
465     #
466     global_atts = {
467         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
468         # last date in monthly file
469         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
470         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
471         #
472         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
473         }
474
475     # data variables
476     # update any variable attributes like range, min, max
477     var_atts = {}
478     # var_atts = {
479     #    'wtemp': {'max': max(data.u),
480     #          'min': min(data.v),
481     #          },
482     #    'cond': {'max': max(data.u),
483     #          'min': min(data.v),
484     #          },
485     #    }
486     
487     # data
488     var_data = (
489         ('time', data['time'][i]),
490         ('wd', data['wd'][i]),
491         ('wl', data['wl'][i]),
492         # ('ysi_id', data['ysi_id'][i]),
493         # ('ysi_sn', data['ysi_sn'][i]),
494         ('stime', data['stime'][i]),
495         ('z', data['z'][i]),
496         #
497         ('wtemp', data['wtemp'][i]),
498         ('cond', data['cond'][i]),
499         ('salin', data['salin'][i]),
500         ('turb', data['turb'][i]),
501         ('ph', data['ph'][i]),
502         ('chl', data['chl'][i]),
503         ('do', data['do'][i]),
504         )
505
506     return (global_atts, var_atts, var_data)
507 #
508
Note: See TracBrowser for help on using the browser.