NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v2_CDL2.py

Revision 320 (checked in by haines, 14 years ago)

catch-up trunk to production code running on cromwell

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-12-16 15:23:36 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V2-2 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, pH, dissolved oxygen,
13          turbidity, and chlorophyll
14         
15
16 creator : lat, lon, z, stime, (time, water_depth), water_temp, cond,
17           salin, ph, turb, chl, do
18
19 updator : z, stime, (time, water_depth), water_temp, cond, salin, ph,
20           turb, chl, do
21
22 using fixed profiler CDL but modified to have raw data for each cast
23 along each column
24
25
26 Examples
27 --------
28
29 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2')
30 or
31 >> si = get_config(cn+'.sensor_info')
32 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
33
34 >> lines = load_data(filename)
35 >> data = parse(platform_info, sensor_info, lines)
36 >> create(platform_info, sensor_info, data) or
37 >> update(platform_info, sensor_info, data)
38
39 """
40
41
42 from raw2proc import *
43 from procutil import *
44 from ncutil import *
45
46 now_dt = datetime.utcnow()
47 now_dt.replace(microsecond=0)
48
49 def parser(platform_info, sensor_info, lines):
50     """
51     parse Automated Vertical Profile Station (AVP) Water Quality Data
52
53     month, day, year, hour, min, sec, temp (deg. C), conductivity
54     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
55     chlorophyll (micrograms per liter), DO (micrograms per liter)
56
57     Notes
58     -----
59     1. Column Format
60
61     temp, cond, salin, depth, pH, turb, chl, DO
62     (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)
63
64     Profile Time: 00:30:00
65     Profile Date: 08/18/2008
66     Profile Depth: 255.0 cm
67     Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
68     08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
69     08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
70     08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
71     08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66
72
73 2.  Use a ragged array to store each uniquely measured param at each
74     time and depth but not gridded, so this uses fixed profiler CDL
75     but modified to have raw data for each cast along each column.
76     For plotting, the data will need to be grid at specified depth bins.
77
78     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
79     Between the waves and the instrument descending from a spool of
80     line with variable radius it works out to about 3-5 cm between
81     observations on average.  When I process the data to make the
82     images, I bin the data every 10 cm and take the average of however
83     many observations fell within that bin.'
84
85     """
86     import numpy
87     from datetime import datetime
88     from time import strptime
89
90     # get sample datetime from filename
91     fn = sensor_info['fn']
92     sample_dt_start = filt_datetime(fn)[0]
93
94     # how many profiles in one file, count number of "Profile Time:" in lines
95     nprof = 0
96     for line in lines:
97         m=re.search("Profile Time:", line)
98         if m:
99             nprof=nprof+1
100
101     # remove first occurrence of blank line if within first 40 lines
102     for i in range(len(lines[0:40])):
103        if re.search("^ \r\n", lines[i]):
104            # print str(i) + " " + lines[i] + " " + lines[i+1]
105            blank_line = lines.pop(i)
106            # lines.append(blank_line)
107
108     # ensure signal end of profile after last profile by appending a blank line to data file
109     lines.append(' \r\n')
110    
111     # ensure blank line between profile casts
112     for i, line in enumerate(lines):
113         if re.search(r"Profile Time", line, re.IGNORECASE):
114             if not re.search("^ \r\n", lines[i-1]):
115                 lines.insert(i, " \r\n")
116    
117     N = nprof
118     nbins = sensor_info['nbins']
119
120     data = {
121         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
122         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
123         'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
124         #
125         'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
126         'wl' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
127         # 'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'),
128         # 'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'),
129         #
130         'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan),
131         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
132         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
133         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
134         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
135         'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
136         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
137         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
138         }
139
140     # current profile count
141     i = 0
142     have_date = have_time = have_wd = have_location = have_head = False
143
144     for line in lines:
145         # if line has weird ascii chars -- skip it and iterate to next line
146         if re.search(r"[\x1a]", line):
147             print 'skipping bad data line ... ' + str(line)
148             continue
149            
150         ysi = []
151         # split line and parse float and integers
152         sw = re.split('[\s/\:]*', line)
153         for s in sw:
154             m = re.search(REAL_RE_STR, s)
155             if m:
156                 ysi.append(float(m.groups()[0]))
157
158         if re.search("Profile Time:", line):
159             have_time = True
160             HH=ysi[0]
161             MM=ysi[1]
162             SS=ysi[2]
163         elif re.search("Profile Date:", line):
164             have_date = True
165             mm=ysi[0]
166             dd=ysi[1]
167             yyyy=ysi[2]
168         elif re.search("Profile Depth:", line):
169             have_wd = True
170             wd = ysi[0]/100.  # cm to meters
171             profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
172             if  sensor_info['utc_offset']:
173                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \
174                              timedelta(hours=sensor_info['utc_offset'])
175             else:
176                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S')
177         elif re.search("Profile Location:", line):
178             have_location = True
179             # Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
180             sw = re.findall(r'\w+:\s(\w+)*', line)
181             # ysi_sn = sw[1]
182             # ysi_id = sw[2]
183             # initialize for new profile at zero for averaging samples within each bin
184             wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
185             depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
186             cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
187             salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
188             turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
189             ph = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
190             chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
191             do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
192             stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan
193             # keep track of number of samples in one profile so not to exceed nbins
194             j = 0
195             # have all the headers stuff
196             head = numpy.array([have_date, have_time, have_wd, have_location])
197             have_head = head.all()
198
199         elif (len(ysi)==14 and have_head):
200             if j>=nbins:
201                 print 'Sample number (' + str(j) + \
202                       ') in profile exceeds maximum value ('+ \
203                       str(nbins) + ') in config'
204        
205             # get sample datetime from data
206             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
207             # month, day, year
208             try:
209                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
210             except ValueError:
211                 # day, month, year (month and day switched in some cases)
212                 try:
213                     sample_dt = scanf_datetime(sample_str, fmt='%d-%m-%y %H:%M:%S')
214                 except:
215                     sample_dt = datetime(1970,1,1)
216                
217             if sensor_info['utc_offset']:
218                 sample_dt = sample_dt + timedelta(hours=sensor_info['utc_offset'])
219                
220             if j<nbins:
221                 stime[j] = dt2es(sample_dt) # sample time
222                 wtemp[j] = ysi[6] # water temperature (C)
223                 cond[j] = ysi[7]  # conductivity (mS/cm)
224                 salin[j] = ysi[8] # salinity (ppt or PSU??)
225                 #
226                 depth[j] = ysi[9] # depth (m, positive up)
227                 #
228                 ph[j] = ysi[10]   # ph
229                 turb[j] = ysi[11] # turbidity (NTU)
230                 chl[j] = ysi[12]  # chlorophyll (ug/l)
231                 do[j] = ysi[13]   # dissolved oxygen (mg/l)
232                 
233             j = j+1
234
235         elif (len(ysi)==0 and have_head and i<N):  # each profile separated by empty line
236             
237             data['dt'][i] = profile_dt # profile datetime
238             data['time'][i] = dt2es(profile_dt) # profile time in epoch seconds
239             data['wd'][i] = -1.*wd
240             data['wl'][i] = platform_info['mean_water_depth'] - (-1*wd)
241             # data['ysi_sn'][i] = ysi_sn
242             # data['ysi_id'][i] = ysi_id
243
244             data['stime'][i] =  stime # sample time in epoch seconds
245             data['z'][i] = -1.*depth
246
247             data['wtemp'][i] =  wtemp
248             data['cond'][i] = cond
249             data['salin'][i] = salin
250             data['turb'][i] = turb
251             data['ph'][i] = ph
252             data['chl'][i] = chl
253             data['do'][i] = do
254            
255             i=i+1
256             have_date = have_time = have_wd = have_location = False
257         else:
258             print 'skipping bad data line ... ' + str(line)
259         # if-elif
260     # for line
261
262     return data
263  
264
265 def creator(platform_info, sensor_info, data):
266     #
267     # subset data only to month being processed (see raw2proc.process())
268     i = data['in']
269     dt = data['dt'][i]
270     #
271     title_str = sensor_info['description']+' at '+ platform_info['location']
272     global_atts = {
273         'title' : title_str,
274         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
275         'institution_url' : 'http://nccoos.unc.edu',
276         'institution_dods_url' : 'http://nccoos.unc.edu',
277         'metadata_url' : 'http://nccoos.unc.edu',
278         'references' : 'http://nccoos.unc.edu',
279         'contact' : 'Sara Haines (haines@email.unc.edu)',
280         #
281         'source' : 'fixed-automated-profiler observation',
282         'history' : 'raw2proc using ' + sensor_info['process_module'],
283         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
284         # conventions
285         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
286         # SEACOOS CDL codes
287         'format_category_code' : 'fixed-profiler-ragged',
288         'institution_code' : platform_info['institution'],
289         'platform_code' : platform_info['id'],
290         'package_code' : sensor_info['id'],
291         # institution specific
292         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
293         'project_url' : 'http://nccoos.unc.edu',
294         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
295         # first date in monthly file
296         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
297         # last date in monthly file
298         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
299         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
300         #
301         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
302         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
303         'process_level' : 'level1',
304         #
305         # must type match to data (e.g. fillvalue is real if data is real)
306         '_FillValue' : numpy.nan,
307         }
308
309     var_atts = {
310         # coordinate variables
311         'time' : {'short_name': 'time',
312                   'long_name': 'Time of Profile',
313                   'standard_name': 'time',
314                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
315                   'axis': 'T',
316                   },
317         'lat' : {'short_name': 'lat',
318                  'long_name': 'Latitude',
319                  'standard_name': 'latitude',
320                  'reference':'geographic coordinates',
321                  'units': 'degrees_north',
322                  'valid_range':(-90.,90.),
323                  'axis': 'Y',
324                  },
325         'lon' : {'short_name': 'lon',
326                  'long_name': 'Longitude',
327                  'standard_name': 'longitude',
328                  'reference':'geographic coordinates',
329                  'units': 'degrees_east',
330                  'valid_range':(-180.,180.),
331                  'axis': 'Y',
332                  },
333         'z' : {'short_name': 'z',
334                'long_name': 'Height',
335                'standard_name': 'height',
336                'reference':'zero at sea-surface',
337                'positive' : 'up',
338                'units': 'm',
339                'axis': 'Z',
340                },
341         # data variables
342         'stime' : {'short_name': 'stime',
343                   'long_name': 'Time of Sample ',
344                   'standard_name': 'time',
345                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
346                   },
347         'wd': {'short_name': 'wd',
348                'long_name': 'Water Depth',
349                'standard_name': 'water_depth',                         
350                'reference' : 'zero at sea-surface',
351                'positive' : 'up',
352                'units': 'm',
353                },
354         'wl': {'short_name': 'wl',
355                'long_name': 'Water Level',
356                'standard_name': 'water_level',
357                'reference':'MSL',
358                'reference_to_MSL' : 0.,
359                'reference_MSL_datum' : platform_info['mean_water_depth'],
360                'reference_MSL_datum_time_period' : platform_info['mean_water_depth_time_period'],
361                'positive' : 'up',
362                'z' : 0.,
363                'units': 'm',
364                },
365         # 'ysi_id' : {'short_name':'ysi_id',
366         #             'long_name':'Identification name of YSI Sonde',
367         #             'standard_name': 'identification_name'
368         #             },
369         # 'ysi_sn' : {'short_name':'ysi_sn',
370         #             'long_name':'Serial number of YSI Sonde',
371         #             'standard_name': 'serial_number'
372         #             },
373         'wtemp': {'short_name': 'wtemp',
374                         'long_name': 'Water Temperature',
375                         'standard_name': 'water_temperature',                         
376                         'units': 'degrees_Celsius',
377                         },
378         'cond': {'short_name': 'cond',
379                         'long_name': 'Conductivity',
380                         'standard_name': 'conductivity',                         
381                         'units': 'mS cm-1',
382                         },
383         'salin': {'short_name': 'salin',
384                         'long_name': 'Salinity',
385                         'standard_name': 'salinity',                         
386                         'units': 'PSU',
387                         },
388         'turb': {'short_name': 'turb',
389                         'long_name': 'Turbidity',
390                         'standard_name': 'turbidity',                         
391                         'units': 'NTU',
392                         },
393         'ph': {'short_name': 'ph',
394                         'long_name': 'pH',
395                         'standard_name': 'ph',                         
396                         'units': '',
397                         },
398         'chl': {'short_name': 'chl',
399                         'long_name': 'Chlorophyll',
400                         'standard_name': 'chlorophyll',                         
401                         'units': 'ug l-1',
402                         },
403         'do': {'short_name': 'do',
404                         'long_name': 'Dissolved Oxygen',
405                         'standard_name': 'dissolved_oxygen',                         
406                         'units': 'mg l-1',
407                         },
408         }
409
410     # dimension names use tuple so order of initialization is maintained
411     dim_inits = (
412         ('time', NC.UNLIMITED),
413         ('lat', 1),
414         ('lon', 1),
415         ('z', sensor_info['nbins']),
416         )
417    
418     # using tuple of tuples so order of initialization is maintained
419     # using dict for attributes order of init not important
420     # use dimension names not values
421     # (varName, varType, (dimName1, [dimName2], ...))
422     var_inits = (
423         # coordinate variables
424         ('time', NC.INT, ('time',)),
425         ('lat', NC.FLOAT, ('lat',)),
426         ('lon', NC.FLOAT, ('lon',)),
427         ('z',  NC.FLOAT, ('time', 'z',)),
428         # data variables
429         ('wd', NC.FLOAT, ('time',)),
430         ('wl', NC.FLOAT, ('time',)),
431         # ('ysi_sn', NC.CHAR, ('time', 'nchar')),
432         # ('ysi_id', NC.CHAR, ('time', 'nchar')),
433         ('stime', NC.FLOAT, ('time', 'z')),       
434         ('wtemp', NC.FLOAT, ('time', 'z')),
435         ('cond', NC.FLOAT, ('time', 'z')),
436         ('salin', NC.FLOAT, ('time', 'z')),
437         ('turb', NC.FLOAT, ('time', 'z')),
438         ('ph', NC.FLOAT, ('time', 'z')),
439         ('chl', NC.FLOAT, ('time', 'z')),
440         ('do', NC.FLOAT, ('time', 'z')),
441         )
442
443     # var data
444     var_data = (
445         ('lat',  platform_info['lat']),
446         ('lon', platform_info['lon']),
447         ('time', data['time'][i]),
448         ('wd', data['wd'][i]),
449         ('wl', data['wl'][i]),
450         # ('ysi_id', data['ysi_id'][i]),
451         # ('ysi_sn', data['ysi_sn'][i]),
452         ('stime', data['stime'][i]),
453         ('z', data['z'][i]),
454         #
455         ('wtemp', data['wtemp'][i]),
456         ('cond', data['cond'][i]),
457         ('salin', data['salin'][i]),
458         ('turb', data['turb'][i]),
459         ('ph', data['ph'][i]),
460         ('chl', data['chl'][i]),
461         ('do', data['do'][i]),
462         )
463
464     return (global_atts, var_atts, dim_inits, var_inits, var_data)
465
466 def updater(platform_info, sensor_info, data):
467     #
468     # subset data only to month being processed (see raw2proc.process())
469     i = data['in']
470     dt = data['dt'][i]
471     #
472     global_atts = {
473         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
474         # last date in monthly file
475         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
476         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
477         #
478         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
479         }
480
481     # data variables
482     # update any variable attributes like range, min, max
483     var_atts = {}
484     # var_atts = {
485     #    'wtemp': {'max': max(data.u),
486     #          'min': min(data.v),
487     #          },
488     #    'cond': {'max': max(data.u),
489     #          'min': min(data.v),
490     #          },
491     #    }
492     
493     # data
494     var_data = (
495         ('time', data['time'][i]),
496         ('wd', data['wd'][i]),
497         ('wl', data['wl'][i]),
498         # ('ysi_id', data['ysi_id'][i]),
499         # ('ysi_sn', data['ysi_sn'][i]),
500         ('stime', data['stime'][i]),
501         ('z', data['z'][i]),
502         #
503         ('wtemp', data['wtemp'][i]),
504         ('cond', data['cond'][i]),
505         ('salin', data['salin'][i]),
506         ('turb', data['turb'][i]),
507         ('ph', data['ph'][i]),
508         ('chl', data['chl'][i]),
509         ('do', data['do'][i]),
510         )
511
512     return (global_atts, var_atts, var_data)
513 #
514
Note: See TracBrowser for help on using the browser.