NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v1_CDL2.py

Revision 448 (checked in by cbc, 13 years ago)

Add new Billy Mitchell configs.

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2010-12-09 16:15:11 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V1 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, dissolved oxygen,
13          turbidity, and chlorophyll (no pH)
14         
15
16 creator : lat, lon, z, stime, (time, water_depth), water_temp, cond,
17           salin, turb, chl, do
18
19 updator : z, stime, (time, water_depth), water_temp, cond, salin,
20           turb, chl, do
21
22 using fixed profiler CDL but modified to have raw data for each cast
23 along each column
24
25
26 Examples
27 --------
28
29 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v1')
30 or
31 >> si = get_config(cn+'.sensor_info')
32 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
33
34 >> lines = load_data(filename)
35 >> data = parse(platform_info, sensor_info, lines)
36 >> create(platform_info, sensor_info, data) or
37 >> update(platform_info, sensor_info, data)
38
39 """
40
41 from raw2proc import *
42 from procutil import *
43 from ncutil import *
44
45 now_dt = datetime.utcnow()
46 now_dt.replace(microsecond=0)
47
48 def parser(platform_info, sensor_info, lines):
49     """
50     parse Automated Vertical Profile Station (AVP) Water Quality Data
51
52     month, day, year, hour, min, sec, temp (deg. C), conductivity
53     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
54     chlorophyll (micrograms per liter), DO (micrograms per liter)
55
56     Notes
57     -----
58     1. Column Format YSI 6600 V1 has no pH
59
60     temp, cond,   salin,  DO,    depth, turb,  chl
61     (C), (mS/cm), (ppt), (ug/l), (m),   (NTU), (ug/l)
62
63
64 (from Aug 2005 to Sep 03 2008)
65     profile time: 00:00:56
66     profile date: 01/31/2006
67     profile location: P180, Instrument Serial No: 0001119E
68     01/31/06 00:01:31 10.99  7.501   4.16  13.22   0.516     6.0  11.5
69     01/31/06 00:01:32 11.00  7.463   4.13  13.22   0.526     6.0  11.4
70     01/31/06 00:01:33 11.00  7.442   4.12  13.22   0.538     6.0  11.4
71     01/31/06 00:01:34 11.00  7.496   4.15  13.11   0.556     6.0  11.3
72 (no data from Sep 03 to 30, 2008)
73 (from Sep 30 2008 to now, still YSI 6600 v1, just header change)
74     Profile Time: 11:38:00
75     Profile Date: 01/06/2009
76     Profile Depth: 380.0 cm
77     Profile Location: Hampton Shoal Serial No: 000109DD, ID: Delta
78     01/06/09 11:38:44 11.16  14.59   8.49  17.86   0.171     4.5  50.4
79     01/06/09 11:38:45 11.16  14.59   8.49  17.86   0.190     4.5  51.8
80     01/06/09 11:38:46 11.16  14.59   8.49  17.88   0.220     4.6  53.0
81     01/06/09 11:38:47 11.16  14.59   8.49  17.88   0.257     4.6  53.9
82     01/06/09 11:38:48 11.16  14.59   8.49  17.88   0.448     4.6  54.3
83
84 2.  Use a ragged array to store each uniquely measured param at each
85     time and depth but not gridded, so this uses fixed profiler CDL
86     but modified to have raw data for each cast along each column.
87     For plotting, the data will need to be grid at specified depth bins.
88
89     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
90     Between the waves and the instrument descending from a spool of
91     line with variable radius it works out to about 3-5 cm between
92     observations on average.  When I process the data to make the
93     images, I bin the data every 10 cm and take the average of however
94     many observations fell within that bin.'
95
96     """
97     import numpy
98     from datetime import datetime
99     from time import strptime
100
101     # get sample datetime from filename
102     fn = sensor_info['fn']
103     sample_dt_start = filt_datetime(fn)
104
105     # how many profiles in one file, count number of "Profile Time:" in lines
106     nprof = 0
107     for line in lines:
108         m=re.search("Profile Time:", line, re.IGNORECASE)
109         if m:
110             nprof=nprof+1
111
112     # remove first occurrence of blank line if within first 40 lines
113     for i in range(len(lines[0:40])):
114        if re.search("^ \r\n", lines[i]):
115            # print str(i) + " " + lines[i] + " " + lines[i+1]
116            blank_line = lines.pop(i)
117            # lines.append(blank_line)
118     # ensure signal end of profile after last profile by appending a blank line to data file
119     lines.append(' \r\n')
120
121     # ensure blank line between profile casts
122     for i, line in enumerate(lines):
123         if re.search(r"Profile Time", line, re.IGNORECASE):
124             if not re.search("^ \r\n", lines[i-1]):
125                 lines.insert(i, " \r\n")
126    
127     N = nprof
128     nbins = sensor_info['nbins']
129
130     data = {
131         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
132         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
133         'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
134         #
135         # 'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'),
136         # 'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'),
137         #
138         'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan),
139         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
140         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
141         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
142         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
143         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
144         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
145         }
146
147     # current profile count
148     i = 0
149     have_date = have_time = have_location = have_head = False
150     verbose = False
151
152     for line in lines:
153         # if line has weird ascii chars -- skip it and iterate to next line
154         if re.search(r"[\x1a]", line):
155             if verbose:
156                 print 'skipping bad data line ... ' + str(line)
157             continue
158            
159         ysi = []
160         # split line and parse float and integers
161         sw = re.split('[\s/\:]*', line)
162         for s in sw:
163             m = re.search(REAL_RE_STR, s)
164             if m:
165                 ysi.append(float(m.groups()[0]))
166
167         if re.search("Profile Time:", line, re.IGNORECASE):
168             have_time = True
169             HH=ysi[0]
170             MM=ysi[1]
171             SS=ysi[2]
172         elif re.search("Profile Date:", line, re.IGNORECASE):
173             have_date = True
174             mm=ysi[0]
175             dd=ysi[1]
176             yyyy=ysi[2]
177
178             profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
179             if  sensor_info['utc_offset']:
180                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \
181                              timedelta(hours=sensor_info['utc_offset'])
182             else:
183                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S')
184         elif re.search("Profile Location:", line):
185             have_location = True
186             # profile location: P180, Instrument Serial No: 0001119E
187             # Profile Location: Hampton Shoal Serial No: 000109DD, ID: Delta
188             sw = re.findall(r'\w+:\s(\w+)*', line)
189             # ysi_sn = sw[1]
190             # ysi_id = sw[2]
191                 
192             # initialize for new profile at zero for averaging samples within each bin
193             wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
194             depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
195             cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
196             salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
197             turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
198             chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
199             do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
200             stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan
201             # keep track of number of samples in one profile so not to exceed nbins
202             j = 0
203             # have all the headers stuff
204             head = numpy.array([have_date, have_time, have_location])
205             have_head = head.all()
206
207         elif re.search("Error", line):
208             # ignore this line
209             if verbose:
210                 print 'skipping bad data line ... ' + str(line)
211             continue
212
213         elif (len(ysi)==13 and have_head):
214             if j>=nbins:
215                 print 'Sample number (' + str(j) + \
216                       ') in profile exceeds maximum value ('+ \
217                       str(nbins) + ') in config'
218        
219             # get sample datetime from data
220             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
221             try:
222                 if  sensor_info['utc_offset']:
223                     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') + \
224                                 timedelta(hours=sensor_info['utc_offset'])
225                 else:
226                     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
227             except TypeError:
228                 if verbose:
229                     print 'bad time stamp, skipping data line .... ' + str(line)
230                 continue
231                
232
233             if j<nbins:
234                 stime[j] = dt2es(sample_dt) # sample time
235                 wtemp[j] = ysi[6] # water temperature (C)
236                 cond[j] = ysi[7]  # conductivity (mS/cm)
237                 salin[j] = ysi[8] # salinity (ppt or PSU??)
238                 do[j] = ysi[9]   # dissolved oxygen (mg/l)
239                 #
240                 depth[j] = ysi[10] # depth (m, positive up)
241                 #
242                 turb[j] = ysi[11] # turbidity (NTU)
243                 chl[j] = ysi[12]  # chlorophyll (ug/l)
244
245             j = j+1
246
247         elif (len(ysi)==0 and have_head and i<N):  # each profile separated by empty line
248
249             data['dt'][i] = profile_dt # profile datetime
250             data['time'][i] = dt2es(profile_dt) # profile time in epoch seconds
251             # data['ysi_sn'][i] = ysi_sn
252             # data['ysi_id'][i] = ysi_id
253             #
254             data['stime'][i] =  stime # sample time in epoch seconds
255             data['z'][i] = -1.*depth
256             #
257             data['wtemp'][i] =  wtemp
258             data['cond'][i] = cond
259             data['salin'][i] = salin
260             data['turb'][i] = turb
261             data['chl'][i] = chl
262             data['do'][i] = do
263            
264             i=i+1
265             have_date = have_time = have_wd = have_location = False
266         else:
267             if verbose:
268                 print 'skipping bad data line ... ' + str(line)
269         # if-elif
270     # for line
271
272     return data
273  
274
275 def creator(platform_info, sensor_info, data):
276     #
277     # subset data only to month being processed (see raw2proc.process())
278     i = data['in']
279     dt = data['dt'][i]
280     #
281     title_str = sensor_info['description']+' at '+ platform_info['location']
282     global_atts = {
283         'title' : title_str,
284         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
285         'institution_url' : 'http://nccoos.unc.edu',
286         'institution_dods_url' : 'http://nccoos.unc.edu',
287         'metadata_url' : 'http://nccoos.unc.edu',
288         'references' : 'http://nccoos.unc.edu',
289         'contact' : 'Sara Haines (haines@email.unc.edu)',
290         #
291         'source' : 'fixed-automated-profiler observation',
292         'history' : 'raw2proc using ' + sensor_info['process_module'],
293         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
294         # conventions
295         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
296         # SEACOOS CDL codes
297         'format_category_code' : 'fixed-profiler-ragged',
298         'institution_code' : platform_info['institution'],
299         'platform_code' : platform_info['id'],
300         'package_code' : sensor_info['id'],
301         # institution specific
302         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
303         'project_url' : 'http://nccoos.unc.edu',
304         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
305         # first date in monthly file
306         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
307         # last date in monthly file
308         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
309         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
310         #
311         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
312         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
313         'process_level' : 'level1',
314         #
315         # must type match to data (e.g. fillvalue is real if data is real)
316         '_FillValue' : numpy.nan,
317         }
318
319     var_atts = {
320         # coordinate variables
321         'time' : {'short_name': 'time',
322                   'long_name': 'Time of Profile',
323                   'standard_name': 'time',
324                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
325                   'axis': 'T',
326                   },
327         'lat' : {'short_name': 'lat',
328                  'long_name': 'Latitude',
329                  'standard_name': 'latitude',
330                  'reference':'geographic coordinates',
331                  'units': 'degrees_north',
332                  'valid_range':(-90.,90.),
333                  'axis': 'Y',
334                  },
335         'lon' : {'short_name': 'lon',
336                  'long_name': 'Longitude',
337                  'standard_name': 'longitude',
338                  'reference':'geographic coordinates',
339                  'units': 'degrees_east',
340                  'valid_range':(-180.,180.),
341                  'axis': 'Y',
342                  },
343         'z' : {'short_name': 'z',
344                'long_name': 'Height',
345                'standard_name': 'height',
346                'reference':'zero at sea-surface',
347                'positive' : 'up',
348                'units': 'm',
349                'axis': 'Z',
350                },
351         # data variables
352         'stime' : {'short_name': 'stime',
353                   'long_name': 'Time of Sample ',
354                   'standard_name': 'time',
355                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
356                   },
357         # 'ysi_id' : {'short_name':'ysi_id',
358         #             'long_name':'Identification name of YSI Sonde',
359         #             'standard_name': 'identification_name'
360         #             },
361         # 'ysi_sn' : {'short_name':'ysi_sn',
362         #             'long_name':'Serial number of YSI Sonde',
363         #             'standard_name': 'serial_number'
364         #             },
365         'wtemp': {'short_name': 'wtemp',
366                         'long_name': 'Water Temperature',
367                         'standard_name': 'water_temperature',                         
368                         'units': 'degrees_Celsius',
369                         },
370         'cond': {'short_name': 'cond',
371                         'long_name': 'Conductivity',
372                         'standard_name': 'conductivity',                         
373                         'units': 'mS cm-1',
374                         },
375         'salin': {'short_name': 'salin',
376                         'long_name': 'Salinity',
377                         'standard_name': 'salinity',                         
378                         'units': 'PSU',
379                         },
380         'turb': {'short_name': 'turb',
381                         'long_name': 'Turbidity',
382                         'standard_name': 'turbidity',                         
383                         'units': 'NTU',
384                         },
385         'chl': {'short_name': 'chl',
386                         'long_name': 'Chlorophyll',
387                         'standard_name': 'chlorophyll',                         
388                         'units': 'ug l-1',
389                         },
390         'do': {'short_name': 'do',
391                         'long_name': 'Dissolved Oxygen',
392                         'standard_name': 'dissolved_oxygen',                         
393                         'units': 'mg l-1',
394                         },
395         }
396
397     # dimension names use tuple so order of initialization is maintained
398     dim_inits = (
399         ('time', NC.UNLIMITED),
400         ('lat', 1),
401         ('lon', 1),
402         ('z', sensor_info['nbins']),
403         )
404    
405     # using tuple of tuples so order of initialization is maintained
406     # using dict for attributes order of init not important
407     # use dimension names not values
408     # (varName, varType, (dimName1, [dimName2], ...))
409     var_inits = (
410         # coordinate variables
411         ('time', NC.INT, ('time',)),
412         ('lat', NC.FLOAT, ('lat',)),
413         ('lon', NC.FLOAT, ('lon',)),
414         ('z',  NC.FLOAT, ('time', 'z',)),
415         # data variables
416         # ('ysi_sn', NC.CHAR, ('time', 'nchar')),
417         # ('ysi_id', NC.CHAR, ('time', 'nchar')),
418         ('stime', NC.FLOAT, ('time', 'z')),       
419         ('wtemp', NC.FLOAT, ('time', 'z')),
420         ('cond', NC.FLOAT, ('time', 'z')),
421         ('salin', NC.FLOAT, ('time', 'z')),
422         ('turb', NC.FLOAT, ('time', 'z')),
423         ('chl', NC.FLOAT, ('time', 'z')),
424         ('do', NC.FLOAT, ('time', 'z')),
425         )
426
427     # var data
428     var_data = (
429         ('lat',  platform_info['lat']),
430         ('lon', platform_info['lon']),
431         ('time', data['time'][i]),
432         # ('ysi_id', data['ysi_id'][i]),
433         # ('ysi_sn', data['ysi_sn'][i]),
434         ('stime', data['stime'][i]),
435         ('z', data['z'][i]),
436         #
437         ('wtemp', data['wtemp'][i]),
438         ('cond', data['cond'][i]),
439         ('salin', data['salin'][i]),
440         ('turb', data['turb'][i]),
441         ('chl', data['chl'][i]),
442         ('do', data['do'][i]),
443         )
444
445     return (global_atts, var_atts, dim_inits, var_inits, var_data)
446
447 def updater(platform_info, sensor_info, data):
448     #
449     # subset data only to month being processed (see raw2proc.process())
450     i = data['in']
451     dt = data['dt'][i]
452     #
453     global_atts = {
454         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
455         # last date in monthly file
456         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
457         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
458         #
459         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
460         }
461
462     # data variables
463     # update any variable attributes like range, min, max
464     var_atts = {}
465     # var_atts = {
466     #    'wtemp': {'max': max(data.u),
467     #          'min': min(data.v),
468     #          },
469     #    'cond': {'max': max(data.u),
470     #          'min': min(data.v),
471     #          },
472     #    }
473     
474     # data
475     var_data = (
476         ('time', data['time'][i]),
477         # ('ysi_id', data['ysi_id'][i]),
478         # ('ysi_sn', data['ysi_sn'][i]),
479         ('stime', data['stime'][i]),
480         ('z', data['z'][i]),
481         #
482         ('wtemp', data['wtemp'][i]),
483         ('cond', data['cond'][i]),
484         ('salin', data['salin'][i]),
485         ('turb', data['turb'][i]),
486         ('chl', data['chl'][i]),
487         ('do', data['do'][i]),
488         )
489
490     return (global_atts, var_atts, var_data)
491 #
492
Note: See TracBrowser for help on using the browser.