NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v1_CDL2.py

Revision 320 (checked in by haines, 14 years ago)

catch-up trunk to production code running on cromwell

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-12-16 16:33:00 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V1 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, dissolved oxygen,
13          turbidity, and chlorophyll (no pH)
14         
15
16 creator : lat, lon, z, stime, (time, water_depth), water_temp, cond,
17           salin, turb, chl, do
18
19 updator : z, stime, (time, water_depth), water_temp, cond, salin,
20           turb, chl, do
21
22 using fixed profiler CDL but modified to have raw data for each cast
23 along each column
24
25
26 Examples
27 --------
28
29 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v1')
30 or
31 >> si = get_config(cn+'.sensor_info')
32 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
33
34 >> lines = load_data(filename)
35 >> data = parse(platform_info, sensor_info, lines)
36 >> create(platform_info, sensor_info, data) or
37 >> update(platform_info, sensor_info, data)
38
39 """
40
41 from raw2proc import *
42 from procutil import *
43 from ncutil import *
44
45 now_dt = datetime.utcnow()
46 now_dt.replace(microsecond=0)
47
48 def parser(platform_info, sensor_info, lines):
49     """
50     parse Automated Vertical Profile Station (AVP) Water Quality Data
51
52     month, day, year, hour, min, sec, temp (deg. C), conductivity
53     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
54     chlorophyll (micrograms per liter), DO (micrograms per liter)
55
56     Notes
57     -----
58     1. Column Format YSI 6600 V1 has no pH
59
60     temp, cond,   salin,  DO,    depth, turb,  chl
61     (C), (mS/cm), (ppt), (ug/l), (m),   (NTU), (ug/l)
62
63
64 (from Aug 2005 to Sep 03 2008)
65     profile time: 00:00:56
66     profile date: 01/31/2006
67     profile location: P180, Instrument Serial No: 0001119E
68     01/31/06 00:01:31 10.99  7.501   4.16  13.22   0.516     6.0  11.5
69     01/31/06 00:01:32 11.00  7.463   4.13  13.22   0.526     6.0  11.4
70     01/31/06 00:01:33 11.00  7.442   4.12  13.22   0.538     6.0  11.4
71     01/31/06 00:01:34 11.00  7.496   4.15  13.11   0.556     6.0  11.3
72 (no data from Sep 03 to 30, 2008)
73 (from Sep 30 2008 to now, still YSI 6600 v1, just header change)
74     Profile Time: 11:38:00
75     Profile Date: 01/06/2009
76     Profile Depth: 380.0 cm
77     Profile Location: Hampton Shoal Serial No: 000109DD, ID: Delta
78     01/06/09 11:38:44 11.16  14.59   8.49  17.86   0.171     4.5  50.4
79     01/06/09 11:38:45 11.16  14.59   8.49  17.86   0.190     4.5  51.8
80     01/06/09 11:38:46 11.16  14.59   8.49  17.88   0.220     4.6  53.0
81     01/06/09 11:38:47 11.16  14.59   8.49  17.88   0.257     4.6  53.9
82     01/06/09 11:38:48 11.16  14.59   8.49  17.88   0.448     4.6  54.3
83
84 2.  Use a ragged array to store each uniquely measured param at each
85     time and depth but not gridded, so this uses fixed profiler CDL
86     but modified to have raw data for each cast along each column.
87     For plotting, the data will need to be grid at specified depth bins.
88
89     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
90     Between the waves and the instrument descending from a spool of
91     line with variable radius it works out to about 3-5 cm between
92     observations on average.  When I process the data to make the
93     images, I bin the data every 10 cm and take the average of however
94     many observations fell within that bin.'
95
96     """
97     import numpy
98     from datetime import datetime
99     from time import strptime
100
101     # get sample datetime from filename
102     fn = sensor_info['fn']
103     sample_dt_start = filt_datetime(fn)[0]
104
105     # how many profiles in one file, count number of "Profile Time:" in lines
106     nprof = 0
107     for line in lines:
108         m=re.search("Profile Time:", line, re.IGNORECASE)
109         if m:
110             nprof=nprof+1
111
112     # remove first occurrence of blank line if within first 40 lines
113     for i in range(len(lines[0:40])):
114        if re.search("^ \r\n", lines[i]):
115            # print str(i) + " " + lines[i] + " " + lines[i+1]
116            blank_line = lines.pop(i)
117            # lines.append(blank_line)
118     # ensure signal end of profile after last profile by appending a blank line to data file
119     lines.append(' \r\n')
120
121     # ensure blank line between profile casts
122     for i, line in enumerate(lines):
123         if re.search(r"Profile Time", line, re.IGNORECASE):
124             if not re.search("^ \r\n", lines[i-1]):
125                 lines.insert(i, " \r\n")
126    
127     N = nprof
128     nbins = sensor_info['nbins']
129
130     data = {
131         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
132         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
133         'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
134         #
135         # 'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'),
136         # 'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'),
137         #
138         'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan),
139         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
140         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
141         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
142         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
143         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
144         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
145         }
146
147     # current profile count
148     i = 0
149     have_date = have_time = have_location = have_head = False
150     verbose = False
151
152     for line in lines:
153         # if line has weird ascii chars -- skip it and iterate to next line
154         if re.search(r"[\x1a]", line):
155             if verbose:
156                 print 'skipping bad data line ... ' + str(line)
157             continue
158            
159         ysi = []
160         # split line and parse float and integers
161         sw = re.split('[\s/\:]*', line)
162         for s in sw:
163             m = re.search(REAL_RE_STR, s)
164             if m:
165                 ysi.append(float(m.groups()[0]))
166
167         if re.search("Profile Time:", line, re.IGNORECASE):
168             have_time = True
169             HH=ysi[0]
170             MM=ysi[1]
171             SS=ysi[2]
172         elif re.search("Profile Date:", line, re.IGNORECASE):
173             have_date = True
174             mm=ysi[0]
175             dd=ysi[1]
176             yyyy=ysi[2]
177
178             profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
179             if  sensor_info['utc_offset']:
180                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \
181                              timedelta(hours=sensor_info['utc_offset'])
182             else:
183                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S')
184         elif re.search("Profile Location:", line):
185             have_location = True
186             # profile location: P180, Instrument Serial No: 0001119E
187             # Profile Location: Hampton Shoal Serial No: 000109DD, ID: Delta
188             sw = re.findall(r'\w+:\s(\w+)*', line)
189             # ysi_sn = sw[1]
190             # ysi_id = sw[2]
191                 
192             # initialize for new profile at zero for averaging samples within each bin
193             wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
194             depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
195             cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
196             salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
197             turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
198             chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
199             do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
200             stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan
201             # keep track of number of samples in one profile so not to exceed nbins
202             j = 0
203             # have all the headers stuff
204             head = numpy.array([have_date, have_time, have_location])
205             have_head = head.all()
206
207         elif re.search("Error", line):
208             # ignore this line
209             if verbose:
210                 print 'skipping bad data line ... ' + str(line)
211             continue
212
213         elif (len(ysi)==13 and have_head):
214             if j>=nbins:
215                 print 'Sample number (' + str(j) + \
216                       ') in profile exceeds maximum value ('+ \
217                       str(nbins) + ') in config'
218        
219             # get sample datetime from data
220             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
221             if  sensor_info['utc_offset']:
222                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') + \
223                             timedelta(hours=sensor_info['utc_offset'])
224             else:
225                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
226
227             if j<nbins:
228                 stime[j] = dt2es(sample_dt) # sample time
229                 wtemp[j] = ysi[6] # water temperature (C)
230                 cond[j] = ysi[7]  # conductivity (mS/cm)
231                 salin[j] = ysi[8] # salinity (ppt or PSU??)
232                 do[j] = ysi[9]   # dissolved oxygen (mg/l)
233                 #
234                 depth[j] = ysi[10] # depth (m, positive up)
235                 #
236                 turb[j] = ysi[11] # turbidity (NTU)
237                 chl[j] = ysi[12]  # chlorophyll (ug/l)
238
239             j = j+1
240
241         elif (len(ysi)==0 and have_head and i<N):  # each profile separated by empty line
242
243             data['dt'][i] = profile_dt # profile datetime
244             data['time'][i] = dt2es(profile_dt) # profile time in epoch seconds
245             # data['ysi_sn'][i] = ysi_sn
246             # data['ysi_id'][i] = ysi_id
247             #
248             data['stime'][i] =  stime # sample time in epoch seconds
249             data['z'][i] = -1.*depth
250             #
251             data['wtemp'][i] =  wtemp
252             data['cond'][i] = cond
253             data['salin'][i] = salin
254             data['turb'][i] = turb
255             data['chl'][i] = chl
256             data['do'][i] = do
257            
258             i=i+1
259             have_date = have_time = have_wd = have_location = False
260         else:
261             if verbose:
262                 print 'skipping bad data line ... ' + str(line)
263         # if-elif
264     # for line
265
266     return data
267  
268
269 def creator(platform_info, sensor_info, data):
270     #
271     # subset data only to month being processed (see raw2proc.process())
272     i = data['in']
273     dt = data['dt'][i]
274     #
275     title_str = sensor_info['description']+' at '+ platform_info['location']
276     global_atts = {
277         'title' : title_str,
278         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
279         'institution_url' : 'http://nccoos.unc.edu',
280         'institution_dods_url' : 'http://nccoos.unc.edu',
281         'metadata_url' : 'http://nccoos.unc.edu',
282         'references' : 'http://nccoos.unc.edu',
283         'contact' : 'Sara Haines (haines@email.unc.edu)',
284         #
285         'source' : 'fixed-automated-profiler observation',
286         'history' : 'raw2proc using ' + sensor_info['process_module'],
287         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
288         # conventions
289         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
290         # SEACOOS CDL codes
291         'format_category_code' : 'fixed-profiler-ragged',
292         'institution_code' : platform_info['institution'],
293         'platform_code' : platform_info['id'],
294         'package_code' : sensor_info['id'],
295         # institution specific
296         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
297         'project_url' : 'http://nccoos.unc.edu',
298         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
299         # first date in monthly file
300         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
301         # last date in monthly file
302         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
303         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
304         #
305         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
306         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
307         'process_level' : 'level1',
308         #
309         # must type match to data (e.g. fillvalue is real if data is real)
310         '_FillValue' : numpy.nan,
311         }
312
313     var_atts = {
314         # coordinate variables
315         'time' : {'short_name': 'time',
316                   'long_name': 'Time of Profile',
317                   'standard_name': 'time',
318                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
319                   'axis': 'T',
320                   },
321         'lat' : {'short_name': 'lat',
322                  'long_name': 'Latitude',
323                  'standard_name': 'latitude',
324                  'reference':'geographic coordinates',
325                  'units': 'degrees_north',
326                  'valid_range':(-90.,90.),
327                  'axis': 'Y',
328                  },
329         'lon' : {'short_name': 'lon',
330                  'long_name': 'Longitude',
331                  'standard_name': 'longitude',
332                  'reference':'geographic coordinates',
333                  'units': 'degrees_east',
334                  'valid_range':(-180.,180.),
335                  'axis': 'Y',
336                  },
337         'z' : {'short_name': 'z',
338                'long_name': 'Height',
339                'standard_name': 'height',
340                'reference':'zero at sea-surface',
341                'positive' : 'up',
342                'units': 'm',
343                'axis': 'Z',
344                },
345         # data variables
346         'stime' : {'short_name': 'stime',
347                   'long_name': 'Time of Sample ',
348                   'standard_name': 'time',
349                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
350                   },
351         # 'ysi_id' : {'short_name':'ysi_id',
352         #             'long_name':'Identification name of YSI Sonde',
353         #             'standard_name': 'identification_name'
354         #             },
355         # 'ysi_sn' : {'short_name':'ysi_sn',
356         #             'long_name':'Serial number of YSI Sonde',
357         #             'standard_name': 'serial_number'
358         #             },
359         'wtemp': {'short_name': 'wtemp',
360                         'long_name': 'Water Temperature',
361                         'standard_name': 'water_temperature',                         
362                         'units': 'degrees_Celsius',
363                         },
364         'cond': {'short_name': 'cond',
365                         'long_name': 'Conductivity',
366                         'standard_name': 'conductivity',                         
367                         'units': 'mS cm-1',
368                         },
369         'salin': {'short_name': 'salin',
370                         'long_name': 'Salinity',
371                         'standard_name': 'salinity',                         
372                         'units': 'PSU',
373                         },
374         'turb': {'short_name': 'turb',
375                         'long_name': 'Turbidity',
376                         'standard_name': 'turbidity',                         
377                         'units': 'NTU',
378                         },
379         'chl': {'short_name': 'chl',
380                         'long_name': 'Chlorophyll',
381                         'standard_name': 'chlorophyll',                         
382                         'units': 'ug l-1',
383                         },
384         'do': {'short_name': 'do',
385                         'long_name': 'Dissolved Oxygen',
386                         'standard_name': 'dissolved_oxygen',                         
387                         'units': 'mg l-1',
388                         },
389         }
390
391     # dimension names use tuple so order of initialization is maintained
392     dim_inits = (
393         ('time', NC.UNLIMITED),
394         ('lat', 1),
395         ('lon', 1),
396         ('z', sensor_info['nbins']),
397         )
398    
399     # using tuple of tuples so order of initialization is maintained
400     # using dict for attributes order of init not important
401     # use dimension names not values
402     # (varName, varType, (dimName1, [dimName2], ...))
403     var_inits = (
404         # coordinate variables
405         ('time', NC.INT, ('time',)),
406         ('lat', NC.FLOAT, ('lat',)),
407         ('lon', NC.FLOAT, ('lon',)),
408         ('z',  NC.FLOAT, ('time', 'z',)),
409         # data variables
410         # ('ysi_sn', NC.CHAR, ('time', 'nchar')),
411         # ('ysi_id', NC.CHAR, ('time', 'nchar')),
412         ('stime', NC.FLOAT, ('time', 'z')),       
413         ('wtemp', NC.FLOAT, ('time', 'z')),
414         ('cond', NC.FLOAT, ('time', 'z')),
415         ('salin', NC.FLOAT, ('time', 'z')),
416         ('turb', NC.FLOAT, ('time', 'z')),
417         ('chl', NC.FLOAT, ('time', 'z')),
418         ('do', NC.FLOAT, ('time', 'z')),
419         )
420
421     # var data
422     var_data = (
423         ('lat',  platform_info['lat']),
424         ('lon', platform_info['lon']),
425         ('time', data['time'][i]),
426         # ('ysi_id', data['ysi_id'][i]),
427         # ('ysi_sn', data['ysi_sn'][i]),
428         ('stime', data['stime'][i]),
429         ('z', data['z'][i]),
430         #
431         ('wtemp', data['wtemp'][i]),
432         ('cond', data['cond'][i]),
433         ('salin', data['salin'][i]),
434         ('turb', data['turb'][i]),
435         ('chl', data['chl'][i]),
436         ('do', data['do'][i]),
437         )
438
439     return (global_atts, var_atts, dim_inits, var_inits, var_data)
440
441 def updater(platform_info, sensor_info, data):
442     #
443     # subset data only to month being processed (see raw2proc.process())
444     i = data['in']
445     dt = data['dt'][i]
446     #
447     global_atts = {
448         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
449         # last date in monthly file
450         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
451         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
452         #
453         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
454         }
455
456     # data variables
457     # update any variable attributes like range, min, max
458     var_atts = {}
459     # var_atts = {
460     #    'wtemp': {'max': max(data.u),
461     #          'min': min(data.v),
462     #          },
463     #    'cond': {'max': max(data.u),
464     #          'min': min(data.v),
465     #          },
466     #    }
467     
468     # data
469     var_data = (
470         ('time', data['time'][i]),
471         # ('ysi_id', data['ysi_id'][i]),
472         # ('ysi_sn', data['ysi_sn'][i]),
473         ('stime', data['stime'][i]),
474         ('z', data['z'][i]),
475         #
476         ('wtemp', data['wtemp'][i]),
477         ('cond', data['cond'][i]),
478         ('salin', data['salin'][i]),
479         ('turb', data['turb'][i]),
480         ('chl', data['chl'][i]),
481         ('do', data['do'][i]),
482         )
483
484     return (global_atts, var_atts, var_data)
485 #
486
Note: See TracBrowser for help on using the browser.