NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v2_CDL2.py

Revision 218 (checked in by haines, 15 years ago)

AVP fixed profiler CDL2 implemented

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-01-07 17:54:32 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V2-2 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, pH, dissolved oxygen,
13          turbidity, and chlorophyll
14         
15
16 creator : lat, lon, z, stime, (time, water_depth), water_temp, cond,
17           salin, ph, turb, chl, do
18
19 updator : z, stime, (time, water_depth), water_temp, cond, salin, ph,
20           turb, chl, do
21
22 using fixed profiler CDL but modified to have raw data for each cast
23 along each column
24
25
26 Examples
27 --------
28
29 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2')
30 or
31 >> si = get_config(cn+'.sensor_info')
32 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
33
34 >> lines = load_data(filename)
35 >> data = parse(platform_info, sensor_info, lines)
36 >> create(platform_info, sensor_info, data) or
37 >> update(platform_info, sensor_info, data)
38
39 """
40
41
42 from raw2proc import *
43 from procutil import *
44 from ncutil import *
45
46 now_dt = datetime.utcnow()
47 now_dt.replace(microsecond=0)
48
49 def parser(platform_info, sensor_info, lines):
50     """
51     parse Automated Vertical Profile Station (AVP) Water Quality Data
52
53     month, day, year, hour, min, sec, temp (deg. C), conductivity
54     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
55     chlorophyll (micrograms per liter), DO (micrograms per liter)
56
57     Notes
58     -----
59     1. Column Format
60
61     temp, cond, salin, depth, pH, turb, chl, DO
62     (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)
63
64     Profile Time: 00:30:00
65     Profile Date: 08/18/2008
66     Profile Depth: 255.0 cm
67     Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
68     08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
69     08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
70     08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
71     08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66
72
73     2. While each parameter is measured uniquely with time and depth such that, temp(t) and z(t)
74     match up with time, we want to grid depth every 1 cm and make each param as temp(t,z).
75
76     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
77     Between the waves and the instrument descending from a spool of
78     line with variable radius it works out to about 3-5 cm between
79     observations on average.  When I process the data to make the
80     images, I bin the data every 10 cm and take the average of however
81     many observations fell within that bin.'
82
83     Do we interpolate or average samples in bin?
84
85     """
86     import numpy
87     from datetime import datetime
88     from time import strptime
89
90     # get sample datetime from filename
91     fn = sensor_info['fn']
92     sample_dt_start = filt_datetime(fn)[0]
93
94     # how many profiles in one file, count number of "Profile Time:" in lines
95     nprof = 0
96     for line in lines:
97         m=re.search("Profile Time:", line)
98         if m:
99             nprof=nprof+1
100
101     # remove first occurrence of blank line if within first 40 lines
102     for i in range(len(lines[0:40])):
103        if re.search("^ \r\n", lines[i]):
104            # print str(i) + " " + lines[i] + " " + lines[i+1]
105            blank_line = lines.pop(i)
106            # lines.append(blank_line)
107
108     # ensure signal end of profile after last profile by appending a blank line to data file
109     lines.append(' \r\n')
110    
111     N = nprof
112     nbins = sensor_info['nbins']
113
114     data = {
115         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
116         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
117         'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
118         #
119         'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
120         'wl' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
121         'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'),
122         'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'),
123         #
124         'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan),
125         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
126         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
127         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
128         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
129         'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
130         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
131         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
132         }
133
134     # current profile count
135     i = 0
136     have_date = have_time = have_wd = have_location = have_head = False
137
138     for line in lines:
139         # if line has weird ascii chars -- skip it and iterate to next line
140         if re.search(r"[\x1a]", line):
141             print 'skipping bad data line ... ' + str(line)
142             continue
143            
144         ysi = []
145         # split line and parse float and integers
146         sw = re.split('[\s/\:]*', line)
147         for s in sw:
148             m = re.search(REAL_RE_STR, s)
149             if m:
150                 ysi.append(float(m.groups()[0]))
151
152         if re.search("Profile Time:", line):
153             have_time = True
154             HH=ysi[0]
155             MM=ysi[1]
156             SS=ysi[2]
157         elif re.search("Profile Date:", line):
158             have_date = True
159             mm=ysi[0]
160             dd=ysi[1]
161             yyyy=ysi[2]
162         elif re.search("Profile Depth:", line):
163             have_wd = True
164             wd = ysi[0]/100.  # cm to meters
165             profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
166             if  sensor_info['utc_offset']:
167                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \
168                              timedelta(hours=sensor_info['utc_offset'])
169             else:
170                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S')
171         elif re.search("Profile Location:", line):
172             have_location = True
173             # Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
174             sw = re.findall(r'\w+:\s(\w+)*', line)
175             ysi_sn = sw[1]
176             ysi_id = sw[2]
177             # initialize for new profile at zero for averaging samples within each bin
178             wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
179             depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
180             cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
181             salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
182             turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
183             ph = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
184             chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
185             do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
186             stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan
187             # keep track of number of samples in one profile so not to exceed nbins
188             j = 0
189             # have all the headers stuff
190             head = numpy.array([have_date, have_time, have_wd, have_location])
191             have_head = head.all()
192
193         elif (len(ysi)==14 and have_head):
194             if j>=nbins:
195                 print 'Sample number (' + str(j) + \
196                       ') in profile exceeds maximum value ('+ \
197                       str(nbins) + ') in config'
198        
199             # get sample datetime from data
200             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
201             if  sensor_info['utc_offset']:
202                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') + \
203                             timedelta(hours=sensor_info['utc_offset'])
204             else:
205                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
206
207             if j<nbins:
208                 stime[j] = dt2es(sample_dt) # sample time
209                 wtemp[j] = ysi[6] # water temperature (C)
210                 cond[j] = ysi[7]  # conductivity (mS/cm)
211                 salin[j] = ysi[8] # salinity (ppt or PSU??)
212                 #
213                 depth[j] = ysi[9] # depth (m, positive up)
214                 #
215                 ph[j] = ysi[10]   # ph
216                 turb[j] = ysi[11] # turbidity (NTU)
217                 chl[j] = ysi[12]  # chlorophyll (ug/l)
218                 do[j] = ysi[13]   # dissolved oxygen (mg/l)
219
220             j = j+1
221
222         elif (len(ysi)==0 and have_head):  # each profile separated by empty line
223             
224             data['dt'][i] = profile_dt # profile datetime
225             data['time'][i] = dt2es(profile_dt) # profile time in epoch seconds
226             data['wd'][i] = -1.*wd
227             data['wl'][i] = platform_info['mean_water_depth'] - (-1*wd)
228             data['ysi_sn'][i] = ysi_sn
229             data['ysi_id'][i] = ysi_id
230
231             data['stime'][i] =  stime # sample time in epoch seconds
232             data['z'][i] = -1.*depth
233
234             data['wtemp'][i] =  wtemp
235             data['cond'][i] = cond
236             data['salin'][i] = salin
237             data['turb'][i] = turb
238             data['ph'][i] = ph
239             data['chl'][i] = chl
240             data['do'][i] = do
241            
242             i=i+1
243             have_date = have_time = have_wd = have_location = False
244         else:
245             print 'skipping bad data line ... ' + str(line)
246         # if-elif
247     # for line
248
249     return data
250  
251
252 def creator(platform_info, sensor_info, data):
253     #
254     # subset data only to month being processed (see raw2proc.process())
255     i = data['in']
256     dt = data['dt'][i]
257     #
258     title_str = sensor_info['description']+' at '+ platform_info['location']
259     global_atts = {
260         'title' : title_str,
261         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
262         'institution_url' : 'http://nccoos.unc.edu',
263         'institution_dods_url' : 'http://nccoos.unc.edu',
264         'metadata_url' : 'http://nccoos.unc.edu',
265         'references' : 'http://nccoos.unc.edu',
266         'contact' : 'Sara Haines (haines@email.unc.edu)',
267         #
268         'source' : 'fixed-automated-profiler observation',
269         'history' : 'raw2proc using ' + sensor_info['process_module'],
270         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
271         # conventions
272         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
273         # SEACOOS CDL codes
274         'format_category_code' : 'fixed-profiler-ragged',
275         'institution_code' : platform_info['institution'],
276         'platform_code' : platform_info['id'],
277         'package_code' : sensor_info['id'],
278         # institution specific
279         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
280         'project_url' : 'http://nccoos.unc.edu',
281         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
282         # first date in monthly file
283         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
284         # last date in monthly file
285         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
286         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
287         #
288         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
289         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
290         'process_level' : 'level1',
291         #
292         # must type match to data (e.g. fillvalue is real if data is real)
293         '_FillValue' : numpy.nan,
294         }
295
296     var_atts = {
297         # coordinate variables
298         'time' : {'short_name': 'time',
299                   'long_name': 'Time of Profile',
300                   'standard_name': 'time',
301                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
302                   'axis': 'T',
303                   },
304         'lat' : {'short_name': 'lat',
305                  'long_name': 'Latitude',
306                  'standard_name': 'latitude',
307                  'reference':'geographic coordinates',
308                  'units': 'degrees_north',
309                  'valid_range':(-90.,90.),
310                  'axis': 'Y',
311                  },
312         'lon' : {'short_name': 'lon',
313                  'long_name': 'Longitude',
314                  'standard_name': 'longitude',
315                  'reference':'geographic coordinates',
316                  'units': 'degrees_east',
317                  'valid_range':(-180.,180.),
318                  'axis': 'Y',
319                  },
320         'z' : {'short_name': 'z',
321                'long_name': 'Height',
322                'standard_name': 'height',
323                'reference':'zero at sea-surface',
324                'positive' : 'up',
325                'units': 'm',
326                'axis': 'Z',
327                },
328         # data variables
329         'stime' : {'short_name': 'stime',
330                   'long_name': 'Time of Sample ',
331                   'standard_name': 'time',
332                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
333                   },
334         'wd': {'short_name': 'wd',
335                'long_name': 'Water Depth',
336                'standard_name': 'water_depth',                         
337                'reference' : 'zero at sea-surface',
338                'positive' : 'up',
339                'units': 'm',
340                },
341         'wl': {'short_name': 'wl',
342                'long_name': 'Water Level',
343                'standard_name': 'water_level',
344                'reference':'MSL',
345                'reference_to_MSL' : 0.,
346                'reference_MSL_datum' : platform_info['mean_water_depth'],
347                'reference_MSL_datum_time_period' : platform_info['mean_water_depth_time_period'],
348                'positive' : 'up',
349                'z' : 0.,
350                'units': 'm',
351                },
352         'ysi_id' : {'short_name':'ysi_id',
353                     'long_name':'Identification name of YSI Sonde',
354                     'standard_name': 'identification_name'
355                     },
356         'ysi_sn' : {'short_name':'ysi_sn',
357                     'long_name':'Serial number of YSI Sonde',
358                     'standard_name': 'serial_number'
359                     },
360         'wtemp': {'short_name': 'wtemp',
361                         'long_name': 'Water Temperature',
362                         'standard_name': 'water_temperature',                         
363                         'units': 'degrees Celsius',
364                         },
365         'cond': {'short_name': 'cond',
366                         'long_name': 'Conductivity',
367                         'standard_name': 'conductivity',                         
368                         'units': 'mS cm-1',
369                         },
370         'salin': {'short_name': 'salin',
371                         'long_name': 'Salinity',
372                         'standard_name': 'salinity',                         
373                         'units': 'PSU',
374                         },
375         'turb': {'short_name': 'turb',
376                         'long_name': 'Turbidity',
377                         'standard_name': 'turbidity',                         
378                         'units': 'NTU',
379                         },
380         'ph': {'short_name': 'ph',
381                         'long_name': 'pH',
382                         'standard_name': 'ph',                         
383                         'units': '',
384                         },
385         'chl': {'short_name': 'chl',
386                         'long_name': 'Chlorophyll',
387                         'standard_name': 'chlorophyll',                         
388                         'units': 'ug l-1',
389                         },
390         'do': {'short_name': 'do',
391                         'long_name': 'Dissolved Oxygen',
392                         'standard_name': 'dissolved_oxygen',                         
393                         'units': 'mg l-1',
394                         },
395         }
396
397     # dimension names use tuple so order of initialization is maintained
398     dim_inits = (
399         ('time', NC.UNLIMITED),
400         ('lat', 1),
401         ('lon', 1),
402         ('z', sensor_info['nbins']),
403         ('nchar', 20),
404         )
405    
406     # using tuple of tuples so order of initialization is maintained
407     # using dict for attributes order of init not important
408     # use dimension names not values
409     # (varName, varType, (dimName1, [dimName2], ...))
410     var_inits = (
411         # coordinate variables
412         ('time', NC.INT, ('time',)),
413         ('lat', NC.FLOAT, ('lat',)),
414         ('lon', NC.FLOAT, ('lon',)),
415         ('z',  NC.FLOAT, ('time', 'z',)),
416         # data variables
417         ('wd', NC.FLOAT, ('time',)),
418         ('wl', NC.FLOAT, ('time',)),
419         # ('ysi_sn', NC.CHAR, ('time', 'nchar')),
420         # ('ysi_id', NC.CHAR, ('time', 'nchar')),
421         ('stime', NC.INT, ('time', 'z')),       
422         ('wtemp', NC.FLOAT, ('time', 'z')),
423         ('cond', NC.FLOAT, ('time', 'z')),
424         ('salin', NC.FLOAT, ('time', 'z')),
425         ('turb', NC.FLOAT, ('time', 'z')),
426         ('ph', NC.FLOAT, ('time', 'z')),
427         ('chl', NC.FLOAT, ('time', 'z')),
428         ('do', NC.FLOAT, ('time', 'z')),
429         )
430
431     # var data
432     var_data = (
433         ('lat',  platform_info['lat']),
434         ('lon', platform_info['lon']),
435         ('time', data['time'][i]),
436         ('wd', data['wd'][i]),
437         ('wl', data['wl'][i]),
438         # ('ysi_id', data['ysi_id'][i]),
439         # ('ysi_sn', data['ysi_sn'][i]),
440         ('stime', data['stime'][i]),
441         ('z', data['z'][i]),
442         #
443         ('wtemp', data['wtemp'][i]),
444         ('cond', data['cond'][i]),
445         ('salin', data['salin'][i]),
446         ('turb', data['turb'][i]),
447         ('ph', data['ph'][i]),
448         ('chl', data['chl'][i]),
449         ('do', data['do'][i]),
450         )
451
452     return (global_atts, var_atts, dim_inits, var_inits, var_data)
453
454 def updater(platform_info, sensor_info, data):
455     #
456     # subset data only to month being processed (see raw2proc.process())
457     i = data['in']
458     dt = data['dt'][i]
459     #
460     global_atts = {
461         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
462         # last date in monthly file
463         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
464         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
465         #
466         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
467         }
468
469     # data variables
470     # update any variable attributes like range, min, max
471     var_atts = {}
472     # var_atts = {
473     #    'wtemp': {'max': max(data.u),
474     #          'min': min(data.v),
475     #          },
476     #    'cond': {'max': max(data.u),
477     #          'min': min(data.v),
478     #          },
479     #    }
480     
481     # data
482     var_data = (
483         ('time', data['time'][i]),
484         ('wd', data['wd'][i]),
485         ('wl', data['wl'][i]),
486         # ('ysi_id', data['ysi_id'][i]),
487         # ('ysi_sn', data['ysi_sn'][i]),
488         ('stime', data['stime'][i]),
489         ('z', data['z'][i]),
490         #
491         ('wtemp', data['wtemp'][i]),
492         ('cond', data['cond'][i]),
493         ('salin', data['salin'][i]),
494         ('turb', data['turb'][i]),
495         ('ph', data['ph'][i]),
496         ('chl', data['chl'][i]),
497         ('do', data['do'][i]),
498         )
499
500     return (global_atts, var_atts, var_data)
501 #
502
Note: See TracBrowser for help on using the browser.