NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v2_moving_point.py

Revision 451 (checked in by cbc, 13 years ago)

Add various proc and config files not already under SVN.

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2011-05-05 14:43:47 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V2-2 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, pH, dissolved oxygen,
13          turbidity, and chlorophyll
14          
15
16 creator : lat, lon, z, time, water_depth, water_temp, cond,
17           salin, ph, turb, chl, do
18
19 updator : z, time, water_depth, water_temp, cond, salin, ph,
20           turb, chl, do
21
22 using moving point CDL
23
24
25 Examples
26 --------
27
28 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2')
29 or
30 >> si = get_config(cn+'.sensor_info')
31 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
32
33 >> lines = load_data(filename)
34 >> data = parse(platform_info, sensor_info, lines)
35 >> create(platform_info, sensor_info, data) or
36 >> update(platform_info, sensor_info, data)
37
38 """
39
40
41 from raw2proc import *
42 from procutil import *
43 from ncutil import *
44
45 now_dt = datetime.utcnow()
46 now_dt.replace(microsecond=0)
47
48 def parser(platform_info, sensor_info, lines):
49     """
50     parse Automated Vertical Profile Station (AVP) Water Quality Data
51
52     month, day, year, hour, min, sec, temp (deg. C), conductivity
53     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
54     chlorophyll (micrograms per liter), DO (micrograms per liter)
55
56     Notes
57     -----
58     1. Column Format
59
60     temp, cond, salin, depth, pH, turb, chl, DO
61     (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)
62
63     Profile Time: 00:30:00
64     Profile Date: 08/18/2008
65     Profile Depth: 255.0 cm
66     Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
67     08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
68     08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
69     08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
70     08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66
71
72
73     """
74     import numpy
75     from datetime import datetime
76     from time import strptime
77
78     # get sample datetime from filename
79     fn = sensor_info['fn']
80     sample_dt_start = filt_datetime(fn)
81
82     # how many samples
83     nsamp = 0
84     for line in lines:
85         # if line has weird ascii chars -- skip it and iterate to next line
86         if re.search(r"[\x1a]", line):
87             # print 'skipping bad data line ... ' + str(line)
88             continue
89         m=re.search("^\d{2}\/\d{2}\/\d{2}", line)
90         if m:
91             nsamp=nsamp+1
92
93     N = nsamp
94
95     data = {
96         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
97         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
98         'z' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
99         'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
100         'wl' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
101         'batt' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
102         'wtemp' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
103         'cond' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
104         'salin' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
105         'turb' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
106         'ph' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
107         'chl' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
108         'do' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
109         }
110
111     # setting all dates to this old data so eliminated for this month
112     for i in range(N):
113         data['dt'][i] = datetime(1970,1,1)
114
115     # sample count
116     i = 0
117
118     for line in lines:
119         # if line has weird ascii chars -- skip it and iterate to next line
120         if re.search(r"[\x1a]", line):
121             # print 'skipping bad data line ... ' + str(line)
122             continue
123
124         ysi = []
125         # split line and parse float and integers
126         sw = re.split('[\s/\:]*', line)
127         for s in sw:
128             m = re.search(REAL_RE_STR, s)
129             if m:
130                 ysi.append(float(m.groups()[0]))
131
132         if re.search("Profile Depth:", line) and i<N:
133             sw = re.match("Profile Depth: " + REAL_RE_STR + "(\\w+)", line)
134             if (ysi[0] is not None) and (sw is not None):
135                 unit_str = sw.groups()[-1]
136                 if unit_str is not None:
137                     (wd, unit_str) = udconvert(ysi[0], unit_str, 'm') # to meters
138                 else:
139                     wd = numpy.nan
140             else:
141                 wd = numpy.nan
142
143             wl = platform_info['mean_water_depth'] - (-1*wd)
144             data['wl'][i] = wl
145             data['wd'][i] = -1*wd
146
147         if re.search("Voltage", line) and i<N:
148             batt = ysi[0]  # volts
149             data['batt'][i] = batt
150
151         if re.search("Profile Location:", line):
152             # Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
153             sw = re.findall(r'\w+:\s(\w+)*', line)
154             # ysi_sn = sw[1]
155             # ysi_id = sw[2]
156
157         if re.search("^\d{2}\/\d{2}\/\d{2}", line) and len(ysi)==14 and i<N:
158             # get sample datetime from data
159             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
160
161             # month, day, year
162             try:
163                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
164             except ValueError:
165                 # day, month, year (month and day switched in some cases)
166                 try:
167                     sample_dt = scanf_datetime(sample_str, fmt='%d-%m-%y %H:%M:%S')
168                 except:
169                     sample_dt = datetime(1970,1,1)
170
171             if sample_dt is not None:
172                 wtemp = ysi[6] # water temperature (C)
173                 cond  = ysi[7] # conductivity (mS/cm)
174                 salin = ysi[8] # salinity (ppt or PSU??)
175                 depth = ysi[9] # depth (m)
176                 #
177                 ph = ysi[10]   # ph
178                 turb = ysi[11] # turbidity (NTU)
179                 chl = ysi[12]  # chlorophyll (ug/l)
180                 do = ysi[13]   # dissolved oxygen (ug/l)
181            
182                 data['dt'][i] = sample_dt # sample datetime
183                 data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
184                 #
185                 data['wtemp'][i] =  wtemp
186                 data['cond'][i] = cond
187                 data['salin'][i] = salin
188                 data['z'][i] = -1*depth # relative to surface
189
190                 data['turb'][i] = turb
191                 data['ph'][i] = ph
192                 data['chl'][i] = chl
193                 data['do'][i] = do           
194                 i=i+1
195                
196             else:
197                 print 'skipping line, ill-formed date ... ' + str(line)
198
199         elif (len(ysi)>=6 and len(ysi)<14):
200             print 'skipping bad data line ... ' + str(line)
201
202         # if-elif
203     # for line
204
205     return data
206  
207
208 def creator(platform_info, sensor_info, data):
209     #
210     # subset data only to month being processed (see raw2proc.process())
211     i = data['in']
212     dt = data['dt'][i]
213     #
214     title_str = sensor_info['description']+' at '+ platform_info['location']
215     global_atts = {
216         'title' : title_str,
217         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
218         'institution_url' : 'http://nccoos.unc.edu',
219         'institution_dods_url' : 'http://nccoos.unc.edu',
220         'metadata_url' : 'http://nccoos.unc.edu',
221         'references' : 'http://nccoos.unc.edu',
222         'contact' : 'Sara Haines (haines@email.unc.edu)',
223         #
224         'source' : 'fixed-automated-profiler observation',
225         'history' : 'raw2proc using ' + sensor_info['process_module'],
226         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
227         # conventions
228         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
229         # SEACOOS CDL codes
230         'format_category_code' : 'fixed-profiler-ragged',
231         'institution_code' : platform_info['institution'],
232         'platform_code' : platform_info['id'],
233         'package_code' : sensor_info['id'],
234         # institution specific
235         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
236         'project_url' : 'http://nccoos.unc.edu',
237         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
238         # first date in monthly file
239         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
240         # last date in monthly file
241         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
242         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
243         #
244         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
245         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
246         'process_level' : 'level1',
247         #
248         # must type match to data (e.g. fillvalue is real if data is real)
249         '_FillValue' : numpy.nan,
250         }
251
252     var_atts = {
253         # coordinate variables
254         'time' : {'short_name': 'time',
255                   'long_name': 'Time of Profile',
256                   'standard_name': 'time',
257                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
258                   'axis': 'T',
259                   },
260         'lat' : {'short_name': 'lat',
261                  'long_name': 'Latitude',
262                  'standard_name': 'latitude',
263                  'reference':'geographic coordinates',
264                  'units': 'degrees_north',
265                  'valid_range':(-90.,90.),
266                  'axis': 'Y',
267                  },
268         'lon' : {'short_name': 'lon',
269                  'long_name': 'Longitude',
270                  'standard_name': 'longitude',
271                  'reference':'geographic coordinates',
272                  'units': 'degrees_east',
273                  'valid_range':(-180.,180.),
274                  'axis': 'Y',
275                  },
276         'z' : {'short_name': 'z',
277                'long_name': 'z',
278                'standard_name': 'z',
279                'reference':'zero is surface',
280                'positive' : 'up',
281                'units': 'm',
282                'axis': 'Z',
283                },
284         # data variables
285         'batt': {'short_name': 'batt',
286                'long_name': 'Battery',
287                'standard_name': 'battery_voltage',
288                'units': 'volts',
289                },
290         'wd': {'short_name': 'wd',
291                'long_name': 'Water Depth',
292                'standard_name': 'water_depth',                         
293                'reference' : 'zero at sea-surface',
294                'positive' : 'up',
295                'units': 'm',
296                },
297         'wl': {'short_name': 'wl',
298                'long_name': 'Water Level',
299                'standard_name': 'water_level',
300                'reference':'MSL',
301                'reference_to_MSL' : 0.,
302                'reference_MSL_datum' : platform_info['mean_water_depth'],
303                'reference_MSL_datum_time_period' : platform_info['mean_water_depth_time_period'],
304                'positive' : 'up',
305                'z' : 0.,
306                'units': 'm',
307                },
308         'wtemp': {'short_name': 'wtemp',
309                         'long_name': 'Water Temperature',
310                         'standard_name': 'water_temperature',                         
311                         'units': 'degrees_Celsius',
312                         },
313         'cond': {'short_name': 'cond',
314                         'long_name': 'Conductivity',
315                         'standard_name': 'conductivity',                         
316                         'units': 'mS cm-1',
317                         },
318         'salin': {'short_name': 'salin',
319                         'long_name': 'Salinity',
320                         'standard_name': 'salinity',                         
321                         'units': 'PSU',
322                         },
323         'turb': {'short_name': 'turb',
324                         'long_name': 'Turbidity',
325                         'standard_name': 'turbidity',                         
326                         'units': 'NTU',
327                         },
328         'ph': {'short_name': 'ph',
329                         'long_name': 'pH',
330                         'standard_name': 'ph',                         
331                         'units': '',
332                         },
333         'chl': {'short_name': 'chl',
334                         'long_name': 'Chlorophyll',
335                         'standard_name': 'chlorophyll',                         
336                         'units': 'ug l-1',
337                         },
338         'do': {'short_name': 'do',
339                         'long_name': 'Dissolved Oxygen',
340                         'standard_name': 'dissolved_oxygen',                         
341                         'units': 'mg l-1',
342                         },
343         }
344
345     # dimension names use tuple so order of initialization is maintained
346     dim_inits = (
347         ('ntime', NC.UNLIMITED),
348         ('nlat', 1),
349         ('nlon', 1),
350         )
351    
352     # using tuple of tuples so order of initialization is maintained
353     # using dict for attributes order of init not important
354     # use dimension names not values
355     # (varName, varType, (dimName1, [dimName2], ...))
356     var_inits = (
357         # coordinate variables
358         ('time', NC.INT, ('ntime',)),
359         ('lat', NC.FLOAT, ('nlat',)),
360         ('lon', NC.FLOAT, ('nlon',)),
361         ('z',  NC.FLOAT, ('ntime',)),
362         # data variables
363         ('batt', NC.FLOAT, ('ntime',)),
364         ('wd', NC.FLOAT, ('ntime',)),
365         ('wl', NC.FLOAT, ('ntime',)),
366         #
367         ('wtemp', NC.FLOAT, ('ntime',)),
368         ('cond', NC.FLOAT, ('ntime',)),
369         ('salin', NC.FLOAT, ('ntime',)),
370         ('turb', NC.FLOAT, ('ntime',)),
371         ('ph', NC.FLOAT, ('ntime',)),
372         ('chl', NC.FLOAT, ('ntime',)),
373         ('do', NC.FLOAT, ('ntime',)),
374         )
375
376     # var data
377     var_data = (
378         ('lat',  platform_info['lat']),
379         ('lon', platform_info['lon']),
380         ('time', data['time'][i]),
381         ('z', data['z'][i]),
382         #
383         ('batt', data['batt'][i]),
384         ('wd', data['wd'][i]),
385         ('wl', data['wl'][i]),
386         #
387         ('wtemp', data['wtemp'][i]),
388         ('cond', data['cond'][i]),
389         ('salin', data['salin'][i]),
390         ('turb', data['turb'][i]),
391         ('ph', data['ph'][i]),
392         ('chl', data['chl'][i]),
393         ('do', data['do'][i]),
394         )
395
396     return (global_atts, var_atts, dim_inits, var_inits, var_data)
397
398 def updater(platform_info, sensor_info, data):
399     #
400     # subset data only to month being processed (see raw2proc.process())
401     i = data['in']
402     dt = data['dt'][i]
403     #
404     global_atts = {
405         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
406         # last date in monthly file
407         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
408         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
409         #
410         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
411         }
412
413     # data variables
414     # update any variable attributes like range, min, max
415     var_atts = {}
416     # var_atts = {
417     #    'wtemp': {'max': max(data.u),
418     #          'min': min(data.v),
419     #          },
420     #    'cond': {'max': max(data.u),
421     #          'min': min(data.v),
422     #          },
423     #    }
424    
425     # data
426     var_data = (
427         ('time', data['time'][i]),
428         ('z', data['z'][i]),
429         #
430         ('batt', data['batt'][i]),
431         ('wd', data['wd'][i]),
432         ('wl', data['wl'][i]),
433         #
434         ('wtemp', data['wtemp'][i]),
435         ('cond', data['cond'][i]),
436         ('salin', data['salin'][i]),
437         ('turb', data['turb'][i]),
438         ('ph', data['ph'][i]),
439         ('chl', data['chl'][i]),
440         ('do', data['do'][i]),
441         )
442
443     return (global_atts, var_atts, var_data)
444 #
Note: See TracBrowser for help on using the browser.