NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v2_CDL1.py

Revision 451 (checked in by cbc, 13 years ago)

Add various proc and config files not already under SVN.

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2008-10-09 17:31:44 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V2-2 on an automated veritical profiler (avp)
8
9 parser : sample date and time, water_depth for each profile
10          water temperature, conductivity, pressure (depth), salinity, pH, dissolved oxygen, turbidity, and chlorophyll
11          raw data averaged to 10 cm bins
12
13 creator : lat, lon, z, time, water_depth, water_temp, cond, salin, ph, turb, chl, do
14 updator : time, water_depth, water_temp, cond, salin, ph, turb, chl, do
15
16
17 Examples
18 --------
19
20 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2')
21 or
22 >> si = get_config(cn+'.sensor_info')
23 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
24
25 >> lines = load_data(filename)
26 >> data = parse(platform_info, sensor_info, lines)
27 >> create(platform_info, sensor_info, data) or
28 >> update(platform_info, sensor_info, data)
29
30 """
31
32
33 from raw2proc import *
34 from procutil import *
35 from ncutil import *
36
37 now_dt = datetime.utcnow()
38 now_dt.replace(microsecond=0)
39
40 def parser(platform_info, sensor_info, lines):
41     """
42     parse Automated Vertical Profile Station (AVP) Water Quality Data
43
44     month, day, year, hour, min, sec, temp (deg. C), conductivity
45     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
46     chlorophyll (micrograms per liter), DO (micrograms per liter)
47
48     Notes
49     -----
50     1. Column Format
51
52     temp, cond, salin, depth, pH, turb, chl, DO
53     (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)
54
55     Profile Time: 00:30:00
56     Profile Date: 08/18/2008
57     Profile Depth: 255.0 cm
58     Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
59     08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
60     08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
61     08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
62     08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66
63
64     """
65     import numpy
66     from datetime import datetime
67     from time import strptime
68
69     # get sample datetime from filename
70     fn = sensor_info['fn']
71     sample_dt_start = filt_datetime(fn)[0]
72
73     # how many profiles in one file, count number of "Profile Time:" in lines
74     nprof = 0
75     for line in lines:
76         m=re.search("Profile Time:", line)
77         if m:
78             nprof=nprof+1
79
80     # remove first occurrence of blank line if within first 10-40 lines
81     # and put it on the end to signal end of profile after last profile
82     for i in range(len(lines[0:40])):
83         if re.search("^ \r\n", lines[i]):
84             # print str(i) + " " + lines[i] + " " + lines[i+1]
85             blank_line = lines.pop(i)
86             lines.append(blank_line)
87    
88     bin_size = sensor_info['bin_size'] # Bin Size (meters)
89     nominal_depth = platform_info['water_depth'# Mean sea level at station (meters) or nominal water depth
90     z = numpy.arange(0, -1*nominal_depth, -1*bin_size, dtype=float)
91    
92     N = nprof
93     nbins = len(z)
94
95     if nbins != sensor_info['nbins']:
96         print 'Number of bins computed from water_depth and bin_size ('+ \
97               str(nbins)+') does not match config number ('+ \
98               str(sensor_info['nbins'])+')'
99    
100     data = {
101         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
102         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
103         'z' : numpy.array(numpy.ones((nbins,), dtype=float)*numpy.nan),
104         #
105         'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
106         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
107         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
108         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
109         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
110         'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
111         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
112         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
113         }
114
115     # current profile count
116     i = 0
117
118     for line in lines:
119         ysi = []
120         # split line and parse float and integers
121         sw = re.split('[\s/\:]*', line)
122         for s in sw:
123             m = re.search(REAL_RE_STR, s)
124             if m:
125                 ysi.append(float(m.groups()[0]))
126
127         if re.search("Profile Time:", line):
128             HH=ysi[0]
129             MM=ysi[1]
130             SS=ysi[2]
131         elif re.search("Profile Date:", line):
132             mm=ysi[0]
133             dd=ysi[1]
134             yyyy=ysi[2]
135         elif re.search("Profile Depth:", line):
136             wd = ysi[0]/100.  # cm to meters
137             sample_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
138             if  sensor_info['utc_offset']:
139                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
140                              timedelta(hours=sensor_info['utc_offset'])
141             else:
142                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S')
143
144             # initialize for new profile at zero for averaging samples within each bin
145             wtemp = numpy.zeros(nbins)
146             cond = numpy.zeros(nbins)
147             salin = numpy.zeros(nbins)
148             turb = numpy.zeros(nbins)
149             ph = numpy.zeros(nbins)
150             chl = numpy.zeros(nbins)
151             do = numpy.zeros(nbins)
152             Ns = numpy.zeros(nbins) # count samples per bin for averaging
153         elif len(ysi)==14:                                                                             
154             # get sample datetime from data
155             # sample_str = '%02d-%02d-%2d %02d:%02d:%02d' % tuple(ysi[0:6])
156             # if  sensor_info['utc_offset']:
157             #     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
158             #                 timedelta(hours=sensor_info['utc_offset'])
159             # else:
160             # sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
161
162             depth = -1*ysi[9] # depth (m, positive up)
163             ibin = ((z)<=depth)*(depth<(z+bin_size))
164
165             Ns[ibin] = Ns[ibin]+1
166             wtemp[ibin] = wtemp[ibin]+ysi[6] # water temperature (C)
167             cond[ibin] = cond[ibin]+ysi[7]   # conductivity (mS/cm)
168             salin[ibin] = salin[ibin]+ysi[8] # salinity (ppt or PSU??)
169             #
170             ph[ibin] = ph[ibin]+ysi[10]      # ph
171             turb[ibin] = turb[ibin]+ysi[11]  # turbidity (NTU)
172             chl[ibin] = chl[ibin]+ysi[12]    # chlorophyll (ug/l)
173             do[ibin] = do[ibin]+ysi[13]      # dissolved oxygen (mg/l)
174
175         elif (len(ysi)==0):  # each profile separated by empty line
176             # average summations by sample count per bin
177             # where count is zero make it NaN so average is not divide by zero
178             Ns[Ns==0]=numpy.nan*Ns[Ns==0]
179            
180             data['dt'][i] = sample_dt # sample datetime
181             data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
182             data['wd'][i] = wd
183             data['z'] = z
184             # divide by counts
185             data['wtemp'][i] =  wtemp/Ns
186             data['cond'][i] = cond/Ns
187             data['salin'][i] = salin/Ns
188             data['turb'][i] = turb/Ns
189             data['ph'][i] = ph/Ns
190             data['chl'][i] = chl/Ns
191             data['do'][i] = do/Ns
192            
193             i=i+1
194            
195         # if-elif
196     # for line
197
198     return data
199  
200
201 def creator(platform_info, sensor_info, data):
202     #
203     #
204     title_str = sensor_info['description']+' at '+ platform_info['location']
205     global_atts = {
206         'title' : title_str,
207         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
208         'institution_url' : 'http://nccoos.unc.edu',
209         'institution_dods_url' : 'http://nccoos.unc.edu',
210         'metadata_url' : 'http://nccoos.unc.edu',
211         'references' : 'http://nccoos.unc.edu',
212         'contact' : 'Sara Haines (haines@email.unc.edu)',
213         #
214         'source' : 'fixed-automated-profiler observation',
215         'history' : 'raw2proc using ' + sensor_info['process_module'],
216         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
217         # conventions
218         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
219         # SEACOOS CDL codes
220         'format_category_code' : 'fixed-profiler',
221         'institution_code' : platform_info['institution'],
222         'platform_code' : platform_info['id'],
223         'package_code' : sensor_info['id'],
224         # institution specific
225         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
226         'project_url' : 'http://nccoos.unc.edu',
227         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
228         # first date in monthly file
229         'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
230         # last date in monthly file
231         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
232         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
233         #
234         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
235         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
236         'process_level' : 'level1',
237         #
238         # must type match to data (e.g. fillvalue is real if data is real)
239         '_FillValue' : -99999.,
240         }
241
242     var_atts = {
243         # coordinate variables
244         'time' : {'short_name': 'time',
245                   'long_name': 'Time',
246                   'standard_name': 'time',
247                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
248                   'axis': 'T',
249                   },
250         'lat' : {'short_name': 'lat',
251              'long_name': 'Latitude',
252              'standard_name': 'latitude',
253              'reference':'geographic coordinates',
254              'units': 'degrees_north',
255              'valid_range':(-90.,90.),
256              'axis': 'Y',
257              },
258         'lon' : {'short_name': 'lon',
259                  'long_name': 'Longitude',
260                  'standard_name': 'longitude',
261                  'reference':'geographic coordinates',
262                  'units': 'degrees_east',
263                  'valid_range':(-180.,180.),
264                  'axis': 'Y',
265                  },
266         'z' : {'short_name': 'z',
267                'long_name': 'Height',
268                'standard_name': 'height',
269                'reference':'zero at sea-surface',
270                'positive' : 'up',
271                'units': 'm',
272                'axis': 'Z',
273                },
274         # data variables
275         'wtemp': {'short_name': 'wtemp',
276                         'long_name': 'Water Temperature',
277                         'standard_name': 'water_temperature',                         
278                         'units': 'degrees Celsius',
279                         },
280         'cond': {'short_name': 'cond',
281                         'long_name': 'Conductivity',
282                         'standard_name': 'conductivity',                         
283                         'units': 'mS cm-1',
284                         },
285         'salin': {'short_name': 'salin',
286                         'long_name': 'Salinity',
287                         'standard_name': 'salinity',                         
288                         'units': 'PSU',
289                         },
290         'depth': {'short_name': 'depth',
291                   'long_name': 'Depth',
292                   'standard_name': 'depth',                         
293                   'units': 'm',
294                   'reference':'zero at sea-surface',
295                   'positive' : 'up',
296                   },
297         'turb': {'short_name': 'turb',
298                         'long_name': 'Turbidity',
299                         'standard_name': 'turbidity',                         
300                         'units': 'NTU',
301                         },
302         'ph': {'short_name': 'ph',
303                         'long_name': 'pH',
304                         'standard_name': 'ph',                         
305                         'units': '',
306                         },
307         'chl': {'short_name': 'chl',
308                         'long_name': 'Chlorophyll',
309                         'standard_name': 'chlorophyll',                         
310                         'units': 'ug l-1',
311                         },
312         'do': {'short_name': 'do',
313                         'long_name': 'Dissolved Oxygen',
314                         'standard_name': 'dissolved_oxygen',                         
315                         'units': 'mg l-1',
316                         },
317         }
318
319     # dimension names use tuple so order of initialization is maintained
320     dim_inits = (
321         ('ntime', NC.UNLIMITED),
322         ('nlat', 1),
323         ('nlon', 1),
324         ('nz', 1),
325         )
326    
327     # using tuple of tuples so order of initialization is maintained
328     # using dict for attributes order of init not important
329     # use dimension names not values
330     # (varName, varType, (dimName1, [dimName2], ...))
331     var_inits = (
332         # coordinate variables
333         ('time', NC.INT, ('ntime',)),
334         ('lat', NC.FLOAT, ('nlat',)),
335         ('lon', NC.FLOAT, ('nlon',)),
336         ('z',  NC.FLOAT, ('nz',)),
337         # data variables
338         ('wtemp', NC.FLOAT, ('ntime',)),
339         ('cond', NC.FLOAT, ('ntime',)),
340         ('salin', NC.FLOAT, ('ntime',)),
341         ('depth', NC.FLOAT, ('ntime',)),
342         ('turb', NC.FLOAT, ('ntime',)),
343         ('ph', NC.FLOAT, ('ntime',)),
344         ('chl', NC.FLOAT, ('ntime',)),
345         ('do', NC.FLOAT, ('ntime',)),
346         )
347
348     # subset data only to month being processed (see raw2proc.process())
349     i = data['in']
350    
351     # var data
352     var_data = (
353         ('lat',  platform_info['lat']),
354         ('lon', platform_info['lon']),
355         #
356         ('time', data['time'][i]),
357         ('z', 0),
358         #
359         ('wtemp', data['wtemp'][i]),
360         ('cond', data['cond'][i]),
361         ('salin', data['salin'][i]),
362         ('depth', data['depth'][i]),
363         ('turb', data['turb'][i]),
364         ('ph', data['ph'][i]),
365         ('chl', data['chl'][i]),
366         ('do', data['do'][i]),
367         )
368
369     return (global_atts, var_atts, dim_inits, var_inits, var_data)
370
371 def updater(platform_info, sensor_info, data):
372     #
373     global_atts = {
374         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
375         # last date in monthly file
376         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
377         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
378         #
379         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
380         }
381
382     # data variables
383     # update any variable attributes like range, min, max
384     var_atts = {}
385     # var_atts = {
386     #    'wtemp': {'max': max(data.u),
387     #          'min': min(data.v),
388     #          },
389     #    'cond': {'max': max(data.u),
390     #          'min': min(data.v),
391     #          },
392     #    }
393     
394     # subset data only to month being processed (see raw2proc.process())
395     i = data['in']
396
397     # data
398     var_data = (
399         ('time', data['time'][i]),
400         ('wtemp', data['wtemp'][i]),
401         ('cond', data['cond'][i]),
402         ('salin', data['salin'][i]),
403         ('depth', data['depth'][i]),
404         ('turb', data['turb'][i]),
405         ('ph', data['ph'][i]),
406         ('chl', data['chl'][i]),
407         ('do', data['do'][i]),
408         )
409
410     return (global_atts, var_atts, var_data)
411 #
412
Note: See TracBrowser for help on using the browser.