NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v2.py

Revision 208 (checked in by haines, 16 years ago)

avp and met for profiler stations morgan and stones

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2008-09-09 15:09:47 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V2-2 on an automated veritical profiler (avp)
8
9 parser : sample date and time, water_depth for each profile
10          water temperature, conductivity, pressure (depth), salinity, pH, dissolved oxygen, turbidity, and chlorophyll
11          raw data averaged to 10 cm bins
12
13 creator : lat, lon, z, time, water_depth, water_temp, cond, salin, ph, turb, chl, do
14 updator : time, water_depth, water_temp, cond, salin, ph, turb, chl, do
15
16
17 Examples
18 --------
19
20 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v2')
21 or
22 >> si = get_config(cn+'.sensor_info')
23 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
24
25 >> lines = load_data(filename)
26 >> data = parse(platform_info, sensor_info, lines)
27 >> create(platform_info, sensor_info, data) or
28 >> update(platform_info, sensor_info, data)
29
30 """
31
32
33 from raw2proc import *
34 from procutil import *
35 from ncutil import *
36
37 now_dt = datetime.utcnow()
38 now_dt.replace(microsecond=0)
39
40 def parser(platform_info, sensor_info, lines):
41     """
42     parse Automated Vertical Profile Station (AVP) Water Quality Data
43
44     month, day, year, hour, min, sec, temp (deg. C), conductivity
45     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
46     chlorophyll (micrograms per liter), DO (micrograms per liter)
47
48     Notes
49     -----
50     1. Column Format
51
52     temp, cond, salin, depth, pH, turb, chl, DO
53     (C), (mS/cm), (ppt), (m), pH, (NTU), (ug/l), (ug/l)
54
55     Profile Time: 00:30:00
56     Profile Date: 08/18/2008
57     Profile Depth: 255.0 cm
58     Profile Location: Stones Bay Serial No: 00016B79, ID: AVP1_SERDP
59     08/18/08 00:30:06 26.94  41.87  26.81   0.134  8.00     3.4   4.5   6.60
60     08/18/08 00:30:07 26.94  41.87  26.81   0.143  8.00     3.4   4.8   6.59
61     08/18/08 00:30:08 26.94  41.87  26.81   0.160  8.00     3.4   4.8   6.62
62     08/18/08 00:30:09 26.94  41.87  26.81   0.183  8.00     3.4   4.8   6.66
63
64     2. While each parameter is measured uniquely with time and depth such that, temp(t) and z(t)
65     match up with time, we want to grid depth every 1 cm and make each param as temp(t,z).
66
67     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
68     Between the waves and the instrument descending from a spool of
69     line with variable radius it works out to about 3-5 cm between
70     observations on average.  When I process the data to make the
71     images, I bin the data every 10 cm and take the average of however
72     many observations fell within that bin.'
73
74     Do we interpolate or average samples in bin?
75
76     """
77     import numpy
78     from datetime import datetime
79     from time import strptime
80
81     # get sample datetime from filename
82     fn = sensor_info['fn']
83     sample_dt_start = filt_datetime(fn)[0]
84
85     # how many profiles in one file, count number of "Profile Time:" in lines
86     nprof = 0
87     for line in lines:
88         m=re.search("Profile Time:", line)
89         if m:
90             nprof=nprof+1
91
92     # remove first occurrence of blank line if within first 10-40 lines
93     # and put it on the end to signal end of profile after last profile
94     for i in range(len(lines[0:40])):
95         if re.search("^ \r\n", lines[i]):
96             # print str(i) + " " + lines[i] + " " + lines[i+1]
97             blank_line = lines.pop(i)
98             lines.append(blank_line)
99    
100     bin_size = sensor_info['bin_size'] # Bin Size (meters)
101     nominal_depth = platform_info['water_depth']  # Mean sea level at station (meters) or nominal water depth
102     z = numpy.arange(0, -1*nominal_depth, -1*bin_size, dtype=float)
103    
104     N = nprof
105     nbins = len(z)
106
107     if nbins != sensor_info['nbins']:
108         print 'Number of bins computed from water_depth and bin_size ('+ \
109               str(nbins)+') does not match config number ('+ \
110               str(sensor_info['nbins'])+')'
111    
112     data = {
113         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
114         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
115         'z' : numpy.array(numpy.ones((nbins,), dtype=float)*numpy.nan),
116         #
117         'wd' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
118         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
119         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
120         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
121         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
122         'ph' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
123         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
124         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
125         }
126
127     # current profile count
128     i = 0
129
130     for line in lines:
131         ysi = []
132         # split line and parse float and integers
133         sw = re.split('[\s/\:]*', line)
134         for s in sw:
135             m = re.search(REAL_RE_STR, s)
136             if m:
137                 ysi.append(float(m.groups()[0]))
138
139         if re.search("Profile Time:", line):
140             HH=ysi[0]
141             MM=ysi[1]
142             SS=ysi[2]
143         elif re.search("Profile Date:", line):
144             mm=ysi[0]
145             dd=ysi[1]
146             yyyy=ysi[2]
147         elif re.search("Profile Depth:", line):
148             wd = ysi[0]/100.  # cm to meters
149             sample_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
150             if  sensor_info['utc_offset']:
151                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
152                              timedelta(hours=sensor_info['utc_offset'])
153             else:
154                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S')
155
156             # initialize for new profile at zero for averaging samples within each bin
157             wtemp = numpy.zeros(nbins)
158             cond = numpy.zeros(nbins)
159             salin = numpy.zeros(nbins)
160             turb = numpy.zeros(nbins)
161             ph = numpy.zeros(nbins)
162             chl = numpy.zeros(nbins)
163             do = numpy.zeros(nbins)
164             Ns = numpy.zeros(nbins) # count samples per bin for averaging
165         elif len(ysi)==14:                                                                             
166             # get sample datetime from data
167             # sample_str = '%02d-%02d-%2d %02d:%02d:%02d' % tuple(ysi[0:6])
168             # if  sensor_info['utc_offset']:
169             #     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
170             #                 timedelta(hours=sensor_info['utc_offset'])
171             # else:
172             # sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
173
174             depth = -1*ysi[9] # depth (m, positive up)
175             ibin = ((z)<=depth)*(depth<(z+bin_size))
176
177             Ns[ibin] = Ns[ibin]+1
178             wtemp[ibin] = wtemp[ibin]+ysi[6] # water temperature (C)
179             cond[ibin] = cond[ibin]+ysi[7]   # conductivity (mS/cm)
180             salin[ibin] = salin[ibin]+ysi[8] # salinity (ppt or PSU??)
181             #
182             ph[ibin] = ph[ibin]+ysi[10]      # ph
183             turb[ibin] = turb[ibin]+ysi[11]  # turbidity (NTU)
184             chl[ibin] = chl[ibin]+ysi[12]    # chlorophyll (ug/l)
185             do[ibin] = do[ibin]+ysi[13]      # dissolved oxygen (mg/l)
186
187         elif (len(ysi)==0):  # each profile separated by empty line
188             # average summations by sample count per bin
189             # where count is zero make it NaN so average is not divide by zero
190             Ns[Ns==0]=numpy.nan*Ns[Ns==0]
191            
192             data['dt'][i] = sample_dt # sample datetime
193             data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
194             data['wd'][i] = wd
195             data['z'] = z
196             # divide by counts
197             data['wtemp'][i] =  wtemp/Ns
198             data['cond'][i] = cond/Ns
199             data['salin'][i] = salin/Ns
200             data['turb'][i] = turb/Ns
201             data['ph'][i] = ph/Ns
202             data['chl'][i] = chl/Ns
203             data['do'][i] = do/Ns
204            
205             i=i+1
206            
207         # if-elif
208     # for line
209
210     return data
211  
212
213 def creator(platform_info, sensor_info, data):
214     #
215     #
216     title_str = sensor_info['description']+' at '+ platform_info['location']
217     global_atts = {
218         'title' : title_str,
219         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
220         'institution_url' : 'http://nccoos.unc.edu',
221         'institution_dods_url' : 'http://nccoos.unc.edu',
222         'metadata_url' : 'http://nccoos.unc.edu',
223         'references' : 'http://nccoos.unc.edu',
224         'contact' : 'Sara Haines (haines@email.unc.edu)',
225         #
226         'source' : 'fixed-automated-profiler observation',
227         'history' : 'raw2proc using ' + sensor_info['process_module'],
228         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
229         # conventions
230         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
231         # SEACOOS CDL codes
232         'format_category_code' : 'fixed-profiler',
233         'institution_code' : platform_info['institution'],
234         'platform_code' : platform_info['id'],
235         'package_code' : sensor_info['id'],
236         # institution specific
237         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
238         'project_url' : 'http://nccoos.unc.edu',
239         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
240         # first date in monthly file
241         'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
242         # last date in monthly file
243         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
244         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
245         #
246         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
247         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
248         'process_level' : 'level1',
249         #
250         # must type match to data (e.g. fillvalue is real if data is real)
251         '_FillValue' : -99999.,
252         }
253
254     var_atts = {
255         # coordinate variables
256         'time' : {'short_name': 'time',
257                   'long_name': 'Time',
258                   'standard_name': 'time',
259                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
260                   'axis': 'T',
261                   },
262         'lat' : {'short_name': 'lat',
263              'long_name': 'Latitude',
264              'standard_name': 'latitude',
265              'reference':'geographic coordinates',
266              'units': 'degrees_north',
267              'valid_range':(-90.,90.),
268              'axis': 'Y',
269              },
270         'lon' : {'short_name': 'lon',
271                  'long_name': 'Longtitude',
272                  'standard_name': 'longtitude',
273                  'reference':'geographic coordinates',
274                  'units': 'degrees_east',
275                  'valid_range':(-180.,180.),
276                  'axis': 'Y',
277                  },
278         'z' : {'short_name': 'z',
279                'long_name': 'Height',
280                'standard_name': 'height',
281                'reference':'zero at sea-surface',
282                'positive' : 'up',
283                'units': 'm',
284                'axis': 'Z',
285                },
286         # data variables
287         'wd': {'short_name': 'wd',
288                         'long_name': 'Water Depth',
289                         'standard_name': 'water_depth',                         
290                         'units': 'm',
291                         },
292         'wtemp': {'short_name': 'wtemp',
293                         'long_name': 'Water Temperature',
294                         'standard_name': 'water_temperature',                         
295                         'units': 'degrees Celsius',
296                         },
297         'cond': {'short_name': 'cond',
298                         'long_name': 'Conductivity',
299                         'standard_name': 'conductivity',                         
300                         'units': 'mS cm-1',
301                         },
302         'salin': {'short_name': 'salin',
303                         'long_name': 'Salinity',
304                         'standard_name': 'salinity',                         
305                         'units': 'PSU',
306                         },
307         'turb': {'short_name': 'turb',
308                         'long_name': 'Turbidity',
309                         'standard_name': 'turbidity',                         
310                         'units': 'NTU',
311                         },
312         'ph': {'short_name': 'ph',
313                         'long_name': 'pH',
314                         'standard_name': 'ph',                         
315                         'units': '',
316                         },
317         'chl': {'short_name': 'chl',
318                         'long_name': 'Chlorophyll',
319                         'standard_name': 'chlorophyll',                         
320                         'units': 'ug l-1',
321                         },
322         'do': {'short_name': 'do',
323                         'long_name': 'Dissolved Oxygen',
324                         'standard_name': 'dissolved_oxygen',                         
325                         'units': 'mg l-1',
326                         },
327         }
328
329     # dimension names use tuple so order of initialization is maintained
330     dim_inits = (
331         ('ntime', NC.UNLIMITED),
332         ('nlat', 1),
333         ('nlon', 1),
334         ('nz', sensor_info['nbins'])
335         )
336    
337     # using tuple of tuples so order of initialization is maintained
338     # using dict for attributes order of init not important
339     # use dimension names not values
340     # (varName, varType, (dimName1, [dimName2], ...))
341     var_inits = (
342         # coordinate variables
343         ('time', NC.INT, ('ntime',)),
344         ('lat', NC.FLOAT, ('nlat',)),
345         ('lon', NC.FLOAT, ('nlon',)),
346         ('z',  NC.FLOAT, ('nz',)),
347         # data variables
348         ('wd', NC.FLOAT, ('ntime',)),
349         ('wtemp', NC.FLOAT, ('ntime', 'nz')),
350         ('cond', NC.FLOAT, ('ntime', 'nz')),
351         ('salin', NC.FLOAT, ('ntime', 'nz')),
352         ('turb', NC.FLOAT, ('ntime', 'nz')),
353         ('ph', NC.FLOAT, ('ntime', 'nz')),
354         ('chl', NC.FLOAT, ('ntime', 'nz')),
355         ('do', NC.FLOAT, ('ntime', 'nz')),
356         )
357
358     # subset data only to month being processed (see raw2proc.process())
359     i = data['in']
360    
361     # var data
362     var_data = (
363         ('lat',  platform_info['lat']),
364         ('lon', platform_info['lon']),
365         ('z', data['z']),
366         #
367         ('time', data['time'][i]),
368         ('wd', data['wd'][i]),
369         ('wtemp', data['wtemp'][i]),
370         ('cond', data['cond'][i]),
371         ('salin', data['salin'][i]),
372         ('turb', data['turb'][i]),
373         ('ph', data['ph'][i]),
374         ('chl', data['chl'][i]),
375         ('do', data['do'][i]),
376         )
377
378     return (global_atts, var_atts, dim_inits, var_inits, var_data)
379
380 def updater(platform_info, sensor_info, data):
381     #
382     global_atts = {
383         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
384         # last date in monthly file
385         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
386         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
387         #
388         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
389         }
390
391     # data variables
392     # update any variable attributes like range, min, max
393     var_atts = {}
394     # var_atts = {
395     #    'wtemp': {'max': max(data.u),
396     #          'min': min(data.v),
397     #          },
398     #    'cond': {'max': max(data.u),
399     #          'min': min(data.v),
400     #          },
401     #    }
402    
403     # subset data only to month being processed (see raw2proc.process())
404     i = data['in']
405
406     # data
407     var_data = (
408         ('time', data['time'][i]),
409         ('wd', data['wd'][i]),
410         ('wtemp', data['wtemp'][i]),
411         ('cond', data['cond'][i]),
412         ('salin', data['salin'][i]),
413         ('turb', data['turb'][i]),
414         ('ph', data['ph'][i]),
415         ('chl', data['chl'][i]),
416         ('do', data['do'][i]),
417         )
418
419     return (global_atts, var_atts, var_data)
420 #
Note: See TracBrowser for help on using the browser.