NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_avp_ysi_6600_v1_CDL2.py

Revision 219 (checked in by haines, 15 years ago)

AVP fixed profiler CDL2; hampton processing added YSI 6600 V1

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-01-08 19:45:28 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data from YSI 6600 V1 on an automated veritical profiler (avp)
8
9 parser : date and time, water_depth for each profile
10
11          sample time, sample depth, as cast measures water
12          temperature, conductivity, salinity, dissolved oxygen,
13          turbidity, and chlorophyll (no pH)
14         
15
16 creator : lat, lon, z, stime, (time, water_depth), water_temp, cond,
17           salin, turb, chl, do
18
19 updator : z, stime, (time, water_depth), water_temp, cond, salin,
20           turb, chl, do
21
22 using fixed profiler CDL but modified to have raw data for each cast
23 along each column
24
25
26 Examples
27 --------
28
29 >> (parse, create, update) = load_processors('proc_avp_ysi_6600_v1')
30 or
31 >> si = get_config(cn+'.sensor_info')
32 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
33
34 >> lines = load_data(filename)
35 >> data = parse(platform_info, sensor_info, lines)
36 >> create(platform_info, sensor_info, data) or
37 >> update(platform_info, sensor_info, data)
38
39 """
40
41 from raw2proc import *
42 from procutil import *
43 from ncutil import *
44
45 now_dt = datetime.utcnow()
46 now_dt.replace(microsecond=0)
47
48 def parser(platform_info, sensor_info, lines):
49     """
50     parse Automated Vertical Profile Station (AVP) Water Quality Data
51
52     month, day, year, hour, min, sec, temp (deg. C), conductivity
53     (mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU),
54     chlorophyll (micrograms per liter), DO (micrograms per liter)
55
56     Notes
57     -----
58     1. Column Format YSI 6600 V1 has no pH
59
60     temp, cond,   salin,  DO,    depth, turb,  chl
61     (C), (mS/cm), (ppt), (ug/l), (m),   (NTU), (ug/l)
62
63
64 (from Aug 2005 to Sep 03 2008)
65     profile time: 00:00:56
66     profile date: 01/31/2006
67     profile location: P180, Instrument Serial No: 0001119E
68     01/31/06 00:01:31 10.99  7.501   4.16  13.22   0.516     6.0  11.5
69     01/31/06 00:01:32 11.00  7.463   4.13  13.22   0.526     6.0  11.4
70     01/31/06 00:01:33 11.00  7.442   4.12  13.22   0.538     6.0  11.4
71     01/31/06 00:01:34 11.00  7.496   4.15  13.11   0.556     6.0  11.3
72 (no data from Sep 03 to 30, 2008)
73 (from Sep 30 2008 to now, still YSI 6600 v1, just header change)
74     Profile Time: 11:38:00
75     Profile Date: 01/06/2009
76     Profile Depth: 380.0 cm
77     Profile Location: Hampton Shoal Serial No: 000109DD, ID: Delta
78     01/06/09 11:38:44 11.16  14.59   8.49  17.86   0.171     4.5  50.4
79     01/06/09 11:38:45 11.16  14.59   8.49  17.86   0.190     4.5  51.8
80     01/06/09 11:38:46 11.16  14.59   8.49  17.88   0.220     4.6  53.0
81     01/06/09 11:38:47 11.16  14.59   8.49  17.88   0.257     4.6  53.9
82     01/06/09 11:38:48 11.16  14.59   8.49  17.88   0.448     4.6  54.3
83
84 2.  Use a ragged array to store each uniquely measured param at each
85     time and depth but not gridded, so this uses fixed profiler CDL
86     but modified to have raw data for each cast along each column.
87     For plotting, the data will need to be grid at specified depth bins.
88
89     Tony Whipple at IMS says 'The AVPs sample at one second intervals.
90     Between the waves and the instrument descending from a spool of
91     line with variable radius it works out to about 3-5 cm between
92     observations on average.  When I process the data to make the
93     images, I bin the data every 10 cm and take the average of however
94     many observations fell within that bin.'
95
96     """
97     import numpy
98     from datetime import datetime
99     from time import strptime
100
101     # get sample datetime from filename
102     fn = sensor_info['fn']
103     sample_dt_start = filt_datetime(fn)[0]
104
105     # how many profiles in one file, count number of "Profile Time:" in lines
106     nprof = 0
107     for line in lines:
108         m=re.search("Profile Time:", line, re.IGNORECASE)
109         if m:
110             nprof=nprof+1
111
112     # remove first occurrence of blank line if within first 40 lines
113     for i in range(len(lines[0:40])):
114        if re.search("^ \r\n", lines[i]):
115            # print str(i) + " " + lines[i] + " " + lines[i+1]
116            blank_line = lines.pop(i)
117            # lines.append(blank_line)
118     # ensure signal end of profile after last profile by appending a blank line to data file
119     lines.append(' \r\n')
120
121     # ensure blank line between profile casts
122     for i, line in enumerate(lines):
123         if re.search(r"Profile Time", line, re.IGNORECASE):
124             if not re.search("^ \r\n", lines[i-1]):
125                 lines.insert(i, " \r\n")
126    
127     N = nprof
128     nbins = sensor_info['nbins']
129
130     data = {
131         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
132         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
133         'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
134         #
135         'ysi_sn' : numpy.array(['' for i in range(N)] , dtype='|S20'),
136         'ysi_id' : numpy.array(['' for i in range(N)] , dtype='|S20'),
137         #
138         'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan),
139         'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
140         'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
141         'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
142         'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
143         'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
144         'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan),
145         }
146
147     # current profile count
148     i = 0
149     have_date = have_time = have_location = have_head = False
150     verbose = False
151
152     for line in lines:
153         # if line has weird ascii chars -- skip it and iterate to next line
154         if re.search(r"[\x1a]", line):
155             if verbose:
156                 print 'skipping bad data line ... ' + str(line)
157             continue
158            
159         ysi = []
160         # split line and parse float and integers
161         sw = re.split('[\s/\:]*', line)
162         for s in sw:
163             m = re.search(REAL_RE_STR, s)
164             if m:
165                 ysi.append(float(m.groups()[0]))
166
167         if re.search("Profile Time:", line, re.IGNORECASE):
168             have_time = True
169             HH=ysi[0]
170             MM=ysi[1]
171             SS=ysi[2]
172         elif re.search("Profile Date:", line, re.IGNORECASE):
173             have_date = True
174             mm=ysi[0]
175             dd=ysi[1]
176             yyyy=ysi[2]
177
178             profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS)
179             if  sensor_info['utc_offset']:
180                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \
181                              timedelta(hours=sensor_info['utc_offset'])
182             else:
183                 profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S')
184         elif re.search("Profile Location:", line, re.IGNORECASE):
185             have_location = True
186             # profile location: P180, Instrument Serial No: 0001119E
187             # Profile Location: Hampton Shoal Serial No: 000109DD, ID: Delta
188             sw = re.findall(r'\w+:\s(\w+)*', line)
189             if len(sw)>=2: ysi_sn = sw[1]
190             else: ysi_sn = 'not known'
191             if len(sw)>=3: ysi_id = sw[2]
192             else: ysi_id = 'not known'
193                
194             # initialize for new profile at zero for averaging samples within each bin
195             wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
196             depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
197             cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
198             salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
199             turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
200             chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
201             do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan
202             stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan
203             # keep track of number of samples in one profile so not to exceed nbins
204             j = 0
205             # have all the headers stuff
206             head = numpy.array([have_date, have_time, have_location])
207             have_head = head.all()
208
209         elif (len(ysi)==13 and have_head):
210             if j>=nbins:
211                 print 'Sample number (' + str(j) + \
212                       ') in profile exceeds maximum value ('+ \
213                       str(nbins) + ') in config'
214        
215             # get sample datetime from data
216             sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6])
217             if  sensor_info['utc_offset']:
218                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') + \
219                             timedelta(hours=sensor_info['utc_offset'])
220             else:
221                 sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S')
222
223             if j<nbins:
224                 stime[j] = dt2es(sample_dt) # sample time
225                 wtemp[j] = ysi[6] # water temperature (C)
226                 cond[j] = ysi[7]  # conductivity (mS/cm)
227                 salin[j] = ysi[8] # salinity (ppt or PSU??)
228                 do[j] = ysi[9]   # dissolved oxygen (mg/l)
229                 #
230                 depth[j] = ysi[10] # depth (m, positive up)
231                 #
232                 turb[j] = ysi[11] # turbidity (NTU)
233                 chl[j] = ysi[12]  # chlorophyll (ug/l)
234
235             j = j+1
236
237         elif (len(ysi)==0 and have_head and i<N):  # each profile separated by empty line
238
239             data['dt'][i] = profile_dt # profile datetime
240             data['time'][i] = dt2es(profile_dt) # profile time in epoch seconds
241             data['ysi_sn'][i] = ysi_sn
242             data['ysi_id'][i] = ysi_id
243             #
244             data['stime'][i] =  stime # sample time in epoch seconds
245             data['z'][i] = -1.*depth
246             #
247             data['wtemp'][i] =  wtemp
248             data['cond'][i] = cond
249             data['salin'][i] = salin
250             data['turb'][i] = turb
251             data['chl'][i] = chl
252             data['do'][i] = do
253            
254             i=i+1
255             have_date = have_time = have_wd = have_location = False
256         else:
257             if verbose:
258                 print 'skipping bad data line ... ' + str(line)
259         # if-elif
260     # for line
261
262     return data
263  
264
265 def creator(platform_info, sensor_info, data):
266     #
267     # subset data only to month being processed (see raw2proc.process())
268     i = data['in']
269     dt = data['dt'][i]
270     #
271     title_str = sensor_info['description']+' at '+ platform_info['location']
272     global_atts = {
273         'title' : title_str,
274         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
275         'institution_url' : 'http://nccoos.unc.edu',
276         'institution_dods_url' : 'http://nccoos.unc.edu',
277         'metadata_url' : 'http://nccoos.unc.edu',
278         'references' : 'http://nccoos.unc.edu',
279         'contact' : 'Sara Haines (haines@email.unc.edu)',
280         #
281         'source' : 'fixed-automated-profiler observation',
282         'history' : 'raw2proc using ' + sensor_info['process_module'],
283         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
284         # conventions
285         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
286         # SEACOOS CDL codes
287         'format_category_code' : 'fixed-profiler-ragged',
288         'institution_code' : platform_info['institution'],
289         'platform_code' : platform_info['id'],
290         'package_code' : sensor_info['id'],
291         # institution specific
292         'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)',
293         'project_url' : 'http://nccoos.unc.edu',
294         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
295         # first date in monthly file
296         'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"),
297         # last date in monthly file
298         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
299         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
300         #
301         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
302         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
303         'process_level' : 'level1',
304         #
305         # must type match to data (e.g. fillvalue is real if data is real)
306         '_FillValue' : numpy.nan,
307         }
308
309     var_atts = {
310         # coordinate variables
311         'time' : {'short_name': 'time',
312                   'long_name': 'Time of Profile',
313                   'standard_name': 'time',
314                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
315                   'axis': 'T',
316                   },
317         'lat' : {'short_name': 'lat',
318                  'long_name': 'Latitude',
319                  'standard_name': 'latitude',
320                  'reference':'geographic coordinates',
321                  'units': 'degrees_north',
322                  'valid_range':(-90.,90.),
323                  'axis': 'Y',
324                  },
325         'lon' : {'short_name': 'lon',
326                  'long_name': 'Longitude',
327                  'standard_name': 'longitude',
328                  'reference':'geographic coordinates',
329                  'units': 'degrees_east',
330                  'valid_range':(-180.,180.),
331                  'axis': 'Y',
332                  },
333         'z' : {'short_name': 'z',
334                'long_name': 'Height',
335                'standard_name': 'height',
336                'reference':'zero at sea-surface',
337                'positive' : 'up',
338                'units': 'm',
339                'axis': 'Z',
340                },
341         # data variables
342         'stime' : {'short_name': 'stime',
343                   'long_name': 'Time of Sample ',
344                   'standard_name': 'time',
345                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
346                   },
347         'ysi_id' : {'short_name':'ysi_id',
348                     'long_name':'Identification name of YSI Sonde',
349                     'standard_name': 'identification_name'
350                     },
351         'ysi_sn' : {'short_name':'ysi_sn',
352                     'long_name':'Serial number of YSI Sonde',
353                     'standard_name': 'serial_number'
354                     },
355         'wtemp': {'short_name': 'wtemp',
356                         'long_name': 'Water Temperature',
357                         'standard_name': 'water_temperature',                         
358                         'units': 'degrees Celsius',
359                         },
360         'cond': {'short_name': 'cond',
361                         'long_name': 'Conductivity',
362                         'standard_name': 'conductivity',                         
363                         'units': 'mS cm-1',
364                         },
365         'salin': {'short_name': 'salin',
366                         'long_name': 'Salinity',
367                         'standard_name': 'salinity',                         
368                         'units': 'PSU',
369                         },
370         'turb': {'short_name': 'turb',
371                         'long_name': 'Turbidity',
372                         'standard_name': 'turbidity',                         
373                         'units': 'NTU',
374                         },
375         'chl': {'short_name': 'chl',
376                         'long_name': 'Chlorophyll',
377                         'standard_name': 'chlorophyll',                         
378                         'units': 'ug l-1',
379                         },
380         'do': {'short_name': 'do',
381                         'long_name': 'Dissolved Oxygen',
382                         'standard_name': 'dissolved_oxygen',                         
383                         'units': 'mg l-1',
384                         },
385         }
386
387     # dimension names use tuple so order of initialization is maintained
388     dim_inits = (
389         ('time', NC.UNLIMITED),
390         ('lat', 1),
391         ('lon', 1),
392         ('z', sensor_info['nbins']),
393         ('nchar', 20),
394         )
395    
396     # using tuple of tuples so order of initialization is maintained
397     # using dict for attributes order of init not important
398     # use dimension names not values
399     # (varName, varType, (dimName1, [dimName2], ...))
400     var_inits = (
401         # coordinate variables
402         ('time', NC.INT, ('time',)),
403         ('lat', NC.FLOAT, ('lat',)),
404         ('lon', NC.FLOAT, ('lon',)),
405         ('z',  NC.FLOAT, ('time', 'z',)),
406         # data variables
407         # ('ysi_sn', NC.CHAR, ('time', 'nchar')),
408         # ('ysi_id', NC.CHAR, ('time', 'nchar')),
409         ('stime', NC.INT, ('time', 'z')),       
410         ('wtemp', NC.FLOAT, ('time', 'z')),
411         ('cond', NC.FLOAT, ('time', 'z')),
412         ('salin', NC.FLOAT, ('time', 'z')),
413         ('turb', NC.FLOAT, ('time', 'z')),
414         ('chl', NC.FLOAT, ('time', 'z')),
415         ('do', NC.FLOAT, ('time', 'z')),
416         )
417
418     # var data
419     var_data = (
420         ('lat',  platform_info['lat']),
421         ('lon', platform_info['lon']),
422         ('time', data['time'][i]),
423         # ('ysi_id', data['ysi_id'][i]),
424         # ('ysi_sn', data['ysi_sn'][i]),
425         ('stime', data['stime'][i]),
426         ('z', data['z'][i]),
427         #
428         ('wtemp', data['wtemp'][i]),
429         ('cond', data['cond'][i]),
430         ('salin', data['salin'][i]),
431         ('turb', data['turb'][i]),
432         ('chl', data['chl'][i]),
433         ('do', data['do'][i]),
434         )
435
436     return (global_atts, var_atts, dim_inits, var_inits, var_data)
437
438 def updater(platform_info, sensor_info, data):
439     #
440     # subset data only to month being processed (see raw2proc.process())
441     i = data['in']
442     dt = data['dt'][i]
443     #
444     global_atts = {
445         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
446         # last date in monthly file
447         'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"),
448         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
449         #
450         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
451         }
452
453     # data variables
454     # update any variable attributes like range, min, max
455     var_atts = {}
456     # var_atts = {
457     #    'wtemp': {'max': max(data.u),
458     #          'min': min(data.v),
459     #          },
460     #    'cond': {'max': max(data.u),
461     #          'min': min(data.v),
462     #          },
463     #    }
464     
465     # data
466     var_data = (
467         ('time', data['time'][i]),
468         # ('ysi_id', data['ysi_id'][i]),
469         # ('ysi_sn', data['ysi_sn'][i]),
470         ('stime', data['stime'][i]),
471         ('z', data['z'][i]),
472         #
473         ('wtemp', data['wtemp'][i]),
474         ('cond', data['cond'][i]),
475         ('salin', data['salin'][i]),
476         ('turb', data['turb'][i]),
477         ('chl', data['chl'][i]),
478         ('do', data['do'][i]),
479         )
480
481     return (global_atts, var_atts, var_data)
482 #
483
Note: See TracBrowser for help on using the browser.