NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/tags/raw2proc-1.0/proc_cr10x_wq_v2.py

Revision 320 (checked in by haines, 14 years ago)

catch-up trunk to production code running on cromwell

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-12-15 08:32:33 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data water quality data from ysi 6600 V2
8 collected on Campbell Scientific DataLogger (loggernet) (csi)
9
10 parser : sample date and time,
11          water temperature, conductivity, pH, dissolved oxygen, turbidity, and system battery
12
13 creator : lat, lon, z, time, wtemp, cond, ph, turb, do_sat, do_mg, battvolts
14 updator : time, wtemp, cond, ph, turb, do_sat, do_mg, battvolts
15
16
17 Examples
18 --------
19
20 >> (parse, create, update) = load_processors('proc_csi_wq_v2')
21 or
22 >> si = get_config(cn+'.sensor_info')
23 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
24
25 >> lines = load_data(filename)
26 >> data = parse(platform_info, sensor_info, lines)
27 >> create(platform_info, sensor_info, data) or
28 >> update(platform_info, sensor_info, data)
29
30 """
31
32
33 from raw2proc import *
34 from procutil import *
35 from ncutil import *
36
37 now_dt = datetime.utcnow()
38 now_dt.replace(microsecond=0)
39
40 def parser(platform_info, sensor_info, lines):
41     """
42    
43     From FSL (CSI datalogger program files):
44    
45     15 Output_Table  15.00 Min
46     1 15 L
47     2 Year_RTM  L
48     3 Day_RTM  L
49     4 Hour_Minute_RTM  L
50     5 Rain15sec_TOT  L
51     6 SonLevlft  L
52     7 SonFlow  L
53     8 PrDepthft  L
54     9 PrFlowcfs  L
55    
56     1 Output_Table  60.00 Min
57     1 1 L
58     2 Year_RTM  L
59     3 Day_RTM  L
60     4 Hour_Minute_RTM  L
61     5 H2OTempC  L
62     6 SpCond  L
63     7 DOSat  L
64     8 DOmg  L
65     9 PH  L
66     10 Turb  L
67     11 BattVolts  L
68
69     Example data:
70        
71     1,2005,83,1600,16.47,0,.4,.04,8.14,115.5,14.25
72     15,2005,83,1615,0,4.551,-.547,.897,.885
73     15,2005,83,1630,0,4.541,.727,.908,1.005
74     15,2005,83,1645,0,4.537,6.731,.878,.676
75     15,2005,83,1700,0,4.537,6.731,.83,.167
76     1,2005,83,1700,16.57,0,.4,.03,8.03,145.7,13.08
77     15,2005,83,1715,0,4.547,5.29,.847,.347
78     15,2005,83,1730,0,4.541,.908,.842,.287
79     15,2005,83,1745,0,4.547,7.3,.853,.407
80     15,2005,83,1800,0,4.551,6.939,.855,.437
81     1,2005,83,1800,15.65,0,.2,.02,7.91,111.3,12.98
82
83     """
84     import numpy
85     from datetime import datetime
86     from time import strptime
87     import math
88
89     # get sample datetime from filename
90     fn = sensor_info['fn']
91     sample_dt_start = filt_datetime(fn)[0]
92
93     # how many samples
94     nsamp = 0
95     for line in lines:
96         m=re.search("^1,", line)
97         if m:
98             nsamp=nsamp+1
99
100     N = nsamp
101     data = {
102         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
103         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
104         'wtemp' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
105         'cond' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
106         'do_sat' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
107         'do_mg' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
108         'ph' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
109         'turb' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
110         'battvolts' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
111         }
112
113     # sample count
114     i = 0
115
116     for line in lines:
117         csi = []
118         # split line and parse float and integers
119         m=re.search("^1,", line)
120         if m:
121             sw = re.split(',', line)
122         else:
123             continue
124        
125         for s in sw:
126             m = re.search(REAL_RE_STR, s)
127             if m:
128                 csi.append(float(m.groups()[0]))
129
130         if len(csi)==11:
131             # get sample datetime from data
132             yyyy = csi[1]
133             yday = csi[2]
134             (MM, HH) = math.modf(csi[3]/100.)
135             MM = math.ceil(MM*100.)
136             if (HH == 24):
137                 yday=yday+1
138                 HH = 0.
139                
140             sample_str = '%04d-%03d %02d:%02d' % (yyyy, yday, HH, MM)
141             # if  sensor_info['utc_offset']:
142             #     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
143             #                 timedelta(hours=sensor_info['utc_offset'])
144             # else:
145             sample_dt = scanf_datetime(sample_str, fmt='%Y-%j %H:%M')
146
147             data['dt'][i] = sample_dt # sample datetime
148             data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
149             #
150             data['wtemp'][i] =  csi[4] # water temperature (C)
151             data['cond'][i] = csi[5] # specific conductivity (mS/cm)
152             data['do_sat'][i] = csi[6]   # saturated dissolved oxygen (% air sat)
153             data['do_mg'][i] = csi[7]   # dissolved oxygen (mg/l)
154             data['ph'][i] = csi[8]   # ph
155             data['turb'][i] = csi[9] # turbidity (NTU)
156             data['battvolts'][i] = csi[10]   # battery (volts)
157
158             i=i+1
159
160         # if-elif
161     # for line
162
163     # check that no data[dt] is set to Nan or anything but datetime
164     # keep only data that has a resolved datetime
165     keep = numpy.array([type(datetime(1970,1,1)) == type(dt) for dt in data['dt'][:]])
166     if keep.any():
167         for param in data.keys():
168             data[param] = data[param][keep]
169
170     return data
171
172  
173
174 def creator(platform_info, sensor_info, data):
175     #
176     #
177     title_str = sensor_info['description']+' at '+ platform_info['location']
178     global_atts = {
179         'title' : title_str,
180         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
181         'institution_url' : 'http://nccoos.unc.edu',
182         'institution_dods_url' : 'http://nccoos.unc.edu',
183         'metadata_url' : 'http://nccoos.unc.edu',
184         'references' : 'http://ehs.unc.edu',
185         'contact' : 'Sara Haines (haines@email.unc.edu)',
186         'station_owner' : 'Environment, Health, and Safety Office',
187         'station_contact' : 'Sharon Myers (samyers@ehs.unc.edu)',
188         #
189         'source' : 'fixed-point observation',
190         'history' : 'raw2proc using ' + sensor_info['process_module'],
191         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
192         # conventions
193         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
194         # SEACOOS CDL codes
195         'format_category_code' : 'fixed-point',
196         'institution_code' : platform_info['institution'],
197         'platform_code' : platform_info['id'],
198         'package_code' : sensor_info['id'],
199         # institution specific
200         'project' : 'Environment, Health, and Safety (EHS)',
201         'project_url' : 'http://ehs.unc.edu/environment/water_quality',
202         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
203         # first date in monthly file
204         'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
205         # last date in monthly file
206         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
207         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
208         #
209         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
210         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
211         'process_level' : 'level1',
212         #
213         # must type match to data (e.g. fillvalue is real if data is real)
214         '_FillValue' : -99999.,
215         }
216
217     var_atts = {
218         # coordinate variables
219         'time' : {'short_name': 'time',
220                   'long_name': 'Time',
221                   'standard_name': 'time',
222                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
223                   'axis': 'T',
224                   },
225         'lat' : {'short_name': 'lat',
226              'long_name': 'Latitude',
227              'standard_name': 'latitude',
228              'reference':'geographic coordinates',
229              'units': 'degrees_north',
230              'valid_range':(-90.,90.),
231              'axis': 'Y',
232              },
233         'lon' : {'short_name': 'lon',
234                  'long_name': 'Longitude',
235                  'standard_name': 'longitude',
236                  'reference':'geographic coordinates',
237                  'units': 'degrees_east',
238                  'valid_range':(-180.,180.),
239                  'axis': 'Y',
240                  },
241         'z' : {'short_name': 'z',
242                'long_name': 'Height',
243                'standard_name': 'height',
244                'reference':'zero at sea-surface',
245                'positive' : 'up',
246                'units': 'm',
247                'axis': 'Z',
248                },
249         # data variables
250         'wtemp': {'short_name': 'wtemp',
251                   'long_name': 'Water Temperature',
252                   'standard_name': 'water_temperature',                         
253                   'units': 'degrees_Celsius',
254                   },
255         'cond': {'short_name': 'cond',
256                  'long_name': 'Conductivity',
257                  'standard_name': 'conductivity',                         
258                  'units': 'mS cm-1',
259                  },
260         'turb': {'short_name': 'turb',
261                  'long_name': 'Turbidity',
262                  'standard_name': 'turbidity',                         
263                  'units': 'NTU',
264                  },
265         'ph': {'short_name': 'ph',
266                'long_name': 'pH',
267                'standard_name': 'ph',                         
268                'units': '',
269                },
270         'do_mg': {'short_name': 'do_mg',
271                'long_name': 'ROX Optical Dissolved Oxygen, Derived Concentration',
272                'standard_name': 'dissolved_oxygen_concentration',                         
273                'units': 'mg l-1',
274                },
275         'do_sat': {'short_name': 'do_sat',
276                'long_name': 'ROX Optical Dissolved Oxygen, Percent of Air Saturation',
277                'standard_name': 'dissolved_oxygen_relative_to_air_saturation',                         
278                'units': '%',
279                },
280         'battvolts': {'short_name': 'battery',
281                'long_name': 'Battery Voltage of the Station',
282                'standard_name': 'battery_voltage',                         
283                'units': 'volts',
284                },
285         }
286
287     # dimension names use tuple so order of initialization is maintained
288     dim_inits = (
289         ('ntime', NC.UNLIMITED),
290         ('nlat', 1),
291         ('nlon', 1),
292         ('nz', 1),
293         )
294    
295     # using tuple of tuples so order of initialization is maintained
296     # using dict for attributes order of init not important
297     # use dimension names not values
298     # (varName, varType, (dimName1, [dimName2], ...))
299     var_inits = (
300         # coordinate variables
301         ('time', NC.INT, ('ntime',)),
302         ('lat', NC.FLOAT, ('nlat',)),
303         ('lon', NC.FLOAT, ('nlon',)),
304         ('z',  NC.FLOAT, ('nz',)),
305         # data variables
306         ('wtemp', NC.FLOAT, ('ntime',)),
307         ('cond', NC.FLOAT, ('ntime',)),
308         ('turb', NC.FLOAT, ('ntime',)),
309         ('ph', NC.FLOAT, ('ntime',)),
310         ('do_mg', NC.FLOAT, ('ntime',)),
311         ('do_sat', NC.FLOAT, ('ntime',)),
312         ('battvolts', NC.FLOAT, ('ntime',)),
313         )
314
315     # subset data only to month being processed (see raw2proc.process())
316     i = data['in']
317    
318     # var data
319     var_data = (
320         ('lat',  platform_info['lat']),
321         ('lon', platform_info['lon']),
322         ('z', platform_info['altitude']),
323         #
324         ('time', data['time'][i]),
325         #
326         ('wtemp', data['wtemp'][i]),
327         ('cond', data['cond'][i]),
328         ('turb', data['turb'][i]),
329         ('ph', data['ph'][i]),
330         ('do_mg', data['do_mg'][i]),
331         ('do_sat', data['do_sat'][i]),
332         ('battvolts', data['battvolts'][i]),
333         )
334
335     return (global_atts, var_atts, dim_inits, var_inits, var_data)
336
337 def updater(platform_info, sensor_info, data):
338     #
339     global_atts = {
340         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
341         # last date in monthly file
342         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
343         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
344         #
345         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
346         }
347
348     # data variables
349     # update any variable attributes like range, min, max
350     var_atts = {}
351     # var_atts = {
352     #    'wtemp': {'max': max(data.u),
353     #          'min': min(data.v),
354     #          },
355     #    'cond': {'max': max(data.u),
356     #          'min': min(data.v),
357     #          },
358     #    }
359    
360     # subset data only to month being processed (see raw2proc.process())
361     i = data['in']
362
363     # data
364     var_data = (
365         ('time', data['time'][i]),
366         ('wtemp', data['wtemp'][i]),
367         ('cond', data['cond'][i]),
368         ('turb', data['turb'][i]),
369         ('ph', data['ph'][i]),
370         ('do_mg', data['do_mg'][i]),
371         ('do_sat', data['do_sat'][i]),
372         ('battvolts', data['battvolts'][i]),
373         )
374
375     return (global_atts, var_atts, var_data)
376 #
Note: See TracBrowser for help on using the browser.