NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_cr10x_wq_v1.py

Revision 448 (checked in by cbc, 13 years ago)

Add new Billy Mitchell configs.

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2010-12-09 16:14:19 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data water quality data from ysi 6600 V2
8 collected on Campbell Scientific DataLogger (loggernet) (csi)
9
10 parser : sample date and time,
11          water temperature, conductivity, pH, dissolved oxygen, turbidity, and system battery
12
13 creator : lat, lon, z, time, wtemp, cond, ph, turb, do_sat, do_mg, battvolts
14 updator : time, wtemp, cond, ph, turb, do_sat, do_mg, battvolts
15
16
17 Examples
18 --------
19
20 >> (parse, create, update) = load_processors('proc_csi_wq_v2')
21 or
22 >> si = get_config(cn+'.sensor_info')
23 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
24
25 >> lines = load_data(filename)
26 >> data = parse(platform_info, sensor_info, lines)
27 >> create(platform_info, sensor_info, data) or
28 >> update(platform_info, sensor_info, data)
29
30 """
31
32
33 from raw2proc import *
34 from procutil import *
35 from ncutil import *
36
37 now_dt = datetime.utcnow()
38 now_dt.replace(microsecond=0)
39
40 def parser(platform_info, sensor_info, lines):
41     """
42    
43     From FSL (CSI datalogger program files):
44    
45
46     """
47     import numpy
48     from datetime import datetime
49     from time import strptime
50     import math
51
52     # get sample datetime from filename
53     fn = sensor_info['fn']
54     sample_dt_start = filt_datetime(fn)
55
56     # how many samples
57     nsamp = 0
58     for line in lines:
59         m=re.search("^1,", line)
60         if m:
61             nsamp=nsamp+1
62
63     N = nsamp
64     data = {
65         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
66         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
67         'wtemp' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
68         'cond' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
69         'do_sat' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
70         'do_mg' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
71         'ph' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
72         'turb' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
73         'battvolts' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
74         }
75
76     # sample count
77     i = 0
78
79     for line in lines:
80         csi = []
81         # split line and parse float and integers
82         m=re.search("^1,", line)
83         if m:
84             sw = re.split(',', line)
85         else:
86             continue
87        
88         for s in sw:
89             m = re.search(REAL_RE_STR, s)
90             if m:
91                 csi.append(float(m.groups()[0]))
92
93         if len(csi)==14:
94             # get sample datetime from data
95             yyyy = csi[1]
96             yday = csi[2]
97             (MM, HH) = math.modf(csi[3]/100.)
98             MM = math.ceil(MM*100.)
99             if (HH == 24):
100                 yday=yday+1
101                 HH = 0.
102                
103             sample_str = '%04d-%03d %02d:%02d' % (yyyy, yday, HH, MM)
104             # if  sensor_info['utc_offset']:
105             #     sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%Y %H:%M:%S') + \
106             #                 timedelta(hours=sensor_info['utc_offset'])
107             # else:
108             sample_dt = scanf_datetime(sample_str, fmt='%Y-%j %H:%M')
109
110             data['dt'][i] = sample_dt # sample datetime
111             data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
112             #
113             data['wtemp'][i] =  csi[4] # water temperature (C)
114             data['cond'][i] = csi[5] # specific conductivity (mS/cm)
115             data['do_sat'][i] = csi[6]   # saturated dissolved oxygen (% air sat)
116             data['do_mg'][i] = csi[7]   # dissolved oxygen (mg/l)
117             data['ph'][i] = csi[8]   # ph
118             data['turb'][i] = csi[9] # turbidity (NTU)
119
120             # no adcp's prior to March 2005
121             # data['sontek_wl'][i] = csi[5] # sontek water level (ft)
122             # data['sontek_flow'][i] = csi[6] # sontek flow (cfs)
123
124             # data['press_wl'][i] = csi[10] # pressure water level (ft ?? or inches)
125             # data['rain'][i] =  csi[11] # 15 sec rain count ??
126             # data['press_flow'][i] = csi[12] # flow flow (cfs)
127             data['battvolts'][i] = csi[13]   # battery (volts)
128
129             i=i+1
130
131         # if-elif
132     # for line
133
134     # check that no data[dt] is set to Nan or anything but datetime
135     # keep only data that has a resolved datetime
136     keep = numpy.array([type(datetime(1970,1,1)) == type(dt) for dt in data['dt'][:]])
137     if keep.any():
138         for param in data.keys():
139             data[param] = data[param][keep]
140
141     return data
142
143  
144
145 def creator(platform_info, sensor_info, data):
146     #
147     #
148     title_str = sensor_info['description']+' at '+ platform_info['location']
149     global_atts = {
150         'title' : title_str,
151         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
152         'institution_url' : 'http://nccoos.unc.edu',
153         'institution_dods_url' : 'http://nccoos.unc.edu',
154         'metadata_url' : 'http://nccoos.unc.edu',
155         'references' : 'http://ehs.unc.edu',
156         'contact' : 'Sara Haines (haines@email.unc.edu)',
157         'station_owner' : 'Environment, Health, and Safety Office',
158         'station_contact' : 'Sharon Myers (samyers@ehs.unc.edu)',
159         #
160         'source' : 'fixed-point observation',
161         'history' : 'raw2proc using ' + sensor_info['process_module'],
162         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
163         # conventions
164         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
165         # SEACOOS CDL codes
166         'format_category_code' : 'fixed-point',
167         'institution_code' : platform_info['institution'],
168         'platform_code' : platform_info['id'],
169         'package_code' : sensor_info['id'],
170         # institution specific
171         'project' : 'Environment, Health, and Safety (EHS)',
172         'project_url' : 'http://ehs.unc.edu/environment/water_quality',
173         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
174         # first date in monthly file
175         'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
176         # last date in monthly file
177         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
178         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
179         #
180         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
181         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
182         'process_level' : 'level1',
183         #
184         # must type match to data (e.g. fillvalue is real if data is real)
185         '_FillValue' : -99999.,
186         }
187
188     var_atts = {
189         # coordinate variables
190         'time' : {'short_name': 'time',
191                   'long_name': 'Time',
192                   'standard_name': 'time',
193                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
194                   'axis': 'T',
195                   },
196         'lat' : {'short_name': 'lat',
197              'long_name': 'Latitude',
198              'standard_name': 'latitude',
199              'reference':'geographic coordinates',
200              'units': 'degrees_north',
201              'valid_range':(-90.,90.),
202              'axis': 'Y',
203              },
204         'lon' : {'short_name': 'lon',
205                  'long_name': 'Longitude',
206                  'standard_name': 'longitude',
207                  'reference':'geographic coordinates',
208                  'units': 'degrees_east',
209                  'valid_range':(-180.,180.),
210                  'axis': 'Y',
211                  },
212         'z' : {'short_name': 'z',
213                'long_name': 'Height',
214                'standard_name': 'height',
215                'reference':'zero at sea-surface',
216                'positive' : 'up',
217                'units': 'm',
218                'axis': 'Z',
219                },
220         # data variables
221         'wtemp': {'short_name': 'wtemp',
222                   'long_name': 'Water Temperature',
223                   'standard_name': 'water_temperature',                         
224                   'units': 'degrees_Celsius',
225                   },
226         'cond': {'short_name': 'cond',
227                  'long_name': 'Conductivity',
228                  'standard_name': 'conductivity',                         
229                  'units': 'mS cm-1',
230                  },
231         'turb': {'short_name': 'turb',
232                  'long_name': 'Turbidity',
233                  'standard_name': 'turbidity',                         
234                  'units': 'NTU',
235                  },
236         'ph': {'short_name': 'ph',
237                'long_name': 'pH',
238                'standard_name': 'ph',                         
239                'units': '',
240                },
241         'do_mg': {'short_name': 'do_mg',
242                'long_name': 'ROX Optical Dissolved Oxygen, Derived Concentration',
243                'standard_name': 'dissolved_oxygen_concentration',                         
244                'units': 'mg l-1',
245                },
246         'do_sat': {'short_name': 'do_sat',
247                'long_name': 'ROX Optical Dissolved Oxygen, Percent of Air Saturation',
248                'standard_name': 'dissolved_oxygen_relative_to_air_saturation',                         
249                'units': '%',
250                },
251         'battvolts': {'short_name': 'battery',
252                'long_name': 'Battery Voltage of the Station',
253                'standard_name': 'battery_voltage',                         
254                'units': 'volts',
255                },
256         }
257
258     # dimension names use tuple so order of initialization is maintained
259     dim_inits = (
260         ('ntime', NC.UNLIMITED),
261         ('nlat', 1),
262         ('nlon', 1),
263         ('nz', 1),
264         )
265    
266     # using tuple of tuples so order of initialization is maintained
267     # using dict for attributes order of init not important
268     # use dimension names not values
269     # (varName, varType, (dimName1, [dimName2], ...))
270     var_inits = (
271         # coordinate variables
272         ('time', NC.INT, ('ntime',)),
273         ('lat', NC.FLOAT, ('nlat',)),
274         ('lon', NC.FLOAT, ('nlon',)),
275         ('z',  NC.FLOAT, ('nz',)),
276         # data variables
277         ('wtemp', NC.FLOAT, ('ntime',)),
278         ('cond', NC.FLOAT, ('ntime',)),
279         ('turb', NC.FLOAT, ('ntime',)),
280         ('ph', NC.FLOAT, ('ntime',)),
281         ('do_mg', NC.FLOAT, ('ntime',)),
282         ('do_sat', NC.FLOAT, ('ntime',)),
283         ('battvolts', NC.FLOAT, ('ntime',)),
284         )
285
286     # subset data only to month being processed (see raw2proc.process())
287     i = data['in']
288    
289     # var data
290     var_data = (
291         ('lat',  platform_info['lat']),
292         ('lon', platform_info['lon']),
293         ('z', platform_info['altitude']),
294         #
295         ('time', data['time'][i]),
296         #
297         ('wtemp', data['wtemp'][i]),
298         ('cond', data['cond'][i]),
299         ('turb', data['turb'][i]),
300         ('ph', data['ph'][i]),
301         ('do_mg', data['do_mg'][i]),
302         ('do_sat', data['do_sat'][i]),
303         ('battvolts', data['battvolts'][i]),
304         )
305
306     return (global_atts, var_atts, dim_inits, var_inits, var_data)
307
308 def updater(platform_info, sensor_info, data):
309     #
310     global_atts = {
311         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
312         # last date in monthly file
313         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
314         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
315         #
316         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
317         }
318
319     # data variables
320     # update any variable attributes like range, min, max
321     var_atts = {}
322     # var_atts = {
323     #    'wtemp': {'max': max(data.u),
324     #          'min': min(data.v),
325     #          },
326     #    'cond': {'max': max(data.u),
327     #          'min': min(data.v),
328     #          },
329     #    }
330    
331     # subset data only to month being processed (see raw2proc.process())
332     i = data['in']
333
334     # data
335     var_data = (
336         ('time', data['time'][i]),
337         ('wtemp', data['wtemp'][i]),
338         ('cond', data['cond'][i]),
339         ('turb', data['turb'][i]),
340         ('ph', data['ph'][i]),
341         ('do_mg', data['do_mg'][i]),
342         ('do_sat', data['do_sat'][i]),
343         ('battvolts', data['battvolts'][i]),
344         )
345
346     return (global_atts, var_atts, var_data)
347 #
Note: See TracBrowser for help on using the browser.