NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_cr1000_wq.py

Revision 320 (checked in by haines, 14 years ago)

catch-up trunk to production code running on cromwell

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-12-15 08:31:52 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data water level and flow data (sontek argonaut and pressure
8 sensor) collected on Campbell Scientific DataLogger (loggernet) (csi)
9
10 parser : sample date and time, water_depth and flow from sontek and pressure
11
12 creator : lat, lon, z, time, sontek_wl, sontek_flow, press_wl, press_flow
13 updator : time, sontek_wl, sontek_flow, press_wl, press_flow
14
15
16 Examples
17 --------
18
19 >> (parse, create, update) = load_processors('proc_csi_adcp_v2')
20 or
21 >> si = get_config(cn+'.sensor_info')
22 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
23
24 >> lines = load_data(filename)
25 >> data = parse(platform_info, sensor_info, lines)
26 >> create(platform_info, sensor_info, data) or
27 >> update(platform_info, sensor_info, data)
28
29 """
30
31
32 from raw2proc import *
33 from procutil import *
34 from ncutil import *
35
36 now_dt = datetime.utcnow()
37 now_dt.replace(microsecond=0)
38
39 def parser(platform_info, sensor_info, lines):
40     """
41     "TOA5","CR1000_CBC","CR1000","5498","CR1000.Std.11","CPU:UNC_CrowBranch.CR1","1554","DataHourly"
42     "TIMESTAMP","RECORD","SondeTempC","SpCond","DOSat","DOmg","pH","Turb","BattVolt_Min"
43     "TS","RN","","","","","","",""
44     "","","Smp","Smp","Smp","Smp","Smp","Smp","Min"
45     "2009-06-01 00:00:00",3066,20.54,0.551,7.17,-10.3,30.5,0,12.88
46     "2009-06-01 01:00:00",3067,20.38,0.551,7.16,-9.7,29.7,0,12.86
47     "2009-06-01 02:00:00",3068,20.18,0.55,7.15,-9.2,30.1,0,12.84
48     "2009-06-01 03:00:00",3069,19.99,0.549,7.16,-9.5,27.6,0,12.83
49     "2009-06-01 04:00:00",3070,"NaN",0.549,7.16,-9.5,27.6,0,12.83
50     """
51
52     import numpy
53     from datetime import datetime
54     from time import strptime
55
56     # get sample datetime from filename
57     fn = sensor_info['fn']
58     sample_dt_start = filt_datetime(fn)[0]
59
60     # how many samples (don't count header 4 lines)
61     nsamp = len(lines[4:])
62
63     N = nsamp
64     data = {
65         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
66         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
67         'wtemp' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
68         'cond' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
69         'do_sat' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
70         'do_mg' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
71         'ph' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
72         'turb' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
73         'battvolts' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
74
75         }
76
77     # sample count
78     i = 0
79
80     for line in lines[4:]:
81         csi = []
82         # split line
83         sw = re.split(',', line)
84         if len(sw)<=0:
85             print ' ... skipping line %d ' % (i,)
86             continue
87
88         # replace "NAN"
89         for index, s in enumerate(sw):
90             m = re.search(NAN_RE_STR, s)
91             if m:
92                 sw[index] = '-99999'
93
94         # parse date-time, and all other float and integers
95         for s in sw[1:]:
96             m = re.search(REAL_RE_STR, s)
97             if m:
98                 csi.append(float(m.groups()[0]))
99
100         if  sensor_info['utc_offset']:
101             sample_dt = scanf_datetime(sw[0], fmt='"%Y-%m-%d %H:%M:%S"') + \
102                         timedelta(hours=sensor_info['utc_offset'])
103         else:
104             sample_dt = scanf_datetime(sw[0], fmt='"%Y-%m-%d %H:%M:%S"')
105
106         data['dt'][i] = sample_dt # sample datetime
107         data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
108
109         if len(csi)==8:
110             #
111             data['wtemp'][i] =  csi[1] # water temperature (C)
112             data['cond'][i] = csi[2] # specific conductivity (mS/cm)
113             data['ph'][i] = csi[3]   # ph
114             data['turb'][i] = csi[5] # turbidity (NTU)
115             data['do_sat'][i] = csi[6]   # saturated dissolved oxygen (% air sat)
116             data['do_mg'][i] = csi[4]   # dissolved oxygen (mg/l)
117             data['battvolts'][i] = csi[7]   # battery (volts)
118             i=i+1
119         else:
120             print ' ... skipping line %d -- %s ' % (i,line)
121             continue           
122            
123
124         # if re.search
125     # for line
126
127     # check that no data[dt] is set to Nan or anything but datetime
128     # keep only data that has a resolved datetime
129     keep = numpy.array([type(datetime(1970,1,1)) == type(dt) for dt in data['dt'][:]])
130     if keep.any():
131         for param in data.keys():
132             data[param] = data[param][keep]
133
134     return data
135
136 def creator(platform_info, sensor_info, data):
137     #
138     #
139     title_str = sensor_info['description']+' at '+ platform_info['location']
140     global_atts = {
141         'title' : title_str,
142         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
143         'institution_url' : 'http://nccoos.unc.edu',
144         'institution_dods_url' : 'http://nccoos.unc.edu',
145         'metadata_url' : 'http://nccoos.unc.edu',
146         'references' : 'http://ehs.unc.edu',
147         'contact' : 'Sara Haines (haines@email.unc.edu)',
148         'station_owner' : 'Environment, Health, and Safety Office',
149         'station_contact' : 'Sharon Myers (samyers@ehs.unc.edu)',
150         #
151         'source' : 'fixed-point observation',
152         'history' : 'raw2proc using ' + sensor_info['process_module'],
153         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
154         # conventions
155         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
156         # SEACOOS CDL codes
157         'format_category_code' : 'fixed-point',
158         'institution_code' : platform_info['institution'],
159         'platform_code' : platform_info['id'],
160         'package_code' : sensor_info['id'],
161         # institution specific
162         'project' : 'Environment, Health, and Safety (EHS)',
163         'project_url' : 'http://ehs.unc.edu/environment/water_quality',
164         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
165         # first date in monthly file
166         'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
167         # last date in monthly file
168         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
169         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
170         #
171         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
172         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
173         'process_level' : 'level1',
174         #
175         # must type match to data (e.g. fillvalue is real if data is real)
176         '_FillValue' : -99999.,
177         }
178
179     var_atts = {
180         # coordinate variables
181         'time' : {'short_name': 'time',
182                   'long_name': 'Time',
183                   'standard_name': 'time',
184                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
185                   'axis': 'T',
186                   },
187         'lat' : {'short_name': 'lat',
188              'long_name': 'Latitude',
189              'standard_name': 'latitude',
190              'reference':'geographic coordinates',
191              'units': 'degrees_north',
192              'valid_range':(-90.,90.),
193              'axis': 'Y',
194              },
195         'lon' : {'short_name': 'lon',
196                  'long_name': 'Longitude',
197                  'standard_name': 'longitude',
198                  'reference':'geographic coordinates',
199                  'units': 'degrees_east',
200                  'valid_range':(-180.,180.),
201                  'axis': 'Y',
202                  },
203         'z' : {'short_name': 'z',
204                'long_name': 'Height',
205                'standard_name': 'height',
206                'reference':'zero at sea-surface',
207                'positive' : 'up',
208                'units': 'm',
209                'axis': 'Z',
210                },
211         # data variables
212         'wtemp': {'short_name': 'wtemp',
213                   'long_name': 'Water Temperature',
214                   'standard_name': 'water_temperature',                         
215                   'units': 'degrees_Celsius',
216                   },
217         'cond': {'short_name': 'cond',
218                  'long_name': 'Conductivity',
219                  'standard_name': 'conductivity',                         
220                  'units': 'mS cm-1',
221                  },
222         'turb': {'short_name': 'turb',
223                  'long_name': 'Turbidity',
224                  'standard_name': 'turbidity',                         
225                  'units': 'NTU',
226                  },
227         'ph': {'short_name': 'ph',
228                'long_name': 'pH',
229                'standard_name': 'ph',                         
230                'units': '',
231                },
232         'do_mg': {'short_name': 'do_mg',
233                'long_name': 'ROX Optical Dissolved Oxygen, Derived Concentration',
234                'standard_name': 'dissolved_oxygen_concentration',                         
235                'units': 'mg l-1',
236                },
237         'do_sat': {'short_name': 'do_sat',
238                'long_name': 'ROX Optical Dissolved Oxygen, Percent of Air Saturation',
239                'standard_name': 'dissolved_oxygen_relative_to_air_saturation',                         
240                'units': '%',
241                },
242         'battvolts': {'short_name': 'battery',
243                'long_name': 'Battery Voltage of the Station',
244                'standard_name': 'battery_voltage',                         
245                'units': 'volts',
246                },
247         }
248
249     # dimension names use tuple so order of initialization is maintained
250     dim_inits = (
251         ('ntime', NC.UNLIMITED),
252         ('nlat', 1),
253         ('nlon', 1),
254         ('nz', 1),
255         )
256    
257     # using tuple of tuples so order of initialization is maintained
258     # using dict for attributes order of init not important
259     # use dimension names not values
260     # (varName, varType, (dimName1, [dimName2], ...))
261     var_inits = (
262         # coordinate variables
263         ('time', NC.INT, ('ntime',)),
264         ('lat', NC.FLOAT, ('nlat',)),
265         ('lon', NC.FLOAT, ('nlon',)),
266         ('z',  NC.FLOAT, ('nz',)),
267         # data variables
268         ('wtemp', NC.FLOAT, ('ntime',)),
269         ('cond', NC.FLOAT, ('ntime',)),
270         ('turb', NC.FLOAT, ('ntime',)),
271         ('ph', NC.FLOAT, ('ntime',)),
272         ('do_mg', NC.FLOAT, ('ntime',)),
273         ('do_sat', NC.FLOAT, ('ntime',)),
274         ('battvolts', NC.FLOAT, ('ntime',)),
275         )
276
277     # subset data only to month being processed (see raw2proc.process())
278     i = data['in']
279    
280     # var data
281     var_data = (
282         ('lat',  platform_info['lat']),
283         ('lon', platform_info['lon']),
284         ('z', platform_info['altitude']),
285         #
286         ('time', data['time'][i]),
287         #
288         ('wtemp', data['wtemp'][i]),
289         ('cond', data['cond'][i]),
290         ('turb', data['turb'][i]),
291         ('ph', data['ph'][i]),
292         ('do_mg', data['do_mg'][i]),
293         ('do_sat', data['do_sat'][i]),
294         ('battvolts', data['battvolts'][i]),
295         )
296
297     return (global_atts, var_atts, dim_inits, var_inits, var_data)
298
299 def updater(platform_info, sensor_info, data):
300     #
301     global_atts = {
302         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
303         # last date in monthly file
304         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
305         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
306         #
307         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
308         }
309
310     # data variables
311     # update any variable attributes like range, min, max
312     var_atts = {}
313     # var_atts = {
314     #    'wtemp': {'max': max(data.u),
315     #          'min': min(data.v),
316     #          },
317     #    'cond': {'max': max(data.u),
318     #          'min': min(data.v),
319     #          },
320     #    }
321    
322     # subset data only to month being processed (see raw2proc.process())
323     i = data['in']
324
325     # data
326     var_data = (
327         ('time', data['time'][i]),
328         ('wtemp', data['wtemp'][i]),
329         ('cond', data['cond'][i]),
330         ('turb', data['turb'][i]),
331         ('ph', data['ph'][i]),
332         ('do_mg', data['do_mg'][i]),
333         ('do_sat', data['do_sat'][i]),
334         ('battvolts', data['battvolts'][i]),
335         )
336
337     return (global_atts, var_atts, var_data)
338 #
Note: See TracBrowser for help on using the browser.