NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_cr10x_flow_v1.py

Revision 233 (checked in by haines, 15 years ago)

EHS processing and python virtual env

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-03-19 14:44:02 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data water level and flow data (pressure sensor only) collected
8 on Campbell Scientific DataLogger (loggernet) (csi)
9
10 parser : sample date and time, water_depth and flow from sontek and pressure
11
12 creator : lat, lon, z, time, rain, press_wl, press_flow
13 updator : time, rain, press_wl, press_flow
14
15
16 Examples
17 --------
18
19 >> (parse, create, update) = load_processors('proc_csi_adcp_v2')
20 or
21 >> si = get_config(cn+'.sensor_info')
22 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
23
24 >> lines = load_data(filename)
25 >> data = parse(platform_info, sensor_info, lines)
26 >> create(platform_info, sensor_info, data) or
27 >> update(platform_info, sensor_info, data)
28
29 """
30
31
32 from raw2proc import *
33 from procutil import *
34 from ncutil import *
35
36 now_dt = datetime.utcnow()
37 now_dt.replace(microsecond=0)
38
39 def parser(platform_info, sensor_info, lines):
40     """
41     From FSL (CSI datalogger program files):
42    
43     1 Output_Table  60.00 Min
44     1 1 L
45     2 Year_RTM  L
46     3 Day_RTM  L
47     4 Hour_Minute_RTM  L
48     5 H2OTempC  L
49     6 SpCond  L
50     7 DOSat  L
51     8 DOmg  L
52     9 PH  L
53     10 TURB L
54     11 PrDepthft  L
55     12 Rain  L
56     13 PrFlowcfs  L
57     14 BattVolts  L
58
59     Example data:
60    
61     1,2001,130,2000,19.27,.292,.1,.01,7.44,3.5,.123,0,12.77,0
62     1,2001,130,2100,19.17,.291,.1,.01,7.38,3.1,.119,0,12.58,0
63     1,2001,130,2200,19.06,.288,.1,.01,7.35,3.2,.12,0,12.72,0
64     1,2001,130,2300,18.89,.282,.1,.01,7.35,2.8,.127,0,12.68,0
65     1,2001,130,2400,18.68,.277,.1,.01,7.36,2.7,1.347,0,13.47,12.75
66     1,2001,131,100,18.45,.275,.1,.01,7.36,2.7,1.292,0,12.92,12.62
67
68     """
69
70     import numpy
71     from datetime import datetime
72     from time import strptime
73
74     # get sample datetime from filename
75     fn = sensor_info['fn']
76     sample_dt_start = filt_datetime(fn)[0]
77
78     # how many samples
79     nsamp = 0
80     for line in lines:
81         m=re.search("^1,", line)
82         if m:
83             nsamp=nsamp+1
84
85     N = nsamp
86     data = {
87         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
88         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
89         'rain' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
90         'press_wl' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
91         'press_flow' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
92         }
93
94     # sample count
95     i = 0
96
97     for line in lines:
98         csi = []
99         # split line and parse float and integers
100         m=re.search("^1,", line)
101         if m:
102             sw = re.split(',', line)
103         else:
104             continue
105
106         # split line and parse float and integers
107         sw = re.split(',', line)
108         for s in sw:
109             m = re.search(REAL_RE_STR, s)
110             if m:
111                 csi.append(float(m.groups()[0]))
112
113         if len(csi)==14:
114             # get sample datetime from data
115             yyyy = csi[1]
116             yday = csi[2]
117             (MM, HH) = math.modf(csi[3]/100.)
118             MM = math.ceil(MM*100.)
119             if (HH == 24):
120                 yday=yday+1
121                 HH = 0.
122                
123             sample_str = '%04d-%03d %02d:%02d' % (yyyy, yday, HH, MM)
124             if  sensor_info['utc_offset']:
125                 sample_dt = scanf_datetime(sample_str, fmt='%Y-%j %H:%M') + \
126                             timedelta(hours=sensor_info['utc_offset'])
127             else:
128                 sample_dt = scanf_datetime(sample_str, fmt='%Y-%j %H:%M')
129
130             data['dt'][i] = sample_dt # sample datetime
131             data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
132             #
133             # data['wtemp'][i] =  csi[4] # water temperature (C)
134             # data['cond'][i] = csi[5] # specific conductivity (mS/cm)
135             # data['do_sat'][i] = csi[6]   # saturated dissolved oxygen (% air sat)
136             # data['do_mg'][i] = csi[7]   # dissolved oxygen (mg/l)
137             # data['ph'][i] = csi[8]   # ph
138             # data['turb'][i] = csi[9] # turbidity (NTU)
139
140             # no adcp's prior to March 2005
141             # data['sontek_wl'][i] = csi[5] # sontek water level (ft)
142             # data['sontek_flow'][i] = csi[6] # sontek flow (cfs)
143
144             data['press_wl'][i] = csi[10] # pressure water level (ft)
145             data['rain'][i] =  csi[11] # 15 sec rain count ??
146             data['press_flow'][i] = csi[12] # flow flow (cfs)
147             # data['battvolts'][i] = csi[13]   # battery (volts)
148            
149             i=i+1
150
151         # if-elif
152     # for line
153
154     # check that no data[dt] is set to Nan or anything but datetime
155     # keep only data that has a resolved datetime
156     keep = numpy.array([type(datetime(1970,1,1)) == type(dt) for dt in data['dt'][:]])
157     if keep.any():
158         for param in data.keys():
159             data[param] = data[param][keep]
160            
161     return data
162  
163
164 def creator(platform_info, sensor_info, data):
165     #
166     #
167     title_str = sensor_info['description']+' at '+ platform_info['location']
168     global_atts = {
169         'title' : title_str,
170         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
171         'institution_url' : 'http://nccoos.unc.edu',
172         'institution_dods_url' : 'http://nccoos.unc.edu',
173         'metadata_url' : 'http://nccoos.unc.edu',
174         'references' : 'http://nccoos.unc.edu',
175         'contact' : 'Sara Haines (haines@email.unc.edu)',
176         'station_owner' : 'Environment, Health, and Safety Office',
177         'station_contact' : 'Sharon Myers (samyers@ehs.unc.edu)',
178         #
179         'source' : 'fixed-observation',
180         'history' : 'raw2proc using ' + sensor_info['process_module'],
181         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
182         # conventions
183         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
184         # SEACOOS CDL codes
185         'format_category_code' : 'fixed-point',
186         'institution_code' : platform_info['institution'],
187         'platform_code' : platform_info['id'],
188         'package_code' : sensor_info['id'],
189         # institution specific
190         'project' : 'Environment, Health, and Safety (EHS)',
191         'project_url' : 'http://ehs.unc.edu/environment/water_quality',
192         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
193         # first date in monthly file
194         'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
195         # last date in monthly file
196         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
197         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
198         #
199         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
200         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
201         'process_level' : 'level1',
202         #
203         # must type match to data (e.g. fillvalue is real if data is real)
204         '_FillValue' : -99999.,
205         }
206
207     var_atts = {
208         # coordinate variables
209         'time' : {'short_name': 'time',
210                   'long_name': 'Time',
211                   'standard_name': 'time',
212                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
213                   'axis': 'T',
214                   },
215         'lat' : {'short_name': 'lat',
216              'long_name': 'Latitude',
217              'standard_name': 'latitude',
218              'reference':'geographic coordinates',
219              'units': 'degrees_north',
220              'valid_range':(-90.,90.),
221              'axis': 'Y',
222              },
223         'lon' : {'short_name': 'lon',
224                  'long_name': 'Longitude',
225                  'standard_name': 'longitude',
226                  'reference':'geographic coordinates',
227                  'units': 'degrees_east',
228                  'valid_range':(-180.,180.),
229                  'axis': 'Y',
230                  },
231         'z' : {'short_name': 'z',
232                'long_name': 'Altitude',
233                'standard_name': 'altitude',
234                'reference':'zero at mean sea level',
235                'positive' : 'up',
236                'units': 'm',
237                'axis': 'Z',
238                },
239         # data variables
240         'rain': {'short_name': 'rain',
241                  'long_name': '15-Minute Rain',
242                  'standard_name': 'rain',                         
243                  'units': 'inches',
244                   },
245         'press_wl': { 'short_name': 'press_wl',
246                   'long_name': 'Pressure Water Level',
247                   'standard_name': 'water_level',                         
248                   'units': 'feet',
249                   'reference':'zero at station altitude',
250                   'positive' : 'up',
251                   },
252         'press_flow': { 'short_name': 'flow',
253                         'long_name': 'Pressure Stream Flow',
254                         'standard_name': 'water_flux',                         
255                         'units': 'cfs',
256                         },
257         }
258
259     # dimension names use tuple so order of initialization is maintained
260     dim_inits = (
261         ('ntime', NC.UNLIMITED),
262         ('nlat', 1),
263         ('nlon', 1),
264         ('nz', 1),
265         )
266    
267     # using tuple of tuples so order of initialization is maintained
268     # using dict for attributes order of init not important
269     # use dimension names not values
270     # (varName, varType, (dimName1, [dimName2], ...))
271     var_inits = (
272         # coordinate variables
273         ('time', NC.INT, ('ntime',)),
274         ('lat', NC.FLOAT, ('nlat',)),
275         ('lon', NC.FLOAT, ('nlon',)),
276         ('z',  NC.FLOAT, ('nz',)),
277         # data variables
278         ('rain', NC.FLOAT, ('ntime',)),
279         ('press_wl', NC.FLOAT, ('ntime',)),
280         ('press_flow', NC.FLOAT, ('ntime',)),
281         )
282
283     # subset data only to month being processed (see raw2proc.process())
284     i = data['in']
285    
286     # var data
287     var_data = (
288         ('lat',  platform_info['lat']),
289         ('lon', platform_info['lon']),
290         ('z', platform_info['altitude']),
291         #
292         ('time', data['time'][i]),
293         #
294         ('rain', data['rain'][i]),
295         ('press_wl', data['press_wl'][i]),
296         ('press_flow', data['press_flow'][i]),
297         )
298
299     return (global_atts, var_atts, dim_inits, var_inits, var_data)
300
301 def updater(platform_info, sensor_info, data):
302     #
303     global_atts = {
304         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
305         # last date in monthly file
306         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
307         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
308         #
309         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
310         }
311
312     # data variables
313     # update any variable attributes like range, min, max
314     var_atts = {}
315     # var_atts = {
316     #    'wtemp': {'max': max(data.u),
317     #          'min': min(data.v),
318     #          },
319     #    'cond': {'max': max(data.u),
320     #          'min': min(data.v),
321     #          },
322     #    }
323    
324     # subset data only to month being processed (see raw2proc.process())
325     i = data['in']
326
327     # data
328     var_data = (
329         ('time', data['time'][i]),
330         #
331         ('rain', data['rain'][i]),
332         ('press_wl', data['press_wl'][i]),
333         ('press_flow', data['press_flow'][i]),
334         )
335
336     return (global_atts, var_atts, var_data)
337 #
Note: See TracBrowser for help on using the browser.