NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_cr1000_flow.py

Revision 233 (checked in by haines, 15 years ago)

EHS processing and python virtual env

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-06-25 15:12:53 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data water level and flow data (sontek argonaut and pressure
8 sensor) collected on Campbell Scientific DataLogger (loggernet) (csi)
9
10 parser : sample date and time, water_depth and flow from sontek and pressure
11
12 creator : lat, lon, z, time, sontek_wl, sontek_flow, press_wl, press_flow
13 updator : time, sontek_wl, sontek_flow, press_wl, press_flow
14
15
16 Examples
17 --------
18
19 >> (parse, create, update) = load_processors('proc_csi_adcp_v2')
20 or
21 >> si = get_config(cn+'.sensor_info')
22 >> (parse, create, update) = load_processors(si['adcp']['proc_module'])
23
24 >> lines = load_data(filename)
25 >> data = parse(platform_info, sensor_info, lines)
26 >> create(platform_info, sensor_info, data) or
27 >> update(platform_info, sensor_info, data)
28
29 """
30
31
32 from raw2proc import *
33 from procutil import *
34 from ncutil import *
35
36 now_dt = datetime.utcnow()
37 now_dt.replace(microsecond=0)
38
39 def parser(platform_info, sensor_info, lines):
40     """
41     From FSL (CSI datalogger program files):
42    
43     Example data: NO Sontek
44
45     TOA5,CR1000_CBC,CR1000,5498,CR1000.Std.11,CPU:UNC_CrowBranch.CR1,1554,Data15Min
46     TIMESTAMP,RECORD,RainIn_Tot,WaterLevelFt,Flow
47     TS,RN,,,
48     ,,Tot,Smp,Smp
49     2009-01-22 15:30:00,0,0,0,0
50     2009-01-22 15:45:00,1,0,0,0
51     2009-01-22 16:00:00,2,0.01,0,0
52     2009-01-22 16:15:00,3,0,0,0
53
54     Example data: with Sontek
55
56     """
57
58     import numpy
59     from datetime import datetime
60     from time import strptime
61
62     # get sample datetime from filename
63     fn = sensor_info['fn']
64     sample_dt_start = filt_datetime(fn)[0]
65
66     # how many samples (don't count header 4 lines)
67     nsamp = len(lines[4:])
68
69     N = nsamp
70     data = {
71         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
72         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
73         'rain' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
74         'sontek_wl' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
75         'sontek_flow' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
76         'press_wl' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
77         'press_flow' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
78         }
79
80     # sample count
81     i = 0
82
83     for line in lines[4:]:
84         csi = []
85         # split line
86         sw = re.split(',', line)
87         if len(sw)<=0:
88             print ' ... skipping line %d ' % (i,)
89             continue
90
91         # replace any "NAN" text with a number
92         for index, s in enumerate(sw):
93             m = re.search(NAN_RE_STR, s)
94             if m:
95                 sw[index] = '-99999'
96
97         # parse date-time, and all other float and integers
98         for s in sw[1:]:
99             m = re.search(REAL_RE_STR, s)
100             if m:
101                 csi.append(float(m.groups()[0]))
102
103         if  sensor_info['utc_offset']:
104             sample_dt = scanf_datetime(sw[0], fmt='"%Y-%m-%d %H:%M:%S"') + \
105                         timedelta(hours=sensor_info['utc_offset'])
106         else:
107             sample_dt = scanf_datetime(sw[0], fmt='"%Y-%m-%d %H:%M:%S"')
108
109         data['dt'][i] = sample_dt # sample datetime
110         data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
111        
112         if len(csi)==6:
113             # MOW has all six fields but no sontek now
114             data['rain'][i] =  csi[1] # 15 min rain count (inches)
115             data['sontek_wl'][i] = csi[2] # sontek water level (ft)
116             data['sontek_flow'][i] = csi[3] # sontek flow (cfs)
117             data['press_wl'][i] = csi[4] # pressure water level (ft)
118             data['press_flow'][i] = csi[5] # flow flow (cfs)
119             i=i+1
120         elif len(csi)==4:
121             # CBC is not reporting pressure level and flow (THIS IS NOT RIGHT)
122             # Need Kevin Simpson at YSI to fix this.
123             data['rain'][i] =  csi[1] # 15 min rain count (inches)
124             data['sontek_wl'][i] = csi[2] # sontek water level (ft)
125             data['sontek_flow'][i] = csi[3] # sontek flow (cfs)
126             data['press_wl'][i] = 0. # pressure water level (ft)
127             data['press_flow'][i] = 0. # flow flow (cfs)
128             i=i+1
129         else:
130             print ' ... skipping line %d -- %s ' % (i,line)
131             continue           
132
133         # if re.search
134     # for line
135
136     # check that no data[dt] is set to Nan or anything but datetime
137     # keep only data that has a resolved datetime
138     keep = numpy.array([type(datetime(1970,1,1)) == type(dt) for dt in data['dt'][:]])
139     if keep.any():
140         for param in data.keys():
141             data[param] = data[param][keep]
142
143     return data
144  
145
146 def creator(platform_info, sensor_info, data):
147     #
148     #
149     title_str = sensor_info['description']+' at '+ platform_info['location']
150     global_atts = {
151         'title' : title_str,
152         'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)',
153         'institution_url' : 'http://nccoos.unc.edu',
154         'institution_dods_url' : 'http://nccoos.unc.edu',
155         'metadata_url' : 'http://nccoos.unc.edu',
156         'references' : 'http://nccoos.unc.edu',
157         'contact' : 'Sara Haines (haines@email.unc.edu)',
158         'station_owner' : 'Environment, Health, and Safety Office',
159         'station_contact' : 'Sharon Myers (samyers@ehs.unc.edu)',
160         #
161         'source' : 'fixed-observation',
162         'history' : 'raw2proc using ' + sensor_info['process_module'],
163         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
164         # conventions
165         'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0',
166         # SEACOOS CDL codes
167         'format_category_code' : 'fixed-point',
168         'institution_code' : platform_info['institution'],
169         'platform_code' : platform_info['id'],
170         'package_code' : sensor_info['id'],
171         # institution specific
172         'project' : 'Environment, Health, and Safety (EHS)',
173         'project_url' : 'http://ehs.unc.edu/environment/water_quality',
174         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
175         # first date in monthly file
176         'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"),
177         # last date in monthly file
178         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
179         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
180         #
181         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
182         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
183         'process_level' : 'level1',
184         #
185         # must type match to data (e.g. fillvalue is real if data is real)
186         '_FillValue' : -99999.,
187         }
188
189     var_atts = {
190         # coordinate variables
191         'time' : {'short_name': 'time',
192                   'long_name': 'Time',
193                   'standard_name': 'time',
194                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
195                   'axis': 'T',
196                   },
197         'lat' : {'short_name': 'lat',
198              'long_name': 'Latitude',
199              'standard_name': 'latitude',
200              'reference':'geographic coordinates',
201              'units': 'degrees_north',
202              'valid_range':(-90.,90.),
203              'axis': 'Y',
204              },
205         'lon' : {'short_name': 'lon',
206                  'long_name': 'Longitude',
207                  'standard_name': 'longitude',
208                  'reference':'geographic coordinates',
209                  'units': 'degrees_east',
210                  'valid_range':(-180.,180.),
211                  'axis': 'Y',
212                  },
213         'z' : {'short_name': 'z',
214                'long_name': 'Altitude',
215                'standard_name': 'altitude',
216                'reference':'zero at mean sea level',
217                'positive' : 'up',
218                'units': 'm',
219                'axis': 'Z',
220                },
221         # data variables
222         'rain': {'short_name': 'rain',
223                  'long_name': '15-Minute Rain',
224                  'standard_name': 'rain',                         
225                  'units': 'inches',
226                   },
227         'sontek_wl': {'short_name': 'sontek_wl',
228                   'long_name': 'Sontek Water Level',
229                   'standard_name': 'water_level',                         
230                   'units': 'feet',
231                   'reference':'zero at station altitude',
232                   'positive' : 'up',
233                   },
234         'sontek_flow': {'short_name': 'flow',
235                         'long_name': 'Sontek Stream Flow',
236                         'standard_name': 'water_flux',                         
237                         'units': 'cfs',
238                         },
239         'press_wl': { 'short_name': 'press_wl',
240                   'long_name': 'Pressure Water Level',
241                   'standard_name': 'water_level',                         
242                   'units': 'feet',
243                   'reference':'zero at station altitude',
244                   'positive' : 'up',
245                   },
246         'press_flow': { 'short_name': 'flow',
247                         'long_name': 'Pressure Stream Flow',
248                         'standard_name': 'water_flux',                         
249                         'units': 'cfs',
250                         },
251         }
252
253     # dimension names use tuple so order of initialization is maintained
254     dim_inits = (
255         ('ntime', NC.UNLIMITED),
256         ('nlat', 1),
257         ('nlon', 1),
258         ('nz', 1),
259         )
260    
261     # using tuple of tuples so order of initialization is maintained
262     # using dict for attributes order of init not important
263     # use dimension names not values
264     # (varName, varType, (dimName1, [dimName2], ...))
265     var_inits = (
266         # coordinate variables
267         ('time', NC.INT, ('ntime',)),
268         ('lat', NC.FLOAT, ('nlat',)),
269         ('lon', NC.FLOAT, ('nlon',)),
270         ('z',  NC.FLOAT, ('nz',)),
271         # data variables
272         ('rain', NC.FLOAT, ('ntime',)),
273         ('sontek_wl', NC.FLOAT, ('ntime',)),
274         ('sontek_flow', NC.FLOAT, ('ntime',)),
275         ('press_wl', NC.FLOAT, ('ntime',)),
276         ('press_flow', NC.FLOAT, ('ntime',)),
277         )
278
279     # subset data only to month being processed (see raw2proc.process())
280     i = data['in']
281    
282     # var data
283     var_data = (
284         ('lat',  platform_info['lat']),
285         ('lon', platform_info['lon']),
286         ('z', platform_info['altitude']),
287         #
288         ('time', data['time'][i]),
289         #
290         ('rain', data['rain'][i]),
291         ('sontek_wl', data['sontek_wl'][i]),
292         ('sontek_flow', data['sontek_flow'][i]),
293         ('press_wl', data['press_wl'][i]),
294         ('press_flow', data['press_flow'][i]),
295         )
296
297     return (global_atts, var_atts, dim_inits, var_inits, var_data)
298
299 def updater(platform_info, sensor_info, data):
300     #
301     global_atts = {
302         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
303         # last date in monthly file
304         'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"),
305         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
306         #
307         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
308         }
309
310     # data variables
311     # update any variable attributes like range, min, max
312     var_atts = {}
313     # var_atts = {
314     #    'wtemp': {'max': max(data.u),
315     #          'min': min(data.v),
316     #          },
317     #    'cond': {'max': max(data.u),
318     #          'min': min(data.v),
319     #          },
320     #    }
321    
322     # subset data only to month being processed (see raw2proc.process())
323     i = data['in']
324
325     # data
326     var_data = (
327         ('time', data['time'][i]),
328         #
329         ('rain', data['rain'][i]),
330         ('sontek_wl', data['sontek_wl'][i]),
331         ('sontek_flow', data['sontek_flow'][i]),
332         ('press_wl', data['press_wl'][i]),
333         ('press_flow', data['press_flow'][i]),
334         )
335
336     return (global_atts, var_atts, var_data)
337 #
Note: See TracBrowser for help on using the browser.