1 |
#!/usr/bin/env python |
---|
2 |
# Last modified: Time-stamp: <2009-12-07 15:03:02 haines> |
---|
3 |
""" |
---|
4 |
how to parse data, and assert what data and info goes into |
---|
5 |
creating and updating monthly netcdf files |
---|
6 |
|
---|
7 |
parse data water level and flow data (sontek argonaut and pressure |
---|
8 |
sensor) collected on Campbell Scientific DataLogger (loggernet) (csi) |
---|
9 |
|
---|
10 |
parser : sample date and time, water_depth and flow from sontek and pressure |
---|
11 |
|
---|
12 |
creator : lat, lon, z, time, sontek_wl, sontek_flow, press_wl, press_flow |
---|
13 |
updator : time, sontek_wl, sontek_flow, press_wl, press_flow |
---|
14 |
|
---|
15 |
|
---|
16 |
Examples |
---|
17 |
-------- |
---|
18 |
|
---|
19 |
>> (parse, create, update) = load_processors('proc_csi_adcp_v2') |
---|
20 |
or |
---|
21 |
>> si = get_config(cn+'.sensor_info') |
---|
22 |
>> (parse, create, update) = load_processors(si['adcp']['proc_module']) |
---|
23 |
|
---|
24 |
>> lines = load_data(filename) |
---|
25 |
>> data = parse(platform_info, sensor_info, lines) |
---|
26 |
>> create(platform_info, sensor_info, data) or |
---|
27 |
>> update(platform_info, sensor_info, data) |
---|
28 |
|
---|
29 |
""" |
---|
30 |
|
---|
31 |
|
---|
32 |
from raw2proc import * |
---|
33 |
from procutil import * |
---|
34 |
from ncutil import * |
---|
35 |
|
---|
36 |
now_dt = datetime.utcnow() |
---|
37 |
now_dt.replace(microsecond=0) |
---|
38 |
|
---|
39 |
def parser(platform_info, sensor_info, lines): |
---|
40 |
""" |
---|
41 |
From FSL (CSI datalogger program files): |
---|
42 |
|
---|
43 |
15 Output_Table 15.00 Min |
---|
44 |
1 15 L |
---|
45 |
2 Year_RTM L |
---|
46 |
3 Day_RTM L |
---|
47 |
4 Hour_Minute_RTM L |
---|
48 |
5 Rain15sec_TOT L |
---|
49 |
6 SonLevlft L |
---|
50 |
7 SonFlow L |
---|
51 |
8 PrDepthft L |
---|
52 |
9 PrFlowcfs L |
---|
53 |
|
---|
54 |
1 Output_Table 60.00 Min |
---|
55 |
1 1 L |
---|
56 |
2 Year_RTM L |
---|
57 |
3 Day_RTM L |
---|
58 |
4 Hour_Minute_RTM L |
---|
59 |
5 H2OTempC L |
---|
60 |
6 SpCond L |
---|
61 |
7 DOSat L |
---|
62 |
8 DOmg L |
---|
63 |
9 PH L |
---|
64 |
10 Turb L |
---|
65 |
11 BattVolts L |
---|
66 |
|
---|
67 |
Example data: |
---|
68 |
|
---|
69 |
1,2005,83,1600,16.47,0,.4,.04,8.14,115.5,14.25 |
---|
70 |
15,2005,83,1615,0,4.551,-.547,.897,.885 |
---|
71 |
15,2005,83,1630,0,4.541,.727,.908,1.005 |
---|
72 |
15,2005,83,1645,0,4.537,6.731,.878,.676 |
---|
73 |
15,2005,83,1700,0,4.537,6.731,.83,.167 |
---|
74 |
1,2005,83,1700,16.57,0,.4,.03,8.03,145.7,13.08 |
---|
75 |
15,2005,83,1715,0,4.547,5.29,.847,.347 |
---|
76 |
15,2005,83,1730,0,4.541,.908,.842,.287 |
---|
77 |
15,2005,83,1745,0,4.547,7.3,.853,.407 |
---|
78 |
15,2005,83,1800,0,4.551,6.939,.855,.437 |
---|
79 |
1,2005,83,1800,15.65,0,.2,.02,7.91,111.3,12.98 |
---|
80 |
|
---|
81 |
15,2008,187,1715,.36,0,0 |
---|
82 |
15,2008,187,1730,.06,0,0 |
---|
83 |
15,2008,187,1745,.02,0,0 |
---|
84 |
15,2008,187,1800,.01,0,0 |
---|
85 |
|
---|
86 |
""" |
---|
87 |
|
---|
88 |
import numpy |
---|
89 |
from datetime import datetime |
---|
90 |
from time import strptime |
---|
91 |
|
---|
92 |
# get sample datetime from filename |
---|
93 |
fn = sensor_info['fn'] |
---|
94 |
sample_dt_start = filt_datetime(fn)[0] |
---|
95 |
|
---|
96 |
# how many samples |
---|
97 |
nsamp = 0 |
---|
98 |
for line in lines: |
---|
99 |
m=re.search("^15,", line) |
---|
100 |
if m: |
---|
101 |
nsamp=nsamp+1 |
---|
102 |
|
---|
103 |
N = nsamp |
---|
104 |
data = { |
---|
105 |
'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan), |
---|
106 |
'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan), |
---|
107 |
'rain' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
108 |
'sontek_wl' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
109 |
'sontek_flow' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
110 |
'press' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
111 |
'press_wl' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
112 |
'press_flow' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
113 |
'press_csi_ft' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
114 |
'press_csi_cfs' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan), |
---|
115 |
} |
---|
116 |
|
---|
117 |
# sample count |
---|
118 |
i = 0 |
---|
119 |
|
---|
120 |
for line in lines: |
---|
121 |
csi = [] |
---|
122 |
# split line and parse float and integers |
---|
123 |
m=re.search("^15,", line) |
---|
124 |
if m: |
---|
125 |
sw = re.split(',', line) |
---|
126 |
else: |
---|
127 |
continue |
---|
128 |
|
---|
129 |
# split line and parse float and integers |
---|
130 |
sw = re.split(',', line) |
---|
131 |
for s in sw: |
---|
132 |
m = re.search(REAL_RE_STR, s) |
---|
133 |
if m: |
---|
134 |
csi.append(float(m.groups()[0])) |
---|
135 |
|
---|
136 |
if re.search('^15,',line) and len(csi)>=4: |
---|
137 |
# get sample datetime from data |
---|
138 |
yyyy = csi[1] |
---|
139 |
yday = csi[2] |
---|
140 |
(MM, HH) = math.modf(csi[3]/100.) |
---|
141 |
MM = math.ceil(MM*100.) |
---|
142 |
if (HH == 24): |
---|
143 |
yday=yday+1 |
---|
144 |
HH = 0. |
---|
145 |
sample_str = '%04d-%03d %02d:%02d' % (yyyy, yday, HH, MM) |
---|
146 |
if sensor_info['utc_offset']: |
---|
147 |
sample_dt = scanf_datetime(sample_str, fmt='%Y-%j %H:%M') + \ |
---|
148 |
timedelta(hours=sensor_info['utc_offset']) |
---|
149 |
else: |
---|
150 |
sample_dt = scanf_datetime(sample_str, fmt='%Y-%j %H:%M') |
---|
151 |
data['dt'][i] = sample_dt # sample datetime |
---|
152 |
data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds |
---|
153 |
|
---|
154 |
if len(csi)==9: |
---|
155 |
# |
---|
156 |
data['rain'][i] = csi[4] # 15 min rain count (inches) |
---|
157 |
data['sontek_wl'][i] = csi[5] # sontek water level (ft) |
---|
158 |
data['sontek_flow'][i] = csi[6] # sontek flow (cfs) |
---|
159 |
data['press_csi_ft'][i] = csi[7] # pressure water level (ft) |
---|
160 |
data['press_csi_cfs'][i] = csi[8] # flow flow (cfs) |
---|
161 |
elif len(csi)==7: |
---|
162 |
# |
---|
163 |
data['rain'][i] = csi[4] # 15 min rain count (inches) |
---|
164 |
data['sontek_wl'][i] = numpy.nan # sontek water level (ft) |
---|
165 |
data['sontek_flow'][i] = numpy.nan # sontek flow (cfs) |
---|
166 |
data['press_wl'][i] = csi[5] # pressure water level (ft) |
---|
167 |
data['press_flow'][i] = csi[6] # flow flow (cfs) |
---|
168 |
|
---|
169 |
i=i+1 |
---|
170 |
|
---|
171 |
# if re.search |
---|
172 |
# for line |
---|
173 |
|
---|
174 |
# revert press_csi_ft back to raw pressure reading (eventually |
---|
175 |
# want csi to just report the raw pressure reading so we can just |
---|
176 |
# do this ourselves. |
---|
177 |
data['press'] = (data['press_csi_ft']+1.5)/27.6778 # raw pressure (psi) |
---|
178 |
# convert psi to height of water column based on hydrostatic eqn |
---|
179 |
data['press_wl'] = data['press']*2.3059+sensor_info['press_offset'] # (feet) |
---|
180 |
|
---|
181 |
# flow based on parameter as computed by data logger |
---|
182 |
# data['press_flow'] = data['press_csi_cfs'] |
---|
183 |
|
---|
184 |
# flow based on calculation from data logger but applied to offset calibration |
---|
185 |
# SMH does not know what equation is based on or how these values are derived |
---|
186 |
data['press_flow'] = ((data['press_wl']*12))*10.81 - 8.81 # cfs |
---|
187 |
|
---|
188 |
# check that no data[dt] is set to Nan or anything but datetime |
---|
189 |
# keep only data that has a resolved datetime |
---|
190 |
keep = numpy.array([type(datetime(1970,1,1)) == type(dt) for dt in data['dt'][:]]) |
---|
191 |
if keep.any(): |
---|
192 |
for param in data.keys(): |
---|
193 |
data[param] = data[param][keep] |
---|
194 |
|
---|
195 |
return data |
---|
196 |
|
---|
197 |
|
---|
198 |
def creator(platform_info, sensor_info, data): |
---|
199 |
# |
---|
200 |
# |
---|
201 |
title_str = sensor_info['description']+' at '+ platform_info['location'] |
---|
202 |
global_atts = { |
---|
203 |
'title' : title_str, |
---|
204 |
'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)', |
---|
205 |
'institution_url' : 'http://nccoos.unc.edu', |
---|
206 |
'institution_dods_url' : 'http://nccoos.unc.edu', |
---|
207 |
'metadata_url' : 'http://nccoos.unc.edu', |
---|
208 |
'references' : 'http://nccoos.unc.edu', |
---|
209 |
'contact' : 'Sara Haines (haines@email.unc.edu)', |
---|
210 |
'station_owner' : 'Environment, Health, and Safety Office', |
---|
211 |
'station_contact' : 'Sharon Myers (samyers@ehs.unc.edu)', |
---|
212 |
# |
---|
213 |
'source' : 'fixed-observation', |
---|
214 |
'history' : 'raw2proc using ' + sensor_info['process_module'], |
---|
215 |
'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(), |
---|
216 |
# conventions |
---|
217 |
'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0', |
---|
218 |
# SEACOOS CDL codes |
---|
219 |
'format_category_code' : 'fixed-point', |
---|
220 |
'institution_code' : platform_info['institution'], |
---|
221 |
'platform_code' : platform_info['id'], |
---|
222 |
'package_code' : sensor_info['id'], |
---|
223 |
# institution specific |
---|
224 |
'project' : 'Environment, Health, and Safety (EHS)', |
---|
225 |
'project_url' : 'http://ehs.unc.edu/environment/water_quality', |
---|
226 |
# timeframe of data contained in file yyyy-mm-dd HH:MM:SS |
---|
227 |
# first date in monthly file |
---|
228 |
'start_date' : data['dt'][0].strftime("%Y-%m-%d %H:%M:%S"), |
---|
229 |
# last date in monthly file |
---|
230 |
'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), |
---|
231 |
'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
232 |
# |
---|
233 |
'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
234 |
'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
235 |
'process_level' : 'level1', |
---|
236 |
# |
---|
237 |
# must type match to data (e.g. fillvalue is real if data is real) |
---|
238 |
'_FillValue' : -99999., |
---|
239 |
} |
---|
240 |
|
---|
241 |
var_atts = { |
---|
242 |
# coordinate variables |
---|
243 |
'time' : {'short_name': 'time', |
---|
244 |
'long_name': 'Time', |
---|
245 |
'standard_name': 'time', |
---|
246 |
'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC |
---|
247 |
'axis': 'T', |
---|
248 |
}, |
---|
249 |
'lat' : {'short_name': 'lat', |
---|
250 |
'long_name': 'Latitude', |
---|
251 |
'standard_name': 'latitude', |
---|
252 |
'reference':'geographic coordinates', |
---|
253 |
'units': 'degrees_north', |
---|
254 |
'valid_range':(-90.,90.), |
---|
255 |
'axis': 'Y', |
---|
256 |
}, |
---|
257 |
'lon' : {'short_name': 'lon', |
---|
258 |
'long_name': 'Longitude', |
---|
259 |
'standard_name': 'longitude', |
---|
260 |
'reference':'geographic coordinates', |
---|
261 |
'units': 'degrees_east', |
---|
262 |
'valid_range':(-180.,180.), |
---|
263 |
'axis': 'Y', |
---|
264 |
}, |
---|
265 |
'z' : {'short_name': 'z', |
---|
266 |
'long_name': 'Altitude', |
---|
267 |
'standard_name': 'altitude', |
---|
268 |
'reference':'zero at mean sea level', |
---|
269 |
'positive' : 'up', |
---|
270 |
'units': 'm', |
---|
271 |
'axis': 'Z', |
---|
272 |
}, |
---|
273 |
# data variables |
---|
274 |
'rain': {'short_name': 'rain', |
---|
275 |
'long_name': '15-Minute Rain', |
---|
276 |
'standard_name': 'rain', |
---|
277 |
'units': 'inches', |
---|
278 |
}, |
---|
279 |
'sontek_wl': {'short_name': 'sontek_wl', |
---|
280 |
'long_name': 'Sontek Water Level', |
---|
281 |
'standard_name': 'water_level', |
---|
282 |
'units': 'feet', |
---|
283 |
'reference':'zero at station altitude', |
---|
284 |
'positive' : 'up', |
---|
285 |
}, |
---|
286 |
'sontek_flow': {'short_name': 'flow', |
---|
287 |
'long_name': 'Sontek Stream Flow', |
---|
288 |
'standard_name': 'water_flux', |
---|
289 |
'units': 'cfs', |
---|
290 |
}, |
---|
291 |
'press_wl': { 'short_name': 'press_wl', |
---|
292 |
'long_name': 'Pressure Water Level', |
---|
293 |
'standard_name': 'water_level', |
---|
294 |
'units': 'feet', |
---|
295 |
'reference':'zero at station altitude', |
---|
296 |
'positive' : 'up', |
---|
297 |
}, |
---|
298 |
'press_flow': { 'short_name': 'flow', |
---|
299 |
'long_name': 'Pressure Stream Flow', |
---|
300 |
'standard_name': 'water_flux', |
---|
301 |
'units': 'cfs', |
---|
302 |
}, |
---|
303 |
} |
---|
304 |
|
---|
305 |
# dimension names use tuple so order of initialization is maintained |
---|
306 |
dim_inits = ( |
---|
307 |
('ntime', NC.UNLIMITED), |
---|
308 |
('nlat', 1), |
---|
309 |
('nlon', 1), |
---|
310 |
('nz', 1), |
---|
311 |
) |
---|
312 |
|
---|
313 |
# using tuple of tuples so order of initialization is maintained |
---|
314 |
# using dict for attributes order of init not important |
---|
315 |
# use dimension names not values |
---|
316 |
# (varName, varType, (dimName1, [dimName2], ...)) |
---|
317 |
var_inits = ( |
---|
318 |
# coordinate variables |
---|
319 |
('time', NC.INT, ('ntime',)), |
---|
320 |
('lat', NC.FLOAT, ('nlat',)), |
---|
321 |
('lon', NC.FLOAT, ('nlon',)), |
---|
322 |
('z', NC.FLOAT, ('nz',)), |
---|
323 |
# data variables |
---|
324 |
('rain', NC.FLOAT, ('ntime',)), |
---|
325 |
('sontek_wl', NC.FLOAT, ('ntime',)), |
---|
326 |
('sontek_flow', NC.FLOAT, ('ntime',)), |
---|
327 |
('press_wl', NC.FLOAT, ('ntime',)), |
---|
328 |
('press_flow', NC.FLOAT, ('ntime',)), |
---|
329 |
) |
---|
330 |
|
---|
331 |
# subset data only to month being processed (see raw2proc.process()) |
---|
332 |
i = data['in'] |
---|
333 |
|
---|
334 |
# var data |
---|
335 |
var_data = ( |
---|
336 |
('lat', platform_info['lat']), |
---|
337 |
('lon', platform_info['lon']), |
---|
338 |
('z', platform_info['altitude']), |
---|
339 |
# |
---|
340 |
('time', data['time'][i]), |
---|
341 |
# |
---|
342 |
('rain', data['rain'][i]), |
---|
343 |
('sontek_wl', data['sontek_wl'][i]), |
---|
344 |
('sontek_flow', data['sontek_flow'][i]), |
---|
345 |
('press_wl', data['press_wl'][i]), |
---|
346 |
('press_flow', data['press_flow'][i]), |
---|
347 |
) |
---|
348 |
|
---|
349 |
return (global_atts, var_atts, dim_inits, var_inits, var_data) |
---|
350 |
|
---|
351 |
def updater(platform_info, sensor_info, data): |
---|
352 |
# |
---|
353 |
global_atts = { |
---|
354 |
# update times of data contained in file (yyyy-mm-dd HH:MM:SS) |
---|
355 |
# last date in monthly file |
---|
356 |
'end_date' : data['dt'][-1].strftime("%Y-%m-%d %H:%M:%S"), |
---|
357 |
'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
358 |
# |
---|
359 |
'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
360 |
} |
---|
361 |
|
---|
362 |
# data variables |
---|
363 |
# update any variable attributes like range, min, max |
---|
364 |
var_atts = {} |
---|
365 |
# var_atts = { |
---|
366 |
# 'wtemp': {'max': max(data.u), |
---|
367 |
# 'min': min(data.v), |
---|
368 |
# }, |
---|
369 |
# 'cond': {'max': max(data.u), |
---|
370 |
# 'min': min(data.v), |
---|
371 |
# }, |
---|
372 |
# } |
---|
373 |
|
---|
374 |
# subset data only to month being processed (see raw2proc.process()) |
---|
375 |
i = data['in'] |
---|
376 |
|
---|
377 |
# data |
---|
378 |
var_data = ( |
---|
379 |
('time', data['time'][i]), |
---|
380 |
# |
---|
381 |
('rain', data['rain'][i]), |
---|
382 |
('sontek_wl', data['sontek_wl'][i]), |
---|
383 |
('sontek_flow', data['sontek_flow'][i]), |
---|
384 |
('press_wl', data['press_wl'][i]), |
---|
385 |
('press_flow', data['press_flow'][i]), |
---|
386 |
) |
---|
387 |
|
---|
388 |
return (global_atts, var_atts, var_data) |
---|
389 |
# |
---|