1 |
|
---|
2 |
|
---|
3 |
""" |
---|
4 |
how to parse data, and assert what data and info goes into |
---|
5 |
creating and updating monthly netcdf files |
---|
6 |
|
---|
7 |
parse data from YSI 6600 V1 on an automated veritical profiler (avp) |
---|
8 |
|
---|
9 |
parser : date and time, water_depth for each profile |
---|
10 |
|
---|
11 |
sample time, sample depth, as cast measures water |
---|
12 |
temperature, conductivity, salinity, dissolved oxygen, |
---|
13 |
turbidity, and chlorophyll (no pH) |
---|
14 |
|
---|
15 |
|
---|
16 |
creator : lat, lon, z, stime, (time, water_depth), water_temp, cond, |
---|
17 |
salin, turb, chl, do |
---|
18 |
|
---|
19 |
updator : z, stime, (time, water_depth), water_temp, cond, salin, |
---|
20 |
turb, chl, do |
---|
21 |
|
---|
22 |
using fixed profiler CDL but modified to have raw data for each cast |
---|
23 |
along each column |
---|
24 |
|
---|
25 |
|
---|
26 |
Examples |
---|
27 |
-------- |
---|
28 |
|
---|
29 |
>> (parse, create, update) = load_processors('proc_avp_ysi_6600_v1') |
---|
30 |
or |
---|
31 |
>> si = get_config(cn+'.sensor_info') |
---|
32 |
>> (parse, create, update) = load_processors(si['adcp']['proc_module']) |
---|
33 |
|
---|
34 |
>> lines = load_data(filename) |
---|
35 |
>> data = parse(platform_info, sensor_info, lines) |
---|
36 |
>> create(platform_info, sensor_info, data) or |
---|
37 |
>> update(platform_info, sensor_info, data) |
---|
38 |
|
---|
39 |
""" |
---|
40 |
|
---|
41 |
from raw2proc import * |
---|
42 |
from procutil import * |
---|
43 |
from ncutil import * |
---|
44 |
|
---|
45 |
now_dt = datetime.utcnow() |
---|
46 |
now_dt.replace(microsecond=0) |
---|
47 |
|
---|
48 |
def parser(platform_info, sensor_info, lines): |
---|
49 |
""" |
---|
50 |
parse Automated Vertical Profile Station (AVP) Water Quality Data |
---|
51 |
|
---|
52 |
month, day, year, hour, min, sec, temp (deg. C), conductivity |
---|
53 |
(mS/cm), salinity (ppt or PSU), depth (meters), pH, turbidity (NTU), |
---|
54 |
chlorophyll (micrograms per liter), DO (micrograms per liter) |
---|
55 |
|
---|
56 |
Notes |
---|
57 |
----- |
---|
58 |
1. Column Format YSI 6600 V1 has no pH |
---|
59 |
|
---|
60 |
temp, cond, salin, DO, depth, turb, chl |
---|
61 |
(C), (mS/cm), (ppt), (ug/l), (m), (NTU), (ug/l) |
---|
62 |
|
---|
63 |
|
---|
64 |
(from Aug 2005 to Sep 03 2008) |
---|
65 |
profile time: 00:00:56 |
---|
66 |
profile date: 01/31/2006 |
---|
67 |
profile location: P180, Instrument Serial No: 0001119E |
---|
68 |
01/31/06 00:01:31 10.99 7.501 4.16 13.22 0.516 6.0 11.5 |
---|
69 |
01/31/06 00:01:32 11.00 7.463 4.13 13.22 0.526 6.0 11.4 |
---|
70 |
01/31/06 00:01:33 11.00 7.442 4.12 13.22 0.538 6.0 11.4 |
---|
71 |
01/31/06 00:01:34 11.00 7.496 4.15 13.11 0.556 6.0 11.3 |
---|
72 |
(no data from Sep 03 to 30, 2008) |
---|
73 |
(from Sep 30 2008 to now, still YSI 6600 v1, just header change) |
---|
74 |
Profile Time: 11:38:00 |
---|
75 |
Profile Date: 01/06/2009 |
---|
76 |
Profile Depth: 380.0 cm |
---|
77 |
Profile Location: Hampton Shoal Serial No: 000109DD, ID: Delta |
---|
78 |
01/06/09 11:38:44 11.16 14.59 8.49 17.86 0.171 4.5 50.4 |
---|
79 |
01/06/09 11:38:45 11.16 14.59 8.49 17.86 0.190 4.5 51.8 |
---|
80 |
01/06/09 11:38:46 11.16 14.59 8.49 17.88 0.220 4.6 53.0 |
---|
81 |
01/06/09 11:38:47 11.16 14.59 8.49 17.88 0.257 4.6 53.9 |
---|
82 |
01/06/09 11:38:48 11.16 14.59 8.49 17.88 0.448 4.6 54.3 |
---|
83 |
|
---|
84 |
2. Use a ragged array to store each uniquely measured param at each |
---|
85 |
time and depth but not gridded, so this uses fixed profiler CDL |
---|
86 |
but modified to have raw data for each cast along each column. |
---|
87 |
For plotting, the data will need to be grid at specified depth bins. |
---|
88 |
|
---|
89 |
Tony Whipple at IMS says 'The AVPs sample at one second intervals. |
---|
90 |
Between the waves and the instrument descending from a spool of |
---|
91 |
line with variable radius it works out to about 3-5 cm between |
---|
92 |
observations on average. When I process the data to make the |
---|
93 |
images, I bin the data every 10 cm and take the average of however |
---|
94 |
many observations fell within that bin.' |
---|
95 |
|
---|
96 |
""" |
---|
97 |
import numpy |
---|
98 |
from datetime import datetime |
---|
99 |
from time import strptime |
---|
100 |
|
---|
101 |
|
---|
102 |
fn = sensor_info['fn'] |
---|
103 |
sample_dt_start = filt_datetime(fn)[0] |
---|
104 |
|
---|
105 |
|
---|
106 |
nprof = 0 |
---|
107 |
for line in lines: |
---|
108 |
m=re.search("Profile Time:", line, re.IGNORECASE) |
---|
109 |
if m: |
---|
110 |
nprof=nprof+1 |
---|
111 |
|
---|
112 |
|
---|
113 |
for i in range(len(lines[0:40])): |
---|
114 |
if re.search("^ \r\n", lines[i]): |
---|
115 |
|
---|
116 |
blank_line = lines.pop(i) |
---|
117 |
|
---|
118 |
|
---|
119 |
lines.append(' \r\n') |
---|
120 |
|
---|
121 |
|
---|
122 |
for i, line in enumerate(lines): |
---|
123 |
if re.search(r"Profile Time", line, re.IGNORECASE): |
---|
124 |
if not re.search("^ \r\n", lines[i-1]): |
---|
125 |
lines.insert(i, " \r\n") |
---|
126 |
|
---|
127 |
N = nprof |
---|
128 |
nbins = sensor_info['nbins'] |
---|
129 |
|
---|
130 |
data = { |
---|
131 |
'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan), |
---|
132 |
'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan), |
---|
133 |
'z' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), |
---|
134 |
|
---|
135 |
|
---|
136 |
|
---|
137 |
|
---|
138 |
'stime' : numpy.array(numpy.ones((N,nbins), dtype=long)*numpy.nan), |
---|
139 |
'wtemp' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), |
---|
140 |
'cond' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), |
---|
141 |
'salin' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), |
---|
142 |
'turb' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), |
---|
143 |
'chl' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), |
---|
144 |
'do' : numpy.array(numpy.ones((N,nbins), dtype=float)*numpy.nan), |
---|
145 |
} |
---|
146 |
|
---|
147 |
|
---|
148 |
i = 0 |
---|
149 |
have_date = have_time = have_location = have_head = False |
---|
150 |
verbose = False |
---|
151 |
|
---|
152 |
for line in lines: |
---|
153 |
|
---|
154 |
if re.search(r"[\x1a]", line): |
---|
155 |
if verbose: |
---|
156 |
print 'skipping bad data line ... ' + str(line) |
---|
157 |
continue |
---|
158 |
|
---|
159 |
ysi = [] |
---|
160 |
|
---|
161 |
sw = re.split('[\s/\:]*', line) |
---|
162 |
for s in sw: |
---|
163 |
m = re.search(REAL_RE_STR, s) |
---|
164 |
if m: |
---|
165 |
ysi.append(float(m.groups()[0])) |
---|
166 |
|
---|
167 |
if re.search("Profile Time:", line, re.IGNORECASE): |
---|
168 |
have_time = True |
---|
169 |
HH=ysi[0] |
---|
170 |
MM=ysi[1] |
---|
171 |
SS=ysi[2] |
---|
172 |
elif re.search("Profile Date:", line, re.IGNORECASE): |
---|
173 |
have_date = True |
---|
174 |
mm=ysi[0] |
---|
175 |
dd=ysi[1] |
---|
176 |
yyyy=ysi[2] |
---|
177 |
|
---|
178 |
profile_str = '%02d-%02d-%4d %02d:%02d:%02d' % (mm,dd,yyyy,HH,MM,SS) |
---|
179 |
if sensor_info['utc_offset']: |
---|
180 |
profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') + \ |
---|
181 |
timedelta(hours=sensor_info['utc_offset']) |
---|
182 |
else: |
---|
183 |
profile_dt = scanf_datetime(profile_str, fmt='%m-%d-%Y %H:%M:%S') |
---|
184 |
elif re.search("Profile Location:", line): |
---|
185 |
have_location = True |
---|
186 |
|
---|
187 |
|
---|
188 |
sw = re.findall(r'\w+:\s(\w+)*', line) |
---|
189 |
|
---|
190 |
|
---|
191 |
|
---|
192 |
|
---|
193 |
wtemp = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan |
---|
194 |
depth =numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan |
---|
195 |
cond = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan |
---|
196 |
salin = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan |
---|
197 |
turb = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan |
---|
198 |
chl = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan |
---|
199 |
do = numpy.array(numpy.ones(nbins,), dtype=float)*numpy.nan |
---|
200 |
stime = numpy.array(numpy.ones(nbins,), dtype=long)*numpy.nan |
---|
201 |
|
---|
202 |
j = 0 |
---|
203 |
|
---|
204 |
head = numpy.array([have_date, have_time, have_location]) |
---|
205 |
have_head = head.all() |
---|
206 |
|
---|
207 |
elif re.search("Error", line): |
---|
208 |
|
---|
209 |
if verbose: |
---|
210 |
print 'skipping bad data line ... ' + str(line) |
---|
211 |
continue |
---|
212 |
|
---|
213 |
elif (len(ysi)==13 and have_head): |
---|
214 |
if j>=nbins: |
---|
215 |
print 'Sample number (' + str(j) + \ |
---|
216 |
') in profile exceeds maximum value ('+ \ |
---|
217 |
str(nbins) + ') in config' |
---|
218 |
|
---|
219 |
|
---|
220 |
sample_str = '%02d-%02d-%02d %02d:%02d:%02d' % tuple(ysi[0:6]) |
---|
221 |
if sensor_info['utc_offset']: |
---|
222 |
sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') + \ |
---|
223 |
timedelta(hours=sensor_info['utc_offset']) |
---|
224 |
else: |
---|
225 |
sample_dt = scanf_datetime(sample_str, fmt='%m-%d-%y %H:%M:%S') |
---|
226 |
|
---|
227 |
if j<nbins: |
---|
228 |
stime[j] = dt2es(sample_dt) |
---|
229 |
wtemp[j] = ysi[6] |
---|
230 |
cond[j] = ysi[7] |
---|
231 |
salin[j] = ysi[8] |
---|
232 |
do[j] = ysi[9] |
---|
233 |
|
---|
234 |
depth[j] = ysi[10] |
---|
235 |
|
---|
236 |
turb[j] = ysi[11] |
---|
237 |
chl[j] = ysi[12] |
---|
238 |
|
---|
239 |
j = j+1 |
---|
240 |
|
---|
241 |
elif (len(ysi)==0 and have_head and i<N): |
---|
242 |
|
---|
243 |
data['dt'][i] = profile_dt |
---|
244 |
data['time'][i] = dt2es(profile_dt) |
---|
245 |
|
---|
246 |
|
---|
247 |
|
---|
248 |
data['stime'][i] = stime |
---|
249 |
data['z'][i] = -1.*depth |
---|
250 |
|
---|
251 |
data['wtemp'][i] = wtemp |
---|
252 |
data['cond'][i] = cond |
---|
253 |
data['salin'][i] = salin |
---|
254 |
data['turb'][i] = turb |
---|
255 |
data['chl'][i] = chl |
---|
256 |
data['do'][i] = do |
---|
257 |
|
---|
258 |
i=i+1 |
---|
259 |
have_date = have_time = have_wd = have_location = False |
---|
260 |
else: |
---|
261 |
if verbose: |
---|
262 |
print 'skipping bad data line ... ' + str(line) |
---|
263 |
|
---|
264 |
|
---|
265 |
|
---|
266 |
return data |
---|
267 |
|
---|
268 |
|
---|
269 |
def creator(platform_info, sensor_info, data): |
---|
270 |
|
---|
271 |
|
---|
272 |
i = data['in'] |
---|
273 |
dt = data['dt'][i] |
---|
274 |
|
---|
275 |
title_str = sensor_info['description']+' at '+ platform_info['location'] |
---|
276 |
global_atts = { |
---|
277 |
'title' : title_str, |
---|
278 |
'institution' : 'Unversity of North Carolina at Chapel Hill (UNC-CH)', |
---|
279 |
'institution_url' : 'http://nccoos.unc.edu', |
---|
280 |
'institution_dods_url' : 'http://nccoos.unc.edu', |
---|
281 |
'metadata_url' : 'http://nccoos.unc.edu', |
---|
282 |
'references' : 'http://nccoos.unc.edu', |
---|
283 |
'contact' : 'Sara Haines (haines@email.unc.edu)', |
---|
284 |
|
---|
285 |
'source' : 'fixed-automated-profiler observation', |
---|
286 |
'history' : 'raw2proc using ' + sensor_info['process_module'], |
---|
287 |
'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(), |
---|
288 |
|
---|
289 |
'Conventions' : 'CF-1.0; SEACOOS-CDL-v2.0', |
---|
290 |
|
---|
291 |
'format_category_code' : 'fixed-profiler-ragged', |
---|
292 |
'institution_code' : platform_info['institution'], |
---|
293 |
'platform_code' : platform_info['id'], |
---|
294 |
'package_code' : sensor_info['id'], |
---|
295 |
|
---|
296 |
'project' : 'North Carolina Coastal Ocean Observing System (NCCOOS)', |
---|
297 |
'project_url' : 'http://nccoos.unc.edu', |
---|
298 |
|
---|
299 |
|
---|
300 |
'start_date' : dt[0].strftime("%Y-%m-%d %H:%M:%S"), |
---|
301 |
|
---|
302 |
'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"), |
---|
303 |
'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
304 |
|
---|
305 |
'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
306 |
'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
307 |
'process_level' : 'level1', |
---|
308 |
|
---|
309 |
|
---|
310 |
'_FillValue' : numpy.nan, |
---|
311 |
} |
---|
312 |
|
---|
313 |
var_atts = { |
---|
314 |
|
---|
315 |
'time' : {'short_name': 'time', |
---|
316 |
'long_name': 'Time of Profile', |
---|
317 |
'standard_name': 'time', |
---|
318 |
'units': 'seconds since 1970-1-1 00:00:00 -0', |
---|
319 |
'axis': 'T', |
---|
320 |
}, |
---|
321 |
'lat' : {'short_name': 'lat', |
---|
322 |
'long_name': 'Latitude', |
---|
323 |
'standard_name': 'latitude', |
---|
324 |
'reference':'geographic coordinates', |
---|
325 |
'units': 'degrees_north', |
---|
326 |
'valid_range':(-90.,90.), |
---|
327 |
'axis': 'Y', |
---|
328 |
}, |
---|
329 |
'lon' : {'short_name': 'lon', |
---|
330 |
'long_name': 'Longitude', |
---|
331 |
'standard_name': 'longitude', |
---|
332 |
'reference':'geographic coordinates', |
---|
333 |
'units': 'degrees_east', |
---|
334 |
'valid_range':(-180.,180.), |
---|
335 |
'axis': 'Y', |
---|
336 |
}, |
---|
337 |
'z' : {'short_name': 'z', |
---|
338 |
'long_name': 'Height', |
---|
339 |
'standard_name': 'height', |
---|
340 |
'reference':'zero at sea-surface', |
---|
341 |
'positive' : 'up', |
---|
342 |
'units': 'm', |
---|
343 |
'axis': 'Z', |
---|
344 |
}, |
---|
345 |
|
---|
346 |
'stime' : {'short_name': 'stime', |
---|
347 |
'long_name': 'Time of Sample ', |
---|
348 |
'standard_name': 'time', |
---|
349 |
'units': 'seconds since 1970-1-1 00:00:00 -0', |
---|
350 |
}, |
---|
351 |
|
---|
352 |
|
---|
353 |
|
---|
354 |
|
---|
355 |
|
---|
356 |
|
---|
357 |
|
---|
358 |
|
---|
359 |
'wtemp': {'short_name': 'wtemp', |
---|
360 |
'long_name': 'Water Temperature', |
---|
361 |
'standard_name': 'water_temperature', |
---|
362 |
'units': 'degrees_Celsius', |
---|
363 |
}, |
---|
364 |
'cond': {'short_name': 'cond', |
---|
365 |
'long_name': 'Conductivity', |
---|
366 |
'standard_name': 'conductivity', |
---|
367 |
'units': 'mS cm-1', |
---|
368 |
}, |
---|
369 |
'salin': {'short_name': 'salin', |
---|
370 |
'long_name': 'Salinity', |
---|
371 |
'standard_name': 'salinity', |
---|
372 |
'units': 'PSU', |
---|
373 |
}, |
---|
374 |
'turb': {'short_name': 'turb', |
---|
375 |
'long_name': 'Turbidity', |
---|
376 |
'standard_name': 'turbidity', |
---|
377 |
'units': 'NTU', |
---|
378 |
}, |
---|
379 |
'chl': {'short_name': 'chl', |
---|
380 |
'long_name': 'Chlorophyll', |
---|
381 |
'standard_name': 'chlorophyll', |
---|
382 |
'units': 'ug l-1', |
---|
383 |
}, |
---|
384 |
'do': {'short_name': 'do', |
---|
385 |
'long_name': 'Dissolved Oxygen', |
---|
386 |
'standard_name': 'dissolved_oxygen', |
---|
387 |
'units': 'mg l-1', |
---|
388 |
}, |
---|
389 |
} |
---|
390 |
|
---|
391 |
|
---|
392 |
dim_inits = ( |
---|
393 |
('time', NC.UNLIMITED), |
---|
394 |
('lat', 1), |
---|
395 |
('lon', 1), |
---|
396 |
('z', sensor_info['nbins']), |
---|
397 |
) |
---|
398 |
|
---|
399 |
|
---|
400 |
|
---|
401 |
|
---|
402 |
|
---|
403 |
var_inits = ( |
---|
404 |
|
---|
405 |
('time', NC.INT, ('time',)), |
---|
406 |
('lat', NC.FLOAT, ('lat',)), |
---|
407 |
('lon', NC.FLOAT, ('lon',)), |
---|
408 |
('z', NC.FLOAT, ('time', 'z',)), |
---|
409 |
|
---|
410 |
|
---|
411 |
|
---|
412 |
('stime', NC.FLOAT, ('time', 'z')), |
---|
413 |
('wtemp', NC.FLOAT, ('time', 'z')), |
---|
414 |
('cond', NC.FLOAT, ('time', 'z')), |
---|
415 |
('salin', NC.FLOAT, ('time', 'z')), |
---|
416 |
('turb', NC.FLOAT, ('time', 'z')), |
---|
417 |
('chl', NC.FLOAT, ('time', 'z')), |
---|
418 |
('do', NC.FLOAT, ('time', 'z')), |
---|
419 |
) |
---|
420 |
|
---|
421 |
|
---|
422 |
var_data = ( |
---|
423 |
('lat', platform_info['lat']), |
---|
424 |
('lon', platform_info['lon']), |
---|
425 |
('time', data['time'][i]), |
---|
426 |
|
---|
427 |
|
---|
428 |
('stime', data['stime'][i]), |
---|
429 |
('z', data['z'][i]), |
---|
430 |
|
---|
431 |
('wtemp', data['wtemp'][i]), |
---|
432 |
('cond', data['cond'][i]), |
---|
433 |
('salin', data['salin'][i]), |
---|
434 |
('turb', data['turb'][i]), |
---|
435 |
('chl', data['chl'][i]), |
---|
436 |
('do', data['do'][i]), |
---|
437 |
) |
---|
438 |
|
---|
439 |
return (global_atts, var_atts, dim_inits, var_inits, var_data) |
---|
440 |
|
---|
441 |
def updater(platform_info, sensor_info, data): |
---|
442 |
|
---|
443 |
|
---|
444 |
i = data['in'] |
---|
445 |
dt = data['dt'][i] |
---|
446 |
|
---|
447 |
global_atts = { |
---|
448 |
|
---|
449 |
|
---|
450 |
'end_date' : dt[-1].strftime("%Y-%m-%d %H:%M:%S"), |
---|
451 |
'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
452 |
|
---|
453 |
'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"), |
---|
454 |
} |
---|
455 |
|
---|
456 |
|
---|
457 |
|
---|
458 |
var_atts = {} |
---|
459 |
|
---|
460 |
|
---|
461 |
|
---|
462 |
|
---|
463 |
|
---|
464 |
|
---|
465 |
|
---|
466 |
|
---|
467 |
|
---|
468 |
|
---|
469 |
var_data = ( |
---|
470 |
('time', data['time'][i]), |
---|
471 |
|
---|
472 |
|
---|
473 |
('stime', data['stime'][i]), |
---|
474 |
('z', data['z'][i]), |
---|
475 |
|
---|
476 |
('wtemp', data['wtemp'][i]), |
---|
477 |
('cond', data['cond'][i]), |
---|
478 |
('salin', data['salin'][i]), |
---|
479 |
('turb', data['turb'][i]), |
---|
480 |
('chl', data['chl'][i]), |
---|
481 |
('do', data['do'][i]), |
---|
482 |
) |
---|
483 |
|
---|
484 |
return (global_atts, var_atts, var_data) |
---|
485 |
|
---|
486 |
|
---|