1 |
#!/usr/bin/env python |
---|
2 |
# Last modified: Time-stamp: <2009-06-19 15:42:44 haines> |
---|
3 |
"""Utilities to help data processing |
---|
4 |
|
---|
5 |
Mostly time functions right now |
---|
6 |
|
---|
7 |
TO DO: |
---|
8 |
check_configs() |
---|
9 |
unit conversions (udunits?) |
---|
10 |
""" |
---|
11 |
|
---|
12 |
__version__ = "v0.1" |
---|
13 |
__author__ = "Sara Haines <sara_haines@unc.edu>" |
---|
14 |
|
---|
15 |
import os.path |
---|
16 |
from datetime import datetime, timedelta, tzinfo |
---|
17 |
from dateutil.tz import tzlocal, tzutc |
---|
18 |
import time |
---|
19 |
import math |
---|
20 |
|
---|
21 |
from ncutil import * |
---|
22 |
|
---|
23 |
def check_configs(): |
---|
24 |
"""Test config files for comformnity |
---|
25 |
|
---|
26 |
check either one or all for a platform |
---|
27 |
|
---|
28 |
id in filename == platform.id |
---|
29 |
datetime in filename <= platform.config_start_date |
---|
30 |
(close in time usually the same day |
---|
31 |
also platform.config_start_date < platform.config_end_date |
---|
32 |
(there needs to be some time that the platform was operational) |
---|
33 |
test existence of specific structural elements (platform info and sensor info) |
---|
34 |
and specific fields for both platform and sensor |
---|
35 |
verify that for each platform_info['packages'] there is sensor_info and same id |
---|
36 |
for pi['packages'][0] in si.keys() |
---|
37 |
pi['packages'][0] == si['adcp']['id'] |
---|
38 |
bounds on data in fields |
---|
39 |
show difference between two consecutive configs? |
---|
40 |
pretty print to screen of dictionary info for platform and sensor info |
---|
41 |
|
---|
42 |
cn = os.path.splitext(os.path.basename(config))[0] |
---|
43 |
cndt = filt_datetime(os.path.basename(config))[0] |
---|
44 |
pi = get_config(cn+'.platform_info') |
---|
45 |
if pi['config_start_date']: |
---|
46 |
config_start_dt = filt_datetime(pi['config_start_date'])[0] |
---|
47 |
elif pi['config_start_date'] == None: |
---|
48 |
config_start_dt = now_dt |
---|
49 |
if pi['config_end_date']: |
---|
50 |
config_end_dt = filt_datetime(pi['config_end_date'])[0] |
---|
51 |
elif pi['config_end_date'] == None: |
---|
52 |
config_end_dt = now_dt |
---|
53 |
|
---|
54 |
print cn + ' -----------------' |
---|
55 |
print cndt |
---|
56 |
print config_start_dt |
---|
57 |
print config_end_dt |
---|
58 |
print now_dt |
---|
59 |
print 'file date ok? ' + str(cndt <= config_start_dt) |
---|
60 |
print 'operation date ok? ' + str(config_start_dt < config_end_dt) |
---|
61 |
""" |
---|
62 |
|
---|
63 |
def dt2es(dt): |
---|
64 |
"""Convert datetime object to epoch seconds (es) as seconds since Jan-01-1970 """ |
---|
65 |
# microseconds of timedelta object not used |
---|
66 |
delta = dt - datetime(1970,1,1,0,0,0) |
---|
67 |
es = delta.days*24*60*60 + delta.seconds |
---|
68 |
return es |
---|
69 |
|
---|
70 |
def es2dt(es): |
---|
71 |
""" Convert epoch seconds (es) to datetime object""" |
---|
72 |
dt = datetime(*time.gmtime(es)[0:6]) |
---|
73 |
return dt |
---|
74 |
|
---|
75 |
def find_months(year, month=1): |
---|
76 |
"""Find which months to process |
---|
77 |
|
---|
78 |
Since data are in subdirectories based on months determine |
---|
79 |
previous, current, and next month to look in directories for data |
---|
80 |
of the current month or month to process. |
---|
81 |
|
---|
82 |
:Parameters: |
---|
83 |
year : int value or str 'yyyy_mm' |
---|
84 |
month : int value |
---|
85 |
|
---|
86 |
:Returns: |
---|
87 |
which_months : tuple of 3 datetime objects |
---|
88 |
(prev_month, current_month, next_month) |
---|
89 |
|
---|
90 |
Examples |
---|
91 |
-------- |
---|
92 |
>>> find_months(2007, 2) |
---|
93 |
>>> find_months('2007_02') |
---|
94 |
|
---|
95 |
""" |
---|
96 |
if type(year) == int and type(month) == int : |
---|
97 |
dt = datetime(year, month, day=1) |
---|
98 |
this_month = dt |
---|
99 |
elif type(year) == str : |
---|
100 |
dt = filt_datetime(year)[0] |
---|
101 |
this_month = dt |
---|
102 |
# |
---|
103 |
if dt.month == 1: # if January |
---|
104 |
prev_month = datetime(dt.year-1, month=12, day=1) # Dec |
---|
105 |
next_month = datetime(dt.year, dt.month+1, day=1) # Feb |
---|
106 |
elif dt.month == 12: # if December |
---|
107 |
prev_month = datetime(dt.year, dt.month-1, day=1) # Nov |
---|
108 |
next_month = datetime(dt.year+1, month=1, day=1) # Jan |
---|
109 |
else: |
---|
110 |
prev_month = datetime(dt.year, dt.month-1, day=1) |
---|
111 |
next_month = datetime(dt.year, dt.month+1, day=1) |
---|
112 |
# |
---|
113 |
return (prev_month, this_month, next_month) |
---|
114 |
|
---|
115 |
def this_month(): |
---|
116 |
"""Return this month (GMT) as formatted string (yyyy_mm) """ |
---|
117 |
this_month_str = "%4d_%02d" % time.gmtime()[0:2] |
---|
118 |
return this_month_str |
---|
119 |
|
---|
120 |
def scanf_datetime(ts, fmt='%Y-%m-%dT%H:%M:%S'): |
---|
121 |
"""Convert string representing date and time to datetime object""" |
---|
122 |
# default string format follows convention YYYY-MM-DDThh:mm:ss |
---|
123 |
|
---|
124 |
t = time.strptime(ts, fmt) |
---|
125 |
# the '*' operator unpacks the tuple, producing the argument list. |
---|
126 |
dt = datetime(*t[0:6]) |
---|
127 |
return dt |
---|
128 |
|
---|
129 |
def filt_datetime(input_string, remove_ext=True): |
---|
130 |
""" |
---|
131 |
Following the template, (YY)YYMMDDhhmmss |
---|
132 |
and versions with of this with decreasing time precision, |
---|
133 |
find the most precise, reasonable string match and |
---|
134 |
return its datetime object. |
---|
135 |
""" |
---|
136 |
|
---|
137 |
# remove any trailing filename extension |
---|
138 |
from os.path import splitext |
---|
139 |
import re |
---|
140 |
if remove_ext: |
---|
141 |
(s, e) = splitext(input_string) |
---|
142 |
input_string = s |
---|
143 |
|
---|
144 |
# YYYYMMDDhhmmss and should handle most cases of the stamp |
---|
145 |
# other forms this should pass |
---|
146 |
# YY_MM_DD_hh:mm:ss |
---|
147 |
# YYYY_MM_DD_hh:mm:ss |
---|
148 |
# YYYY,MM,DD,hh,mm,ss |
---|
149 |
# YY,MM,DD,hh,mm,ss |
---|
150 |
|
---|
151 |
case1_regex = r""" |
---|
152 |
# case 1: (YY)YYMMDDhhmmss |
---|
153 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR (e.g. '07' or '2007') |
---|
154 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
155 |
(\d{2}) # 2-digit MONTH (e.g. '12') |
---|
156 |
\D? # optional 1 character non-digit separator |
---|
157 |
(\d{2}) # 2-digit DAY of month (e.g. '10') |
---|
158 |
\D? # optional 1 character non-digit separator (e.g. ' ' or 'T') |
---|
159 |
(\d{2}) # 2-digit HOUR (e.g. '10') |
---|
160 |
\D? # optional 1 character non-digit separator (e.g. ' ' or ':') |
---|
161 |
(\d{2}) # 2-digit MINUTE (e.g. '10') |
---|
162 |
\D? # optional 1 character non-digit separator (e.g. ' ' or ':') |
---|
163 |
(\d{2}) # 2-digit SECOND (e.g. '10') |
---|
164 |
""" |
---|
165 |
|
---|
166 |
case2_regex = r""" |
---|
167 |
# case 2: (YY)YYMMDDhhmm (no seconds) |
---|
168 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
169 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
170 |
(\d{2}) # 2-digit MONTH |
---|
171 |
\D? # optional 1 character non-digit separator |
---|
172 |
(\d{2}) # 2-digit DAY |
---|
173 |
\D? # optional 1 character non-digit separator (e.g. ' ' or 'T') |
---|
174 |
(\d{2}) # 2-digit HOUR |
---|
175 |
\D? # optional 1 character non-digit separator (e.g. ' ' or ':') |
---|
176 |
(\d{2}) # 2-digit MINUTE |
---|
177 |
""" |
---|
178 |
|
---|
179 |
case3_regex = r""" |
---|
180 |
# case 3: (YY)YYMMDDhh (no seconds, no minutes) |
---|
181 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
182 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
183 |
(\d{2}) # 2-digit MONTH |
---|
184 |
\D? # optional 1 character non-digit separator |
---|
185 |
(\d{2}) # 2-digit DAY |
---|
186 |
\D? # optional 1 character non-digit separator (e.g. ' ' or 'T') |
---|
187 |
(\d{2}) # 2-digit HOUR |
---|
188 |
""" |
---|
189 |
|
---|
190 |
case4_regex = r""" |
---|
191 |
# case 4: (YY)YYMMDD (no time values, just date) |
---|
192 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
193 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
194 |
(\d{2}) # 2-digit MONTH |
---|
195 |
\D? # optional 1 character non-digit separator |
---|
196 |
(\d{2}) # 2-digit DAY |
---|
197 |
""" |
---|
198 |
|
---|
199 |
case5_regex = r""" |
---|
200 |
# case 5: (YY)YYMM (no time values, just month year) |
---|
201 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
202 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
203 |
(\d{2}) # 2-digit MONTH |
---|
204 |
""" |
---|
205 |
|
---|
206 |
## Verbose regular expressions require use of re.VERBOSE flag. |
---|
207 |
## so we can use multiline regexp |
---|
208 |
|
---|
209 |
# cases are ordered from precise to more coarse resolution of time |
---|
210 |
cases = [case1_regex, case2_regex, case3_regex, case4_regex, case5_regex] |
---|
211 |
patterns = [re.compile(c, re.VERBOSE) for c in cases] |
---|
212 |
matches = [p.search(input_string) for p in patterns] |
---|
213 |
|
---|
214 |
# for testing, try to computer datetime objects |
---|
215 |
# just because there is a match does not mean it makes sense |
---|
216 |
for ind in range(len(matches)): |
---|
217 |
if bool(matches[ind]): |
---|
218 |
# print matches[ind].groups() |
---|
219 |
bits = matches[ind].groups() |
---|
220 |
values = [int(yi) for yi in bits] |
---|
221 |
# check for 2-digit year |
---|
222 |
if values[0] < 50: |
---|
223 |
values[0] += 2000 |
---|
224 |
elif values[0]>=50 and values[0]<100: |
---|
225 |
values[0] += 1900 |
---|
226 |
# |
---|
227 |
# we must have at least 3 arg input to datetime |
---|
228 |
if len(values)==1: |
---|
229 |
values.extend([1,1]) # add First of January |
---|
230 |
elif len(values)==2: |
---|
231 |
values.extend([1]) # add first day of month |
---|
232 |
|
---|
233 |
# |
---|
234 |
# compute dt |
---|
235 |
try: |
---|
236 |
dt = datetime(*values) |
---|
237 |
except ValueError, e: |
---|
238 |
# value error if something not valid for datetime |
---|
239 |
# e.g. month 1...12, something parsed wrong |
---|
240 |
dt = None |
---|
241 |
else: |
---|
242 |
# absolute difference in days from now (UTC) |
---|
243 |
z = dt - datetime.utcnow() |
---|
244 |
daysdiff = abs(z.days) |
---|
245 |
# if this date unreasonable (>10 years*365), throw it out |
---|
246 |
# something parsed wrong |
---|
247 |
if daysdiff > 3650: |
---|
248 |
dt = None |
---|
249 |
else: |
---|
250 |
dt = None |
---|
251 |
|
---|
252 |
# place datetime object or None within sequence of matches |
---|
253 |
matches[ind] = dt |
---|
254 |
|
---|
255 |
# find the first (most precise) date match since there might be more than |
---|
256 |
# as we searched more coarse templates, but now we have thrown out |
---|
257 |
|
---|
258 |
b = [bool(x) for x in matches] |
---|
259 |
try: |
---|
260 |
ind = b.index(True) |
---|
261 |
except ValueError, e: |
---|
262 |
print 'filt_datetime: No date found in ', input_string |
---|
263 |
dt = None |
---|
264 |
else: |
---|
265 |
dt = matches[ind] |
---|
266 |
return dt,ind |
---|
267 |
|
---|
268 |
def display_time_diff(diff): |
---|
269 |
"""Display time difference in HH:MM:DD using number weeks (W) |
---|
270 |
and days (D) if necessary""" |
---|
271 |
# weeks, days = divmod(diff.days, 7) |
---|
272 |
days = diff.days |
---|
273 |
minutes, seconds = divmod(diff.seconds, 60) |
---|
274 |
hours, minutes = divmod(minutes, 60) |
---|
275 |
# if (weeks>2 and days>0): |
---|
276 |
# str = "%d Weeks, %d Days %02d:%02d" % (days, hours, minutes) |
---|
277 |
if (days==1): |
---|
278 |
str = "%02d:%02d" % (24+hours, minutes) |
---|
279 |
elif (days>1): |
---|
280 |
str = "%d Days %02d:%02d" % (days, hours, minutes) |
---|
281 |
else: |
---|
282 |
str = "%02d:%02d" % (hours, minutes) |
---|
283 |
return str |
---|
284 |
|
---|
285 |
def copy_loop_sequence(src, dst, fn_glob, numFiles=24): |
---|
286 |
""" """ |
---|
287 |
# src = '/seacoos/data/nccoos/level3/bogue/adcpwaves/dspec/'+this_month.strftime("%Y_%m") |
---|
288 |
# dst = '/home/haines/rayleigh/loop/' |
---|
289 |
# fn_glob = 'bogue_dspec_plot*' |
---|
290 |
|
---|
291 |
def addnan(dt, data): |
---|
292 |
# dt to be only 1-dimension and data to be 1- or 2-dimensional |
---|
293 |
|
---|
294 |
from matplotlib.dates import date2num, num2date |
---|
295 |
|
---|
296 |
# print dt.shape |
---|
297 |
# print data.shape |
---|
298 |
|
---|
299 |
dn = date2num(dt) |
---|
300 |
delta = numpy.diff(dn) |
---|
301 |
sample_interval = numpy.median(delta) |
---|
302 |
maxdelta = 1.5*sample_interval |
---|
303 |
# print maxdelta |
---|
304 |
igap = (delta > maxdelta).nonzero()[0] |
---|
305 |
ngap = len(igap) |
---|
306 |
if not ngap: |
---|
307 |
return (dt, data) |
---|
308 |
else: |
---|
309 |
# convert sample interval to dt object |
---|
310 |
sample_interval = timedelta(0.5*sample_interval) |
---|
311 |
# for each gap in data create NaN |
---|
312 |
data_insert = [numpy.nan for gap in igap] |
---|
313 |
# for each gap in time create datetime value |
---|
314 |
dt_insert = [dt[gap]+sample_interval for gap in igap] |
---|
315 |
# insert new sample times at indices of the gaps |
---|
316 |
new_dt = numpy.insert(numpy.array(dt), igap+1, dt_insert) |
---|
317 |
# insert NaN data at the indices that match the above times |
---|
318 |
new_data = numpy.insert(numpy.array(data), igap+1, data_insert, axis=0) |
---|
319 |
return (new_dt, new_data) |
---|
320 |
|
---|
321 |
# unit conversions |
---|
322 |
def meters2feet(meters): |
---|
323 |
"""Convert meters to feet: <feet> = <meters>*3.28084 """ |
---|
324 |
return meters*3.28084 |
---|
325 |
|
---|
326 |
def feet2meters(feet): |
---|
327 |
"""Convert feet to meters: <meters> = <feet>*0.3048 """ |
---|
328 |
return feet*0.3048 |
---|
329 |
|
---|
330 |
def millibar2inches_Hg(millibar): |
---|
331 |
"""Convert millibars to inches Hg: <inches_Hg> = <millibar>*0.0295301 """ |
---|
332 |
return millibar*0.0295301 |
---|
333 |
|
---|
334 |
def celsius2fahrenheit(celsius): |
---|
335 |
"""Convert deg Celsius to deg Fahrenheit: <fahrenheit> = ((1.8*<celsius>)+32) """ |
---|
336 |
return (1.8*celsius)+32 |
---|
337 |
|
---|
338 |
def millimeters2inches(millimeters): |
---|
339 |
""" Convert millimeter to inches: <inches> = <millimeters>*0.0393700787) """ |
---|
340 |
return millimeters*0.0393700787 |
---|
341 |
|
---|
342 |
def inches2millimeters(inches): |
---|
343 |
""" Convert <mm> = <inches>*25.4 """ |
---|
344 |
return inches*25.4 |
---|
345 |
|
---|
346 |
def meters_sec2knots(meters_sec): |
---|
347 |
""" Convert m/s to knots: <knots> = <meters_sec>*1.94384449) """ |
---|
348 |
return meters_sec*1.94384449 |
---|
349 |
|
---|
350 |
def wind_vector2u(wind_speed, wind_from_direction): |
---|
351 |
""" Convert wind vector to U (east) component: <u> = <wind_speed>*sine(<wind_from_direction>*pi/180) """ |
---|
352 |
return wind_speed*math.sin(wind_from_direction*math.pi/180) |
---|
353 |
|
---|
354 |
def wind_vector2v(wind_speed, wind_from_direction): |
---|
355 |
""" Convert wind vector to V (north) component: <v> = <wind_speed>*cosine(<wind_from_direction>*pi/180) """ |
---|
356 |
return wind_speed*math.cos(wind_from_direction*math.pi/180) |
---|
357 |
|
---|
358 |
def proc2latest(pi, si, yyyy_mm): |
---|
359 |
"""Select specific variables and times from current monthly netCDF |
---|
360 |
and post as latest data. TEST MODE. |
---|
361 |
|
---|
362 |
For each active config file, load specific variables from NCCOOS |
---|
363 |
monthly netCDF, make any necessary changes to data or attributes |
---|
364 |
conform to SEACOOS Data Model, subset data (last 48 hours), and |
---|
365 |
create new netCDF file in latest netCDF directory. |
---|
366 |
|
---|
367 |
NOTE: In test mode right now. See auto() function for similar action. |
---|
368 |
|
---|
369 |
""" |
---|
370 |
|
---|
371 |
platform = pi['id'] |
---|
372 |
package = si['id'] |
---|
373 |
# input file |
---|
374 |
si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm) |
---|
375 |
ifn = os.path.join(si['proc_dir'], si['proc_filename']) |
---|
376 |
# output file |
---|
377 |
si['latest_filename'] = 'nccoos_%s_%s_latest.nc' % (platform, package) |
---|
378 |
ofn = os.path.join(si['latest_dir'], si['latest_filename']) |
---|
379 |
if os.path.exists(ifn): |
---|
380 |
print ' ... ... latest : %s ' % (ofn,) |
---|
381 |
# get dt from current month file |
---|
382 |
(es, units) = nc_get_time(ifn) |
---|
383 |
dt = [es2dt(e) for e in es] |
---|
384 |
last_dt = dt[-1] |
---|
385 |
else: |
---|
386 |
# no input then remove output if exists and exit |
---|
387 |
print " ... ... latest: NO latest file created" |
---|
388 |
if os.path.exists(ofn): |
---|
389 |
os.remove(ofn) |
---|
390 |
return |
---|
391 |
|
---|
392 |
# determine which index of data is within the specified timeframe (last 2 days) |
---|
393 |
n = len(dt) |
---|
394 |
idx = numpy.array([False for i in range(n)]) |
---|
395 |
for i, val in enumerate(dt): |
---|
396 |
if val>last_dt-timedelta(days=2) and val<=last_dt+timedelta(seconds=360): |
---|
397 |
idx[i] = True |
---|
398 |
dt = numpy.array(dt) |
---|
399 |
dt = dt[idx] |
---|
400 |
|
---|
401 |
# read in data and unpack tuple |
---|
402 |
d = nc_load(ifn, si['latest_vars']) |
---|
403 |
global_atts, var_atts, dim_inits, var_inits, var_data = d |
---|
404 |
list_of_record_vars = nc_find_record_vars(ifn) |
---|
405 |
|
---|
406 |
# turn off unlimited dimension (SH NOTE: As of pycdf-0.6-3b cannot |
---|
407 |
# delete a dimension or reset unlimited to limited within either |
---|
408 |
# CDF or CDFDim class, so doing it manually here by setting list |
---|
409 |
# 'dim_init' before creating of new netcdf.) |
---|
410 |
dim_inits = list(dim_inits) |
---|
411 |
for i in range(len(dim_inits)): |
---|
412 |
if dim_inits[i][1]==0: |
---|
413 |
dim_inits[i] = ('ntime', len(dt)) |
---|
414 |
dim_inits = tuple(dim_inits) |
---|
415 |
|
---|
416 |
# subset data |
---|
417 |
varNames = [vn for vn, vt, vd in var_inits] |
---|
418 |
var_data = list(var_data) |
---|
419 |
for i in range(len(varNames)): |
---|
420 |
vn, vd = var_data[i] |
---|
421 |
|
---|
422 |
if vn in list_of_record_vars: |
---|
423 |
var_data[i]=(vn, vd[idx]) |
---|
424 |
var_data = tuple(var_data) |
---|
425 |
|
---|
426 |
global_atts['start_date'] = dt[0].strftime('%Y-%m-%d %H:%M:%S') |
---|
427 |
d = (global_atts, var_atts, dim_inits, var_inits, var_data) |
---|
428 |
|
---|
429 |
# write latest data |
---|
430 |
nc_create(ofn, d) |
---|
431 |
|
---|
432 |
# quick way to rename dimensions |
---|
433 |
nc_rename_dimension(ofn, 'ntime', 'time') |
---|
434 |
nc_rename_dimension(ofn, 'nlat', 'lat') |
---|
435 |
nc_rename_dimension(ofn, 'nlon', 'lon') |
---|
436 |
nc_rename_dimension(ofn, 'nz', 'z') |
---|
437 |
|
---|
438 |
# global replace _FillValue |
---|
439 |
nc_replace_fillvalue(ofn, -99999.0) |
---|