1 |
#!/usr/bin/env python |
---|
2 |
# Last modified: Time-stamp: <2008-01-03 09:50:53 haines> |
---|
3 |
"""Utilities to help data processing |
---|
4 |
|
---|
5 |
Mostly time functions right now |
---|
6 |
|
---|
7 |
TO DO: |
---|
8 |
check_configs() |
---|
9 |
""" |
---|
10 |
|
---|
11 |
__version__ = "v0.1" |
---|
12 |
__author__ = "Sara Haines <sara_haines@unc.edu>" |
---|
13 |
|
---|
14 |
from datetime import datetime, timedelta, tzinfo |
---|
15 |
from dateutil.tz import tzlocal, tzutc |
---|
16 |
import time |
---|
17 |
|
---|
18 |
def check_configs(): |
---|
19 |
"""Test config files for comformnity |
---|
20 |
|
---|
21 |
check either one or all for a platform |
---|
22 |
|
---|
23 |
id in filename == platform.id |
---|
24 |
datetime in filename <= platform.config_start_date |
---|
25 |
(close in time usually the same day |
---|
26 |
also platform.config_start_date < platform.config_end_date |
---|
27 |
(there needs to be some time that the platform was operational) |
---|
28 |
test existence of specific structural elements (platform info and sensor info) |
---|
29 |
and specific fields for both platform and sensor |
---|
30 |
verify that for each platform_info['packages'] there is sensor_info and same id |
---|
31 |
for pi['packages'][0] in si.keys() |
---|
32 |
pi['packages'][0] == si['adcp']['id'] |
---|
33 |
bounds on data in fields |
---|
34 |
show difference between two consecutive configs? |
---|
35 |
pretty print to screen of dictionary info for platform and sensor info |
---|
36 |
|
---|
37 |
cn = os.path.splitext(os.path.basename(config))[0] |
---|
38 |
cndt = filt_datetime(os.path.basename(config)) |
---|
39 |
pi = get_config(cn+'.platform_info') |
---|
40 |
if pi['config_start_date']: |
---|
41 |
config_start_dt = filt_datetime(pi['config_start_date']) |
---|
42 |
elif pi['config_start_date'] == None: |
---|
43 |
config_start_dt = now_dt |
---|
44 |
if pi['config_end_date']: |
---|
45 |
config_end_dt = filt_datetime(pi['config_end_date']) |
---|
46 |
elif pi['config_end_date'] == None: |
---|
47 |
config_end_dt = now_dt |
---|
48 |
|
---|
49 |
print cn + ' -----------------' |
---|
50 |
print cndt |
---|
51 |
print config_start_dt |
---|
52 |
print config_end_dt |
---|
53 |
print now_dt |
---|
54 |
print 'file date ok? ' + str(cndt <= config_start_dt) |
---|
55 |
print 'operation date ok? ' + str(config_start_dt < config_end_dt) |
---|
56 |
""" |
---|
57 |
|
---|
58 |
def dt2es(dt): |
---|
59 |
"""Convert datetime object to epoch seconds (es) as seconds since Jan-01-1970 """ |
---|
60 |
# microseconds of timedelta object not used |
---|
61 |
delta = dt - datetime(1970,1,1,0,0,0) |
---|
62 |
es = delta.days*24*60*60 + delta.seconds |
---|
63 |
return es |
---|
64 |
|
---|
65 |
def es2dt(es): |
---|
66 |
""" Convert epoch seconds (es) to datetime object""" |
---|
67 |
dt = datetime(*time.gmtime(es)[0:6]) |
---|
68 |
return dt |
---|
69 |
|
---|
70 |
def find_months(year, month=1): |
---|
71 |
"""Find which months to process |
---|
72 |
|
---|
73 |
Since data are in subdirectories based on months determine |
---|
74 |
previous, current, and next month to look in directories for data |
---|
75 |
of the current month or month to process. |
---|
76 |
|
---|
77 |
:Parameters: |
---|
78 |
year : int value or str 'yyyy_mm' |
---|
79 |
month : int value |
---|
80 |
|
---|
81 |
:Returns: |
---|
82 |
which_months : tuple of 3 datetime objects |
---|
83 |
(prev_month, current_month, next_month) |
---|
84 |
|
---|
85 |
Examples |
---|
86 |
-------- |
---|
87 |
>>> find_months(2007, 2) |
---|
88 |
>>> find_months('2007_02') |
---|
89 |
|
---|
90 |
""" |
---|
91 |
if type(year) == int and type(month) == int : |
---|
92 |
dt = datetime(year, month, day=1) |
---|
93 |
this_month = dt |
---|
94 |
elif type(year) == str : |
---|
95 |
dt = filt_datetime(year) |
---|
96 |
this_month = dt |
---|
97 |
# |
---|
98 |
if dt.month == 1: # if January |
---|
99 |
prev_month = datetime(dt.year-1, month=12, day=1) # Dec |
---|
100 |
next_month = datetime(dt.year, dt.month+1, day=1) # Feb |
---|
101 |
elif dt.month == 12: # if December |
---|
102 |
prev_month = datetime(dt.year, dt.month-1, day=1) # Nov |
---|
103 |
next_month = datetime(dt.year+1, month=1, day=1) # Jan |
---|
104 |
else: |
---|
105 |
prev_month = datetime(dt.year, dt.month-1, day=1) |
---|
106 |
next_month = datetime(dt.year, dt.month+1, day=1) |
---|
107 |
# |
---|
108 |
return (prev_month, this_month, next_month) |
---|
109 |
|
---|
110 |
def this_month(): |
---|
111 |
"""Return this month (GMT) as formatted string (yyyy_mm) """ |
---|
112 |
this_month_str = "%4d_%02d" % time.gmtime()[0:2] |
---|
113 |
return this_month_str |
---|
114 |
|
---|
115 |
def scanf_datetime(ts, fmt='%Y-%m-%dT%H:%M:%S'): |
---|
116 |
"""Convert string representing date and time to datetime object""" |
---|
117 |
# default string format follows convention YYYY-MM-DDThh:mm:ss |
---|
118 |
|
---|
119 |
t = time.strptime(ts, fmt) |
---|
120 |
# the '*' operator unpacks the tuple, producing the argument list. |
---|
121 |
dt = datetime(*t[0:6]) |
---|
122 |
return dt |
---|
123 |
|
---|
124 |
def filt_datetime(input_string, remove_ext=True): |
---|
125 |
""" |
---|
126 |
Following the template, (YY)YYMMDDhhmmss |
---|
127 |
and versions with of this with decreasing time precision, |
---|
128 |
find the most precise, reasonable string match and |
---|
129 |
return its datetime object. |
---|
130 |
""" |
---|
131 |
|
---|
132 |
# remove any trailing filename extension |
---|
133 |
from os.path import splitext |
---|
134 |
import re |
---|
135 |
if remove_ext: |
---|
136 |
(s, e) = splitext(input_string) |
---|
137 |
input_string = s |
---|
138 |
|
---|
139 |
# YYYYMMDDhhmmss and should handle most cases of the stamp |
---|
140 |
# other forms this should pass |
---|
141 |
# YY_MM_DD_hh:mm:ss |
---|
142 |
# YYYY_MM_DD_hh:mm:ss |
---|
143 |
# YYYY,MM,DD,hh,mm,ss |
---|
144 |
# YY,MM,DD,hh,mm,ss |
---|
145 |
|
---|
146 |
case1_regex = r""" |
---|
147 |
# case 1: (YY)YYMMDDhhmmss |
---|
148 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR (e.g. '07' or '2007') |
---|
149 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
150 |
(\d{2}) # 2-digit MONTH (e.g. '12') |
---|
151 |
\D? # optional 1 character non-digit separator |
---|
152 |
(\d{2}) # 2-digit DAY of month (e.g. '10') |
---|
153 |
\D? # optional 1 character non-digit separator (e.g. ' ' or 'T') |
---|
154 |
(\d{2}) # 2-digit HOUR (e.g. '10') |
---|
155 |
\D? # optional 1 character non-digit separator (e.g. ' ' or ':') |
---|
156 |
(\d{2}) # 2-digit MINUTE (e.g. '10') |
---|
157 |
\D? # optional 1 character non-digit separator (e.g. ' ' or ':') |
---|
158 |
(\d{2}) # 2-digit SECOND (e.g. '10') |
---|
159 |
""" |
---|
160 |
|
---|
161 |
case2_regex = r""" |
---|
162 |
# case 2: (YY)YYMMDDhhmm (no seconds) |
---|
163 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
164 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
165 |
(\d{2}) # 2-digit MONTH |
---|
166 |
\D? # optional 1 character non-digit separator |
---|
167 |
(\d{2}) # 2-digit DAY |
---|
168 |
\D? # optional 1 character non-digit separator (e.g. ' ' or 'T') |
---|
169 |
(\d{2}) # 2-digit HOUR |
---|
170 |
\D? # optional 1 character non-digit separator (e.g. ' ' or ':') |
---|
171 |
(\d{2}) # 2-digit MINUTE |
---|
172 |
""" |
---|
173 |
|
---|
174 |
case3_regex = r""" |
---|
175 |
# case 3: (YY)YYMMDDhh (no seconds, no minutes) |
---|
176 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
177 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
178 |
(\d{2}) # 2-digit MONTH |
---|
179 |
\D? # optional 1 character non-digit separator |
---|
180 |
(\d{2}) # 2-digit DAY |
---|
181 |
\D? # optional 1 character non-digit separator (e.g. ' ' or 'T') |
---|
182 |
(\d{2}) # 2-digit HOUR |
---|
183 |
""" |
---|
184 |
|
---|
185 |
case4_regex = r""" |
---|
186 |
# case 4: (YY)YYMMDD (no time values, just date) |
---|
187 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
188 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
189 |
(\d{2}) # 2-digit MONTH |
---|
190 |
\D? # optional 1 character non-digit separator |
---|
191 |
(\d{2}) # 2-digit DAY |
---|
192 |
""" |
---|
193 |
|
---|
194 |
case5_regex = r""" |
---|
195 |
# case 5: (YY)YYMM (no time values, just month year) |
---|
196 |
(\d{4}|\d{2}) # 2- or 4-digit YEAR |
---|
197 |
\D? # optional 1 character non-digit separator (e.g. ' ' or '-') |
---|
198 |
(\d{2}) # 2-digit MONTH |
---|
199 |
""" |
---|
200 |
|
---|
201 |
## Verbose regular expressions require use of re.VERBOSE flag. |
---|
202 |
## so we can use multiline regexp |
---|
203 |
|
---|
204 |
# cases are ordered from precise to more coarse resolution of time |
---|
205 |
cases = [case1_regex, case2_regex, case3_regex, case4_regex, case5_regex] |
---|
206 |
patterns = [re.compile(c, re.VERBOSE) for c in cases] |
---|
207 |
matches = [p.search(input_string) for p in patterns] |
---|
208 |
|
---|
209 |
# for testing, try to computer datetime objects |
---|
210 |
# just because there is a match does not mean it makes sense |
---|
211 |
for ind in range(len(matches)): |
---|
212 |
if bool(matches[ind]): |
---|
213 |
# print matches[ind].groups() |
---|
214 |
bits = matches[ind].groups() |
---|
215 |
values = [int(yi) for yi in bits] |
---|
216 |
# check for 2-digit year |
---|
217 |
if values[0] < 50: |
---|
218 |
values[0] += 2000 |
---|
219 |
elif values[0]>=50 and values[0]<100: |
---|
220 |
values[0] += 1900 |
---|
221 |
# |
---|
222 |
# we must have at least 3 arg input to datetime |
---|
223 |
if len(values)==1: |
---|
224 |
values.extend([1,1]) # add First of January |
---|
225 |
elif len(values)==2: |
---|
226 |
values.extend([1]) # add first day of month |
---|
227 |
|
---|
228 |
# |
---|
229 |
# compute dt |
---|
230 |
try: |
---|
231 |
dt = datetime(*values) |
---|
232 |
except ValueError, e: |
---|
233 |
# value error if something not valid for datetime |
---|
234 |
# e.g. month 1...12, something parsed wrong |
---|
235 |
dt = None |
---|
236 |
else: |
---|
237 |
# absolute difference in days from now (UTC) |
---|
238 |
z = dt - datetime.utcnow() |
---|
239 |
daysdiff = abs(z.days) |
---|
240 |
# if this date unreasonable (>10 years*365), throw it out |
---|
241 |
# something parsed wrong |
---|
242 |
if daysdiff > 3650: |
---|
243 |
dt = None |
---|
244 |
else: |
---|
245 |
dt = None |
---|
246 |
|
---|
247 |
# place datetime object or None within sequence of matches |
---|
248 |
matches[ind] = dt |
---|
249 |
|
---|
250 |
# find the first (most precise) date match since there might be more than |
---|
251 |
# as we searched more coarse templates, but now we have thrown out |
---|
252 |
b = [bool(x) for x in matches] |
---|
253 |
try: |
---|
254 |
ind = b.index(True) |
---|
255 |
except ValueError, e: |
---|
256 |
print 'filt_datetime: No date found in ', input_string |
---|
257 |
dt = None |
---|
258 |
else: |
---|
259 |
dt = matches[ind] |
---|
260 |
return dt |
---|
261 |
|
---|
262 |
def display_time_diff(diff): |
---|
263 |
"""Display time difference in HH:MM:DD using number weeks (W) |
---|
264 |
and days (D) if necessary""" |
---|
265 |
# weeks, days = divmod(diff.days, 7) |
---|
266 |
days = diff.days |
---|
267 |
minutes, seconds = divmod(diff.seconds, 60) |
---|
268 |
hours, minutes = divmod(minutes, 60) |
---|
269 |
# if (weeks>2 and days>0): |
---|
270 |
# str = "%d Weeks, %d Days %02d:%02d" % (days, hours, minutes) |
---|
271 |
if (days==1): |
---|
272 |
str = "%02d:%02d" % (24+hours, minutes) |
---|
273 |
elif (days>1): |
---|
274 |
str = "%d Days %02d:%02d" % (days, hours, minutes) |
---|
275 |
else: |
---|
276 |
str = "%02d:%02d" % (hours, minutes) |
---|
277 |
return str |
---|
278 |
|
---|
279 |
# |
---|