NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/procutil.py

Revision 218 (checked in by haines, 15 years ago)

AVP fixed profiler CDL2 implemented

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2008-12-17 16:49:11 haines>
3 """Utilities to help data processing
4
5    Mostly time functions right now
6
7    TO DO:
8    check_configs()
9    unit conversions (udunits?)
10 """
11
12 __version__ = "v0.1"
13 __author__ = "Sara Haines <sara_haines@unc.edu>"
14
15 import os.path
16 from datetime import datetime, timedelta, tzinfo
17 from dateutil.tz import tzlocal, tzutc
18 import time
19 import math
20
21 from ncutil import *
22
23 def check_configs():
24     """Test config files for comformnity
25
26     check either one or all for a platform
27     
28     id in filename == platform.id
29     datetime in filename <= platform.config_start_date
30        (close in time usually the same day
31     also platform.config_start_date < platform.config_end_date
32        (there needs to be some time that the platform was operational)
33     test existence of specific structural elements (platform info and sensor info)
34     and specific fields for both platform and sensor
35     verify that for each platform_info['packages'] there is sensor_info and same id
36       for pi['packages'][0] in si.keys()
37       pi['packages'][0] == si['adcp']['id']
38     bounds on data in fields
39     show difference between two consecutive configs?
40     pretty print to screen of dictionary info for platform and sensor info
41     
42         cn = os.path.splitext(os.path.basename(config))[0]
43         cndt = filt_datetime(os.path.basename(config))[0]
44         pi = get_config(cn+'.platform_info')
45         if pi['config_start_date']:
46             config_start_dt = filt_datetime(pi['config_start_date'])[0]
47         elif pi['config_start_date'] == None:
48             config_start_dt = now_dt
49         if pi['config_end_date']:
50             config_end_dt = filt_datetime(pi['config_end_date'])[0]
51         elif pi['config_end_date'] == None:
52             config_end_dt = now_dt
53
54         print cn + ' -----------------'
55         print cndt
56         print config_start_dt
57         print config_end_dt
58         print now_dt
59         print 'file date ok? ' + str(cndt <= config_start_dt)
60         print 'operation date ok? ' + str(config_start_dt < config_end_dt)
61     """
62
63 def dt2es(dt):
64     """Convert datetime object to epoch seconds (es) as seconds since Jan-01-1970 """
65     # microseconds of timedelta object not used
66     delta = dt - datetime(1970,1,1,0,0,0)
67     es = delta.days*24*60*60 + delta.seconds
68     return es
69
70 def es2dt(es):
71     """ Convert epoch seconds (es) to datetime object"""
72     dt = datetime(*time.gmtime(es)[0:6])
73     return dt
74
75 def find_months(year, month=1):
76     """Find which months to process
77
78     Since data are in subdirectories based on months determine
79     previous, current, and next month to look in directories for data
80     of the current month or month to process.
81
82     :Parameters:
83         year : int value or str 'yyyy_mm'
84         month : int value
85
86     :Returns:
87         which_months : tuple of 3 datetime objects
88              (prev_month, current_month, next_month)
89
90     Examples
91     --------
92     >>> find_months(2007, 2)
93     >>> find_months('2007_02')
94     
95     """
96     if type(year) == int and type(month) == int :
97         dt = datetime(year, month, day=1)
98         this_month = dt
99     elif type(year) == str :
100         dt = filt_datetime(year)[0]
101         this_month = dt
102     #
103     if dt.month == 1: # if January
104         prev_month = datetime(dt.year-1, month=12, day=1) # Dec
105         next_month = datetime(dt.year, dt.month+1, day=1) # Feb
106     elif dt.month == 12: # if December
107         prev_month = datetime(dt.year, dt.month-1, day=1) # Nov
108         next_month = datetime(dt.year+1, month=1, day=1)  # Jan
109     else:
110         prev_month = datetime(dt.year, dt.month-1, day=1)
111         next_month = datetime(dt.year, dt.month+1, day=1)
112     #
113     return (prev_month, this_month, next_month)
114
115 def this_month():
116     """Return this month (GMT) as formatted string (yyyy_mm) """
117     this_month_str = "%4d_%02d" % time.gmtime()[0:2]
118     return this_month_str
119
120 def scanf_datetime(ts, fmt='%Y-%m-%dT%H:%M:%S'):
121     """Convert string representing date and time to datetime object"""
122     # default string format follows convention YYYY-MM-DDThh:mm:ss
123     
124     t = time.strptime(ts, fmt)
125     # the '*' operator unpacks the tuple, producing the argument list.
126     dt = datetime(*t[0:6])
127     return dt
128
129 def filt_datetime(input_string, remove_ext=True):
130     """
131     Following the template, (YY)YYMMDDhhmmss
132     and versions with of this with decreasing time precision,
133     find the most precise, reasonable string match and
134     return its datetime object.
135     """
136
137     # remove any trailing filename extension
138     from os.path import splitext
139     import re
140     if remove_ext:
141         (s, e) = splitext(input_string)
142         input_string = s
143    
144     # YYYYMMDDhhmmss and should handle most cases of the stamp
145     # other forms this should pass
146     # YY_MM_DD_hh:mm:ss
147     # YYYY_MM_DD_hh:mm:ss
148     # YYYY,MM,DD,hh,mm,ss
149     # YY,MM,DD,hh,mm,ss
150
151     case1_regex = r"""
152     # case 1: (YY)YYMMDDhhmmss
153     (\d{4}|\d{2})     # 2- or 4-digit YEAR (e.g. '07' or '2007')
154     \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
155     (\d{2})           # 2-digit MONTH (e.g. '12')
156     \D?               # optional 1 character non-digit separator
157     (\d{2})           # 2-digit DAY of month (e.g. '10')
158     \D?               # optional 1 character non-digit separator (e.g. ' ' or 'T')
159     (\d{2})           # 2-digit HOUR (e.g. '10')
160     \D?               # optional 1 character non-digit separator (e.g. ' ' or ':')
161     (\d{2})           # 2-digit MINUTE (e.g. '10')
162     \D?               # optional 1 character non-digit separator (e.g. ' ' or ':')
163     (\d{2})           # 2-digit SECOND (e.g. '10')
164     """
165
166     case2_regex = r"""
167     # case 2: (YY)YYMMDDhhmm (no seconds)
168     (\d{4}|\d{2})     # 2- or 4-digit YEAR
169     \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
170     (\d{2})           # 2-digit MONTH
171     \D?               # optional 1 character non-digit separator
172     (\d{2})           # 2-digit DAY
173     \D?               # optional 1 character non-digit separator (e.g. ' ' or 'T')
174     (\d{2})           # 2-digit HOUR
175     \D?               # optional 1 character non-digit separator (e.g. ' ' or ':')
176     (\d{2})           # 2-digit MINUTE
177     """
178
179     case3_regex = r"""
180     # case 3: (YY)YYMMDDhh (no seconds, no minutes)
181     (\d{4}|\d{2})     # 2- or 4-digit YEAR
182     \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
183     (\d{2})           # 2-digit MONTH
184     \D?               # optional 1 character non-digit separator
185     (\d{2})           # 2-digit DAY
186     \D?               # optional 1 character non-digit separator (e.g. ' ' or 'T')
187     (\d{2})           # 2-digit HOUR
188     """
189
190     case4_regex = r"""
191     # case 4: (YY)YYMMDD (no time values, just date)
192     (\d{4}|\d{2})     # 2- or 4-digit YEAR
193     \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
194     (\d{2})           # 2-digit MONTH
195     \D?               # optional 1 character non-digit separator
196     (\d{2})           # 2-digit DAY
197     """
198
199     case5_regex = r"""
200     # case 5: (YY)YYMM (no time values, just month year)
201     (\d{4}|\d{2})     # 2- or 4-digit YEAR
202     \D?               # optional 1 character non-digit separator (e.g. ' ' or '-')
203     (\d{2})           # 2-digit MONTH
204     """
205
206     ##  Verbose regular expressions require use of re.VERBOSE flag.
207     ##  so we can use multiline regexp
208
209     # cases are ordered from precise to more coarse resolution of time
210     cases = [case1_regex, case2_regex, case3_regex, case4_regex, case5_regex]
211     patterns = [re.compile(c, re.VERBOSE) for c in cases]
212     matches = [p.search(input_string) for p in patterns]
213
214     # for testing, try to computer datetime objects
215     # just because there is a match does not mean it makes sense
216     for ind in range(len(matches)):
217         if bool(matches[ind]):
218             # print matches[ind].groups()
219             bits = matches[ind].groups()
220             values = [int(yi) for yi in bits]
221             # check for 2-digit year
222             if values[0] < 50:
223                 values[0] += 2000
224             elif values[0]>=50 and values[0]<100:
225                 values[0] += 1900
226             #
227             # we must have at least 3 arg input to datetime
228             if len(values)==1:
229                 values.extend([1,1]) # add First of January
230             elif len(values)==2:
231                 values.extend([1]) # add first day of month
232
233             #
234             # compute dt
235             try:
236                 dt = datetime(*values)
237             except ValueError, e:
238                 # value error if something not valid for datetime
239                 # e.g. month 1...12, something parsed wrong
240                 dt = None
241             else:
242                 # absolute difference in days from now (UTC)
243                 z = dt - datetime.utcnow()
244                 daysdiff = abs(z.days)
245                 # if this date unreasonable (>10 years*365), throw it out
246                 # something parsed wrong
247                 if daysdiff > 3650:
248                     dt = None               
249         else:
250             dt = None
251
252         # place datetime object or None within sequence of matches
253         matches[ind] = dt
254
255     # find the first (most precise) date match since there might be more than
256     # as we searched more coarse templates, but now we have thrown out
257     
258     b = [bool(x) for x in matches]
259     try:
260         ind = b.index(True)
261     except ValueError, e:
262         print 'filt_datetime: No date found in ', input_string
263         dt = None
264     else:
265        dt = matches[ind]
266        return dt,ind
267
268 def display_time_diff(diff):
269     """Display time difference in HH:MM:DD using number weeks (W)
270     and days (D) if necessary"""
271     # weeks, days = divmod(diff.days, 7)
272     days = diff.days
273     minutes, seconds = divmod(diff.seconds, 60)
274     hours, minutes = divmod(minutes, 60)   
275     # if (weeks>2 and days>0):
276     #    str = "%d Weeks, %d Days %02d:%02d" % (days, hours, minutes)
277     if (days==1):
278         str = "%02d:%02d" % (24+hours, minutes)
279     elif (days>1):
280         str = "%d Days %02d:%02d" % (days, hours, minutes)
281     else:
282         str = "%02d:%02d" % (hours, minutes)
283     return str
284
285 def copy_loop_sequence(src, dst, fn_glob, numFiles=24):
286     """ """
287     # src = '/seacoos/data/nccoos/level3/bogue/adcpwaves/dspec/'+this_month.strftime("%Y_%m")
288     # dst = '/home/haines/rayleigh/loop/'
289     # fn_glob = 'bogue_dspec_plot*'
290
291 def addnan(dt, data):
292     # dt to be only 1-dimension and data to be 1- or 2-dimensional
293     
294     from matplotlib.dates import date2num, num2date
295
296     # print dt.shape
297     # print data.shape
298     
299     dn = date2num(dt)
300     delta = numpy.diff(dn)
301     sample_interval = numpy.median(delta)
302     maxdelta = 1.5*sample_interval
303     # print maxdelta
304     igap = (delta > maxdelta).nonzero()[0]
305     ngap = len(igap)
306     if not ngap:
307         return (dt, data)
308     else:
309         # convert sample interval to dt object
310         sample_interval = timedelta(0.5*sample_interval)
311         # for each gap in data create NaN
312         data_insert = [numpy.nan for gap in igap]
313         # for each gap in time create datetime value
314         dt_insert = [dt[gap]+sample_interval for gap in igap]
315         # insert new sample times at indices of the gaps
316         new_dt = numpy.insert(numpy.array(dt), igap+1, dt_insert)
317         # insert NaN data at the indices that match the above times
318         new_data = numpy.insert(numpy.array(data), igap+1, data_insert, axis=0)
319         return (new_dt, new_data)
320
321 # unit conversions
322 def meters2feet(meters):
323     """Convert meters to feet: <feet> = <meters>*3.28084 """
324     return meters*3.28084
325        
326 def feet2meters(feet):
327     """Convert feet to meters: <meters> = <feet>*0.3048 """
328     return feet*0.3048
329        
330 def millibar2inches_Hg(millibar):
331     """Convert millibars to inches Hg: <inches_Hg> = <millibar>*0.0295301 """
332     return millibar*0.0295301
333
334 def celsius2fahrenheit(celsius):
335     """Convert deg Celsius to deg Fahrenheit: <fahrenheit> = ((1.8*<celsius>)+32) """
336     return (1.8*celsius)+32
337
338 def millimeters2inches(millimeters):
339     """ Convert millimeter to inches: <inches> = <millimeters>*0.0393700787) """
340     return millimeters*0.0393700787
341
342 def inches2millimeters(inches):
343     """ Convert <mm> = <inches>*25.4 """
344     return inches*25.4
345
346 def meters_sec2knots(meters_sec):
347     """ Convert m/s to knots: <knots> = <meters_sec>*1.94384449) """
348     return meters_sec*1.94384449
349
350 def wind_vector2u(wind_speed, wind_from_direction):
351     """ Convert wind vector to U (east) component: <u> = <wind_speed>*sine(<wind_from_direction>*pi/180) """
352     return wind_speed*math.sin(wind_from_direction*math.pi/180)
353
354 def wind_vector2v(wind_speed, wind_from_direction):
355     """ Convert wind vector to V (north) component: <v> = <wind_speed>*cosine(<wind_from_direction>*pi/180) """
356     return wind_speed*math.cos(wind_from_direction*math.pi/180)
357
358 def proc2latest(pi, si, yyyy_mm):
359     """Select specific variables and times from current monthly netCDF
360     and post as latest data.  TEST MODE.
361
362     For each active config file, load specific variables from NCCOOS
363     monthly netCDF, make any necessary changes to data or attributes
364     conform to SEACOOS Data Model, subset data (last 48 hours), and
365     create new netCDF file in latest netCDF directory.
366
367     NOTE: In test mode right now. See auto() function for similar action.
368
369     """
370    
371     platform = pi['id']
372     package = si['id']
373     si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm)
374     ifn = os.path.join(si['proc_dir'], si['proc_filename'])
375     if os.path.exists(ifn):
376         # get dt from current month file
377         (es, units) = nc_get_time(ifn)
378         dt = [es2dt(e) for e in es]
379         last_dt = dt[-1]
380
381     # determine which index of data is within the specified timeframe (last 2 days)
382     n = len(dt)
383     idx = numpy.array([False for i in range(n)])
384     for i, val in enumerate(dt):
385         if val>last_dt-timedelta(days=2) and val<=last_dt+timedelta(seconds=360):
386             idx[i] = True
387     dt = numpy.array(dt)
388     dt = dt[idx]
389
390     # read in data and unpack tuple
391     d = nc_load(ifn, si['latest_vars'])
392     global_atts, var_atts, dim_inits, var_inits, var_data = d
393     list_of_record_vars = nc_find_record_vars(ifn)
394
395     # turn off unlimited dimension (SH NOTE: As of pycdf-0.6-3b cannot
396     # delete a dimension or reset unlimited to limited within either
397     # CDF or CDFDim class, so doing it manually here by setting list
398     # 'dim_init' before creating of new netcdf.)
399     dim_inits = list(dim_inits)
400     for i in range(len(dim_inits)):
401         if dim_inits[i][1]==0:
402             dim_inits[i] = ('ntime', len(dt))       
403     dim_inits = tuple(dim_inits)
404
405     # subset data
406     varNames = [vn for vn, vt, vd in var_inits]
407     var_data = list(var_data)
408     for i in range(len(varNames)):
409         vn, vd = var_data[i]
410
411         if vn in list_of_record_vars:
412             var_data[i]=(vn, vd[idx])
413     var_data = tuple(var_data)
414
415     global_atts['start_date'] = dt[0].strftime('%Y-%m-%d %H:%M:%S')
416     si['latest_filename'] = 'nccoos_%s_%s_latest.nc' % (platform, package)
417     ofn = os.path.join(si['latest_dir'], si['latest_filename'])
418     d = (global_atts, var_atts, dim_inits, var_inits, var_data)
419
420     # write latest data
421     nc_create(ofn, d)
422
423     # quick way to rename dimensions
424     nc_rename_dimension(ofn, 'ntime', 'time')
425     nc_rename_dimension(ofn, 'nlat', 'lat')
426     nc_rename_dimension(ofn, 'nlon', 'lon')
427     nc_rename_dimension(ofn, 'nz', 'z')
428
429     # global replace _FillValue
430     nc_replace_fillvalue(ofn, -99999.0)
Note: See TracBrowser for help on using the browser.