NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/tags/raw2proc-v0.1/raw2proc.py

Revision 102 (checked in by haines, 16 years ago)

commit v0.1 in tags

  • Property svn:executable set to
Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2008-01-14 11:03:16 haines>
3 """Process raw data to monthly netCDF data files
4
5 This module processes raw ascii- or binary-data from different NCCOOS
6 sensors (ctd, adcp, waves-adcp, met) based on manual or automated
7 operation.  If automated processing, add raw data (level0) from all
8 active sensors to current month's netcdf data files (level1) with the
9 current configuration setting.  If manual processing, determine which
10 configurations to use for requested platform, sensor, and month.
11
12 :Processing steps:
13   0. raw2proc auto or manual for platform, sensor, month
14   1. list of files to process
15   2. parse data
16   3. create, update netcdf
17
18   to-do
19   3. qc (measured) data
20   4. process derived data (and regrid?)
21   5. qc (measured and derived) data flags
22
23 """
24
25 __version__ = "v0.1"
26 __author__ = "Sara Haines <sara_haines@unc.edu>"
27
28 import sys
29 import os
30 import re
31
32 # define config file location to run under cron
33 defconfigs='/afs/isis.unc.edu/depts/marine/workspace/haines/nc-coos/raw2proc'
34
35 import numpy
36
37 from procutil import *
38 from ncutil import *
39
40 REAL_RE_STR = '\\s*(-?\\d(\\.\\d+|)[Ee][+\\-]\\d\\d?|-?(\\d+\\.\\d*|\\d*\\.\\d+)|-?\\d+)\\s*'
41
42 def load_data(inFile):
43     lines=None
44     if os.path.exists(inFile):
45         f = open(inFile, 'r')
46         lines = f.readlines()
47         f.close()
48         if len(lines)<=0:
49             print 'Empty file: '+ inFile           
50     else:
51         print 'File does not exist: '+ inFile
52     return lines
53
54 def import_parser(name):
55     mod = __import__('parsers')
56     parser = getattr(mod, name)
57     return parser
58
59 def import_processors(mod_name):
60     mod = __import__(mod_name)
61     parser = getattr(mod, 'parser')
62     creator = getattr(mod, 'creator')
63     updater = getattr(mod, 'updater')
64     return (parser, creator, updater)
65    
66
67 def get_config(name):
68     """Usage Example >>>sensor_info = get_config('bogue_config_20060918.sensor_info')"""
69     components = name.split('.')
70     mod = __import__(components[0])
71     for comp in components[1:]:
72         attr = getattr(mod, comp)
73     return attr
74
75 def find_configs(platform, yyyy_mm, config_dir=''):
76     """Find which configuration files for specified platform and month
77
78     :Parameters:
79        platform : string
80            Platfrom id to process (e.g. 'bogue')
81        yyyy_mm : string
82            Year and month of data to process (e.g. '2007_07')
83
84     :Returns:
85        cns : list of str
86            List of configurations that overlap with desired month
87            If empty [], no configs were found
88     """
89     import glob
90     # list of config files based on platform
91     configs = glob.glob(os.path.join(config_dir, platform + '_config_*.py'))
92     now_dt = datetime.utcnow()
93     now_dt.replace(microsecond=0)
94     # determine when month starts and ends
95     (prev_month, this_month, next_month) = find_months(yyyy_mm)
96     month_start_dt = this_month
97     month_end_dt = next_month - timedelta(seconds=1)
98     # print month_start_dt; print month_end_dt
99     #
100     cns = []
101     for config in configs:
102         # datetime from filename
103         cn = os.path.splitext(os.path.basename(config))[0]
104         cndt = filt_datetime(os.path.basename(config))
105         pi = get_config(cn+'.platform_info')
106         if pi['config_start_date']:
107             config_start_dt = filt_datetime(pi['config_start_date'])
108         elif pi['config_start_date'] == None:
109             config_start_dt = now_dt
110         if pi['config_end_date']:
111             config_end_dt = filt_datetime(pi['config_end_date'])
112         elif pi['config_end_date'] == None:
113             config_end_dt = now_dt
114         #
115         if (config_start_dt <= month_start_dt or config_start_dt <= month_end_dt) and \
116                (config_end_dt >= month_start_dt or config_end_dt >= month_end_dt):
117             cns.append(cn)
118     return cns
119
120
121 def find_active_configs(config_dir=''):
122     """Find which configuration files are active
123
124     :Returns:
125        cns : list of str
126            List of configurations that overlap with desired month
127            If empty [], no configs were found
128     """
129     import glob
130     # list of all config files
131     configs = glob.glob(os.path.join(config_dir, '*_config_*.py'))
132     now_dt = datetime.utcnow()
133     now_dt.replace(microsecond=0)
134     #
135     cns = []
136     for config in configs:
137         # datetime from filename
138         cn = os.path.splitext(os.path.basename(config))[0]
139         cndt = filt_datetime(os.path.basename(config))
140         pi = get_config(cn+'.platform_info')
141         if pi['config_end_date'] == None:
142             cns.append(cn)
143     return cns
144
145
146 def find_raw(si, yyyy_mm):
147     """Determine which list of raw files to process for month """
148     import glob
149     # determine when month starts and ends
150     #
151     months = find_months(yyyy_mm)
152     # list all the raw files in prev-month, this-month, and next-month
153     all_raw_files = []
154     for mon in months:
155         mstr = mon.strftime('%Y_%m')
156         gs = os.path.join(si['raw_dir'], mstr, si['raw_file_glob'])
157         all_raw_files.extend(glob.glob(gs))
158
159     all_raw_files.sort()
160        
161     # ****** ((SMH) NOTE: Will need to override looking in specific
162     # subdirs of months if all data is contained in one file for long
163     # deployment, such as with adcp binary data.
164
165     #
166     dt_start = si['proc_start_dt']-timedelta(days=1)
167     dt_end = si['proc_end_dt']+timedelta(days=1)
168     raw_files = []; raw_dts = []
169     # compute datetime for each file
170     for fn in all_raw_files:
171         fndt = filt_datetime(os.path.basename(fn))
172         if fndt:
173             if dt_start <= fndt <= dt_end:
174                 raw_files.append(fn)
175                 raw_dts.append(fndt)
176        
177     return (raw_files, raw_dts)
178
179 def which_raw(pi, raw_files, dts):
180     """Further limit file names based on configuration file timeframe """
181
182     now_dt = datetime.utcnow()
183     now_dt.replace(microsecond=0)
184     if pi['config_start_date']:
185         config_start_dt = filt_datetime(pi['config_start_date'])
186     elif pi['config_start_date'] == None:
187         config_start_dt = now_dt
188
189     if pi['config_end_date']:
190         config_end_dt = filt_datetime(pi['config_end_date'])
191     elif pi['config_end_date'] == None:
192         config_end_dt = now_dt
193        
194     new_list = [raw_files[i] for i in range(len(raw_files)) \
195                      if config_start_dt <= dts[i] <= config_end_dt]
196     return new_list
197        
198
199 def raw2proc(proctype, platform=None, package=None, yyyy_mm=None):
200     """
201     Process data either in auto-mode or manual-mode
202
203     If auto-mode, process newest data for all platforms, all
204     sensors. Otherwise in manual-mode, process data for specified
205     platform, sensor package, and month.
206
207     :Parameters:
208        proctype : string
209            'auto' or 'manual'
210
211        platform : string
212            Platfrom id to process (e.g. 'bogue')
213        package : string
214            Sensor package id to process (e.g. 'adcp')
215        yyyy_mm : string
216            Year and month of data to process (e.g. '2007_07')
217
218     Examples
219     --------
220     >>> raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_06')
221     >>> raw2proc('manual', 'bogue', 'adcp', '2007_06')
222          
223     """
224     print '\nStart time for raw2proc: %s\n' % start_dt.strftime("%Y-%b-%d %H:%M:%S UTC")
225
226     if proctype == 'auto':
227         print 'Processing in auto-mode, all platforms, all packages, latest data'
228         auto()
229     elif proctype == 'manual':
230         if platform and package and yyyy_mm:
231             print 'Processing in manually ...'
232             print ' ...  platform id : %s' % platform
233             print ' ... package name : %s' % package
234             print ' ...        month : %s' % yyyy_mm
235             print ' ...  starting at : %s' % start_dt.strftime("%Y-%m-%d %H:%M:%S UTC")
236             manual(platform, package, yyyy_mm)
237         else:
238             print 'raw2proc: Manual operation requires platform, package, and month'
239             print "   >>> raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_07')"
240     else:
241         print 'raw2proc: requires either auto or manual operation'
242
243
244 def auto():
245     """Process all platforms, all packages, latest data
246
247     Notes
248     -----
249    
250     1. determine which platforms (all platforms with currently active
251        config files i.e. config_end_date is None
252     2. for each platform
253          get latest config
254          for each package
255            (determine process for 'latest' data) copy to new area when grabbed
256            parse recent data
257            yyyy_mm is the current month
258            load this months netcdf, if new month, create this months netcdf
259            update modified date and append new data in netcdf
260            
261     """
262     yyyy_mm = this_month()
263     months = find_months(yyyy_mm)
264     month_start_dt = months[1]
265     month_end_dt = months[2] - timedelta(seconds=1)
266
267     configs = find_active_configs(config_dir=defconfigs)
268     if configs:
269         # for each configuration
270         for cn in configs:
271             print ' ... config file : %s' % cn
272             pi = get_config(cn+'.platform_info')
273             asi = get_config(cn+'.sensor_info')
274             platform = pi['id']
275             # for each sensor package
276             for package in asi.keys():
277                 print ' ... package name : %s' % package
278                 si = asi[package]
279                 si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm)
280                 ofn = os.path.join(si['proc_dir'], si['proc_filename'])
281                 si['proc_start_dt'] = month_start_dt
282                 si['proc_end_dt'] = month_end_dt
283                 if os.path.exists(ofn):
284                     # get last dt from current month file
285                     (es, units) = nc_get_time(ofn)
286                     last_dt = es2dt(es[-1])
287                     # if older than month_start_dt use it instead to only process newest data
288                     if last_dt>=month_start_dt:
289                         si['proc_start_dt'] = last_dt
290
291                 (raw_files, raw_dts) = find_raw(si, yyyy_mm)
292                 raw_files = which_raw(pi, raw_files, raw_dts)
293                 process(pi, si, raw_files, yyyy_mm)
294     #
295     else:
296         print ' ... ... ... \nNOTE: No active platforms\n'
297
298 def manual(platform, package, yyyy_mm):
299     """Process data for specified platform, sensor package, and month
300
301     Notes
302     -----
303    
304     1. determine which configs
305     2. for each config for specific platform
306            if have package in config
307                which raw files
308     """
309      # determine when month starts and ends
310     months = find_months(yyyy_mm)
311     month_start_dt = months[1]
312     month_end_dt = months[2] - timedelta(seconds=1)
313    
314     configs = find_configs(platform, yyyy_mm, config_dir=defconfigs)
315
316     if configs:
317         # for each configuration
318         for index in range(len(configs)):
319             cn = configs[index]
320             print ' ... config file : %s' % cn
321             pi = get_config(cn+'.platform_info')
322             # month start and end dt to pi info
323             asi = get_config(cn+'.sensor_info')
324             if package in pi['packages']:
325                 si = asi[package]
326                 si['proc_start_dt'] = month_start_dt
327                 si['proc_end_dt'] = month_end_dt
328                 si['proc_filename'] = '%s_%s_%s.nc' % (platform, package, yyyy_mm)
329                 ofn = os.path.join(si['proc_dir'], si['proc_filename'])
330                 (raw_files, raw_dts) = find_raw(si, yyyy_mm)
331                 raw_files = which_raw(pi, raw_files, raw_dts)
332                 # remove any previous netcdf file (platform_package_yyyy_mm.nc)
333                 if index==0  and os.path.exists(ofn):
334                     os.remove(ofn)
335                 #
336                 process(pi, si, raw_files, yyyy_mm)
337             else:
338                 print ' ... ... \nNOTE: %s not operational on %s for %s\n' % (package, platform, yyyy_mm)               
339     else:
340         print ' ... ... ... \nNOTE: %s not operational for %s\n' % (platform, yyyy_mm)
341    
342 def process(pi, si, raw_files, yyyy_mm):
343     # tailored data processing for different input file formats and control over output
344     (parse, create, update) = import_processors(si['process_module'])
345     for fn in raw_files:
346         # sys.stdout.write('... %s ... ' % fn)
347         lines = load_data(fn)
348         data = parse(pi, si, lines)
349         # determine which index of data is within the specified timeframe (usually the month)
350         data['in'] =  data['dt']>si['proc_start_dt'] and data['dt']<=si['proc_end_dt']
351         # if any records are in the month then write to netcdf
352         if data['in'].any():
353             sys.stdout.write('... %s ... ' % fn)
354             sys.stdout.write('%d\n' % len(data['in']))
355             ofn = os.path.join(si['proc_dir'], si['proc_filename'])
356             # update or create netcdf
357             if os.path.exists(ofn):
358                 ut = update(pi,si,data)
359                 nc_update(ofn, ut)
360             else:
361                 ct = create(pi,si,data)
362                 nc_create(ofn, ct)
363
364
365        
366    
367 # globals
368 start_dt = datetime.utcnow()
369 start_dt.replace(microsecond=0)
370
371 if __name__ == "__main__":
372     import optparse
373     raw2proc('auto')
374
375     # for testing
376     # proctype='manual'; platform='bogue'; package='adcp'; yyyy_mm='2007_07'
377     # raw2proc(proctype='manual', platform='bogue', package='adcp', yyyy_mm='2007_07')
Note: See TracBrowser for help on using the browser.