NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/proc_sbe37_ctd.py

Revision 494 (checked in by haines, 12 years ago)

Processing mods for buoy data

Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2012-06-28 14:44:14 haines>
3 """
4 how to parse data, and assert what data and info goes into
5 creating and updating monthly netcdf files
6
7 parse data ctd data collected on Seabird CTD -- SBE37
8 derive salinity, depth, and density using seawater.csiro toolbox
9
10 parser : sample date and time, wtemp, cond, press, (derive) depth, salin, dens
11
12 creator : lat, lon, z, time, wtemp, cond, press, depth, salin, dens
13 updator : time, wtemp, cond, press, depth, salin, dens
14
15 Examples
16 --------
17 >>> sensor_info = sensor_info['ctd1']
18 >>> (parse, create, update) = import_processors(sensor_info['process_module'])
19
20 >>> data = parse(platform_info, sensor_info, lines)
21 >>> create(platform_info, sensor_info, data) or
22 >>> update(platform_info, sensor_info, data)
23
24 Testing
25 -------
26 from raw2proc import *
27 cn = 'b1_config_20111112'
28 sensor_info = get_config(cn+'.sensor_info')
29 sensor_info = sensor_info['ctd1']
30 platform_info = get_config(cn+'.platform_info')
31 (parse, create, update) = import_processors(sensor_info['process_module'])
32
33 filename = '/seacoos/data/nccoos/level0/b1/ctd1/store/B1_CTD1_2011_11_12.asc'
34 lines = load_data(filename)
35 sensor_info['fn'] = filename
36 data = parse(platform_info, sensor_info, lines)
37
38 create(platform_info, sensor_info, data)
39 update(platform_info, sensor_info, data)
40
41 """
42 from raw2proc import *
43 from procutil import *
44 from ncutil import *
45
46 now_dt = datetime.utcnow()
47 now_dt.replace(microsecond=0)
48
49 def parser(platform_info, sensor_info, lines):
50     """
51     Header comments start with '*'
52     Last line of comments *END*
53     
54     * Sea-Bird SBE37 Data File:
55     * FileName = C:\Documents and Settings\haines\Desktop\nc-wind 2012 CTD Recovery\B1_CTD1_3085_2012-04-07.asc
56     ...
57     * S>
58     *END*
59     start time =  12 Nov 2011  15:28:43
60     sample interval = 360 seconds
61     start sample number = 1
62     17.4036,  4.35264,    3.521, 12 Nov 2011, 15:28:43
63     17.4289,  4.35624,    3.593, 12 Nov 2011, 15:34:44
64     17.4110,  4.35376,    3.600, 12 Nov 2011, 15:40:44
65     17.4106,  4.35395,    3.618, 12 Nov 2011, 15:46:44
66     17.3798,  4.34961,    3.515, 12 Nov 2011, 15:52:44
67     17.3861,  4.35033,    3.708, 12 Nov 2011, 15:58:44
68     17.4136,  4.35348,    3.488, 12 Nov 2011, 16:04:44
69     17.4269,  4.35530,    3.616, 12 Nov 2011, 16:10:44
70     17.4421,  4.35679,    3.612, 12 Nov 2011, 16:16:44
71     17.4417,  4.35679,    3.537, 12 Nov 2011, 16:22:44
72     ... EOF                   
73
74     """
75
76     import numpy
77     from datetime import datetime
78     from time import strptime
79
80     # get sample datetime from filename
81     fn = sensor_info['fn']
82     sample_dt_start = filt_datetime(fn)
83
84     # tease out the header info and where it ends
85     # **** may want more info extracted later for data attributes (e.g. sensor coeff)
86     serial_number, end_idx, sample_interval_str = (None, None, None)
87     for idx, k in enumerate(lines[0:100]):
88         # serial number
89         m = re.search(r'^\*.*(SERIAL NO\.)\s+(\d*)', k)
90         if m: serial_number = m.group(2)
91         m = re.search(r'^\*\w+(sample interval)\s*=\s*(.*)', k)
92         if m: sample_interval_str = m.group(2)
93         m = re.search(r'^(\*END\*).*', k)
94         if m: end_idx = idx
95     # check that serial_info serial_number, sample_interval matches
96
97     # split data from header info and get how many samples (start count 3 lines past *END*)
98     if end_idx: lines = lines[end_idx+3:]
99     nsamp = len(lines)
100
101     N = nsamp
102     data = {
103         'dt' : numpy.array(numpy.ones((N,), dtype=object)*numpy.nan),
104         'time' : numpy.array(numpy.ones((N,), dtype=long)*numpy.nan),
105         'wtemp' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
106         'cond' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
107         'press' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
108         'salin' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
109         'density' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
110         'depth' : numpy.array(numpy.ones((N,), dtype=float)*numpy.nan),
111         }
112
113     # sample count
114     i = 0
115
116     for line in lines:
117         # if line has weird ascii chars -- skip it and iterate to next line
118         csi = []
119         # split line
120         sw = re.split(',', line)
121         if len(sw)<=0:
122             print ' ... skipping line %d -- %s' % (i,line)
123             continue
124
125         # replace "NAN"
126         for index, s in enumerate(sw):
127             m = re.search(NAN_RE_STR, s)
128             if m:
129                 sw[index] = '-99999'
130
131         # parse date-time, and all other float and integers
132         for s in sw[0:3]:
133             m = re.search(REAL_RE_STR, s)
134             if m:
135                 csi.append(float(m.groups()[0]))
136
137         if len(sw)>=5:
138             dstr = sw[3]+' '+sw[4]
139             # print dstr
140             m = re.search('\s*(\d{2})\s*(\w{2,3})\s*(\d{4})\s*(\d{2}):(\d{2}):(\d{2}).*', dstr)
141         else:
142             print ' ... skipping line %d -- %s ' % (i,line)
143             continue           
144
145         if m:
146             dstr = '%s %s %s %s:%s:%s' % m.groups()
147         else:
148             print ' ... skipping line %d -- %s ' % (i,line)
149             continue           
150
151         if  sensor_info['utc_offset']:
152             sample_dt = scanf_datetime(dstr, fmt='%d %b %Y %H:%M:%S') + \
153                         timedelta(hours=sensor_info['utc_offset'])
154         else:
155             sample_dt = scanf_datetime(dstr, fmt='%d %b %Y %H:%M:%S')
156
157         # ***** TO DO: may need to adjust any offset in CTD sample time to UTC clock
158         # This requires knowing what UTC time is at a CTD sample
159         data['dt'][i] = sample_dt # sample datetime
160         data['time'][i] = dt2es(sample_dt) # sample time in epoch seconds
161
162         if len(csi)==3:
163             #
164             # (pg 31 SBE IMP Microcat User Manual)
165             # "#iiFORMAT=1 (default) Output converted to data
166             # date format dd mmm yyyy,
167             # conductivity = S/m,
168             # temperature precedes conductivity"
169             data['wtemp'][i] =  csi[0] # water temperature (C)
170             data['cond'][i] = csi[1] # specific conductivity (S/m)
171             data['press'][i] = csi[2]   # pressure decibars
172             i=i+1
173         else:
174             print ' ... skipping line %d -- %s ' % (i,line)
175             continue           
176            
177         # if re.search
178     # for line
179
180     # check that no data[dt] is set to Nan or anything but datetime
181     # keep only data that has a resolved datetime
182     keep = numpy.array([type(datetime(1970,1,1)) == type(dt) for dt in data['dt'][:]])
183     if keep.any():
184         for param in data.keys():
185             data[param] = data[param][keep]
186
187     # Quality Control steps for temp, depth, and cond
188     # (1) within range
189     # (2) if not pumped
190     good = (5<data['wtemp']) & (data['wtemp']<30)
191     bad = ~good
192     data['wtemp'][bad] = numpy.nan
193    
194     good = (2<data['cond']) & (data['cond']<7)
195     bad = ~good
196     data['cond'][bad] = numpy.nan
197
198     # press range depends on deployment depth and instrument transducer rating
199     
200     # calculate depth, salinity and density   
201     import seawater.csiro
202     import seawater.constants
203
204     # seawater.constants.C3515 is units of mS/cm
205     # data['cond'] is units of S/m
206     # You have: mS cm-1
207     # You want: S m-1
208     #     <S m-1> = <mS cm-1>*0.1
209     #     <S m-1> = <mS cm-1>/10
210
211     data['depth'] = -1*seawater.csiro.depth(data['press'], platform_info['lat']) # meters
212     data['salin'] = seawater.csiro.salt(10*data['cond']/seawater.constants.C3515, data['wtemp'], data['press']) # psu
213     data['density'] = seawater.csiro.dens(data['salin'], data['wtemp'], data['press']) # kg/m^3
214
215     return data
216
217 def creator(platform_info, sensor_info, data):
218     #
219     #
220     # subset data only to month being processed (see raw2proc.process())
221     i = data['in']
222    
223     title_str = sensor_info['description']+' at '+ platform_info['location']
224     global_atts = {
225         'title' : title_str,
226         'institution' : platform_info['institution'],
227         'institution_url' : platform_info['institution_url'],
228         'institution_dods_url' : platform_info['institution_dods_url'],
229         'metadata_url' : platform_info['metadata_url'],
230         'references' : platform_info['references'],
231         'contact' : platform_info['contact'],
232         #
233         'source' : platform_info['source']+' '+sensor_info['source'],
234         'history' : 'raw2proc using ' + sensor_info['process_module'],
235         'comment' : 'File created using pycdf'+pycdfVersion()+' and numpy '+pycdfArrayPkg(),
236         # conventions
237         'Conventions' : platform_info['conventions'],
238         # SEACOOS CDL codes
239         'format_category_code' : platform_info['format_category_code'],
240         'institution_code' : platform_info['institution_code'],
241         'platform_code' : platform_info['id'],
242         'package_code' : sensor_info['id'],
243         # institution specific
244         'project' : platform_info['project'],
245         'project_url' : platform_info['project_url'],
246         # timeframe of data contained in file yyyy-mm-dd HH:MM:SS
247         # first date in monthly file
248         'start_date' : data['dt'][i][0].strftime("%Y-%m-%d %H:%M:%S"),
249         # last date in monthly file
250         'end_date' : data['dt'][i][-1].strftime("%Y-%m-%d %H:%M:%S"),
251         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
252         #
253         'creation_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
254         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
255         'process_level' : 'level1',
256         #
257         # must type match to data (e.g. fillvalue is real if data is real)
258         '_FillValue' : -99999.,
259         }
260
261     var_atts = {
262         # coordinate variables
263         'time' : {'short_name': 'time',
264                   'long_name': 'Time',
265                   'standard_name': 'time',
266                   'units': 'seconds since 1970-1-1 00:00:00 -0', # UTC
267                   'axis': 'T',
268                   },
269         'lat' : {'short_name': 'lat',
270              'long_name': 'Latitude',
271              'standard_name': 'latitude',
272              'reference':'geographic coordinates',
273              'units': 'degrees_north',
274              'valid_range':(-90.,90.),
275              'axis': 'Y',
276              },
277         'lon' : {'short_name': 'lon',
278                  'long_name': 'Longitude',
279                  'standard_name': 'longitude',
280                  'reference':'geographic coordinates',
281                  'units': 'degrees_east',
282                  'valid_range':(-180.,180.),
283                  'axis': 'Y',
284                  },
285         'z' : {'short_name': 'z',
286                'long_name': 'Depth',
287                'standard_name': 'depth',
288                'reference':'zero at sea-surface',
289                'positive' : 'up',
290                'units': 'm',
291                'axis': 'Z',
292                },
293         # data variables
294         'wtemp': {'short_name': 'wtemp',
295                   'long_name': 'Water Temperature',
296                   'standard_name': 'water_temperature',                         
297                   'units': 'degrees_Celsius',
298                   },
299         'cond': {'short_name': 'cond',
300                  'long_name': 'Conductivity',
301                  'standard_name': 'conductivity',                         
302                  'units': 'S m-1',
303                  },
304         'press': {'short_name': 'press',
305                  'long_name': 'Pressure',
306                  'standard_name': 'water_pressure',                         
307                  'units': 'decibar',
308                  },
309         'depth': {'short_name': 'depth',
310                   'long_name': 'Depth',
311                   'standard_name': 'depth',                         
312                   'reference':'zero at sea-surface',
313                   'positive' : 'up',
314                   'units': 'm',
315                   'comment': 'Derived using seawater.csiro.depth(press,lat)',
316                  },
317         'salin': {'short_name': 'salin',
318                   'long_name': 'Salinity',
319                   'standard_name': 'salinity',
320                   'units': 'psu',
321                   'comment': 'Derived using seawater.csiro.salt(cond/C3515,wtemp,press)',
322                  },
323         'density': {'short_name': 'density',
324                     'long_name': 'Density',
325                     'standard_name': 'density',
326                     'units': 'kg m-3',
327                     'comment': 'Derived using seawater.csiro.dens0(salin,wtemp,press)',
328                  },
329         }
330
331     # dimension names use tuple so order of initialization is maintained
332     dim_inits = (
333         ('ntime', NC.UNLIMITED),
334         ('nlat', 1),
335         ('nlon', 1),
336         ('nz', 1),
337         )
338    
339     # using tuple of tuples so order of initialization is maintained
340     # using dict for attributes order of init not important
341     # use dimension names not values
342     # (varName, varType, (dimName1, [dimName2], ...))
343     var_inits = (
344         # coordinate variables
345         ('time', NC.INT, ('ntime',)),
346         ('lat', NC.FLOAT, ('nlat',)),
347         ('lon', NC.FLOAT, ('nlon',)),
348         ('z',  NC.FLOAT, ('nz',)),
349         # data variables
350         ('wtemp', NC.FLOAT, ('ntime',)),
351         ('cond', NC.FLOAT, ('ntime',)),
352         ('press', NC.FLOAT, ('ntime',)),
353         # derived variables
354         ('depth', NC.FLOAT, ('ntime',)),
355         ('salin', NC.FLOAT, ('ntime',)),
356         ('density', NC.FLOAT, ('ntime',)),
357         )
358
359     # var data
360     var_data = (
361         ('lat',  platform_info['lat']),
362         ('lon', platform_info['lon']),
363         ('z', sensor_info['nominal_depth']),
364         #
365         ('time', data['time'][i]),
366         #
367         ('wtemp', data['wtemp'][i]),
368         ('cond', data['cond'][i]),
369         ('press', data['press'][i]),
370         # derived variables
371         ('depth', data['depth'][i]),
372         ('salin',  data['salin'][i]),
373         ('density', data['density'][i]),
374         )
375
376     return (global_atts, var_atts, dim_inits, var_inits, var_data)
377
378 def updater(platform_info, sensor_info, data):
379     #
380
381     # subset data only to month being processed (see raw2proc.process())
382     i = data['in']
383
384     global_atts = {
385         # update times of data contained in file (yyyy-mm-dd HH:MM:SS)
386         # last date in monthly file
387         'end_date' : data['dt'][i][-1].strftime("%Y-%m-%d %H:%M:%S"),
388         'release_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
389         #
390         'modification_date' : now_dt.strftime("%Y-%m-%d %H:%M:%S"),
391         }
392
393     # data variables
394     # update any variable attributes like range, min, max
395     var_atts = {}
396     # var_atts = {
397     #    'wtemp': {'max': max(data.u),
398     #          'min': min(data.v),
399     #          },
400     #    'cond': {'max': max(data.u),
401     #          'min': min(data.v),
402     #          },
403     #    }
404     
405     # data
406     var_data = (
407         ('time', data['time'][i]),
408         ('wtemp', data['wtemp'][i]),
409         ('cond', data['cond'][i]),
410         ('press', data['press'][i]),
411         # derived variables
412         ('depth', data['depth'][i]),
413         ('salin',  data['salin'][i]),
414         ('density', data['density'][i]),
415         )
416
417     return (global_atts, var_atts, var_data)
418 #
419
Note: See TracBrowser for help on using the browser.