NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/raw2proc/trunk/raw2proc/split_cr1000_by_month.py

Revision 320 (checked in by haines, 14 years ago)

catch-up trunk to production code running on cromwell

  • Property svn:executable set to *
Line 
1 #!/usr/bin/env python
2 # Last modified:  Time-stamp: <2009-04-01 08:47:51 haines>
3 """
4 parse datestr from cr1000 files to create monthly files
5
6 input file
7 /seacoos/data/nccoos/level0/crow/crow_csi_loggernet_yyyymmdd-yyyymmdd.dat
8
9 Output form
10 /seacoos/data/nccoos/level0/crow/yyyy_mm/wq/csi_wq_yyyy_mm.dat
11 /seacoos/data/nccoos/level0/crow/yyyy_mm/flow/csi_flow_yyyy_mm.dat
12
13 load data file
14 parse lines for time YYYY, jjj, HHMM
15 what year and month?
16
17 create YYYY_MM directory and output file if does not exist.
18 write line to YYYY_MM/csi_loggernet_yyyy_mm.dat output file
19
20 """
21
22 REAL_RE_STR = '\\s*(-?\\d(\\.\\d+|)[Ee][+\\-]\\d\\d?|-?(\\d+\\.\\d*|\\d*\\.\\d+)|-?\\d+)\\s*'
23
24 import sys
25 import os
26 import re
27 from procutil import *
28
29 def parse_csi_loggernet(fn, lines):
30     """
31
32     From FSL (CSI datalogger program files):
33     Example data:
34    
35     TOA5,CR1000_CBC,CR1000,5498,CR1000.Std.11,CPU:UNC_CrowBranch.CR1,1554,Data15Min
36     TIMESTAMP,RECORD,RainIn_Tot,WaterLevelFt,Flow
37     TS,RN,,,
38     ,,Tot,Smp,Smp
39     2009-01-22 15:30:00,0,0,0,0
40     2009-01-22 15:45:00,1,0,0,0
41     2009-01-22 16:00:00,2,0.01,0,0
42     2009-01-22 16:15:00,3,0,0,0
43
44     TOA5,CR1000_CBC,CR1000,5498,CR1000.Std.11,CPU:UNC_CrowBranch.CR1,1554,DataHourly
45     TIMESTAMP,RECORD,SondeTempC,SpCond,DOSat,DOmg,pH,Turb,BattVolt_Min
46     TS,RN,,,,,,,
47     ,,Smp,Smp,Smp,Smp,Smp,Smp,Min
48     2009-01-22 16:00:00,0,2.68,0.533,7.63,-46.8,-1.4,0,11.99
49     2009-01-22 17:00:00,1,3.07,0.553,7.62,-46.6,-1.4,0,11.96
50     2009-01-22 18:00:00,2,3.45,0.548,7.62,-46.5,-1.4,0,11.91
51     2009-01-22 19:00:00,3,3.53,0.546,7.62,-46.3,-1.4,0,11.89
52     2009-01-22 20:00:00,4,3.59,0.547,7.62,-46.3,-1.4,0,11.86
53     2009-01-22 21:00:00,5,3.55,0.545,7.61,-46.2,-0.7,0,11.84
54     2009-01-22 22:00:00,6,3.47,0.545,7.62,-46.3,4.2,0,11.81
55     2009-01-22 23:00:00,7,3.37,0.545,7.62,-46.4,-0.7,0,11.8
56     2009-01-23 00:00:00,8,3.28,0.545,7.62,-46.5,4.2,0,11.78
57     2009-01-23 01:00:00,9,3.17,0.546,7.62,-46.7,-0.9,0,11.76
58     2009-01-23 02:00:00,10,3,0.549,7.63,-46.8,-1.3,0,11.74
59     2009-01-23 03:00:00,11,2.95,0.55,7.64,-47.3,-1.4,0,11.73
60     2009-01-23 04:00:00,12,2.89,0.552,7.63,-47.2,-1.4,0,11.71
61     2009-01-23 05:00:00,13,2.8,0.554,7.64,-47.3,-1.4,0,11.69
62     2009-01-23 06:00:00,14,2.72,0.554,7.64,-47.6,-1.3,0,11.68
63    
64    
65     """
66    
67     import numpy
68     from datetime import datetime
69     from time import strptime
70     import math
71
72     p = os.path.split(fn)
73     (loggertype, id, datatype) = p[1].split('_')
74
75     #  set this_month to now
76     this_month_str = this_month()
77
78     # skip first 4 lines but write these four lines to top of monthly files
79     print lines[0:4]
80     for line in lines[4:]:
81         # split line
82         sw = re.split(',', line)
83
84         if len(sw)>=1:
85             # print line
86             # get sample datetime from sw[0]
87             sample_dt = scanf_datetime(sw[0], fmt='"%Y-%m-%d %H:%M:%S"')
88             month_str = '%4d_%02d' % sample_dt.timetuple()[0:2]
89         else:
90             # not a well-formed line, so skip to next line
91             print 'ill-formed time, line not to be copied: ' + line
92             continue
93
94         if datatype=='Data15Min.dat':
95             data_dir = os.path.join(p[0],'flow',month_str)
96             ofn_prefix = '%s_%s' % (id.lower(), 'flow')
97         elif datatype=='DataHourly.dat':
98             data_dir = os.path.join(p[0],'wq',month_str)
99             ofn_prefix = '%s_%s' % (id.lower(), 'wq')
100            
101         if not os.path.isdir(data_dir):
102             print 'Creating directory: '+data_dir
103             os.mkdir(data_dir)
104            
105         ofn = os.path.join(data_dir, ofn_prefix)
106         ofn = '_'.join([ofn, month_str])
107         ofn = '.'.join([ofn, 'dat'])
108
109         # delete previous existing month file so start fresh
110         if os.path.exists(ofn) and (month_str != this_month_str):
111             print 'Deleting file: '+ofn
112             os.remove(ofn)
113            
114         if os.path.exists(ofn):
115             f = open(ofn, 'a')
116             f.write(line)
117             f.close
118         else:
119             print 'Creating file: '+ofn
120             f = open(ofn, 'w')
121             # write first four header lines to each new month
122             for l in lines[0:4]:
123                 f.write(l)
124             f.write(line)
125             f.close()
126
127         this_month_str = month_str
128        
129     # for line
130     return
131    
132
133 def load_data(inFile):
134     lines=None
135     if os.path.exists(inFile):
136         f = open(inFile, 'r')
137         lines = f.readlines()
138         f.close()
139         if len(lines)<=0:
140             print 'Empty file: '+ inFile           
141     else:
142         print 'File does not exist: '+ inFile
143     return lines
144
145 from raw2proc import *
146
147 def test1(fn):
148     lines = load_data(fn)
149     return parse_csi_loggernet(fn, lines)
150
151 def spin():
152     fns = [
153         '/seacoos/data/nccoos/level0/crow/CR1000_CBC_Data15Min.dat',
154         '/seacoos/data/nccoos/level0/crow/CR1000_CBC_DataHourly.dat',
155         '/seacoos/data/nccoos/level0/meet/CR1000_MOW_Data15Min.dat',
156         '/seacoos/data/nccoos/level0/meet/CR1000_MOW_DataHourly.dat',
157         ]
158
159     for fn in fns:
160         test1(fn)
161
162
163 if __name__ == '__main__':
164     pass
165     # fn = '/seacoos/data/nccoos/level0/crow/cbc_loggernet_20050325-20070726.dat'
166
167     #
168     # fn = sys.argv[1]
169     # try:
170     #     test1(fn)
171     # except:
172     #     pass
173    
Note: See TracBrowser for help on using the browser.