1 |
""" |
---|
2 |
Expand zipped sponge data files. |
---|
3 |
|
---|
4 |
Usage: |
---|
5 |
|
---|
6 |
> python expand.py path/to/config/file |
---|
7 |
|
---|
8 |
>>> import expand |
---|
9 |
""" |
---|
10 |
|
---|
11 |
__author__ = "Chris Calloway" |
---|
12 |
__email__ = "cbc@chriscalloway.org" |
---|
13 |
__copyright__ = "Copyright 2010 UNC-CH Department of Marine Science" |
---|
14 |
__license__ = "GPL2" |
---|
15 |
|
---|
16 |
import sys |
---|
17 |
import os |
---|
18 |
import glob |
---|
19 |
import zipfile |
---|
20 |
import shutil |
---|
21 |
import fileinput |
---|
22 |
from StringIO import StringIO |
---|
23 |
|
---|
24 |
usage = "\n".join(__doc__.splitlines()[3:6]) |
---|
25 |
test_path = "tests/expand" |
---|
26 |
|
---|
27 |
|
---|
28 |
def config_path(): |
---|
29 |
""" |
---|
30 |
Return the configuration file path from the command line. |
---|
31 |
|
---|
32 |
Supply too few arguments on command line. |
---|
33 |
|
---|
34 |
>>> save_stdout = sys.stdout |
---|
35 |
>>> temp_stdout = StringIO() |
---|
36 |
>>> sys.stdout = temp_stdout |
---|
37 |
>>> sys.argv = [] |
---|
38 |
>>> _config_path = config_path() |
---|
39 |
>>> sys.stdout = save_stdout |
---|
40 |
>>> usage == temp_stdout.getvalue()[:-1] |
---|
41 |
True |
---|
42 |
|
---|
43 |
Supply too many arguments on the command line. |
---|
44 |
|
---|
45 |
>>> save_stdout = sys.stdout |
---|
46 |
>>> temp_stdout = StringIO() |
---|
47 |
>>> sys.stdout = temp_stdout |
---|
48 |
>>> sys.argv = ["", "", "",] |
---|
49 |
>>> _config_path = config_path() |
---|
50 |
>>> sys.stdout = save_stdout |
---|
51 |
>>> usage == temp_stdout.getvalue()[:-1] |
---|
52 |
True |
---|
53 |
|
---|
54 |
Supply non-file argument. |
---|
55 |
|
---|
56 |
>>> save_stdout = sys.stdout |
---|
57 |
>>> temp_stdout = StringIO() |
---|
58 |
>>> sys.stdout = temp_stdout |
---|
59 |
>>> _config_path = os.path.join( |
---|
60 |
... os.path.dirname( |
---|
61 |
... os.path.abspath(__file__)), |
---|
62 |
... test_path) |
---|
63 |
>>> sys.argv = ["", _config_path] |
---|
64 |
>>> _config_path = config_path() |
---|
65 |
>>> sys.stdout = save_stdout |
---|
66 |
>>> usage == temp_stdout.getvalue()[:-1] |
---|
67 |
True |
---|
68 |
|
---|
69 |
Supply nonexistent file argument. |
---|
70 |
|
---|
71 |
>>> save_stdout = sys.stdout |
---|
72 |
>>> temp_stdout = StringIO() |
---|
73 |
>>> sys.stdout = temp_stdout |
---|
74 |
>>> _config_path = os.path.join( |
---|
75 |
... os.path.dirname( |
---|
76 |
... os.path.abspath(__file__)), |
---|
77 |
... test_path, "xxxxx") |
---|
78 |
>>> sys.argv = ["", _config_path] |
---|
79 |
>>> _config_path = config_path() |
---|
80 |
>>> sys.stdout = save_stdout |
---|
81 |
>>> usage == temp_stdout.getvalue()[:-1] |
---|
82 |
True |
---|
83 |
|
---|
84 |
Supply valid config path argument. |
---|
85 |
|
---|
86 |
>>> _config_path = os.path.join( |
---|
87 |
... os.path.dirname( |
---|
88 |
... os.path.abspath(__file__)), |
---|
89 |
... test_path, "config.py") |
---|
90 |
>>> sys.argv = ["", _config_path] |
---|
91 |
>>> _config_path == config_path() |
---|
92 |
True |
---|
93 |
""" |
---|
94 |
|
---|
95 |
path = None |
---|
96 |
try: |
---|
97 |
if len(sys.argv) == 2: |
---|
98 |
path = sys.argv[1] |
---|
99 |
if not os.path.exists(path): |
---|
100 |
raise IOError(path + \ |
---|
101 |
" does not exist.") |
---|
102 |
elif not os.path.isfile(path): |
---|
103 |
raise IOError(path + \ |
---|
104 |
" is not a file.") |
---|
105 |
else: |
---|
106 |
raise IOError("Incorrect number of arguments supplied.") |
---|
107 |
except IOError: |
---|
108 |
print usage |
---|
109 |
return path |
---|
110 |
|
---|
111 |
|
---|
112 |
def config(path): |
---|
113 |
""" |
---|
114 |
Return the configuration from a file. |
---|
115 |
|
---|
116 |
Execute empty configuration. |
---|
117 |
|
---|
118 |
>>> _config_path = os.path.join( |
---|
119 |
... os.path.dirname( |
---|
120 |
... os.path.abspath(__file__)), |
---|
121 |
... test_path, "empty_config.py") |
---|
122 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
123 |
config(_config_path) |
---|
124 |
>>> zipdir |
---|
125 |
>>> xmldir |
---|
126 |
>>> zipdir_pattern |
---|
127 |
>>> zipfile_pattern |
---|
128 |
>>> xmlfile_pattern |
---|
129 |
|
---|
130 |
Execute nonexistent configuration. |
---|
131 |
|
---|
132 |
>>> save_stdout = sys.stdout |
---|
133 |
>>> temp_stdout = StringIO() |
---|
134 |
>>> sys.stdout = temp_stdout |
---|
135 |
>>> _config_path = os.path.join( |
---|
136 |
... os.path.dirname( |
---|
137 |
... os.path.abspath(__file__)), |
---|
138 |
... test_path, "xxxxx") |
---|
139 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
140 |
config(_config_path) |
---|
141 |
>>> sys.stdout = save_stdout |
---|
142 |
>>> usage == temp_stdout.getvalue()[:-1] |
---|
143 |
True |
---|
144 |
>>> zipdir |
---|
145 |
>>> xmldir |
---|
146 |
>>> zipdir_pattern |
---|
147 |
>>> zipfile_pattern |
---|
148 |
>>> xmlfile_pattern |
---|
149 |
|
---|
150 |
Execute bad configuration. |
---|
151 |
|
---|
152 |
>>> save_stdout = sys.stdout |
---|
153 |
>>> temp_stdout = StringIO() |
---|
154 |
>>> sys.stdout = temp_stdout |
---|
155 |
>>> _config_path = os.path.join( |
---|
156 |
... os.path.dirname( |
---|
157 |
... os.path.abspath(__file__)), |
---|
158 |
... test_path, "bad_config.py") |
---|
159 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
160 |
config(_config_path) |
---|
161 |
>>> sys.stdout = save_stdout |
---|
162 |
>>> usage == temp_stdout.getvalue()[:-1] |
---|
163 |
True |
---|
164 |
>>> |
---|
165 |
>>> zipdir |
---|
166 |
>>> xmldir |
---|
167 |
>>> zipdir_pattern |
---|
168 |
>>> zipfile_pattern |
---|
169 |
>>> xmlfile_pattern |
---|
170 |
|
---|
171 |
Execute valid configuration. |
---|
172 |
|
---|
173 |
>>> _config_path = os.path.join( |
---|
174 |
... os.path.dirname( |
---|
175 |
... os.path.abspath(__file__)), |
---|
176 |
... test_path, "config.py") |
---|
177 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
178 |
config(_config_path) |
---|
179 |
>>> zipdir == os.path.join(os.path.dirname(os.path.abspath(__file__)), |
---|
180 |
... test_path, "zip") |
---|
181 |
True |
---|
182 |
>>> xmldir == os.path.join(os.path.dirname(os.path.abspath(__file__)), |
---|
183 |
... test_path, "xml") |
---|
184 |
True |
---|
185 |
>>> zipdir_pattern == "[0-9][0-9][0-9][0-9]_[0-9][0-9]" |
---|
186 |
True |
---|
187 |
>>> zipfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
188 |
"[0-9][0-9][0-9][0-9][0-9][0-9]-" \ |
---|
189 |
"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
190 |
"[0-9][0-9][0-9][0-9][0-9][0-9].zip" |
---|
191 |
True |
---|
192 |
>>> xmlfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
193 |
"[0-9][0-9][0-9][0-9][0-9][0-9].xml" |
---|
194 |
True |
---|
195 |
""" |
---|
196 |
|
---|
197 |
namespace = {} |
---|
198 |
namespace["zipdir"] = None |
---|
199 |
namespace["xmldir"] = None |
---|
200 |
namespace["zipdir_pattern"] = None |
---|
201 |
namespace["zipfile_pattern"] = None |
---|
202 |
namespace["xmlfile_pattern"] = None |
---|
203 |
try: |
---|
204 |
execfile(path, globals(), namespace) |
---|
205 |
except IOError: |
---|
206 |
print usage |
---|
207 |
except SyntaxError: |
---|
208 |
print usage |
---|
209 |
return (namespace["zipdir"], |
---|
210 |
namespace["xmldir"], |
---|
211 |
namespace["zipdir_pattern"], |
---|
212 |
namespace["zipfile_pattern"], |
---|
213 |
namespace["xmlfile_pattern"],) |
---|
214 |
|
---|
215 |
|
---|
216 |
def combine(xml_subdir, xmlfile_pattern): |
---|
217 |
""" |
---|
218 |
Combine the xml sponge data files from a subdirectory. |
---|
219 |
|
---|
220 |
Combine test subdirectory. |
---|
221 |
|
---|
222 |
>>> xmlref_path = os.path.join( |
---|
223 |
... os.path.dirname( |
---|
224 |
... os.path.abspath(__file__)), |
---|
225 |
... test_path, "xmlref") |
---|
226 |
>>> xmlref = glob.glob(os.path.join(xmlref_path, "*")) |
---|
227 |
>>> xmlref_path = [path for path in xmlref if os.path.isdir(path)][0] |
---|
228 |
>>> xmltest_path = os.path.join( |
---|
229 |
... os.path.dirname( |
---|
230 |
... os.path.abspath(__file__)), |
---|
231 |
... test_path, "xmltest") |
---|
232 |
>>> if os.path.exists(xmltest_path): |
---|
233 |
... shutil.rmtree(xmltest_path) |
---|
234 |
>>> shutil.copytree(xmlref_path, xmltest_path) |
---|
235 |
>>> xmltest = glob.glob(os.path.join(xmltest_path, "*")) |
---|
236 |
>>> xml_subdir = [path for path in xmltest if os.path.isdir(path)][0] |
---|
237 |
>>> xmlfile_pattern = "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
238 |
"[0-9][0-9][0-9][0-9][0-9][0-9].xml" |
---|
239 |
>>> combine(xml_subdir, xmlfile_pattern) |
---|
240 |
>>> ref_path = xml_subdir + os.extsep + "ref" |
---|
241 |
>>> xml_path = xml_subdir + os.extsep + "xml" |
---|
242 |
>>> ref_handle = open(ref_path) |
---|
243 |
>>> xml_handle = open(xml_path) |
---|
244 |
>>> reffile = ref_handle.read() |
---|
245 |
>>> xmlfile = xml_handle.read() |
---|
246 |
>>> reffile == xmlfile |
---|
247 |
True |
---|
248 |
>>> ref_handle.close() |
---|
249 |
>>> xml_handle.close() |
---|
250 |
>>> os.path.exists(xml_subdir) |
---|
251 |
False |
---|
252 |
""" |
---|
253 |
|
---|
254 |
|
---|
255 |
xmlfiles = glob.glob(os.path.join(xml_subdir, xmlfile_pattern)) |
---|
256 |
xmlfiles = [xmlfile |
---|
257 |
for xmlfile in xmlfiles |
---|
258 |
if os.path.isfile(xmlfile)] |
---|
259 |
|
---|
260 |
|
---|
261 |
files = fileinput.FileInput(xmlfiles) |
---|
262 |
header = files.readline() |
---|
263 |
lines = [line for line in files if not files.isfirstline()] |
---|
264 |
|
---|
265 |
|
---|
266 |
path = xml_subdir + os.extsep + "xml" |
---|
267 |
handle = open(path, "w") |
---|
268 |
handle.write(header) |
---|
269 |
handle.write("<root>\r\n") |
---|
270 |
handle.writelines(lines) |
---|
271 |
handle.write("\r\n</root>") |
---|
272 |
handle.close() |
---|
273 |
|
---|
274 |
|
---|
275 |
shutil.rmtree(xml_subdir) |
---|
276 |
|
---|
277 |
return |
---|
278 |
|
---|
279 |
|
---|
280 |
def expand(zipdir, xmldir, zipdir_pattern, zipfile_pattern, xmlfile_pattern): |
---|
281 |
""" |
---|
282 |
Expand zipped sponge data files. |
---|
283 |
|
---|
284 |
Expand valid tree. |
---|
285 |
|
---|
286 |
>>> _config_path = os.path.join( |
---|
287 |
... os.path.dirname( |
---|
288 |
... os.path.abspath(__file__)), |
---|
289 |
... test_path, "config.py") |
---|
290 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
291 |
config(_config_path) |
---|
292 |
>>> if os.path.exists(xmldir): |
---|
293 |
... shutil.rmtree(xmldir) |
---|
294 |
>>> expand(zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern) |
---|
295 |
""" |
---|
296 |
|
---|
297 |
|
---|
298 |
zip_months = glob.glob(os.path.join(zipdir, zipdir_pattern)) |
---|
299 |
zip_months = [zip_month |
---|
300 |
for zip_month in zip_months |
---|
301 |
if os.path.isdir(zip_month)] |
---|
302 |
|
---|
303 |
|
---|
304 |
if not os.path.exists(xmldir): |
---|
305 |
os.mkdir(xmldir, 0755) |
---|
306 |
elif not os.path.isdir(xmldir): |
---|
307 |
raise IOError("XML directory name " + \ |
---|
308 |
xmldir + \ |
---|
309 |
" exists and is not a directory.") |
---|
310 |
|
---|
311 |
for zip_month in zip_months: |
---|
312 |
|
---|
313 |
xml_month = os.path.join(xmldir, os.path.split(zip_month)[1]) |
---|
314 |
if not os.path.exists(xml_month): |
---|
315 |
os.mkdir(xml_month, 0755) |
---|
316 |
elif not os.path.isdir(xml_month): |
---|
317 |
raise IOError("XML month subdirectory name " + \ |
---|
318 |
xml_month + \ |
---|
319 |
" exists and is not a directory.") |
---|
320 |
|
---|
321 |
|
---|
322 |
zipfiles = glob.glob(os.path.join(zip_month, zipfile_pattern)) |
---|
323 |
zipfiles = [zip_file |
---|
324 |
for zip_file in zipfiles |
---|
325 |
if os.path.isfile(zip_file)] |
---|
326 |
|
---|
327 |
for zip_file in zipfiles: |
---|
328 |
|
---|
329 |
xml_subdir = os.path.splitext(os.path.split(zip_file)[1])[0] |
---|
330 |
xml_subdir = os.path.join(xml_month, xml_subdir) |
---|
331 |
if not os.path.exists(xml_subdir): |
---|
332 |
os.mkdir(xml_subdir, 0755) |
---|
333 |
|
---|
334 |
archive = zipfile.ZipFile(zip_file, "r") |
---|
335 |
archive.extractall(xml_subdir) |
---|
336 |
combine(xml_subdir, xmlfile_pattern) |
---|
337 |
elif not os.path.isdir(xml_subdir): |
---|
338 |
raise IOError("XML file subdirectory name " + \ |
---|
339 |
xml_subdir + \ |
---|
340 |
" exists and is not a directory.") |
---|
341 |
|
---|
342 |
return |
---|
343 |
|
---|
344 |
if __name__ == "__main__": |
---|
345 |
_config_path = config_path() |
---|
346 |
if _config_path: |
---|
347 |
_config = config(_config_path) |
---|
348 |
if all(_config): |
---|
349 |
expand(*_config) |
---|