1 |
|
---|
2 |
|
---|
3 |
""" |
---|
4 |
Expand zipped sponge data files. |
---|
5 |
|
---|
6 |
Usage: |
---|
7 |
|
---|
8 |
> python expand.py path/to/config/file |
---|
9 |
> python expand.py -t |
---|
10 |
> python expand.py --test |
---|
11 |
|
---|
12 |
Test silent import. |
---|
13 |
|
---|
14 |
>>> import expand |
---|
15 |
""" |
---|
16 |
|
---|
17 |
__author__ = "Chris Calloway" |
---|
18 |
__email__ = "cbc@chriscalloway.org" |
---|
19 |
__copyright__ = "Copyright 2010 UNC-CH Department of Marine Science" |
---|
20 |
__license__ = "GPL2" |
---|
21 |
|
---|
22 |
import sys |
---|
23 |
import os |
---|
24 |
import glob |
---|
25 |
import zipfile |
---|
26 |
import shutil |
---|
27 |
import fileinput |
---|
28 |
import doctest |
---|
29 |
import unittest |
---|
30 |
from StringIO import StringIO |
---|
31 |
|
---|
32 |
USAGE = "\n".join(__doc__.splitlines()[3:8]) |
---|
33 |
TEST_PATH = "tests/expand" |
---|
34 |
|
---|
35 |
|
---|
36 |
def _test(): |
---|
37 |
""" |
---|
38 |
Run doctests as unittest suite. |
---|
39 |
|
---|
40 |
Test silent import |
---|
41 |
|
---|
42 |
>>> from expand import _test |
---|
43 |
""" |
---|
44 |
|
---|
45 |
suite = [] |
---|
46 |
suite.append(doctest.DocTestSuite()) |
---|
47 |
suite = unittest.TestSuite(suite) |
---|
48 |
unittest.TextTestRunner().run(suite) |
---|
49 |
|
---|
50 |
return |
---|
51 |
|
---|
52 |
|
---|
53 |
def config_path(): |
---|
54 |
""" |
---|
55 |
Return the configuration file path from the command line. |
---|
56 |
|
---|
57 |
Supply too few arguments on command line. |
---|
58 |
|
---|
59 |
>>> save_stdout = sys.stdout |
---|
60 |
>>> temp_stdout = StringIO() |
---|
61 |
>>> sys.stdout = temp_stdout |
---|
62 |
>>> sys.argv = [] |
---|
63 |
>>> _config_path = config_path() |
---|
64 |
>>> sys.stdout = save_stdout |
---|
65 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
66 |
True |
---|
67 |
|
---|
68 |
Supply too many arguments on the command line. |
---|
69 |
|
---|
70 |
>>> save_stdout = sys.stdout |
---|
71 |
>>> temp_stdout = StringIO() |
---|
72 |
>>> sys.stdout = temp_stdout |
---|
73 |
>>> sys.argv = ["", "", "",] |
---|
74 |
>>> _config_path = config_path() |
---|
75 |
>>> sys.stdout = save_stdout |
---|
76 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
77 |
True |
---|
78 |
|
---|
79 |
Supply non-file argument. |
---|
80 |
|
---|
81 |
>>> save_stdout = sys.stdout |
---|
82 |
>>> temp_stdout = StringIO() |
---|
83 |
>>> sys.stdout = temp_stdout |
---|
84 |
>>> _config_path = os.path.join( |
---|
85 |
... os.path.dirname( |
---|
86 |
... os.path.abspath(__file__)), |
---|
87 |
... TEST_PATH) |
---|
88 |
>>> sys.argv = ["", _config_path] |
---|
89 |
>>> _config_path = config_path() |
---|
90 |
>>> sys.stdout = save_stdout |
---|
91 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
92 |
True |
---|
93 |
|
---|
94 |
Supply nonexistent file argument. |
---|
95 |
|
---|
96 |
>>> save_stdout = sys.stdout |
---|
97 |
>>> temp_stdout = StringIO() |
---|
98 |
>>> sys.stdout = temp_stdout |
---|
99 |
>>> _config_path = os.path.join( |
---|
100 |
... os.path.dirname( |
---|
101 |
... os.path.abspath(__file__)), |
---|
102 |
... TEST_PATH, "xxxxx") |
---|
103 |
>>> sys.argv = ["", _config_path] |
---|
104 |
>>> _config_path = config_path() |
---|
105 |
>>> sys.stdout = save_stdout |
---|
106 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
107 |
True |
---|
108 |
|
---|
109 |
Supply valid config path argument. |
---|
110 |
|
---|
111 |
>>> _config_path = os.path.join( |
---|
112 |
... os.path.dirname( |
---|
113 |
... os.path.abspath(__file__)), |
---|
114 |
... TEST_PATH, "config.py") |
---|
115 |
>>> sys.argv = ["", _config_path] |
---|
116 |
>>> _config_path == config_path() |
---|
117 |
True |
---|
118 |
""" |
---|
119 |
|
---|
120 |
path = None |
---|
121 |
try: |
---|
122 |
if len(sys.argv) == 2: |
---|
123 |
if sys.argv[1] == "-t" or sys.argv[1] == "--test": |
---|
124 |
_test() |
---|
125 |
else: |
---|
126 |
path = sys.argv[1] |
---|
127 |
if not os.path.exists(path): |
---|
128 |
raise IOError(path + \ |
---|
129 |
" does not exist.") |
---|
130 |
elif not os.path.isfile(path): |
---|
131 |
raise IOError(path + \ |
---|
132 |
" is not a file.") |
---|
133 |
else: |
---|
134 |
raise IOError("Incorrect number of arguments supplied.") |
---|
135 |
except IOError: |
---|
136 |
print USAGE |
---|
137 |
return path |
---|
138 |
|
---|
139 |
|
---|
140 |
def config(path): |
---|
141 |
""" |
---|
142 |
Return the configuration from a file. |
---|
143 |
|
---|
144 |
Execute empty configuration. |
---|
145 |
|
---|
146 |
>>> _config_path = os.path.join( |
---|
147 |
... os.path.dirname( |
---|
148 |
... os.path.abspath(__file__)), |
---|
149 |
... TEST_PATH, "empty_config.py") |
---|
150 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
151 |
config(_config_path) |
---|
152 |
>>> zipdir |
---|
153 |
>>> xmldir |
---|
154 |
>>> zipdir_pattern |
---|
155 |
>>> zipfile_pattern |
---|
156 |
>>> xmlfile_pattern |
---|
157 |
|
---|
158 |
Execute nonexistent configuration. |
---|
159 |
|
---|
160 |
>>> save_stdout = sys.stdout |
---|
161 |
>>> temp_stdout = StringIO() |
---|
162 |
>>> sys.stdout = temp_stdout |
---|
163 |
>>> _config_path = os.path.join( |
---|
164 |
... os.path.dirname( |
---|
165 |
... os.path.abspath(__file__)), |
---|
166 |
... TEST_PATH, "xxxxx") |
---|
167 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
168 |
config(_config_path) |
---|
169 |
>>> sys.stdout = save_stdout |
---|
170 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
171 |
True |
---|
172 |
>>> zipdir |
---|
173 |
>>> xmldir |
---|
174 |
>>> zipdir_pattern |
---|
175 |
>>> zipfile_pattern |
---|
176 |
>>> xmlfile_pattern |
---|
177 |
|
---|
178 |
Execute bad configuration. |
---|
179 |
|
---|
180 |
>>> save_stdout = sys.stdout |
---|
181 |
>>> temp_stdout = StringIO() |
---|
182 |
>>> sys.stdout = temp_stdout |
---|
183 |
>>> _config_path = os.path.join( |
---|
184 |
... os.path.dirname( |
---|
185 |
... os.path.abspath(__file__)), |
---|
186 |
... TEST_PATH, "bad_config.py") |
---|
187 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
188 |
config(_config_path) |
---|
189 |
>>> sys.stdout = save_stdout |
---|
190 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
191 |
True |
---|
192 |
>>> |
---|
193 |
>>> zipdir |
---|
194 |
>>> xmldir |
---|
195 |
>>> zipdir_pattern |
---|
196 |
>>> zipfile_pattern |
---|
197 |
>>> xmlfile_pattern |
---|
198 |
|
---|
199 |
Execute valid configuration. |
---|
200 |
|
---|
201 |
>>> _config_path = os.path.join( |
---|
202 |
... os.path.dirname( |
---|
203 |
... os.path.abspath(__file__)), |
---|
204 |
... TEST_PATH, "config.py") |
---|
205 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
206 |
config(_config_path) |
---|
207 |
>>> zipdir == os.path.join(os.path.dirname(os.path.abspath(__file__)), |
---|
208 |
... TEST_PATH, "zip") |
---|
209 |
True |
---|
210 |
>>> xmldir == os.path.join(os.path.dirname(os.path.abspath(__file__)), |
---|
211 |
... TEST_PATH, "xml") |
---|
212 |
True |
---|
213 |
>>> zipdir_pattern == "[0-9][0-9][0-9][0-9]_[0-9][0-9]" |
---|
214 |
True |
---|
215 |
>>> zipfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
216 |
"[0-9][0-9][0-9][0-9][0-9][0-9]-" \ |
---|
217 |
"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
218 |
"[0-9][0-9][0-9][0-9][0-9][0-9].zip" |
---|
219 |
True |
---|
220 |
>>> xmlfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
221 |
"[0-9][0-9][0-9][0-9][0-9][0-9].xml" |
---|
222 |
True |
---|
223 |
""" |
---|
224 |
|
---|
225 |
namespace = {} |
---|
226 |
namespace["zipdir"] = None |
---|
227 |
namespace["xmldir"] = None |
---|
228 |
namespace["zipdir_pattern"] = None |
---|
229 |
namespace["zipfile_pattern"] = None |
---|
230 |
namespace["xmlfile_pattern"] = None |
---|
231 |
try: |
---|
232 |
execfile(path, globals(), namespace) |
---|
233 |
except IOError: |
---|
234 |
print USAGE |
---|
235 |
except SyntaxError: |
---|
236 |
print USAGE |
---|
237 |
return (namespace["zipdir"], |
---|
238 |
namespace["xmldir"], |
---|
239 |
namespace["zipdir_pattern"], |
---|
240 |
namespace["zipfile_pattern"], |
---|
241 |
namespace["xmlfile_pattern"],) |
---|
242 |
|
---|
243 |
|
---|
244 |
def combine(xml_subdir, xmlfile_pattern): |
---|
245 |
""" |
---|
246 |
Combine the XML sponge data files from a subdirectory. |
---|
247 |
|
---|
248 |
Combine test subdirectory. |
---|
249 |
|
---|
250 |
>>> xmlref_path = os.path.join( |
---|
251 |
... os.path.dirname( |
---|
252 |
... os.path.abspath(__file__)), |
---|
253 |
... TEST_PATH, "xmlref") |
---|
254 |
>>> xmlref = glob.glob(os.path.join(xmlref_path, "*")) |
---|
255 |
>>> xmlref_path = [path for path in xmlref if os.path.isdir(path)][0] |
---|
256 |
>>> xmltest_path = os.path.join( |
---|
257 |
... os.path.dirname( |
---|
258 |
... os.path.abspath(__file__)), |
---|
259 |
... TEST_PATH, "xmltest") |
---|
260 |
>>> if os.path.exists(xmltest_path): |
---|
261 |
... shutil.rmtree(xmltest_path) |
---|
262 |
>>> shutil.copytree(xmlref_path, xmltest_path) |
---|
263 |
>>> xmltest = glob.glob(os.path.join(xmltest_path, "*")) |
---|
264 |
>>> xml_subdir = [path for path in xmltest if os.path.isdir(path)][0] |
---|
265 |
>>> xmlfile_pattern = "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \ |
---|
266 |
"[0-9][0-9][0-9][0-9][0-9][0-9].xml" |
---|
267 |
>>> combine(xml_subdir, xmlfile_pattern) |
---|
268 |
>>> ref_path = xml_subdir + os.extsep + "ref" |
---|
269 |
>>> xml_path = xml_subdir + os.extsep + "xml" |
---|
270 |
>>> with open(ref_path) as ref_handle: |
---|
271 |
... reffile = ref_handle.readlines() |
---|
272 |
>>> with open(xml_path) as xml_handle: |
---|
273 |
... xmlfile = xml_handle.readlines() |
---|
274 |
>>> reffile == xmlfile |
---|
275 |
True |
---|
276 |
>>> os.path.exists(xml_subdir) |
---|
277 |
False |
---|
278 |
""" |
---|
279 |
|
---|
280 |
|
---|
281 |
xmlfiles = glob.glob(os.path.join(xml_subdir, xmlfile_pattern)) |
---|
282 |
xmlfiles = [xmlfile |
---|
283 |
for xmlfile in xmlfiles |
---|
284 |
if os.path.isfile(xmlfile)] |
---|
285 |
|
---|
286 |
|
---|
287 |
files = fileinput.FileInput(xmlfiles) |
---|
288 |
header = files.readline().rstrip() + "\n" |
---|
289 |
lines = [line.rstrip() + "\n" for line in files if not files.isfirstline()] |
---|
290 |
|
---|
291 |
|
---|
292 |
path = xml_subdir + os.extsep + "xml" |
---|
293 |
with open(path, "w") as handle: |
---|
294 |
handle.write(header) |
---|
295 |
handle.write("<root>\n") |
---|
296 |
handle.writelines(lines) |
---|
297 |
handle.write("</root>\n") |
---|
298 |
|
---|
299 |
|
---|
300 |
shutil.rmtree(xml_subdir) |
---|
301 |
|
---|
302 |
return |
---|
303 |
|
---|
304 |
|
---|
305 |
def expand(zipdir, xmldir, zipdir_pattern, zipfile_pattern, xmlfile_pattern): |
---|
306 |
""" |
---|
307 |
Expand zipped sponge data files. |
---|
308 |
|
---|
309 |
Expand valid tree. |
---|
310 |
|
---|
311 |
>>> _config_path = os.path.join( |
---|
312 |
... os.path.dirname( |
---|
313 |
... os.path.abspath(__file__)), |
---|
314 |
... TEST_PATH, "config.py") |
---|
315 |
>>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \ |
---|
316 |
config(_config_path) |
---|
317 |
>>> if os.path.exists(xmldir): |
---|
318 |
... shutil.rmtree(xmldir) |
---|
319 |
>>> expand(zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern) |
---|
320 |
>>> xmlref_path = os.path.join( |
---|
321 |
... os.path.dirname( |
---|
322 |
... os.path.abspath(__file__)), |
---|
323 |
... TEST_PATH, "xmlref") |
---|
324 |
>>> ref_paths = sorted(glob.glob(os.path.join(TEST_PATH, "xmlref", |
---|
325 |
... "*", "*.ref"))) |
---|
326 |
>>> xml_paths = sorted(glob.glob(os.path.join(xmldir, "*", "*.xml"))) |
---|
327 |
>>> paths = zip(ref_paths, xml_paths) |
---|
328 |
>>> truths = [] |
---|
329 |
>>> for ref_path,xml_path in paths: |
---|
330 |
... with open(ref_path) as ref_handle: |
---|
331 |
... reffile = ref_handle.readlines() |
---|
332 |
... with open(xml_path) as xml_handle: |
---|
333 |
... xmlfile = xml_handle.readlines() |
---|
334 |
... truths.extend((reffile == xmlfile, |
---|
335 |
... os.path.exists(os.path.splitext(xml_path)[0]),)) |
---|
336 |
>>> truths == [True, False,] * len(paths) |
---|
337 |
True |
---|
338 |
""" |
---|
339 |
|
---|
340 |
|
---|
341 |
zip_months = glob.glob(os.path.join(zipdir, zipdir_pattern)) |
---|
342 |
zip_months = [zip_month |
---|
343 |
for zip_month in zip_months |
---|
344 |
if os.path.isdir(zip_month)] |
---|
345 |
|
---|
346 |
|
---|
347 |
if not os.path.exists(xmldir): |
---|
348 |
os.mkdir(xmldir, 0755) |
---|
349 |
elif not os.path.isdir(xmldir): |
---|
350 |
raise IOError("XML directory name " + \ |
---|
351 |
xmldir + \ |
---|
352 |
" exists and is not a directory.") |
---|
353 |
|
---|
354 |
for zip_month in zip_months: |
---|
355 |
|
---|
356 |
xml_month = os.path.join(xmldir, os.path.split(zip_month)[1]) |
---|
357 |
if not os.path.exists(xml_month): |
---|
358 |
os.mkdir(xml_month, 0755) |
---|
359 |
elif not os.path.isdir(xml_month): |
---|
360 |
raise IOError("XML month subdirectory name " + \ |
---|
361 |
xml_month + \ |
---|
362 |
" exists and is not a directory.") |
---|
363 |
|
---|
364 |
|
---|
365 |
zipfiles = glob.glob(os.path.join(zip_month, zipfile_pattern)) |
---|
366 |
zipfiles = [zip_file |
---|
367 |
for zip_file in zipfiles |
---|
368 |
if os.path.isfile(zip_file)] |
---|
369 |
|
---|
370 |
for zip_file in zipfiles: |
---|
371 |
|
---|
372 |
xml_subdir = os.path.splitext(os.path.split(zip_file)[1])[0] |
---|
373 |
xml_subdir = os.path.join(xml_month, xml_subdir) |
---|
374 |
if not os.path.exists(xml_subdir): |
---|
375 |
os.mkdir(xml_subdir, 0755) |
---|
376 |
|
---|
377 |
|
---|
378 |
archive = zipfile.ZipFile(zip_file, "r") |
---|
379 |
archive.extractall(xml_subdir) |
---|
380 |
combine(xml_subdir, xmlfile_pattern) |
---|
381 |
elif not os.path.isdir(xml_subdir): |
---|
382 |
raise IOError("XML file subdirectory name " + \ |
---|
383 |
xml_subdir + \ |
---|
384 |
" exists and is not a directory.") |
---|
385 |
|
---|
386 |
return |
---|
387 |
|
---|
388 |
|
---|
389 |
def _main(): |
---|
390 |
""" |
---|
391 |
Run module as script. |
---|
392 |
|
---|
393 |
Test silent import. |
---|
394 |
|
---|
395 |
>>> from expand import _main |
---|
396 |
""" |
---|
397 |
|
---|
398 |
_config_path = config_path() |
---|
399 |
if _config_path: |
---|
400 |
_config = config(_config_path) |
---|
401 |
if all(_config): |
---|
402 |
expand(*_config) |
---|
403 |
return |
---|
404 |
|
---|
405 |
if __name__ == "__main__": |
---|
406 |
_main() |
---|