NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/spongenet/trunk/spongenet/expand.py

Revision 344 (checked in by cbc, 14 years ago)

Add more package management and she-bang lines.

Line 
1 #!/usr/bin/env python
2
3 """
4 Expand zipped sponge data files.
5
6 Usage:
7
8    > python expand.py path/to/config/file
9    > python expand.py -t
10    > python expand.py --test
11
12 Test silent import.
13
14 >>> import expand
15 """
16
17 __author__ = "Chris Calloway"
18 __email__ = "cbc@chriscalloway.org"
19 __copyright__ = "Copyright 2010 UNC-CH Department of Marine Science"
20 __license__ = "GPL2"
21
22 import sys
23 import os
24 import glob
25 import zipfile
26 import shutil
27 import fileinput
28 import doctest
29 import unittest
30 from StringIO import StringIO
31
32 USAGE = "\n".join(__doc__.splitlines()[3:8])
33 TEST_PATH = "tests/expand"
34
35
36 def _test():
37     """
38     Run doctests as unittest suite.
39
40     Test silent import
41
42     >>> from expand import _test
43     """
44
45     suite = []
46     suite.append(doctest.DocTestSuite())
47     suite = unittest.TestSuite(suite)
48     unittest.TextTestRunner().run(suite)
49
50     return
51
52
53 def config_path():
54     """
55     Return the configuration file path from the command line.
56
57     Supply too few arguments on command line.
58
59     >>> save_stdout = sys.stdout
60     >>> temp_stdout = StringIO()
61     >>> sys.stdout = temp_stdout
62     >>> sys.argv = []
63     >>> _config_path = config_path()
64     >>> sys.stdout = save_stdout
65     >>> USAGE == temp_stdout.getvalue()[:-1]
66     True
67
68     Supply too many arguments on the command line.
69
70     >>> save_stdout = sys.stdout
71     >>> temp_stdout = StringIO()
72     >>> sys.stdout = temp_stdout
73     >>> sys.argv = ["", "", "",]
74     >>> _config_path = config_path()
75     >>> sys.stdout = save_stdout
76     >>> USAGE == temp_stdout.getvalue()[:-1]
77     True
78
79     Supply non-file argument.
80
81     >>> save_stdout = sys.stdout
82     >>> temp_stdout = StringIO()
83     >>> sys.stdout = temp_stdout
84     >>> _config_path = os.path.join(
85     ...                    os.path.dirname(
86     ...                        os.path.abspath(__file__)),
87     ...                    TEST_PATH)
88     >>> sys.argv = ["", _config_path]
89     >>> _config_path = config_path()
90     >>> sys.stdout = save_stdout
91     >>> USAGE == temp_stdout.getvalue()[:-1]
92     True
93
94     Supply nonexistent file argument.
95
96     >>> save_stdout = sys.stdout
97     >>> temp_stdout = StringIO()
98     >>> sys.stdout = temp_stdout
99     >>> _config_path = os.path.join(
100     ...                    os.path.dirname(
101     ...                        os.path.abspath(__file__)),
102     ...                    TEST_PATH, "xxxxx")
103     >>> sys.argv = ["", _config_path]
104     >>> _config_path = config_path()
105     >>> sys.stdout = save_stdout
106     >>> USAGE == temp_stdout.getvalue()[:-1]
107     True
108
109     Supply valid config path argument.
110
111     >>> _config_path = os.path.join(
112     ...                    os.path.dirname(
113     ...                        os.path.abspath(__file__)),
114     ...                    TEST_PATH, "config.py")
115     >>> sys.argv = ["", _config_path]
116     >>> _config_path == config_path()
117     True
118     """
119
120     path = None
121     try:
122         if len(sys.argv) == 2:
123             if sys.argv[1] == "-t" or sys.argv[1] == "--test":
124                 _test()
125             else:
126                 path = sys.argv[1]
127                 if not os.path.exists(path):
128                     raise IOError(path + \
129                                   " does not exist.")
130                 elif not os.path.isfile(path):
131                     raise IOError(path + \
132                                   " is not a file.")
133         else:
134             raise IOError("Incorrect number of arguments supplied.")
135     except IOError:
136         print USAGE
137     return path
138
139
140 def config(path):
141     """
142     Return the configuration from a file.
143
144     Execute empty configuration.
145
146     >>> _config_path = os.path.join(
147     ...                    os.path.dirname(
148     ...                        os.path.abspath(__file__)),
149     ...                    TEST_PATH, "empty_config.py")
150     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
151             config(_config_path)
152     >>> zipdir
153     >>> xmldir
154     >>> zipdir_pattern
155     >>> zipfile_pattern
156     >>> xmlfile_pattern
157
158     Execute nonexistent configuration.
159
160     >>> save_stdout = sys.stdout
161     >>> temp_stdout = StringIO()
162     >>> sys.stdout = temp_stdout
163     >>> _config_path = os.path.join(
164     ...                    os.path.dirname(
165     ...                        os.path.abspath(__file__)),
166     ...                    TEST_PATH, "xxxxx")
167     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
168             config(_config_path)
169     >>> sys.stdout = save_stdout
170     >>> USAGE == temp_stdout.getvalue()[:-1]
171     True
172     >>> zipdir
173     >>> xmldir
174     >>> zipdir_pattern
175     >>> zipfile_pattern
176     >>> xmlfile_pattern
177
178     Execute bad configuration.
179
180     >>> save_stdout = sys.stdout
181     >>> temp_stdout = StringIO()
182     >>> sys.stdout = temp_stdout
183     >>> _config_path = os.path.join(
184     ...                    os.path.dirname(
185     ...                        os.path.abspath(__file__)),
186     ...                    TEST_PATH, "bad_config.py")
187     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
188             config(_config_path)
189     >>> sys.stdout = save_stdout
190     >>> USAGE == temp_stdout.getvalue()[:-1]
191     True
192     >>>
193     >>> zipdir
194     >>> xmldir
195     >>> zipdir_pattern
196     >>> zipfile_pattern
197     >>> xmlfile_pattern
198
199     Execute valid configuration.
200
201     >>> _config_path = os.path.join(
202     ...                    os.path.dirname(
203     ...                        os.path.abspath(__file__)),
204     ...                    TEST_PATH, "config.py")
205     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
206             config(_config_path)
207     >>> zipdir == os.path.join(os.path.dirname(os.path.abspath(__file__)),
208     ...                        TEST_PATH, "zip")
209     True
210     >>> xmldir == os.path.join(os.path.dirname(os.path.abspath(__file__)),
211     ...                        TEST_PATH, "xml")
212     True
213     >>> zipdir_pattern == "[0-9][0-9][0-9][0-9]_[0-9][0-9]"
214     True
215     >>> zipfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
216                            "[0-9][0-9][0-9][0-9][0-9][0-9]-" \
217                            "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
218                            "[0-9][0-9][0-9][0-9][0-9][0-9].zip"
219     True
220     >>> xmlfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
221                            "[0-9][0-9][0-9][0-9][0-9][0-9].xml"
222     True
223     """
224
225     namespace = {}
226     namespace["zipdir"] = None
227     namespace["xmldir"] = None
228     namespace["zipdir_pattern"] = None
229     namespace["zipfile_pattern"] = None
230     namespace["xmlfile_pattern"] = None
231     try:
232         execfile(path, globals(), namespace)
233     except IOError:
234         print USAGE
235     except SyntaxError:
236         print USAGE
237     return (namespace["zipdir"],
238             namespace["xmldir"],
239             namespace["zipdir_pattern"],
240             namespace["zipfile_pattern"],
241             namespace["xmlfile_pattern"],)
242
243
244 def combine(xml_subdir, xmlfile_pattern):
245     """
246     Combine the xml sponge data files from a subdirectory.
247
248     Combine test subdirectory.
249
250     >>> xmlref_path = os.path.join(
251     ...                   os.path.dirname(
252     ...                       os.path.abspath(__file__)),
253     ...                   TEST_PATH, "xmlref")
254     >>> xmlref = glob.glob(os.path.join(xmlref_path, "*"))
255     >>> xmlref_path = [path for path in xmlref if os.path.isdir(path)][0]
256     >>> xmltest_path = os.path.join(
257     ...                    os.path.dirname(
258     ...                        os.path.abspath(__file__)),
259     ...                    TEST_PATH, "xmltest")
260     >>> if os.path.exists(xmltest_path):
261     ...     shutil.rmtree(xmltest_path)
262     >>> shutil.copytree(xmlref_path, xmltest_path)
263     >>> xmltest = glob.glob(os.path.join(xmltest_path, "*"))
264     >>> xml_subdir = [path for path in xmltest if os.path.isdir(path)][0]
265     >>> xmlfile_pattern = "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
266                           "[0-9][0-9][0-9][0-9][0-9][0-9].xml"
267     >>> combine(xml_subdir, xmlfile_pattern)
268     >>> ref_path = xml_subdir + os.extsep + "ref"
269     >>> xml_path = xml_subdir + os.extsep + "xml"
270     >>> ref_handle = open(ref_path)
271     >>> xml_handle = open(xml_path)
272     >>> reffile = ref_handle.read()
273     >>> xmlfile = xml_handle.read()
274     >>> ref_handle.close()
275     >>> xml_handle.close()
276     >>> reffile == xmlfile
277     True
278     >>> os.path.exists(xml_subdir)
279     False
280     """
281
282     # Find all the xml sponge data files.
283     xmlfiles = glob.glob(os.path.join(xml_subdir, xmlfile_pattern))
284     xmlfiles = [xmlfile
285                 for xmlfile in xmlfiles
286                 if os.path.isfile(xmlfile)]
287
288     # Read all the xml sponge data files.
289     files = fileinput.FileInput(xmlfiles)
290     header = files.readline()
291     lines = ["\r\n" if files.isfirstline() else line for line in files]
292
293     # Write a combined xml sponge data file.
294     path = xml_subdir + os.extsep + "xml"
295     handle = open(path, "w")
296     handle.write(header)
297     handle.write("<root>\r\n")
298     handle.writelines(lines)
299     handle.write("\r\n</root>")
300     handle.close()
301
302     # Remove the xml sponge data file subdirectory.
303     shutil.rmtree(xml_subdir)
304
305     return
306
307
308 def expand(zipdir, xmldir, zipdir_pattern, zipfile_pattern, xmlfile_pattern):
309     """
310     Expand zipped sponge data files.
311
312     Expand valid tree.
313
314     >>> _config_path = os.path.join(
315     ...                    os.path.dirname(
316     ...                        os.path.abspath(__file__)),
317     ...                    TEST_PATH, "config.py")
318     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
319             config(_config_path)
320     >>> if os.path.exists(xmldir):
321     ...     shutil.rmtree(xmldir)
322     >>> expand(zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern)
323     >>> xmlref_path = os.path.join(
324     ...                   os.path.dirname(
325     ...                       os.path.abspath(__file__)),
326     ...                   TEST_PATH, "xmlref")
327     >>> ref_paths = sorted(glob.glob(os.path.join(TEST_PATH, "xmlref",
328     ...                                           "*", "*.ref")))
329     >>> xml_paths = sorted(glob.glob(os.path.join(xmldir, "*", "*.xml")))
330     >>> paths = zip(ref_paths, xml_paths)
331     >>> truths = []
332     >>> for ref_path,xml_path in paths:
333     ...     ref_handle = open(ref_path)
334     ...     xml_handle = open(xml_path)
335     ...     reffile = ref_handle.read()
336     ...     xmlfile = xml_handle.read()
337     ...     ref_handle.close()
338     ...     xml_handle.close()
339     ...     truths.extend((reffile == xmlfile,
340     ...                    os.path.exists(os.path.splitext(xml_path)[0]),))
341     >>> truths == [True, False,] * len(paths)
342     True
343     """
344
345     # Find all the monthly directories of zipped files.
346     zip_months = glob.glob(os.path.join(zipdir, zipdir_pattern))
347     zip_months = [zip_month
348                   for zip_month in zip_months
349                   if os.path.isdir(zip_month)]
350
351     # Create a directory for monthly XML subdirectories.
352     if not os.path.exists(xmldir):
353         os.mkdir(xmldir, 0755)
354     elif not os.path.isdir(xmldir):
355         raise IOError("XML directory name " + \
356                        xmldir + \
357                        " exists and is not a directory.")
358
359     for zip_month in zip_months:
360         # Create each monthly XML subdirectory.
361         xml_month = os.path.join(xmldir, os.path.split(zip_month)[1])
362         if not os.path.exists(xml_month):
363             os.mkdir(xml_month, 0755)
364         elif not os.path.isdir(xml_month):
365             raise IOError("XML month subdirectory name " + \
366                           xml_month + \
367                           " exists and is not a directory.")
368
369         # Find all the zip files for each month.
370         zipfiles = glob.glob(os.path.join(zip_month, zipfile_pattern))
371         zipfiles = [zip_file
372                     for zip_file in zipfiles
373                     if os.path.isfile(zip_file)]
374
375         for zip_file in zipfiles:
376             # Create an XML subdirectory for each zip file.
377             xml_subdir = os.path.splitext(os.path.split(zip_file)[1])[0]
378             xml_subdir = os.path.join(xml_month, xml_subdir)
379             if not os.path.exists(xml_subdir):
380                 os.mkdir(xml_subdir, 0755)
381                 # Extract all the xml files in the zip file.
382                 archive = zipfile.ZipFile(zip_file, "r")
383                 archive.extractall(xml_subdir)
384                 combine(xml_subdir, xmlfile_pattern)
385             elif not os.path.isdir(xml_subdir):
386                 raise IOError("XML file subdirectory name " + \
387                               xml_subdir + \
388                               " exists and is not a directory.")
389
390     return
391
392
393 def _main():
394     """
395     Run module as script.
396
397     Test silent import.
398
399     >>> from expand import _main
400     """
401
402     _config_path = config_path()
403     if _config_path:
404         _config = config(_config_path)
405         if all(_config):
406             expand(*_config)
407     return
408
409 if __name__ == "__main__":
410     _main()
Note: See TracBrowser for help on using the browser.