NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/spongenet/trunk/spongenet/expand.py

Revision 343 (checked in by cbc, 14 years ago)

Rearrange file closure to flush buffers.

Line 
1 """
2 Expand zipped sponge data files.
3
4 Usage:
5
6    > python expand.py path/to/config/file
7    > python expand.py -t
8    > python expand.py --test
9
10 Test silent import.
11
12 >>> import expand
13 """
14
15 __author__ = "Chris Calloway"
16 __email__ = "cbc@chriscalloway.org"
17 __copyright__ = "Copyright 2010 UNC-CH Department of Marine Science"
18 __license__ = "GPL2"
19
20 import sys
21 import os
22 import glob
23 import zipfile
24 import shutil
25 import fileinput
26 import doctest
27 import unittest
28 from StringIO import StringIO
29
30 usage = "\n".join(__doc__.splitlines()[3:8])
31 test_path = "tests/expand"
32
33
34 def _test():
35     """
36     Run doctests as unittest suite.
37
38     Test silent import
39
40     >>> from expand import _test
41     """
42
43     suite = []
44     suite.append(doctest.DocTestSuite())
45     suite = unittest.TestSuite(suite)
46     unittest.TextTestRunner().run(suite)
47
48     return
49
50
51 def config_path():
52     """
53     Return the configuration file path from the command line.
54
55     Supply too few arguments on command line.
56
57     >>> save_stdout = sys.stdout
58     >>> temp_stdout = StringIO()
59     >>> sys.stdout = temp_stdout
60     >>> sys.argv = []
61     >>> _config_path = config_path()
62     >>> sys.stdout = save_stdout
63     >>> usage == temp_stdout.getvalue()[:-1]
64     True
65
66     Supply too many arguments on the command line.
67
68     >>> save_stdout = sys.stdout
69     >>> temp_stdout = StringIO()
70     >>> sys.stdout = temp_stdout
71     >>> sys.argv = ["", "", "",]
72     >>> _config_path = config_path()
73     >>> sys.stdout = save_stdout
74     >>> usage == temp_stdout.getvalue()[:-1]
75     True
76
77     Supply non-file argument.
78
79     >>> save_stdout = sys.stdout
80     >>> temp_stdout = StringIO()
81     >>> sys.stdout = temp_stdout
82     >>> _config_path = os.path.join(
83     ...                    os.path.dirname(
84     ...                        os.path.abspath(__file__)),
85     ...                    test_path)
86     >>> sys.argv = ["", _config_path]
87     >>> _config_path = config_path()
88     >>> sys.stdout = save_stdout
89     >>> usage == temp_stdout.getvalue()[:-1]
90     True
91
92     Supply nonexistent file argument.
93
94     >>> save_stdout = sys.stdout
95     >>> temp_stdout = StringIO()
96     >>> sys.stdout = temp_stdout
97     >>> _config_path = os.path.join(
98     ...                    os.path.dirname(
99     ...                        os.path.abspath(__file__)),
100     ...                    test_path, "xxxxx")
101     >>> sys.argv = ["", _config_path]
102     >>> _config_path = config_path()
103     >>> sys.stdout = save_stdout
104     >>> usage == temp_stdout.getvalue()[:-1]
105     True
106
107     Supply valid config path argument.
108
109     >>> _config_path = os.path.join(
110     ...                    os.path.dirname(
111     ...                        os.path.abspath(__file__)),
112     ...                    test_path, "config.py")
113     >>> sys.argv = ["", _config_path]
114     >>> _config_path == config_path()
115     True
116     """
117
118     path = None
119     try:
120         if len(sys.argv) == 2:
121             if sys.argv[1] == "-t" or sys.argv[1] == "--test":
122                 _test()
123             else:
124                 path = sys.argv[1]
125                 if not os.path.exists(path):
126                     raise IOError(path + \
127                                   " does not exist.")
128                 elif not os.path.isfile(path):
129                     raise IOError(path + \
130                                   " is not a file.")
131         else:
132             raise IOError("Incorrect number of arguments supplied.")
133     except IOError:
134         print usage
135     return path
136
137
138 def config(path):
139     """
140     Return the configuration from a file.
141
142     Execute empty configuration.
143
144     >>> _config_path = os.path.join(
145     ...                    os.path.dirname(
146     ...                        os.path.abspath(__file__)),
147     ...                    test_path, "empty_config.py")
148     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
149             config(_config_path)
150     >>> zipdir
151     >>> xmldir
152     >>> zipdir_pattern
153     >>> zipfile_pattern
154     >>> xmlfile_pattern
155
156     Execute nonexistent configuration.
157
158     >>> save_stdout = sys.stdout
159     >>> temp_stdout = StringIO()
160     >>> sys.stdout = temp_stdout
161     >>> _config_path = os.path.join(
162     ...                    os.path.dirname(
163     ...                        os.path.abspath(__file__)),
164     ...                    test_path, "xxxxx")
165     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
166             config(_config_path)
167     >>> sys.stdout = save_stdout
168     >>> usage == temp_stdout.getvalue()[:-1]
169     True
170     >>> zipdir
171     >>> xmldir
172     >>> zipdir_pattern
173     >>> zipfile_pattern
174     >>> xmlfile_pattern
175
176     Execute bad configuration.
177
178     >>> save_stdout = sys.stdout
179     >>> temp_stdout = StringIO()
180     >>> sys.stdout = temp_stdout
181     >>> _config_path = os.path.join(
182     ...                    os.path.dirname(
183     ...                        os.path.abspath(__file__)),
184     ...                    test_path, "bad_config.py")
185     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
186             config(_config_path)
187     >>> sys.stdout = save_stdout
188     >>> usage == temp_stdout.getvalue()[:-1]
189     True
190     >>>
191     >>> zipdir
192     >>> xmldir
193     >>> zipdir_pattern
194     >>> zipfile_pattern
195     >>> xmlfile_pattern
196
197     Execute valid configuration.
198
199     >>> _config_path = os.path.join(
200     ...                    os.path.dirname(
201     ...                        os.path.abspath(__file__)),
202     ...                    test_path, "config.py")
203     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
204             config(_config_path)
205     >>> zipdir == os.path.join(os.path.dirname(os.path.abspath(__file__)),
206     ...                        test_path, "zip")
207     True
208     >>> xmldir == os.path.join(os.path.dirname(os.path.abspath(__file__)),
209     ...                        test_path, "xml")
210     True
211     >>> zipdir_pattern == "[0-9][0-9][0-9][0-9]_[0-9][0-9]"
212     True
213     >>> zipfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
214                            "[0-9][0-9][0-9][0-9][0-9][0-9]-" \
215                            "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
216                            "[0-9][0-9][0-9][0-9][0-9][0-9].zip"
217     True
218     >>> xmlfile_pattern == "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
219                            "[0-9][0-9][0-9][0-9][0-9][0-9].xml"
220     True
221     """
222
223     namespace = {}
224     namespace["zipdir"] = None
225     namespace["xmldir"] = None
226     namespace["zipdir_pattern"] = None
227     namespace["zipfile_pattern"] = None
228     namespace["xmlfile_pattern"] = None
229     try:
230         execfile(path, globals(), namespace)
231     except IOError:
232         print usage
233     except SyntaxError:
234         print usage
235     return (namespace["zipdir"],
236             namespace["xmldir"],
237             namespace["zipdir_pattern"],
238             namespace["zipfile_pattern"],
239             namespace["xmlfile_pattern"],)
240
241
242 def combine(xml_subdir, xmlfile_pattern):
243     """
244     Combine the xml sponge data files from a subdirectory.
245
246     Combine test subdirectory.
247
248     >>> xmlref_path = os.path.join(
249     ...                   os.path.dirname(
250     ...                       os.path.abspath(__file__)),
251     ...                   test_path, "xmlref")
252     >>> xmlref = glob.glob(os.path.join(xmlref_path, "*"))
253     >>> xmlref_path = [path for path in xmlref if os.path.isdir(path)][0]
254     >>> xmltest_path = os.path.join(
255     ...                    os.path.dirname(
256     ...                        os.path.abspath(__file__)),
257     ...                    test_path, "xmltest")
258     >>> if os.path.exists(xmltest_path):
259     ...     shutil.rmtree(xmltest_path)
260     >>> shutil.copytree(xmlref_path, xmltest_path)
261     >>> xmltest = glob.glob(os.path.join(xmltest_path, "*"))
262     >>> xml_subdir = [path for path in xmltest if os.path.isdir(path)][0]
263     >>> xmlfile_pattern = "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]T" \
264                           "[0-9][0-9][0-9][0-9][0-9][0-9].xml"
265     >>> combine(xml_subdir, xmlfile_pattern)
266     >>> ref_path = xml_subdir + os.extsep + "ref"
267     >>> xml_path = xml_subdir + os.extsep + "xml"
268     >>> ref_handle = open(ref_path)
269     >>> xml_handle = open(xml_path)
270     >>> reffile = ref_handle.read()
271     >>> xmlfile = xml_handle.read()
272     >>> ref_handle.close()
273     >>> xml_handle.close()
274     >>> reffile == xmlfile
275     True
276     >>> os.path.exists(xml_subdir)
277     False
278     """
279
280     # Find all the xml sponge data files.
281     xmlfiles = glob.glob(os.path.join(xml_subdir, xmlfile_pattern))
282     xmlfiles = [xmlfile
283                 for xmlfile in xmlfiles
284                 if os.path.isfile(xmlfile)]
285
286     # Read all the xml sponge data files.
287     files = fileinput.FileInput(xmlfiles)
288     header = files.readline()
289     lines = ["\r\n" if files.isfirstline() else line for line in files]
290
291     # Write a combined xml sponge data file.
292     path = xml_subdir + os.extsep + "xml"
293     handle = open(path, "w")
294     handle.write(header)
295     handle.write("<root>\r\n")
296     handle.writelines(lines)
297     handle.write("\r\n</root>")
298     handle.close()
299
300     # Remove the xml sponge data file subdirectory.
301     shutil.rmtree(xml_subdir)
302
303     return
304
305
306 def expand(zipdir, xmldir, zipdir_pattern, zipfile_pattern, xmlfile_pattern):
307     """
308     Expand zipped sponge data files.
309
310     Expand valid tree.
311
312     >>> _config_path = os.path.join(
313     ...                    os.path.dirname(
314     ...                        os.path.abspath(__file__)),
315     ...                    test_path, "config.py")
316     >>> zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern = \
317             config(_config_path)
318     >>> if os.path.exists(xmldir):
319     ...     shutil.rmtree(xmldir)
320     >>> expand(zipdir,xmldir,zipdir_pattern,zipfile_pattern,xmlfile_pattern)
321     >>> xmlref_path = os.path.join(
322     ...                   os.path.dirname(
323     ...                       os.path.abspath(__file__)),
324     ...                   test_path, "xmlref")
325     >>> ref_paths = sorted(glob.glob(os.path.join(test_path, "xmlref",
326     ...                                           "*", "*.ref")))
327     >>> xml_paths = sorted(glob.glob(os.path.join(xmldir, "*", "*.xml")))
328     >>> paths = zip(ref_paths, xml_paths)
329     >>> truths = []
330     >>> for ref_path,xml_path in paths:
331     ...     ref_handle = open(ref_path)
332     ...     xml_handle = open(xml_path)
333     ...     reffile = ref_handle.read()
334     ...     xmlfile = xml_handle.read()
335     ...     ref_handle.close()
336     ...     xml_handle.close()
337     ...     truths.extend((reffile == xmlfile,
338     ...                    os.path.exists(os.path.splitext(xml_path)[0]),))
339     >>> truths == [True, False,] * len(paths)
340     True
341     """
342
343     # Find all the monthly directories of zipped files.
344     zip_months = glob.glob(os.path.join(zipdir, zipdir_pattern))
345     zip_months = [zip_month
346                   for zip_month in zip_months
347                   if os.path.isdir(zip_month)]
348
349     # Create a directory for monthly XML subdirectories.
350     if not os.path.exists(xmldir):
351         os.mkdir(xmldir, 0755)
352     elif not os.path.isdir(xmldir):
353         raise IOError("XML directory name " + \
354                        xmldir + \
355                        " exists and is not a directory.")
356
357     for zip_month in zip_months:
358         # Create each monthly XML subdirectory.
359         xml_month = os.path.join(xmldir, os.path.split(zip_month)[1])
360         if not os.path.exists(xml_month):
361             os.mkdir(xml_month, 0755)
362         elif not os.path.isdir(xml_month):
363             raise IOError("XML month subdirectory name " + \
364                           xml_month + \
365                           " exists and is not a directory.")
366
367         # Find all the zip files for each month.
368         zipfiles = glob.glob(os.path.join(zip_month, zipfile_pattern))
369         zipfiles = [zip_file
370                     for zip_file in zipfiles
371                     if os.path.isfile(zip_file)]
372
373         for zip_file in zipfiles:
374             # Create an XML subdirectory for each zip file.
375             xml_subdir = os.path.splitext(os.path.split(zip_file)[1])[0]
376             xml_subdir = os.path.join(xml_month, xml_subdir)
377             if not os.path.exists(xml_subdir):
378                 os.mkdir(xml_subdir, 0755)
379                 # Extract all the xml files in the zip file.
380                 archive = zipfile.ZipFile(zip_file, "r")
381                 archive.extractall(xml_subdir)
382                 combine(xml_subdir, xmlfile_pattern)
383             elif not os.path.isdir(xml_subdir):
384                 raise IOError("XML file subdirectory name " + \
385                               xml_subdir + \
386                               " exists and is not a directory.")
387
388     return
389
390
391 def _main():
392     """
393     Run module as script.
394
395     Test silent import.
396
397     >>> from expand import _main
398     """
399
400     _config_path = config_path()
401     if _config_path:
402         _config = config(_config_path)
403         if all(_config):
404             expand(*_config)
405     return
406
407 if __name__ == "__main__":
408     _main()
Note: See TracBrowser for help on using the browser.