1 |
|
---|
2 |
|
---|
3 |
""" |
---|
4 |
Parse combined sponge data XML files. |
---|
5 |
|
---|
6 |
Usage: |
---|
7 |
|
---|
8 |
> python parse.py path/to/xml/file |
---|
9 |
> python parse.py -t |
---|
10 |
> python parse.py --test |
---|
11 |
|
---|
12 |
Test silent import. |
---|
13 |
|
---|
14 |
>>> import parse |
---|
15 |
""" |
---|
16 |
|
---|
17 |
__author__ = "Chris Calloway" |
---|
18 |
__email__ = "cbc@chriscalloway.org" |
---|
19 |
__copyright__ = "Copyright 2010 UNC-CH Department of Marine Science" |
---|
20 |
__license__ = "GPL2" |
---|
21 |
|
---|
22 |
import sys |
---|
23 |
import os |
---|
24 |
import glob |
---|
25 |
import hashlib |
---|
26 |
import doctest |
---|
27 |
import unittest |
---|
28 |
from StringIO import StringIO |
---|
29 |
import xml.etree.ElementTree as ET |
---|
30 |
|
---|
31 |
USAGE = "\n".join(__doc__.splitlines()[3:8]) |
---|
32 |
TEST_PATH = os.path.join("tests", "parse") |
---|
33 |
|
---|
34 |
|
---|
35 |
def _test(): |
---|
36 |
""" |
---|
37 |
Run doctests as unittest suite. |
---|
38 |
|
---|
39 |
Test silent import |
---|
40 |
|
---|
41 |
>>> from parse import _test |
---|
42 |
""" |
---|
43 |
|
---|
44 |
suite = [] |
---|
45 |
suite.append(doctest.DocTestSuite()) |
---|
46 |
suite = unittest.TestSuite(suite) |
---|
47 |
unittest.TextTestRunner().run(suite) |
---|
48 |
|
---|
49 |
return |
---|
50 |
|
---|
51 |
|
---|
52 |
def xmldoc_path(): |
---|
53 |
""" |
---|
54 |
Return the XML document file path from the command line. |
---|
55 |
|
---|
56 |
Supply too few arguments on command line. |
---|
57 |
|
---|
58 |
>>> save_stdout = sys.stdout |
---|
59 |
>>> temp_stdout = StringIO() |
---|
60 |
>>> sys.stdout = temp_stdout |
---|
61 |
>>> sys.argv = [] |
---|
62 |
>>> _xmldoc_path = xmldoc_path() |
---|
63 |
>>> sys.stdout = save_stdout |
---|
64 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
65 |
True |
---|
66 |
|
---|
67 |
Supply too many arguments on the command line. |
---|
68 |
|
---|
69 |
>>> save_stdout = sys.stdout |
---|
70 |
>>> temp_stdout = StringIO() |
---|
71 |
>>> sys.stdout = temp_stdout |
---|
72 |
>>> sys.argv = ["", "", "",] |
---|
73 |
>>> _xmldoc_path = xmldoc_path() |
---|
74 |
>>> sys.stdout = save_stdout |
---|
75 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
76 |
True |
---|
77 |
|
---|
78 |
Supply non-file argument. |
---|
79 |
|
---|
80 |
>>> save_stdout = sys.stdout |
---|
81 |
>>> temp_stdout = StringIO() |
---|
82 |
>>> sys.stdout = temp_stdout |
---|
83 |
>>> _xmldoc_path = os.path.join( |
---|
84 |
... os.path.dirname( |
---|
85 |
... os.path.abspath(__file__)), |
---|
86 |
... TEST_PATH) |
---|
87 |
>>> sys.argv = ["", _xmldoc_path] |
---|
88 |
>>> _xmldoc_path = xmldoc_path() |
---|
89 |
>>> sys.stdout = save_stdout |
---|
90 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
91 |
True |
---|
92 |
|
---|
93 |
Supply nonexistent file argument. |
---|
94 |
|
---|
95 |
>>> save_stdout = sys.stdout |
---|
96 |
>>> temp_stdout = StringIO() |
---|
97 |
>>> sys.stdout = temp_stdout |
---|
98 |
>>> _xmldoc_path = os.path.join( |
---|
99 |
... os.path.dirname( |
---|
100 |
... os.path.abspath(__file__)), |
---|
101 |
... TEST_PATH, "xxxxx") |
---|
102 |
>>> sys.argv = ["", _xmldoc_path] |
---|
103 |
>>> _xmldoc_path = xmldoc_path() |
---|
104 |
>>> sys.stdout = save_stdout |
---|
105 |
>>> USAGE == temp_stdout.getvalue()[:-1] |
---|
106 |
True |
---|
107 |
|
---|
108 |
Supply valid XML document path argument. |
---|
109 |
|
---|
110 |
>>> _xmldoc_path = os.path.join( |
---|
111 |
... os.path.dirname( |
---|
112 |
... os.path.abspath(__file__)), |
---|
113 |
... TEST_PATH, "xml","*","*.xml") |
---|
114 |
>>> _xmldoc_path = glob.glob(_xmldoc_path)[0] |
---|
115 |
>>> sys.argv = ["", _xmldoc_path] |
---|
116 |
>>> _xmldoc_path == xmldoc_path() |
---|
117 |
True |
---|
118 |
""" |
---|
119 |
|
---|
120 |
path = None |
---|
121 |
try: |
---|
122 |
if len(sys.argv) == 2: |
---|
123 |
if sys.argv[1] == "-t" or sys.argv[1] == "--test": |
---|
124 |
_test() |
---|
125 |
else: |
---|
126 |
path = sys.argv[1] |
---|
127 |
if not os.path.exists(path): |
---|
128 |
raise IOError(path + \ |
---|
129 |
" does not exist.") |
---|
130 |
elif not os.path.isfile(path): |
---|
131 |
raise IOError(path + \ |
---|
132 |
" is not a file.") |
---|
133 |
else: |
---|
134 |
raise IOError("Incorrect number of arguments supplied.") |
---|
135 |
except IOError: |
---|
136 |
print USAGE |
---|
137 |
return path |
---|
138 |
|
---|
139 |
|
---|
140 |
def xmldoc(path): |
---|
141 |
""" |
---|
142 |
Return the XML document as a list of strings from a file at path. |
---|
143 |
|
---|
144 |
Get the test reference data. |
---|
145 |
|
---|
146 |
>>> xmlref = os.path.join( |
---|
147 |
... os.path.dirname( |
---|
148 |
... os.path.abspath(__file__)), |
---|
149 |
... TEST_PATH, "xmlref.py") |
---|
150 |
>>> namespace = {} |
---|
151 |
>>> execfile(xmlref, globals(), namespace) |
---|
152 |
>>> xml_doc_lens = namespace["XML_DOC_LENS"] |
---|
153 |
>>> xml_doc_md5s = namespace["XML_DOC_MD5S"] |
---|
154 |
|
---|
155 |
Pick a test document. |
---|
156 |
|
---|
157 |
>>> xmldoc_glob = os.path.join( |
---|
158 |
... os.path.dirname( |
---|
159 |
... os.path.abspath(__file__)), |
---|
160 |
... TEST_PATH, "xml","*","*.xml") |
---|
161 |
>>> _xmldoc_path = glob.glob(xmldoc_glob)[0] |
---|
162 |
>>> _xmldoc = xmldoc(_xmldoc_path) |
---|
163 |
|
---|
164 |
Verify the test document matches the reference data. |
---|
165 |
|
---|
166 |
>>> _xmldoc[0] == '<?xml version="1.0" encoding="utf-8"?>\\n' |
---|
167 |
True |
---|
168 |
>>> _xmldoc[-1] == '</root>\\n' |
---|
169 |
True |
---|
170 |
>>> len(_xmldoc) == xml_doc_lens[os.path.basename(_xmldoc_path)] |
---|
171 |
True |
---|
172 |
>>> doc_hash = hashlib.md5() |
---|
173 |
>>> doc_hash.update("".join(_xmldoc)) |
---|
174 |
>>> doc_hash.hexdigest() == xml_doc_md5s[os.path.basename(_xmldoc_path)] |
---|
175 |
True |
---|
176 |
""" |
---|
177 |
|
---|
178 |
_xmldoc = None |
---|
179 |
with open(path) as handle: |
---|
180 |
_xmldoc = handle.readlines() |
---|
181 |
|
---|
182 |
return _xmldoc |
---|
183 |
|
---|
184 |
|
---|
185 |
def _main(): |
---|
186 |
""" |
---|
187 |
Run module as script. |
---|
188 |
|
---|
189 |
Test silent import. |
---|
190 |
|
---|
191 |
>>> from parse import _main |
---|
192 |
""" |
---|
193 |
|
---|
194 |
_xmldoc_path = xmldoc_path() |
---|
195 |
if _xmldoc_path: |
---|
196 |
_xmldoc = xmldoc(_xmldoc_path) |
---|
197 |
print len(_xmldoc) |
---|
198 |
|
---|
199 |
return |
---|
200 |
|
---|
201 |
if __name__ == "__main__": |
---|
202 |
_main() |
---|