NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/spongenet/trunk/spongenet/parse.py

Revision 351 (checked in by cbc, 14 years ago)

Add some functions and tests to parse module.

Line 
1 #!/usr/bin/env python
2
3 """
4 Parse combined sponge data XML files.
5
6 Usage:
7
8    > python parse.py path/to/xml/file
9    > python parse.py -t
10    > python parse.py --test
11
12 Test silent import.
13
14 >>> import parse
15 """
16
17 __author__ = "Chris Calloway"
18 __email__ = "cbc@chriscalloway.org"
19 __copyright__ = "Copyright 2010 UNC-CH Department of Marine Science"
20 __license__ = "GPL2"
21
22 import sys
23 import os
24 import glob
25 import hashlib
26 import doctest
27 import unittest
28 from StringIO import StringIO
29 import xml.etree.ElementTree as ET
30
31 USAGE = "\n".join(__doc__.splitlines()[3:8])
32 TEST_PATH = os.path.join("tests", "parse")
33
34
35 def _test():
36     """
37     Run doctests as unittest suite.
38
39     Test silent import
40
41     >>> from parse import _test
42     """
43
44     suite = []
45     suite.append(doctest.DocTestSuite())
46     suite = unittest.TestSuite(suite)
47     unittest.TextTestRunner().run(suite)
48
49     return
50
51
52 def xmldoc_path():
53     """
54     Return the XML document file path from the command line.
55
56     Supply too few arguments on command line.
57
58     >>> save_stdout = sys.stdout
59     >>> temp_stdout = StringIO()
60     >>> sys.stdout = temp_stdout
61     >>> sys.argv = []
62     >>> _xmldoc_path = xmldoc_path()
63     >>> sys.stdout = save_stdout
64     >>> USAGE == temp_stdout.getvalue()[:-1]
65     True
66
67     Supply too many arguments on the command line.
68
69     >>> save_stdout = sys.stdout
70     >>> temp_stdout = StringIO()
71     >>> sys.stdout = temp_stdout
72     >>> sys.argv = ["", "", "",]
73     >>> _xmldoc_path = xmldoc_path()
74     >>> sys.stdout = save_stdout
75     >>> USAGE == temp_stdout.getvalue()[:-1]
76     True
77
78     Supply non-file argument.
79
80     >>> save_stdout = sys.stdout
81     >>> temp_stdout = StringIO()
82     >>> sys.stdout = temp_stdout
83     >>> _xmldoc_path = os.path.join(
84     ...                    os.path.dirname(
85     ...                        os.path.abspath(__file__)),
86     ...                    TEST_PATH)
87     >>> sys.argv = ["", _xmldoc_path]
88     >>> _xmldoc_path = xmldoc_path()
89     >>> sys.stdout = save_stdout
90     >>> USAGE == temp_stdout.getvalue()[:-1]
91     True
92
93     Supply nonexistent file argument.
94
95     >>> save_stdout = sys.stdout
96     >>> temp_stdout = StringIO()
97     >>> sys.stdout = temp_stdout
98     >>> _xmldoc_path = os.path.join(
99     ...                    os.path.dirname(
100     ...                        os.path.abspath(__file__)),
101     ...                    TEST_PATH, "xxxxx")
102     >>> sys.argv = ["", _xmldoc_path]
103     >>> _xmldoc_path = xmldoc_path()
104     >>> sys.stdout = save_stdout
105     >>> USAGE == temp_stdout.getvalue()[:-1]
106     True
107
108     Supply valid XML document path argument.
109
110     >>> _xmldoc_path = os.path.join(
111     ...                    os.path.dirname(
112     ...                        os.path.abspath(__file__)),
113     ...                    TEST_PATH, "xml","*","*.xml")
114     >>> _xmldoc_path = glob.glob(_xmldoc_path)[0]
115     >>> sys.argv = ["", _xmldoc_path]
116     >>> _xmldoc_path == xmldoc_path()
117     True
118     """
119
120     path = None
121     try:
122         if len(sys.argv) == 2:
123             if sys.argv[1] == "-t" or sys.argv[1] == "--test":
124                 _test()
125             else:
126                 path = sys.argv[1]
127                 if not os.path.exists(path):
128                     raise IOError(path + \
129                                   " does not exist.")
130                 elif not os.path.isfile(path):
131                     raise IOError(path + \
132                                   " is not a file.")
133         else:
134             raise IOError("Incorrect number of arguments supplied.")
135     except IOError:
136         print USAGE
137     return path
138
139
140 def xmldoc(path):
141     """
142     Return the XML document as a list of strings from a file at path.
143
144     Get the test reference data.
145
146     >>> xmlref = os.path.join(
147     ...              os.path.dirname(
148     ...                  os.path.abspath(__file__)),
149     ...               TEST_PATH, "xmlref.py")
150     >>> namespace = {}
151     >>> execfile(xmlref, globals(), namespace)
152     >>> xml_doc_lens = namespace["XML_DOC_LENS"]
153     >>> xml_doc_md5s = namespace["XML_DOC_MD5S"]
154
155     Pick a test document.
156
157     >>> xmldoc_glob = os.path.join(
158     ...                   os.path.dirname(
159     ...                       os.path.abspath(__file__)),
160     ...                   TEST_PATH, "xml","*","*.xml")
161     >>> _xmldoc_path = glob.glob(xmldoc_glob)[0]
162     >>> _xmldoc = xmldoc(_xmldoc_path)
163
164     Verify the test document matches the reference data.
165
166     >>> _xmldoc[0] == '<?xml version="1.0" encoding="utf-8"?>\\n'
167     True
168     >>> _xmldoc[-1] == '</root>\\n'
169     True
170     >>> len(_xmldoc) == xml_doc_lens[os.path.basename(_xmldoc_path)]
171     True
172     >>> doc_hash = hashlib.md5()
173     >>> doc_hash.update("".join(_xmldoc))
174     >>> doc_hash.hexdigest() == xml_doc_md5s[os.path.basename(_xmldoc_path)]
175     True
176     """
177
178     _xmldoc = None
179     with open(path) as handle:
180         _xmldoc = handle.readlines()
181
182     return _xmldoc
183
184
185 def _main():
186     """
187     Run module as script.
188
189     Test silent import.
190
191     >>> from parse import _main
192     """
193
194     _xmldoc_path = xmldoc_path()
195     if _xmldoc_path:
196         _xmldoc = xmldoc(_xmldoc_path)
197         print len(_xmldoc)
198
199     return
200
201 if __name__ == "__main__":
202     _main()
Note: See TracBrowser for help on using the browser.