NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/spongenet/trunk/spongenet/parse.py

Revision 374 (checked in by cbc, 14 years ago)

Remove check for missing Value tag in parse module and defer to handling KeyErrors?.

Line 
1 #!/usr/bin/env python
2
3 """
4 Parse combined sponge data XML files.
5
6 Usage:
7
8    > python parse.py path/to/xml/file
9    > python parse.py -t
10    > python parse.py --test
11
12 Test silent import.
13
14 >>> import parse
15 """
16
17 __author__ = "Chris Calloway"
18 __email__ = "cbc@chriscalloway.org"
19 __copyright__ = "Copyright 2010 UNC-CH Department of Marine Science"
20 __license__ = "GPL2"
21
22 import sys
23 import os
24 import re
25 import glob
26 import hashlib
27 import doctest
28 import unittest
29 from StringIO import StringIO
30 import xml.etree.cElementTree as ET
31
32 USAGE = "\n".join(__doc__.splitlines()[3:8])
33 TEST_PATH = os.path.join("tests", "parse")
34 XMLNS_PATTERN = re.compile(r"(\{.*\})(.*)")
35 MISSING_VALUE = -9999
36
37
38 def _test():
39     """
40     Run doctests as unittest suite.
41
42     Test silent import
43
44     >>> from parse import _test
45     """
46
47     suite = []
48     suite.append(doctest.DocTestSuite())
49     suite = unittest.TestSuite(suite)
50     unittest.TextTestRunner().run(suite)
51
52     return
53
54
55 def xmldoc_path():
56     """
57     Return the XML document file path from the command line.
58
59     Supply too few arguments on command line.
60
61     >>> save_stdout = sys.stdout
62     >>> temp_stdout = StringIO()
63     >>> sys.stdout = temp_stdout
64     >>> sys.argv = []
65     >>> _xmldoc_path = xmldoc_path()
66     >>> sys.stdout = save_stdout
67     >>> USAGE == temp_stdout.getvalue()[:-1]
68     True
69
70     Supply too many arguments on the command line.
71
72     >>> save_stdout = sys.stdout
73     >>> temp_stdout = StringIO()
74     >>> sys.stdout = temp_stdout
75     >>> sys.argv = ["", "", "",]
76     >>> _xmldoc_path = xmldoc_path()
77     >>> sys.stdout = save_stdout
78     >>> USAGE == temp_stdout.getvalue()[:-1]
79     True
80
81     Supply non-file argument.
82
83     >>> save_stdout = sys.stdout
84     >>> temp_stdout = StringIO()
85     >>> sys.stdout = temp_stdout
86     >>> _xmldoc_path = os.path.join(
87     ...                    os.path.dirname(
88     ...                        os.path.abspath(__file__)),
89     ...                    TEST_PATH)
90     >>> sys.argv = ["", _xmldoc_path]
91     >>> _xmldoc_path = xmldoc_path()
92     >>> sys.stdout = save_stdout
93     >>> USAGE == temp_stdout.getvalue()[:-1]
94     True
95
96     Supply nonexistent file argument.
97
98     >>> save_stdout = sys.stdout
99     >>> temp_stdout = StringIO()
100     >>> sys.stdout = temp_stdout
101     >>> _xmldoc_path = os.path.join(
102     ...                    os.path.dirname(
103     ...                        os.path.abspath(__file__)),
104     ...                    TEST_PATH, "xxxxx")
105     >>> sys.argv = ["", _xmldoc_path]
106     >>> _xmldoc_path = xmldoc_path()
107     >>> sys.stdout = save_stdout
108     >>> USAGE == temp_stdout.getvalue()[:-1]
109     True
110
111     Supply valid XML document path argument.
112
113     >>> _xmldoc_path = os.path.join(
114     ...                    os.path.dirname(
115     ...                        os.path.abspath(__file__)),
116     ...                    TEST_PATH, "xml","*","*.xml")
117     >>> _xmldoc_path = glob.glob(_xmldoc_path)[0]
118     >>> sys.argv = ["", _xmldoc_path]
119     >>> _xmldoc_path == xmldoc_path()
120     True
121     """
122
123     path = None
124     try:
125         if len(sys.argv) == 2:
126             if sys.argv[1] == "-t" or sys.argv[1] == "--test":
127                 _test()
128             else:
129                 path = sys.argv[1]
130                 if not os.path.exists(path):
131                     raise IOError(path + \
132                                   " does not exist.")
133                 elif not os.path.isfile(path):
134                     raise IOError(path + \
135                                   " is not a file.")
136         else:
137             raise IOError("Incorrect number of arguments supplied.")
138     except IOError:
139         print USAGE
140     return path
141
142
143 def xmldoc(path):
144     """
145     Return the XML document as a string from a file at path.
146
147     Get the test reference data.
148
149     >>> xmlref = os.path.join(
150     ...              os.path.dirname(
151     ...                  os.path.abspath(__file__)),
152     ...               TEST_PATH, "xmlref.py")
153     >>> namespace = {}
154     >>> execfile(xmlref, globals(), namespace)
155     >>> xml_doc_lens = namespace["XML_DOC_LENS"]
156     >>> xml_doc_md5s = namespace["XML_DOC_MD5S"]
157
158     Pick a test document.
159
160     >>> xmldoc_glob = os.path.join(
161     ...                   os.path.dirname(
162     ...                       os.path.abspath(__file__)),
163     ...                   TEST_PATH, "xml","*","*.xml")
164     >>> _xmldoc_path = glob.glob(xmldoc_glob)[0]
165     >>> _xmldoc = xmldoc(_xmldoc_path)
166
167     Verify the test document matches the reference data.
168
169     >>> xmldoc_lines = _xmldoc.splitlines()
170     >>> xmldoc_lines[0] == '<?xml version="1.0" encoding="utf-8"?>'
171     True
172     >>> xmldoc_lines[-1] == '</root>'
173     True
174     >>> len(xmldoc_lines) == xml_doc_lens[os.path.basename(_xmldoc_path)]
175     True
176     >>> doc_hash = hashlib.md5()
177     >>> doc_hash.update(_xmldoc)
178     >>> doc_hash.hexdigest() == xml_doc_md5s[os.path.basename(_xmldoc_path)]
179     True
180     """
181
182     _xmldoc = None
183     with open(path) as handle:
184         _xmldoc = handle.readlines()
185
186     return "".join(_xmldoc)
187
188
189 class Point(dict):
190     """
191     A data point for a sponge sensor sample."
192
193     Instantiate a valid Point object.
194
195     >>> xmldoc_glob = os.path.join(
196     ...                   os.path.dirname(
197     ...                       os.path.abspath(__file__)),
198     ...                   TEST_PATH, "xml","*","*.xml")
199     >>> _xmldoc_path = glob.glob(xmldoc_glob)[0]
200     >>> _xmldoc = xmldoc(_xmldoc_path)
201     >>> _data = Data(_xmldoc)
202     >>> point = _data.devices[0].sensors["SN100-0"].points["Battery Voltage"]
203     >>> len(point.keys()) == 8
204     True
205     >>> point["id"] != None
206     True
207     >>> point["descr"] != None
208     True
209     >>> point["type"] != None
210     True
211     >>> point["format"] != None
212     True
213     >>> point["unit"] != None
214     True
215     >>> point["rangemin"] != None
216     True
217     >>> point["rangemax"] != None
218     True
219     >>> point["value"] != None
220     True
221     """
222
223     def __init__(self, point):
224         super(Point, self).__init__({})
225         for key, value in point.attrib.items():
226             key = key.lower()
227             self[key] = value
228         for elem in point.getchildren():
229             tag = XMLNS_PATTERN.search(elem.tag).groups()[1].lower()
230             self[tag] = elem.text
231
232
233 class Sensor(dict):
234     """
235     A collection of data points for a sponge sensor sample.
236
237     Instantiate a valid Sensor object.
238
239     >>> xmldoc_glob = os.path.join(
240     ...                   os.path.dirname(
241     ...                       os.path.abspath(__file__)),
242     ...                   TEST_PATH, "xml","*","*.xml")
243     >>> _xmldoc_path = glob.glob(xmldoc_glob)[0]
244     >>> _xmldoc = xmldoc(_xmldoc_path)
245     >>> _data = Data(_xmldoc)
246     >>> sensor = _data.devices[0].sensors["SN100-0"]
247     >>> len(sensor.points.items())
248     3
249     >>> len(sensor.keys()) == 9
250     True
251     >>> sensor["id"] != None
252     True
253     >>> sensor["serialno"] != None
254     True
255     >>> sensor["prodno"] != None
256     True
257     >>> sensor["prodname"] != None
258     True
259     >>> sensor["descr"] != None
260     True
261     >>> sensor["adr"] != None
262     True
263     >>> sensor["protocolver"] != None
264     True
265     >>> sensor["verticalposition"] != None
266     True
267     >>> sensor["status"] != None
268     True
269     """
270
271     def __init__(self, sensor, xmlns):
272         super(Sensor, self).__init__({})
273         for key, value in sensor.attrib.items():
274             key = key.lower()
275             self[key] = value
276         for elem in sensor.getchildren():
277             tag = XMLNS_PATTERN.search(elem.tag).groups()[1].lower()
278             if tag == "parameters":
279                 self.points = [Point(point) for point
280                                 in elem.findall(xmlns + "Point")]
281                 self.points = dict([(point["descr"], point)
282                                    for point in self.points])
283             else:
284                 self[tag] = elem.text
285
286
287 class Device(dict):
288     """
289     Data from a collection of sponge sensors for a single time sample.
290
291     Instantiate a valid Device object.
292
293     >>> xmldoc_glob = os.path.join(
294     ...                   os.path.dirname(
295     ...                       os.path.abspath(__file__)),
296     ...                   TEST_PATH, "xml","*","*.xml")
297     >>> _xmldoc_path = glob.glob(xmldoc_glob)[0]
298     >>> _xmldoc = xmldoc(_xmldoc_path)
299     >>> _data = Data(_xmldoc)
300     >>> device = _data.devices[0]
301     >>> len(device.sensors.items())
302     15
303     >>> len(device.keys())
304     16
305     >>> device["id"] != None
306     True
307     >>> device["sessionid"] != None
308     True
309     >>> device["descr"] != None
310     True
311     >>> device["serialno"] != None
312     True
313     >>> device["prodno"] != None
314     True
315     >>> device["prodname"] != None
316     True
317     >>> device["devicetype"] != None
318     True
319     >>> device["protocolver"] != None
320     True
321     >>> device["time"] != None
322     True
323     >>> device["status"] != None
324     True
325     >>> device["location"] != None
326     True
327     >>> device["verticalposition"] != None
328     True
329     >>> device["owner"] != None
330     True
331     >>> device["recordnumber"] != None
332     True
333     >>> device["data_time"] != None
334     True
335     >>> device["data_sessionid"] != None
336     True
337     """
338
339     def __init__(self, device, xmlns):
340         super(Device, self).__init__({})
341         for key, value in device.attrib.items():
342             key = key.lower()
343             self[key] = value
344         for elem in device.getchildren():
345             tag = XMLNS_PATTERN.search(elem.tag).groups()[1].lower()
346             if tag == "siteinfo":
347                 for subelem in elem.getchildren():
348                     tag = XMLNS_PATTERN.search(subelem.tag).groups()[1].lower()
349                     self[tag] = subelem.text
350             elif tag == "data":
351                 for key, value in elem.attrib.items():
352                     key = key.lower()
353                     if key == "time":
354                         key = "data_time"
355                     elif key == "sessionid":
356                         key = "data_sessionid"
357                     self[key] = value
358                 self.sensors = [Sensor(sensor, xmlns) for sensor
359                                 in elem.findall(xmlns + "SensorData")]
360                 self.sensors = dict([(sensor["id"], sensor)
361                                     for sensor in self.sensors])
362             else:
363                 self[tag] = elem.text
364
365
366 class Data(object):
367     """
368     A collection of sponge data samples from a collection of sensors.
369
370     Instantiate a valid Data object.
371
372     >>> xmldoc_glob = os.path.join(
373     ...                   os.path.dirname(
374     ...                       os.path.abspath(__file__)),
375     ...                   TEST_PATH, "xml","*","*.xml")
376     >>> _xmldoc_path = glob.glob(xmldoc_glob)[0]
377     >>> _xmldoc = xmldoc(_xmldoc_path)
378     >>> _data = Data(_xmldoc)
379     >>> _data.xmlns
380     '{http://www.aadi.no/RTOutSchema}'
381     >>> len(_data.devices)
382     50
383     """
384
385     def __init__(self, _xmldoc):
386         """
387         Initialize a new sponge data tree.
388         """
389
390         tree = ET.XML(_xmldoc)
391         self.xmlns = XMLNS_PATTERN.search(
392                          tree.getchildren()[0].tag).groups()[0]
393         self.devices = [Device(device, self.xmlns) for device
394                         in tree.findall(self.xmlns + "Device")]
395
396
397 def _main():
398     """
399     Run module as script.
400
401     Test silent import.
402
403     >>> from parse import _main
404     """
405
406     data = None
407
408     _xmldoc_path = xmldoc_path()
409     if _xmldoc_path:
410         _xmldoc = xmldoc(_xmldoc_path)
411         data = Data(_xmldoc)
412
413     return data
414
415 if __name__ == "__main__":
416     DATA = _main()
417     print "DATA =", DATA
Note: See TracBrowser for help on using the browser.