NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/utils/findMissing.py

Revision 152 (checked in by cbc, 16 years ago)

Added sodar plot catalog to plotSodar.py and utils/findMissing.py.

Line 
1 """
2 Find raw sodar data files in need of processing.
3
4 Files are presumed organized according to NCCOOS conventions.
5 """
6
7 import os
8
9 def computeSourceWalk(source):
10     """
11     Compute source file tree.
12    
13     computeSourceWalk(source) -> sourceWalk
14    
15     source - path to raw data directory in NCCOOS format.
16     sourceWalk - list of all raw data files with full paths.
17     """
18    
19     sourceWalk = os.walk(source)
20     sourceWalk = [(fullPath, subDirs, files)
21                  for fullPath, subDirs, files
22                  in sourceWalk]
23     return sourceWalk[1:]
24
25 def computeDestinationWalk(destination,force):
26     """
27     Compute destination file tree.
28    
29     computeDestinationWalk(destination, force) -> destinationWalk
30    
31     destination - path to plot images directory in NCCOOS format.
32     force - update all destination plots for which raw data sources exist.
33     destinationWalk - sorted list of all pre-existing destination paths.
34     """
35    
36     if force:
37         destinationWalk = []
38     else:
39         destinationWalk = os.walk(destination)
40         destinationWalk = [(fullPath, subDirs, files)
41                            for fullPath, subDirs, files
42                            in destinationWalk]
43         destinationWalk = destinationWalk[1:]
44         destinationWalk = [fullPath
45                            for fullPath, subDirs, files
46                            in destinationWalk
47                            if not subDirs]
48         return sorted(destinationWalk)
49
50 def findMissing(source, destination, force):
51     """
52     Find raw sodar data files in need of processing.
53    
54     findMissing(source, destination, force) -> differenceWalk
55    
56     source - path to raw data directory in NCCOOS format.
57     destination - path to plot images directory in NCCOOS format.
58     force - update all destination plots for which raw data sources exist.
59     differenceWalk - sorted list of source,destination full path tuples.
60    
61     Compute source filetree.
62     Compute destination file tree.
63     Compute ideal destination file tree from source
64     Compare ideal destination file tree to destination file tree
65     Compute difference file tree as list of tuples (source, destination)
66     """
67    
68     sourceWalk = computeSourceWalk(source)
69    
70     destinationWalk = computeDestinationWalk(destination, force)
71     # always mark most recent destination as missing
72     # to keep it updated as source is updated during the day
73     if destinationWalk:
74         destinationWalk = destinationWalk[:-1]
75    
76     idealWalk = [os.path.join(destination,
77                               fullPath.replace(source + os.path.sep, ''),
78                               fileName.replace('.dat', ''))
79                          
80                  for fullPath, subDirs, files in sourceWalk
81                  for fileName in files]
82    
83     differenceWalk = [path
84                       for path
85                       in idealWalk
86                       if path not in destinationWalk]
87    
88     differenceWalk = sorted(differenceWalk)
89    
90     differenceWalk = [(os.path.join(source,
91                                     path.replace(destination + os.path.sep,
92                                                  '') + '.dat'),
93                        path)
94                        for path
95                        in differenceWalk]
96    
97     return differenceWalk
98
99 if __name__ == '__main__':
100     import optparse
101     from pprint import pprint
102    
103     parser = optparse.OptionParser()
104     (values, args) = parser.parse_args()
105     (source, destination) = tuple(args)
106     if source[-1] == os.path.sep:
107         source = source[:-1]
108     if destination[-1] == os.path.sep:
109         destination = destination[:-1]
110    
111     pprint(findMissing(source, destination))
Note: See TracBrowser for help on using the browser.