NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/utils/findMissing.py

Revision 163 (checked in by cbc, 16 years ago)

Fix computation of differenceWalk when force-update in findMissing.py.

Line 
1 """
2 Find raw sodar data files in need of processing.
3
4 Files are presumed organized according to NCCOOS conventions.
5 """
6
7 import os
8
9 def computeSourceWalk(source):
10     """
11     Compute source file tree.
12    
13     computeSourceWalk(source) -> sourceWalk
14    
15     source - path to raw data directory in NCCOOS format.
16     sourceWalk - list of all raw data files with full paths.
17     """
18    
19     sourceWalk = os.walk(source)
20     sourceWalk = [(fullPath, subDirs, files)
21                  for fullPath, subDirs, files
22                  in sourceWalk]
23     return sourceWalk[1:]
24
25 def computeDestinationWalk(destination,force):
26     """
27     Compute destination file tree.
28    
29     computeDestinationWalk(destination, force) -> destinationWalk
30    
31     destination - path to plot images directory in NCCOOS format.
32     force - update all destination plots for which raw data sources exist.
33     destinationWalk - sorted list of all pre-existing destination paths.
34     """
35    
36     if force:
37         destinationWalk = []
38     else:
39         destinationWalk = os.walk(destination)
40         destinationWalk = [(fullPath, subDirs, files)
41                            for fullPath, subDirs, files
42                            in destinationWalk]
43         destinationWalk = destinationWalk[1:]
44         destinationWalk = [fullPath
45                            for fullPath, subDirs, files
46                            in destinationWalk
47                            if not subDirs]
48         return sorted(destinationWalk)
49
50 def findMissing(source, destination, force):
51     """
52     Find raw sodar data files in need of processing.
53    
54     findMissing(source, destination, force) -> differenceWalk
55    
56     source - path to raw data directory in NCCOOS format.
57     destination - path to plot images directory in NCCOOS format.
58     force - update all destination plots for which raw data sources exist.
59     differenceWalk - sorted list of source,destination full path tuples.
60    
61     Compute source filetree.
62     Compute destination file tree.
63     Compute ideal destination file tree from source
64     Compare ideal destination file tree to destination file tree
65     Compute difference file tree as list of tuples (source, destination)
66     """
67    
68     sourceWalk = computeSourceWalk(source)
69    
70     destinationWalk = computeDestinationWalk(destination, force)
71     # always mark most recent destination as missing
72     # to keep it updated as source is updated during the day
73     if destinationWalk:
74         destinationWalk = destinationWalk[:-1]
75    
76     idealWalk = [os.path.join(destination,
77                               fullPath.replace(source + os.path.sep, ''),
78                               fileName.replace('.dat', ''))
79                          
80                  for fullPath, subDirs, files in sourceWalk
81                  for fileName in files]
82    
83     if destinationWalk:
84         differenceWalk = [path
85                           for path
86                           in idealWalk
87                           if path not in destinationWalk]
88     else:
89         differenceWalk = idealWalk
90    
91     differenceWalk = sorted(differenceWalk)
92    
93     differenceWalk = [(os.path.join(source,
94                                     path.replace(destination + os.path.sep,
95                                                  '') + '.dat'),
96                        path)
97                        for path
98                        in differenceWalk]
99    
100     return differenceWalk
101
102 if __name__ == '__main__':
103     import optparse
104     from pprint import pprint
105    
106     parser = optparse.OptionParser()
107     (values, args) = parser.parse_args()
108     (source, destination) = tuple(args)
109     if source[-1] == os.path.sep:
110         source = source[:-1]
111     if destination[-1] == os.path.sep:
112         destination = destination[:-1]
113    
114     pprint(findMissing(source, destination))
Note: See TracBrowser for help on using the browser.