NCCOOS Trac Projects: Top | Web | Platforms | Processing | Viz | Sprints | Sandbox | (Wind)

root/sodar/trunk/sodar/utils/findMissing.py

Revision 140 (checked in by cbc, 16 years ago)

Always mark most recent destination as missing to keep it update as source is update during the day in findMissing.py.

Line 
1 """
2 Find raw sodar data files in need of processing.
3
4 Files are presumed organized according to NCCOOS conventions.
5 """
6
7 import os
8
9 def findMissing(source, destination):
10     """
11     Find raw sodar data files in need of processing.
12
13     Get source filetree.
14     Get destination file tree.
15     Construct ideal destination file tree from source
16     Compare ideal destination file tree to destination file tree
17     Construct difference file tree as list of tuples (source, destination)
18     """
19    
20     sourceWalk = os.walk(source)
21     sourceWalk = [(fullPath, subDirs, files)
22                  for fullPath, subDirs, files
23                  in sourceWalk]
24     sourceWalk = sourceWalk[1:]
25    
26     destinationWalk = os.walk(destination)
27     destinationWalk = [(fullPath, subDirs, files)
28                        for fullPath, subDirs, files
29                        in destinationWalk]
30     destinationWalk = destinationWalk[1:]
31     destinationWalk = [fullPath
32                        for fullPath, subDirs, files
33                        in destinationWalk
34                        if not subDirs]
35     # always mark most recent destination as missing
36     # to keep it updated as source is updated during the day
37     destinationWalk = sorted(destinationWalk)
38     destinationWalk = destinationWalk[:-1]
39    
40     idealWalk = [os.path.join(destination,
41                               fullPath.replace(source + os.path.sep, ''),
42                               fileName.replace('.dat', ''))
43                          
44                  for fullPath, subDirs, files in sourceWalk
45                  for fileName in files]
46    
47     differenceWalk = [path
48                       for path
49                       in idealWalk
50                       if path not in destinationWalk]
51    
52     differenceWalk = sorted(differenceWalk)
53    
54     differenceWalk = [(os.path.join(source,
55                                     path.replace(destination + os.path.sep,
56                                                  '') + '.dat'),
57                        path)
58                        for path
59                        in differenceWalk]
60    
61     return differenceWalk
62
63 if __name__ == '__main__':
64     import optparse
65     from pprint import pprint
66    
67     parser = optparse.OptionParser()
68     (values, args) = parser.parse_args()
69     (source, destination) = tuple(args)
70     if source[-1] == os.path.sep:
71         source = source[:-1]
72     if destination[-1] == os.path.sep:
73         destination = destination[:-1]
74    
75     pprint(findMissing(source, destination))
Note: See TracBrowser for help on using the browser.