1 |
""" |
---|
2 |
Find raw sodar data files in need of processing. |
---|
3 |
|
---|
4 |
Files are presumed organized according to NCCOOS conventions. |
---|
5 |
""" |
---|
6 |
|
---|
7 |
import os |
---|
8 |
|
---|
9 |
def findMissing(source, destination): |
---|
10 |
""" |
---|
11 |
Find raw sodar data files in need of processing. |
---|
12 |
|
---|
13 |
Get source filetree. |
---|
14 |
Get destination file tree. |
---|
15 |
Construct ideal destination file tree from source |
---|
16 |
Compare ideal destination file tree to destination file tree |
---|
17 |
Construct difference file tree as list of tuples (source, destination) |
---|
18 |
""" |
---|
19 |
|
---|
20 |
sourceWalk = os.walk(source) |
---|
21 |
sourceWalk = [(fullPath, subDirs, files) |
---|
22 |
for fullPath, subDirs, files |
---|
23 |
in sourceWalk] |
---|
24 |
sourceWalk = sourceWalk[1:] |
---|
25 |
|
---|
26 |
destinationWalk = os.walk(destination) |
---|
27 |
destinationWalk = [(fullPath, subDirs, files) |
---|
28 |
for fullPath, subDirs, files |
---|
29 |
in destinationWalk] |
---|
30 |
destinationWalk = destinationWalk[1:] |
---|
31 |
destinationWalk = [fullPath |
---|
32 |
for fullPath, subDirs, files |
---|
33 |
in destinationWalk |
---|
34 |
if not subDirs] |
---|
35 |
# always mark most recent destination as missing |
---|
36 |
# to keep it updated as source is updated during the day |
---|
37 |
destinationWalk = sorted(destinationWalk) |
---|
38 |
destinationWalk = destinationWalk[:-1] |
---|
39 |
|
---|
40 |
idealWalk = [os.path.join(destination, |
---|
41 |
fullPath.replace(source + os.path.sep, ''), |
---|
42 |
fileName.replace('.dat', '')) |
---|
43 |
|
---|
44 |
for fullPath, subDirs, files in sourceWalk |
---|
45 |
for fileName in files] |
---|
46 |
|
---|
47 |
differenceWalk = [path |
---|
48 |
for path |
---|
49 |
in idealWalk |
---|
50 |
if path not in destinationWalk] |
---|
51 |
|
---|
52 |
differenceWalk = sorted(differenceWalk) |
---|
53 |
|
---|
54 |
differenceWalk = [(os.path.join(source, |
---|
55 |
path.replace(destination + os.path.sep, |
---|
56 |
'') + '.dat'), |
---|
57 |
path) |
---|
58 |
for path |
---|
59 |
in differenceWalk] |
---|
60 |
|
---|
61 |
return differenceWalk |
---|
62 |
|
---|
63 |
if __name__ == '__main__': |
---|
64 |
import optparse |
---|
65 |
from pprint import pprint |
---|
66 |
|
---|
67 |
parser = optparse.OptionParser() |
---|
68 |
(values, args) = parser.parse_args() |
---|
69 |
(source, destination) = tuple(args) |
---|
70 |
if source[-1] == os.path.sep: |
---|
71 |
source = source[:-1] |
---|
72 |
if destination[-1] == os.path.sep: |
---|
73 |
destination = destination[:-1] |
---|
74 |
|
---|
75 |
pprint(findMissing(source, destination)) |
---|