1 |
""" |
---|
2 |
Find raw sodar data files in need of processing. |
---|
3 |
|
---|
4 |
Files are presumed organized according to NCCOOS conventions. |
---|
5 |
""" |
---|
6 |
|
---|
7 |
import os |
---|
8 |
|
---|
9 |
def computeSourceWalk(source): |
---|
10 |
""" |
---|
11 |
Compute source file tree. |
---|
12 |
|
---|
13 |
computeSourceWalk(source) -> sourceWalk |
---|
14 |
|
---|
15 |
source - path to raw data directory in NCCOOS format. |
---|
16 |
sourceWalk - list of all raw data files with full paths. |
---|
17 |
""" |
---|
18 |
|
---|
19 |
sourceWalk = os.walk(source) |
---|
20 |
sourceWalk = [(fullPath, subDirs, files) |
---|
21 |
for fullPath, subDirs, files |
---|
22 |
in sourceWalk] |
---|
23 |
return sourceWalk[1:] |
---|
24 |
|
---|
25 |
def computeDestinationWalk(destination,force): |
---|
26 |
""" |
---|
27 |
Compute destination file tree. |
---|
28 |
|
---|
29 |
computeDestinationWalk(destination, force) -> destinationWalk |
---|
30 |
|
---|
31 |
destination - path to plot images directory in NCCOOS format. |
---|
32 |
force - update all destination plots for which raw data sources exist. |
---|
33 |
destinationWalk - sorted list of all pre-existing destination paths. |
---|
34 |
""" |
---|
35 |
|
---|
36 |
if force: |
---|
37 |
destinationWalk = [] |
---|
38 |
else: |
---|
39 |
destinationWalk = os.walk(destination) |
---|
40 |
destinationWalk = [(fullPath, subDirs, files) |
---|
41 |
for fullPath, subDirs, files |
---|
42 |
in destinationWalk] |
---|
43 |
destinationWalk = destinationWalk[1:] |
---|
44 |
destinationWalk = [fullPath |
---|
45 |
for fullPath, subDirs, files |
---|
46 |
in destinationWalk |
---|
47 |
if not subDirs] |
---|
48 |
return sorted(destinationWalk) |
---|
49 |
|
---|
50 |
def findMissing(source, destination, force): |
---|
51 |
""" |
---|
52 |
Find raw sodar data files in need of processing. |
---|
53 |
|
---|
54 |
findMissing(source, destination, force) -> differenceWalk |
---|
55 |
|
---|
56 |
source - path to raw data directory in NCCOOS format. |
---|
57 |
destination - path to plot images directory in NCCOOS format. |
---|
58 |
force - update all destination plots for which raw data sources exist. |
---|
59 |
differenceWalk - sorted list of source,destination full path tuples. |
---|
60 |
|
---|
61 |
Compute source filetree. |
---|
62 |
Compute destination file tree. |
---|
63 |
Compute ideal destination file tree from source |
---|
64 |
Compare ideal destination file tree to destination file tree |
---|
65 |
Compute difference file tree as list of tuples (source, destination) |
---|
66 |
""" |
---|
67 |
|
---|
68 |
sourceWalk = computeSourceWalk(source) |
---|
69 |
|
---|
70 |
destinationWalk = computeDestinationWalk(destination, force) |
---|
71 |
# always mark most recent destination as missing |
---|
72 |
# to keep it updated as source is updated during the day |
---|
73 |
if destinationWalk: |
---|
74 |
destinationWalk = destinationWalk[:-1] |
---|
75 |
|
---|
76 |
idealWalk = [os.path.join(destination, |
---|
77 |
fullPath.replace(source + os.path.sep, ''), |
---|
78 |
fileName.replace('.dat', '')) |
---|
79 |
|
---|
80 |
for fullPath, subDirs, files in sourceWalk |
---|
81 |
for fileName in files] |
---|
82 |
|
---|
83 |
differenceWalk = [path |
---|
84 |
for path |
---|
85 |
in idealWalk |
---|
86 |
if path not in destinationWalk] |
---|
87 |
|
---|
88 |
differenceWalk = sorted(differenceWalk) |
---|
89 |
|
---|
90 |
differenceWalk = [(os.path.join(source, |
---|
91 |
path.replace(destination + os.path.sep, |
---|
92 |
'') + '.dat'), |
---|
93 |
path) |
---|
94 |
for path |
---|
95 |
in differenceWalk] |
---|
96 |
|
---|
97 |
return differenceWalk |
---|
98 |
|
---|
99 |
if __name__ == '__main__': |
---|
100 |
import optparse |
---|
101 |
from pprint import pprint |
---|
102 |
|
---|
103 |
parser = optparse.OptionParser() |
---|
104 |
(values, args) = parser.parse_args() |
---|
105 |
(source, destination) = tuple(args) |
---|
106 |
if source[-1] == os.path.sep: |
---|
107 |
source = source[:-1] |
---|
108 |
if destination[-1] == os.path.sep: |
---|
109 |
destination = destination[:-1] |
---|
110 |
|
---|
111 |
pprint(findMissing(source, destination)) |
---|