""" Find raw sodar data files in need of processing. Files are presumed organized according to NCCOOS conventions. """ import os def findMissing(source, destination): """ Find raw sodar data files in need of processing. Get source filetree. Get destination file tree. Construct ideal destination file tree from source Compare ideal destination file tree to destination file tree Construct difference file tree as list of tuples (source, destination) """ sourceWalk = os.walk(source) sourceWalk = [(fullPath, subDirs, files) for fullPath, subDirs, files in sourceWalk] sourceWalk = sourceWalk[1:] destinationWalk = os.walk(destination) destinationWalk = [(fullPath, subDirs, files) for fullPath, subDirs, files in destinationWalk] destinationWalk = destinationWalk[1:] destinationWalk = [fullPath for fullPath, subDirs, files in destinationWalk if not subDirs] # always mark most recent destination as missing # to keep it updated as source is updated during the day destinationWalk = sorted(destinationWalk) destinationWalk = destinationWalk[:-1] idealWalk = [os.path.join(destination, fullPath.replace(source + os.path.sep, ''), fileName.replace('.dat', '')) for fullPath, subDirs, files in sourceWalk for fileName in files] differenceWalk = [path for path in idealWalk if path not in destinationWalk] differenceWalk = sorted(differenceWalk) differenceWalk = [(os.path.join(source, path.replace(destination + os.path.sep, '') + '.dat'), path) for path in differenceWalk] return differenceWalk if __name__ == '__main__': import optparse from pprint import pprint parser = optparse.OptionParser() (values, args) = parser.parse_args() (source, destination) = tuple(args) if source[-1] == os.path.sep: source = source[:-1] if destination[-1] == os.path.sep: destination = destination[:-1] pprint(findMissing(source, destination))