11 | | import re |
---|
12 | | dataPattern = re.compile('\$') # samples in file are terminated by $ |
---|
13 | | samples = dataPattern.split(data) |
---|
14 | | self.samples = [sample for sample in samples if sample] |
---|
| 13 | object.__init__(self) |
---|
| 14 | # samples in file are terminated by $ |
---|
| 15 | self.samples = [Sample(sample) for sample in [sample.strip() for sample in data.split('$')] if sample] |
---|
| 16 | |
---|
| 17 | class Sample(object): |
---|
| 18 | """A single sample from daily sodar file data (a header and a body)""" |
---|
| 19 | def __init__(self,sample): |
---|
| 20 | object.__init__(self) |
---|
| 21 | # first three groups of lines are the header; rest is body |
---|
| 22 | samplePattern = re.compile(r'''(?P<header>.*?\n\n.*?\n\n.*?\n\n) |
---|
| 23 | (?P<body>.*$) |
---|
| 24 | ''',re.DOTALL | re.VERBOSE) |
---|
| 25 | self.__dict__.update(samplePattern.match(sample.strip()).groupdict()) |
---|
| 26 | # fix for missing keys |
---|
| 27 | self.header = Header(self.header) |
---|
| 28 | |
---|
| 29 | class Header(object): |
---|
| 30 | """A sodar data sample header (a collection of sample-wide parameters)""" |
---|
| 31 | def __init__(self,header): |
---|
| 32 | object.__init__(self) |
---|
| 33 | headerLines = [headerLine.strip() for headerLine in header.split('\n') if headerLine] |
---|
| 34 | # every other line contains parameter names; every other line contains parameter values |
---|
| 35 | parametersPairs = [(headerLine,headerLines[headerLines.index(headerLine)+1]) for headerLine in headerLines[::2]] |
---|
| 36 | for parameterNames,parameterValues in parametersPairs: |
---|
| 37 | # parameter names must be valid Python identifiers for named groups matching |
---|
| 38 | parameterNames = [parameterName.strip('#') for parameterName in parameterNames.split()] |
---|
| 39 | parameterPattern = re.compile(r'(?P<'+'>\S+)\s+(?P<'.join(parameterNames)+'>.*$)') |
---|
| 40 | self.__dict__.update(parameterPattern.match(parameterValues).groupdict()) |
---|