Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.py @ 11:fd16243931d6 draft
Uploaded
author | jjohnson |
---|---|
date | Fri, 12 May 2017 10:18:42 -0400 |
parents | e84d1c3bf4fe |
children | 5b4f6cf857cf |
comparison
equal
deleted
inserted
replaced
10:e84d1c3bf4fe | 11:fd16243931d6 |
---|---|
62 class LineFilter( object ): | 62 class LineFilter( object ): |
63 def __init__(self,source,filter_dict): | 63 def __init__(self,source,filter_dict): |
64 self.source = source | 64 self.source = source |
65 self.filter_dict = filter_dict | 65 self.filter_dict = filter_dict |
66 # print >> sys.stderr, 'LineFilter %s' % filter_dict if filter_dict else 'NONE' | 66 # print >> sys.stderr, 'LineFilter %s' % filter_dict if filter_dict else 'NONE' |
67 self.func = lambda l: l.rstrip('\r\n') if l else None | 67 self.func = lambda i,l: l.rstrip('\r\n') if l else None |
68 self.src_lines = [] | |
69 self.src_line_cnt = 0 | |
68 if not filter_dict: | 70 if not filter_dict: |
69 return | 71 return |
70 if filter_dict['filter'] == 'regex': | 72 if filter_dict['filter'] == 'regex': |
71 rgx = re.compile(filter_dict['pattern']) | 73 rgx = re.compile(filter_dict['pattern']) |
72 if filter_dict['action'] == 'exclude_match': | 74 if filter_dict['action'] == 'exclude_match': |
73 self.func = lambda l: l if not rgx.match(l) else None | 75 self.func = lambda i,l: l if not rgx.match(l) else None |
74 elif filter_dict['action'] == 'include_match': | 76 elif filter_dict['action'] == 'include_match': |
75 self.func = lambda l: l if rgx.match(l) else None | 77 self.func = lambda i,l: l if rgx.match(l) else None |
76 elif filter_dict['action'] == 'exclude_find': | 78 elif filter_dict['action'] == 'exclude_find': |
77 self.func = lambda l: l if not rgx.search(l) else None | 79 self.func = lambda i,l: l if not rgx.search(l) else None |
78 elif filter_dict['action'] == 'include_find': | 80 elif filter_dict['action'] == 'include_find': |
79 self.func = lambda l: l if rgx.search(l) else None | 81 self.func = lambda i,l: l if rgx.search(l) else None |
80 elif filter_dict['filter'] == 'replace': | 82 elif filter_dict['filter'] == 'replace': |
81 p = filter_dict['pattern'] | 83 p = filter_dict['pattern'] |
82 r = filter_dict['replace'] | 84 r = filter_dict['replace'] |
83 c = int(filter_dict['column']) - 1 | 85 c = int(filter_dict['column']) - 1 |
84 self.func = lambda l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))]) | 86 self.func = lambda i,l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))]) |
87 elif filter_dict['filter'] == 'prepend_line_num': | |
88 self.func = lambda i,l: '%d\t%s' % (i,l) | |
89 elif filter_dict['filter'] == 'append_line_num': | |
90 self.func = lambda i,l: '%s\t%d' % (l.rstrip('\r\n'),i) | |
91 elif filter_dict['filter'] == 'skip': | |
92 cnt = filter_dict['count'] | |
93 self.func = lambda i,l: l if i > cnt else None | |
94 elif filter_dict['filter'] == 'normalize': | |
95 cols = [int(c) - 1 for c in filter_dict['columns']] | |
96 sep = filter_dict['separator'] | |
97 self.func = lambda i,l: self.normalize(l,cols,sep) | |
85 def __iter__(self): | 98 def __iter__(self): |
86 return self | 99 return self |
100 def normalize(self,line,split_cols,sep): | |
101 lines = [] | |
102 fields = line.rstrip('\r\n').split('\t') | |
103 split_fields = dict() | |
104 cnt = 0 | |
105 for c in split_cols: | |
106 if c < len(fields): | |
107 split_fields[c] = fields[c].split(sep) | |
108 cnt = max(cnt, len(split_fields[c])) | |
109 if cnt == 0: | |
110 lines.append('\t'.join(fields)) | |
111 else: | |
112 for n in range(0, cnt): | |
113 flds = [x if c not in split_cols else split_fields[c][n] if n < len(split_fields[c]) else '' for (c, x) in enumerate(fields)] | |
114 lines.append('\t'.join(flds)) | |
115 return lines | |
116 def get_lines(self): | |
117 for i,next_line in enumerate(self.source): | |
118 self.src_line_cnt += 1 | |
119 line = self.func(self.src_line_cnt,next_line) | |
120 # print >> sys.stderr, 'LineFilter %s: %d %s' % (str(self.filter_dict),self.src_line_cnt,line) | |
121 if line: | |
122 if isinstance(line,list): | |
123 self.src_lines.extend(line) | |
124 else: | |
125 self.src_lines.append(line) | |
126 return | |
87 def next(self): | 127 def next(self): |
88 for i,next_line in enumerate(self.source): | 128 if not self.src_lines: |
89 line = self.func(next_line) | 129 self.get_lines() |
90 if line: | 130 if self.src_lines: |
91 return line | 131 return self.src_lines.pop(0) |
92 raise StopIteration | 132 raise StopIteration |
93 | 133 |
94 | 134 |
95 class TabularReader: | 135 class TabularReader: |
96 """ | 136 """ |