diff query_tabular.py @ 11:fd16243931d6 draft

Uploaded
author jjohnson
date Fri, 12 May 2017 10:18:42 -0400
parents e84d1c3bf4fe
children 5b4f6cf857cf
line wrap: on
line diff
--- a/query_tabular.py	Thu Mar 02 13:43:25 2017 -0500
+++ b/query_tabular.py	Fri May 12 10:18:42 2017 -0400
@@ -64,31 +64,71 @@
         self.source = source
         self.filter_dict = filter_dict
         # print >> sys.stderr, 'LineFilter %s' % filter_dict if filter_dict else 'NONE'
-        self.func = lambda l: l.rstrip('\r\n') if l else None
+        self.func = lambda i,l: l.rstrip('\r\n') if l else None
+        self.src_lines = []
+        self.src_line_cnt = 0
         if not filter_dict:
             return
         if filter_dict['filter'] == 'regex':
             rgx = re.compile(filter_dict['pattern'])
             if filter_dict['action'] == 'exclude_match':
-                self.func = lambda l: l if not rgx.match(l) else None
+                self.func = lambda i,l: l if not rgx.match(l) else None
             elif filter_dict['action'] == 'include_match':
-                self.func = lambda l: l if rgx.match(l) else None
+                self.func = lambda i,l: l if rgx.match(l) else None
             elif filter_dict['action'] == 'exclude_find':
-                self.func = lambda l: l if not rgx.search(l) else None
+                self.func = lambda i,l: l if not rgx.search(l) else None
             elif filter_dict['action'] == 'include_find':
-                self.func = lambda l: l if rgx.search(l) else None
+                self.func = lambda i,l: l if rgx.search(l) else None
         elif filter_dict['filter'] == 'replace':
             p = filter_dict['pattern']
             r = filter_dict['replace']
             c = int(filter_dict['column']) - 1
-            self.func = lambda l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))])
+            self.func = lambda i,l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))])
+        elif filter_dict['filter'] == 'prepend_line_num':
+            self.func = lambda i,l: '%d\t%s' % (i,l) 
+        elif filter_dict['filter'] == 'append_line_num':
+            self.func = lambda i,l: '%s\t%d' % (l.rstrip('\r\n'),i) 
+        elif filter_dict['filter'] == 'skip':
+            cnt = filter_dict['count']
+            self.func = lambda i,l: l if i > cnt else None
+        elif filter_dict['filter'] == 'normalize':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            sep = filter_dict['separator']
+            self.func = lambda i,l: self.normalize(l,cols,sep)
     def __iter__(self):
         return self
-    def next(self):
+    def normalize(self,line,split_cols,sep):
+        lines = []
+        fields = line.rstrip('\r\n').split('\t')
+        split_fields = dict()
+        cnt = 0
+        for c in split_cols:
+            if c < len(fields):
+                split_fields[c] = fields[c].split(sep)
+                cnt = max(cnt, len(split_fields[c]))
+        if cnt == 0:
+            lines.append('\t'.join(fields))
+        else:
+            for n in range(0, cnt):
+                flds = [x if c not in split_cols else split_fields[c][n] if n < len(split_fields[c]) else ''  for (c, x) in enumerate(fields)]
+                lines.append('\t'.join(flds))
+        return lines
+    def get_lines(self):
         for i,next_line in enumerate(self.source):
-            line = self.func(next_line)
+            self.src_line_cnt += 1
+            line = self.func(self.src_line_cnt,next_line)
+            # print >> sys.stderr, 'LineFilter %s: %d %s' % (str(self.filter_dict),self.src_line_cnt,line)
             if line:
-                return line
+               if isinstance(line,list):
+                   self.src_lines.extend(line)
+               else:
+                   self.src_lines.append(line)
+               return
+    def next(self):
+        if not self.src_lines:
+            self.get_lines()
+        if self.src_lines:
+                return self.src_lines.pop(0)
         raise StopIteration