comparison query_tabular.py @ 16:5b4f6cf857cf draft

Uploaded
author jjohnson
date Thu, 22 Jun 2017 09:14:56 -0400
parents fd16243931d6
children b9f797bf4f38
comparison
equal deleted inserted replaced
15:67082f49f047 16:5b4f6cf857cf
77 self.func = lambda i,l: l if rgx.match(l) else None 77 self.func = lambda i,l: l if rgx.match(l) else None
78 elif filter_dict['action'] == 'exclude_find': 78 elif filter_dict['action'] == 'exclude_find':
79 self.func = lambda i,l: l if not rgx.search(l) else None 79 self.func = lambda i,l: l if not rgx.search(l) else None
80 elif filter_dict['action'] == 'include_find': 80 elif filter_dict['action'] == 'include_find':
81 self.func = lambda i,l: l if rgx.search(l) else None 81 self.func = lambda i,l: l if rgx.search(l) else None
82 elif filter_dict['filter'] == 'select_columns':
83 cols = [int(c) - 1 for c in filter_dict['columns']]
84 self.func = lambda i,l: self.select_columns(l,cols)
82 elif filter_dict['filter'] == 'replace': 85 elif filter_dict['filter'] == 'replace':
83 p = filter_dict['pattern'] 86 p = filter_dict['pattern']
84 r = filter_dict['replace'] 87 r = filter_dict['replace']
85 c = int(filter_dict['column']) - 1 88 c = int(filter_dict['column']) - 1
86 self.func = lambda i,l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))]) 89 self.func = lambda i,l: '\t'.join([x if i != c else re.sub(p,r,x) for i,x in enumerate(l.split('\t'))])
95 cols = [int(c) - 1 for c in filter_dict['columns']] 98 cols = [int(c) - 1 for c in filter_dict['columns']]
96 sep = filter_dict['separator'] 99 sep = filter_dict['separator']
97 self.func = lambda i,l: self.normalize(l,cols,sep) 100 self.func = lambda i,l: self.normalize(l,cols,sep)
98 def __iter__(self): 101 def __iter__(self):
99 return self 102 return self
103 def select_columns(self,line,cols):
104 fields = line.split('\t')
105 return '\t'.join([fields[x] for x in cols])
100 def normalize(self,line,split_cols,sep): 106 def normalize(self,line,split_cols,sep):
101 lines = [] 107 lines = []
102 fields = line.rstrip('\r\n').split('\t') 108 fields = line.rstrip('\r\n').split('\t')
103 split_fields = dict() 109 split_fields = dict()
104 cnt = 0 110 cnt = 0
307 parser.add_option('-Q', '--query_file', dest='query_file', default=None, help='SQL query file') 313 parser.add_option('-Q', '--query_file', dest='query_file', default=None, help='SQL query file')
308 parser.add_option('-n', '--no_header', dest='no_header', action='store_true', default=False, help='Include a column headers line') 314 parser.add_option('-n', '--no_header', dest='no_header', action='store_true', default=False, help='Include a column headers line')
309 parser.add_option('-o', '--output', dest='output', default=None, help='Output file for query results') 315 parser.add_option('-o', '--output', dest='output', default=None, help='Output file for query results')
310 (options, args) = parser.parse_args() 316 (options, args) = parser.parse_args()
311 317
318 def run_query(query,outputFile):
319 conn = get_connection(options.sqlitedb, addfunctions=True)
320 cur = conn.cursor()
321 results = cur.execute(query)
322 if not options.no_header:
323 outputFile.write("#%s\n" % '\t'.join([str(col[0]) for col in cur.description]))
324 # yield [col[0] for col in cur.description]
325 for i, row in enumerate(results):
326 # yield [val for val in row]
327 outputFile.write("%s\n" % '\t'.join([str(val) if val is not None else '' for val in row]))
328
312 # open sqlite connection 329 # open sqlite connection
313 conn = get_connection(options.sqlitedb) 330 conn = get_connection(options.sqlitedb)
314 # determine output destination 331 # determine output destination
315 if options.output is not None: 332 if options.output is not None:
316 try: 333 try:
392 exit(0) 409 exit(0)
393 # if not sqlite.is_read_only_query(query): 410 # if not sqlite.is_read_only_query(query):
394 # print >> sys.stderr, "Error: Must be a read only query" 411 # print >> sys.stderr, "Error: Must be a read only query"
395 # exit(2) 412 # exit(2)
396 try: 413 try:
397 conn = get_connection(options.sqlitedb, addfunctions=True) 414 run_query(query,outputFile)
398 cur = conn.cursor()
399 results = cur.execute(query)
400 if not options.no_header:
401 outputFile.write("#%s\n" % '\t'.join([str(col[0]) for col in cur.description]))
402 # yield [col[0] for col in cur.description]
403 for i, row in enumerate(results):
404 # yield [val for val in row]
405 outputFile.write("%s\n" % '\t'.join([str(val) if val is not None else '' for val in row]))
406 except Exception, exc: 415 except Exception, exc:
407 print >> sys.stderr, "Error: %s" % exc 416 print >> sys.stderr, "Error: %s" % exc
408 exit(1) 417 exit(1)
409 418
410 if __name__ == "__main__": 419 if __name__ == "__main__":