Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.py @ 3:3e3b3c883bec draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
author | jjohnson |
---|---|
date | Tue, 16 Feb 2016 09:34:30 -0500 |
parents | 926c62f7fa09 |
children | 9d73dca48178 |
comparison
equal
deleted
inserted
replaced
2:f5df461a6b90 | 3:3e3b3c883bec |
---|---|
26 table_name : 't1', | 26 table_name : 't1', |
27 column_names : ['c1', 'c2', 'c3'], | 27 column_names : ['c1', 'c2', 'c3'], |
28 comment_lines : 1 | 28 comment_lines : 1 |
29 }, | 29 }, |
30 { file_path : '/home/galaxy/dataset_102.dat', | 30 { file_path : '/home/galaxy/dataset_102.dat', |
31 table_name : 't2', | 31 table_name : 'gff', |
32 column_names : ['c1', 'c2', 'c3'] | 32 column_names : ['seqname',,,'start','end'] |
33 comment_lines : 1 | |
34 load_named_columns : True | |
33 }, | 35 }, |
34 { file_path : '/home/galaxy/dataset_103.dat', | 36 { file_path : '/home/galaxy/dataset_103.dat', |
35 table_name : 'test', | 37 table_name : 'test', |
36 column_names : ['c1', 'c2', 'c3'] | 38 column_names : ['c1', 'c2', 'c3'] |
37 } | 39 } |
56 return 'TEXT' | 58 return 'TEXT' |
57 return None | 59 return None |
58 | 60 |
59 | 61 |
60 def get_column_def(file_path, table_name, skip=0, comment_char='#', | 62 def get_column_def(file_path, table_name, skip=0, comment_char='#', |
61 column_names=None, max_lines=100): | 63 column_names=None, max_lines=100,load_named_columns=False): |
62 col_pref = ['TEXT', 'REAL', 'INTEGER', None] | 64 col_pref = ['TEXT', 'REAL', 'INTEGER', None] |
63 col_types = [] | 65 col_types = [] |
66 col_idx = None | |
64 data_lines = 0 | 67 data_lines = 0 |
68 | |
65 try: | 69 try: |
66 with open(file_path, "r") as fh: | 70 with open(file_path, "r") as fh: |
67 for linenum, line in enumerate(fh): | 71 for linenum, line in enumerate(fh): |
68 if linenum < skip: | 72 if linenum < skip: |
69 continue | 73 continue |
80 col_types[i] = colType | 84 col_types[i] = colType |
81 except Exception, e: | 85 except Exception, e: |
82 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) | 86 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) |
83 except Exception, e: | 87 except Exception, e: |
84 print >> sys.stderr, 'Failed: %s' % (e) | 88 print >> sys.stderr, 'Failed: %s' % (e) |
85 for i, col_type in enumerate(col_types): | 89 for i,col_type in enumerate(col_types): |
86 if not col_type: | 90 if not col_type: |
87 col_types[i] = 'TEXT' | 91 col_types[i] = 'TEXT' |
88 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] | 92 if column_names: |
89 if column_names: | 93 col_names = [] |
90 for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): | 94 if load_named_columns: |
91 if cname and i < len(col_names): | 95 col_idx = [] |
92 col_names[i] = cname | 96 for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): |
97 if cname != '': | |
98 col_idx.append(i) | |
99 col_names.append(cname) | |
100 col_types = [col_types[i] for i in col_idx] | |
101 else: | |
102 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] | |
103 for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): | |
104 if cname and i < len(col_names): | |
105 col_names[i] = cname | |
106 else: | |
107 col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] | |
93 col_def = [] | 108 col_def = [] |
94 for i, col_name in enumerate(col_names): | 109 for i, col_name in enumerate(col_names): |
95 col_def.append('%s %s' % (col_names[i], col_types[i])) | 110 col_def.append('%s %s' % (col_names[i], col_types[i])) |
96 return col_names, col_types, col_def | 111 return col_names, col_types, col_def, col_idx |
97 | 112 |
98 | 113 |
99 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None): | 114 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): |
100 col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names) | 115 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) |
101 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] | 116 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] |
102 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) | 117 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) |
103 # print >> sys.stdout, table_def | 118 # print >> sys.stdout, table_def |
104 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) | 119 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) |
105 # print >> sys.stdout, insert_stmt | 120 # print >> sys.stdout, insert_stmt |
112 if linenum < skip or line.startswith(comment_char): | 127 if linenum < skip or line.startswith(comment_char): |
113 continue | 128 continue |
114 data_lines += 1 | 129 data_lines += 1 |
115 try: | 130 try: |
116 fields = line.rstrip('\r\n').split('\t') | 131 fields = line.rstrip('\r\n').split('\t') |
132 if col_idx: | |
133 fields = [fields[i] for i in col_idx] | |
117 vals = [col_func[i](x) if x else None for i, x in enumerate(fields)] | 134 vals = [col_func[i](x) if x else None for i, x in enumerate(fields)] |
118 c.execute(insert_stmt, vals) | 135 c.execute(insert_stmt, vals) |
119 except Exception, e: | 136 except Exception, e: |
120 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) | 137 print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) |
121 conn.commit() | 138 conn.commit() |
191 tdef = json.load(fh) | 208 tdef = json.load(fh) |
192 if 'tables' in tdef: | 209 if 'tables' in tdef: |
193 for ti, table in enumerate(tdef['tables']): | 210 for ti, table in enumerate(tdef['tables']): |
194 path = table['file_path'] | 211 path = table['file_path'] |
195 table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1) | 212 table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1) |
213 comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 | |
196 column_names = table['column_names'] if 'column_names' in table else None | 214 column_names = table['column_names'] if 'column_names' in table else None |
197 comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 | 215 if column_names: |
198 create_table(conn, path, table_name, column_names=column_names, skip=comment_lines) | 216 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False |
217 else: | |
218 load_named_columns = False | |
219 create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) | |
199 except Exception, exc: | 220 except Exception, exc: |
200 print >> sys.stderr, "Error: %s" % exc | 221 print >> sys.stderr, "Error: %s" % exc |
201 conn.close() | 222 conn.close() |
202 | 223 |
203 query = None | 224 query = None |