comparison query_tabular.py @ 4:9d73dca48178 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9893037a046e4f4c1e7ba859e05f49378c398cb1-dirty
author jjohnson
date Tue, 19 Apr 2016 16:46:18 -0400
parents 3e3b3c883bec
children 19ae309ec53c
comparison
equal deleted inserted replaced
3:3e3b3c883bec 4:9d73dca48178
12 """ 12 """
13 TODO: 13 TODO:
14 - could read column names from comment lines, but issues with legal names 14 - could read column names from comment lines, but issues with legal names
15 - could add some transformations on tabular columns, 15 - could add some transformations on tabular columns,
16 e.g. a regex to format date/time strings 16 e.g. a regex to format date/time strings
17 c2 : re.sub('pat', 'sub', c2) 17 index: ['c2','c4,c5']
18 c3 : 18 unique: ['c1']
19 format: {
20 c2 : re.sub('pat', 'sub', c2)
21 c3 : len(c3)
22 }
23 def format(colname,val, expr):
24
25 - allow optional autoincrement id column - user supplied name?
26 autoincrement : 'id'
19 - column_defs dict of columns to create from tabular input 27 - column_defs dict of columns to create from tabular input
20 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} 28 column_defs : { 'name1' : 'expr', 'name2' : 'expr'}
21 - allow multiple queries and outputs 29 - allow multiple queries and outputs
22 - add a --json input for table definitions (or yaml) 30 - add a --json input for table definitions (or yaml)
23 JSON config: 31 JSON config:
109 for i, col_name in enumerate(col_names): 117 for i, col_name in enumerate(col_names):
110 col_def.append('%s %s' % (col_names[i], col_types[i])) 118 col_def.append('%s %s' % (col_names[i], col_types[i]))
111 return col_names, col_types, col_def, col_idx 119 return col_names, col_types, col_def, col_idx
112 120
113 121
114 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): 122 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]):
115 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) 123 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns)
116 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] 124 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types]
117 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) 125 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def))
118 # print >> sys.stdout, table_def 126 # print >> sys.stdout, table_def
119 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) 127 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names]))
120 # print >> sys.stdout, insert_stmt 128 # print >> sys.stdout, insert_stmt
121 data_lines = 0 129 data_lines = 0
122 try: 130 try:
123 c = conn.cursor() 131 c = conn.cursor()
124 c.execute(table_def) 132 c.execute(table_def)
133 conn.commit()
134 c.close()
135 for i,index in enumerate(unique_indexes):
136 index_name='idx_uniq_%s_%d' % (table_name,i)
137 index_columns = index.split(',')
138 create_index(conn, table_name, index_name, index_columns,unique=True)
139 for i,index in enumerate(indexes):
140 index_name='idx_%s_%d' % (table_name,i)
141 index_columns = index.split(',')
142 create_index(conn, table_name, index_name, index_columns)
143 c = conn.cursor()
125 with open(file_path, "r") as fh: 144 with open(file_path, "r") as fh:
126 for linenum, line in enumerate(fh): 145 for linenum, line in enumerate(fh):
127 if linenum < skip or line.startswith(comment_char): 146 if linenum < skip or line.startswith(comment_char):
128 continue 147 continue
129 data_lines += 1 148 data_lines += 1
139 c.close() 158 c.close()
140 except Exception, e: 159 except Exception, e:
141 print >> sys.stderr, 'Failed: %s' % (e) 160 print >> sys.stderr, 'Failed: %s' % (e)
142 exit(1) 161 exit(1)
143 162
163 def create_index(conn, table_name, index_name, index_columns, unique=False):
164 index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns))
165 c = conn.cursor()
166 c.execute(index_def)
167 conn.commit()
168 c.close()
144 169
145 def regex_match(expr, item): 170 def regex_match(expr, item):
146 return re.match(expr, item) is not None 171 return re.match(expr, item) is not None
147 172
148 173
214 column_names = table['column_names'] if 'column_names' in table else None 239 column_names = table['column_names'] if 'column_names' in table else None
215 if column_names: 240 if column_names:
216 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False 241 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False
217 else: 242 else:
218 load_named_columns = False 243 load_named_columns = False
219 create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) 244 unique_indexes = table['unique'] if 'unique' in table else []
245 indexes = table['index'] if 'index' in table else []
246 create_table(conn, path, table_name, column_names=column_names,
247 skip=comment_lines, load_named_columns=load_named_columns,
248 unique_indexes=unique_indexes, indexes=indexes)
220 except Exception, exc: 249 except Exception, exc:
221 print >> sys.stderr, "Error: %s" % exc 250 print >> sys.stderr, "Error: %s" % exc
222 conn.close() 251 conn.close()
223 252
224 query = None 253 query = None