Mercurial > repos > jjohnson > query_tabular
diff query_tabular.py @ 4:9d73dca48178 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9893037a046e4f4c1e7ba859e05f49378c398cb1-dirty
author | jjohnson |
---|---|
date | Tue, 19 Apr 2016 16:46:18 -0400 |
parents | 3e3b3c883bec |
children | 19ae309ec53c |
line wrap: on
line diff
--- a/query_tabular.py Tue Feb 16 09:34:30 2016 -0500 +++ b/query_tabular.py Tue Apr 19 16:46:18 2016 -0400 @@ -14,8 +14,16 @@ - could read column names from comment lines, but issues with legal names - could add some transformations on tabular columns, e.g. a regex to format date/time strings - c2 : re.sub('pat', 'sub', c2) - c3 : + index: ['c2','c4,c5'] + unique: ['c1'] + format: { + c2 : re.sub('pat', 'sub', c2) + c3 : len(c3) + } + def format(colname,val, expr): + +- allow optional autoincrement id column - user supplied name? + autoincrement : 'id' - column_defs dict of columns to create from tabular input column_defs : { 'name1' : 'expr', 'name2' : 'expr'} - allow multiple queries and outputs @@ -111,7 +119,7 @@ return col_names, col_types, col_def, col_idx -def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): +def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) @@ -122,6 +130,17 @@ try: c = conn.cursor() c.execute(table_def) + conn.commit() + c.close() + for i,index in enumerate(unique_indexes): + index_name='idx_uniq_%s_%d' % (table_name,i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns,unique=True) + for i,index in enumerate(indexes): + index_name='idx_%s_%d' % (table_name,i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns) + c = conn.cursor() with open(file_path, "r") as fh: for linenum, line in enumerate(fh): if linenum < skip or line.startswith(comment_char): @@ -141,6 +160,12 @@ print >> sys.stderr, 'Failed: %s' % (e) exit(1) +def create_index(conn, table_name, index_name, index_columns, unique=False): + index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns)) + c = conn.cursor() + c.execute(index_def) + conn.commit() + c.close() def regex_match(expr, item): return re.match(expr, item) is not None @@ -216,7 +241,11 @@ load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False else: load_named_columns = False - create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) + unique_indexes = table['unique'] if 'unique' in table else [] + indexes = table['index'] if 'index' in table else [] + create_table(conn, path, table_name, column_names=column_names, + skip=comment_lines, load_named_columns=load_named_columns, + unique_indexes=unique_indexes, indexes=indexes) except Exception, exc: print >> sys.stderr, "Error: %s" % exc conn.close()