Mercurial > repos > jjohnson > query_tabular
diff query_tabular.py @ 3:3e3b3c883bec draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
author | jjohnson |
---|---|
date | Tue, 16 Feb 2016 09:34:30 -0500 |
parents | 926c62f7fa09 |
children | 9d73dca48178 |
line wrap: on
line diff
--- a/query_tabular.py Fri Feb 12 08:37:07 2016 -0500 +++ b/query_tabular.py Tue Feb 16 09:34:30 2016 -0500 @@ -28,8 +28,10 @@ comment_lines : 1 }, { file_path : '/home/galaxy/dataset_102.dat', - table_name : 't2', - column_names : ['c1', 'c2', 'c3'] + table_name : 'gff', + column_names : ['seqname',,,'start','end'] + comment_lines : 1 + load_named_columns : True }, { file_path : '/home/galaxy/dataset_103.dat', table_name : 'test', @@ -58,10 +60,12 @@ def get_column_def(file_path, table_name, skip=0, comment_char='#', - column_names=None, max_lines=100): + column_names=None, max_lines=100,load_named_columns=False): col_pref = ['TEXT', 'REAL', 'INTEGER', None] col_types = [] + col_idx = None data_lines = 0 + try: with open(file_path, "r") as fh: for linenum, line in enumerate(fh): @@ -82,22 +86,33 @@ print >> sys.stderr, 'Failed at line: %d err: %s' % (linenum, e) except Exception, e: print >> sys.stderr, 'Failed: %s' % (e) - for i, col_type in enumerate(col_types): + for i,col_type in enumerate(col_types): if not col_type: col_types[i] = 'TEXT' - col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] - if column_names: - for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): - if cname and i < len(col_names): - col_names[i] = cname + if column_names: + col_names = [] + if load_named_columns: + col_idx = [] + for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): + if cname != '': + col_idx.append(i) + col_names.append(cname) + col_types = [col_types[i] for i in col_idx] + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] + for i, cname in enumerate([cn.strip() for cn in column_names.split(',')]): + if cname and i < len(col_names): + col_names[i] = cname + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] col_def = [] for i, col_name in enumerate(col_names): col_def.append('%s %s' % (col_names[i], col_types[i])) - return col_names, col_types, col_def + return col_names, col_types, col_def, col_idx -def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None): - col_names, col_types, col_def = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names) +def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): + col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) # print >> sys.stdout, table_def @@ -114,6 +129,8 @@ data_lines += 1 try: fields = line.rstrip('\r\n').split('\t') + if col_idx: + fields = [fields[i] for i in col_idx] vals = [col_func[i](x) if x else None for i, x in enumerate(fields)] c.execute(insert_stmt, vals) except Exception, e: @@ -193,9 +210,13 @@ for ti, table in enumerate(tdef['tables']): path = table['file_path'] table_name = table['table_name'] if 'table_name' in table else 't%d' % (ti + 1) + comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 column_names = table['column_names'] if 'column_names' in table else None - comment_lines = table['comment_lines'] if 'comment_lines' in table else 0 - create_table(conn, path, table_name, column_names=column_names, skip=comment_lines) + if column_names: + load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False + else: + load_named_columns = False + create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) except Exception, exc: print >> sys.stderr, "Error: %s" % exc conn.close()