diff query_tabular.py @ 4:9d73dca48178 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9893037a046e4f4c1e7ba859e05f49378c398cb1-dirty
author jjohnson
date Tue, 19 Apr 2016 16:46:18 -0400
parents 3e3b3c883bec
children 19ae309ec53c
line wrap: on
line diff
--- a/query_tabular.py	Tue Feb 16 09:34:30 2016 -0500
+++ b/query_tabular.py	Tue Apr 19 16:46:18 2016 -0400
@@ -14,8 +14,16 @@
 - could read column names from comment lines, but issues with legal names
 - could add some transformations on tabular columns,
   e.g. a regex to format date/time strings
-    c2 : re.sub('pat', 'sub', c2)
-    c3 :
+    index: ['c2','c4,c5']
+    unique: ['c1']
+    format: {
+      c2 : re.sub('pat', 'sub', c2)
+      c3 : len(c3)
+   }
+   def format(colname,val, expr):
+     
+- allow optional autoincrement id column - user supplied name?
+    autoincrement : 'id'
 - column_defs dict of columns to create from tabular input
     column_defs : { 'name1' : 'expr', 'name2' : 'expr'}
 - allow multiple queries and outputs
@@ -111,7 +119,7 @@
     return col_names, col_types, col_def, col_idx
 
 
-def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False):
+def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]):
     col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns)
     col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types]
     table_def = 'CREATE TABLE %s (\n    %s\n);' % (table_name, ', \n    '.join(col_def))
@@ -122,6 +130,17 @@
     try:
         c = conn.cursor()
         c.execute(table_def)
+        conn.commit()
+        c.close()
+        for i,index in enumerate(unique_indexes):
+            index_name='idx_uniq_%s_%d' % (table_name,i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns,unique=True)
+        for i,index in enumerate(indexes):
+            index_name='idx_%s_%d' % (table_name,i)
+            index_columns = index.split(',')
+            create_index(conn, table_name, index_name, index_columns)
+        c = conn.cursor()
         with open(file_path, "r") as fh:
             for linenum, line in enumerate(fh):
                 if linenum < skip or line.startswith(comment_char):
@@ -141,6 +160,12 @@
         print >> sys.stderr, 'Failed: %s' % (e)
         exit(1)
 
+def create_index(conn, table_name, index_name, index_columns, unique=False):
+    index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns))
+    c = conn.cursor()
+    c.execute(index_def)
+    conn.commit()
+    c.close()
 
 def regex_match(expr, item):
     return re.match(expr, item) is not None
@@ -216,7 +241,11 @@
                         load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False
                     else:   
                         load_named_columns = False
-                    create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns)
+                    unique_indexes = table['unique'] if 'unique' in table else []
+                    indexes = table['index'] if 'index' in table else []
+                    create_table(conn, path, table_name, column_names=column_names, 
+                                 skip=comment_lines, load_named_columns=load_named_columns, 
+                                 unique_indexes=unique_indexes, indexes=indexes)
         except Exception, exc:
             print >> sys.stderr, "Error: %s" % exc
     conn.close()