changeset 16:5b4f6cf857cf draft

Uploaded
author jjohnson
date Thu, 22 Jun 2017 09:14:56 -0400
parents 67082f49f047
children c6e6e224509c
files ._query_tabular.py query_tabular.py query_tabular.xml
diffstat 3 files changed, 52 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
Binary file ._query_tabular.py has changed
--- a/query_tabular.py	Fri Jun 09 11:25:46 2017 -0400
+++ b/query_tabular.py	Thu Jun 22 09:14:56 2017 -0400
@@ -79,6 +79,9 @@
                 self.func = lambda i,l: l if not rgx.search(l) else None
             elif filter_dict['action'] == 'include_find':
                 self.func = lambda i,l: l if rgx.search(l) else None
+        elif filter_dict['filter'] == 'select_columns':
+            cols = [int(c) - 1 for c in filter_dict['columns']]
+            self.func = lambda i,l: self.select_columns(l,cols)
         elif filter_dict['filter'] == 'replace':
             p = filter_dict['pattern']
             r = filter_dict['replace']
@@ -97,6 +100,9 @@
             self.func = lambda i,l: self.normalize(l,cols,sep)
     def __iter__(self):
         return self
+    def select_columns(self,line,cols):
+        fields = line.split('\t')
+        return '\t'.join([fields[x] for x in cols])
     def normalize(self,line,split_cols,sep):
         lines = []
         fields = line.rstrip('\r\n').split('\t')
@@ -309,6 +315,17 @@
     parser.add_option('-o', '--output', dest='output', default=None, help='Output file for query results')
     (options, args) = parser.parse_args()
 
+    def run_query(query,outputFile):
+        conn = get_connection(options.sqlitedb, addfunctions=True)
+        cur = conn.cursor()
+        results = cur.execute(query)
+        if not options.no_header:
+            outputFile.write("#%s\n" % '\t'.join([str(col[0]) for col in cur.description]))
+            # yield [col[0] for col in cur.description]
+        for i, row in enumerate(results):
+            # yield [val for val in row]
+            outputFile.write("%s\n" % '\t'.join([str(val) if val is not None else '' for val in row]))
+
     # open sqlite connection
     conn = get_connection(options.sqlitedb)
     # determine output destination
@@ -394,15 +411,7 @@
     #    print >> sys.stderr, "Error: Must be a read only query"
     #    exit(2)
     try:
-        conn = get_connection(options.sqlitedb, addfunctions=True)
-        cur = conn.cursor()
-        results = cur.execute(query)
-        if not options.no_header:
-            outputFile.write("#%s\n" % '\t'.join([str(col[0]) for col in cur.description]))
-            # yield [col[0] for col in cur.description]
-        for i, row in enumerate(results):
-            # yield [val for val in row]
-            outputFile.write("%s\n" % '\t'.join([str(val) if val is not None else '' for val in row]))
+        run_query(query,outputFile)
     except Exception, exc:
         print >> sys.stderr, "Error: %s" % exc
         exit(1)
--- a/query_tabular.xml	Fri Jun 09 11:25:46 2017 -0400
+++ b/query_tabular.xml	Thu Jun 22 09:14:56 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="query_tabular" name="Query Tabular" version="3.0.0">
+<tool id="query_tabular" name="Query Tabular" version="3.1.0">
     <description>using sqlite sql</description>
 
     <requirements>
@@ -103,6 +103,11 @@
       #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
       #set $filter_dict['action'] = str($fi.filter.regex_action)
       #silent $input_filters.append($filter_dict)
+    #elif $fi.filter.filter_type == 'select_columns':
+      #set $filter_dict = dict()
+      #set $filter_dict['filter'] = str($fi.filter.filter_type)
+      #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')]
+      #silent $input_filters.append($filter_dict)
     #elif $fi.filter.filter_type == 'replace':
       #set $filter_dict = dict()
       #set $filter_dict['filter'] = str($fi.filter.filter_type)
@@ -145,6 +150,7 @@
                             <option value="skip">skip leading lines</option>
                             <option value="comment">comment char</option>
                             <option value="regex">by regex expression matching</option>
+                            <option value="select_columns">select columns</option>
                             <option value="replace">regex replace value in column</option>
                             <option value="prepend_line_num">prepend a line number column</option>
                             <option value="append_line_num">append a line number column</option>
@@ -172,6 +178,12 @@
                                 <option value="include_find">include line if pattern found</option>
                             </param>
                         </when>
+                        <when value="select_columns">
+                            <param name="columns" type="text" value="" label="enter column numbers to keep"
+                                help="example: 1,4,2 (selects the first,fourth, and second columns)">
+                                <validator type="regex" message="Column ordinal positions separated by commas">^([1-9]\d*)(,[1-9]\d*)*$</validator>
+                            </param>
+                        </when>
                         <when value="replace">
                             <param name="column" type="data_column" data_ref="table" label="Column to replace text"
                                    help=""/>
@@ -310,6 +322,7 @@
   - skip leading lines              skip the first *number* of lines
   - comment char                    omit any lines that start with the specified comment character 
   - by regex expression matching    *include/exclude* lines the match the regex expression 
+  - select columns                  choose to include only selected columns in the order specified 
   - regex replace value in column   replace a field in a column using a regex substitution (good for date reformatting)
   - prepend a line number column    each line has the ordinal value of the line read by this filter as the first column
   - append a line number column     each line has the ordinal value of the line read by this filter as the last column
@@ -541,18 +554,19 @@
         Filter 1 - by regex expression matching [include]: '^\d+' (include lines that start with a number) 
         Filter 2 - append a line number column:
         Filter 3 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format) 
+        Filter 4 - select columns 7,2,3,4,1
 
       Table: People
-      Columns: Pets,FirstName,LastName,DOB,,,id
+      Columns: id,FirstName,LastName,DOB,Pets
 
-      ====  =========  ========   ==========  ==
-      Pets  FirstName  LastName   DOB         id
-      ====  =========  ========   ==========  ==
-      2     Paula      Brown      1978-05-24  1
-      1     Steven     Jones      1974-04-04  2
-      0     Jane       Doe        1978-05-24  3
-      1     James      Smith      1980-10-20  4
-      ====  =========  ========   ==========  ==
+      ==  =========  ========   ==========  ====
+      id  FirstName  LastName   DOB         Pets
+      ==  =========  ========   ==========  ====
+      1     Paula      Brown    1978-05-24  2
+      2     Steven     Jones    1974-04-04  1
+      3     Jane       Doe      1978-05-24  0
+      4     James      Smith    1980-10-20  1
+      ==  =========  ========   ==========  ====
 
 
     *Pet Table*
@@ -563,18 +577,19 @@
         Filter 2 - append a line number column:
         Filter 3 - by regex expression matching [exclude]: '^0\t' (exclude lines with no pets)
         Filter 4 - normalize list columns[5,6]:
+        Filter 5 - select columns 7,5,6
 
       Table: Pet
-      Columns: ,,,,PetName,PetType,id
+      Columns: id,PetName,PetType
 
-      ========  ========  ==
-      PetName   PetType   id
-      ========  ========  ==
-      Rex       dog       1
-      Fluff     cat       1
-      Allie     cat       2
-      Spot                4
-      ========  ========  ==
+      ==  ========  ========
+      id  PetName   PetType 
+      ==  ========  ========
+      1   Rex       dog     
+      1   Fluff     cat     
+      2   Allie     cat     
+      4   Spot              
+      ==  ========  ========
 
 
     Query: SELECT FirstName,LastName,PetName FROM People join Pet on People.id = Pet.id WHERE PetType = 'cat';