diff query_tabular.xml @ 1:8a33b442ecd9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
author iuc
date Fri, 18 Aug 2017 16:48:09 -0400
parents 3708ff0198b7
children 973f03d82c86
line wrap: on
line diff
--- a/query_tabular.xml	Tue Jul 18 09:07:07 2017 -0400
+++ b/query_tabular.xml	Fri Aug 18 16:48:09 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="query_tabular" name="Query Tabular" version="1.0.0">
+<tool id="query_tabular" name="Query Tabular" version="2.0.0">
     <description>using sqlite sql</description>
 
     <macros>
@@ -26,7 +26,7 @@
         -j '$table_json'
         #if $sqlquery:
           -Q '$query_file' 
-          $no_header
+          @RESULT_HEADER@
           -o '$output'
         #end if
     ]]></command>
@@ -54,6 +54,9 @@
   #if $tbl.tbl_opts.pkey_autoincr:
     #set $jtbl['pkey_autoincr'] = str($tbl.tbl_opts.pkey_autoincr)
   #end if
+  #if $tbl.tbl_opts.column_names_from_first_line:
+    #set $jtbl['firstlinenames'] = True
+  #end if
   #if $tbl.tbl_opts.col_names:
   #set $col_names = str($tbl.tbl_opts.col_names)
     #if $tbl.tbl_opts.load_named_columns:
@@ -104,6 +107,9 @@
                     <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help>
                     <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
                 </param>
+                <param name="column_names_from_first_line" type="boolean" truevalue="True" falsevalue="False" checked="false" 
+                    label="Use first line as column names"
+                    help="The names will be quoted if they are not valid SQLite column names."/>
                 <param name="col_names" type="text" value="" optional="true" label="Specify Column Names (comma-separated list)">
                     <help>By default, table columns will be named: c1,c2,c3,...,cn  (column names for a table must be unique)
                           You can override the default names by entering a comma -separated list of names, e.g. ',name1,,,name2' would rename the second and fifth columns.
@@ -119,8 +125,9 @@
                 <repeat name="indexes" title="Table Index">
                     <param name="unique" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="This is a unique index"/>
                     <param name="index_columns" type="text" value="" label="Index on Columns">
-                        <help>Create an index on the column names: e,g, c1  or c2,c4</help>
+                        <help>Create an index on the column names: e.g. for default column names:  c1  or c2,c4 ( use the names you gave for columns)</help>
                         <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
+                        <sanitizer sanitize="False"/>
                     </param>
                 </repeat>
             </section>
@@ -132,7 +139,7 @@
                 <sanitizer sanitize="False"/>
                 <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator>
         </param>
-        <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers from tabular output"/>
+        <expand macro="result_results_header_line" />
     </inputs>
     <outputs>
         <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}">
@@ -391,6 +398,56 @@
             <output name="output" file="pet_normalized_query_results.tsv"/>
         </test>
 
+        <test>
+            <repeat name="tables">
+                <param name="table" ftype="tabular" value="psm_report.tsv"/>
+                <section name="input_opts">
+                    <repeat name="linefilters">
+                        <conditional name="filter">
+                            <param name="filter_type" value="select_columns"/>
+                            <param name="columns" value="1,3,2,6,14,19"/>
+                        </conditional>
+                    </repeat>
+                </section>
+                <section name="tbl_opts">
+                    <param name="table_name" value="PSMs"/>
+                    <param name="column_names_from_first_line" value="True"/>
+                    <param name="col_names" value="Scan"/>
+                </section>
+            </repeat>
+            <param name="sqlquery" value="SELECT * from PSMs"/>
+            <conditional name="query_result">
+                <param name="header" value="yes"/>
+                <param name="header_prefix" value=""/>
+            </conditional>
+            <output name="output" file="psm_report_out1.tsv"/>
+        </test>
+
+        <test>
+            <repeat name="tables">
+                <param name="table" ftype="tabular" value="psm_report.tsv"/>
+                <section name="input_opts">
+                    <repeat name="linefilters">
+                        <conditional name="filter">
+                            <param name="filter_type" value="select_columns"/>
+                            <param name="columns" value="1,3,2,6,14,19"/>
+                        </conditional>
+                    </repeat>
+                </section>
+                <section name="tbl_opts">
+                    <param name="table_name" value="PSMs"/>
+                    <param name="column_names_from_first_line" value="True"/>
+                    <param name="col_names" value="Scan"/>
+                </section>
+            </repeat>
+            <param name="sqlquery" value="SELECT Scan,&quot;m/z&quot;, &quot;Precursor m/z Error [ppm]&quot;, Sequence, &quot;Protein(s)&quot; FROM PSMs WHERE NOT re_search(', ',&quot;Protein(s)&quot;)"/>
+            <conditional name="query_result">
+                <param name="header" value="yes"/>
+                <param name="header_prefix" value=""/>
+            </conditional>
+            <output name="output" file="psm_report_out2.tsv"/>
+        </test>
+
     </tests>
     <help><![CDATA[
 =============
@@ -406,6 +463,45 @@
 
 @LINEFILTERS_HELP@
 
+**Table Options**
+
+  Specify Name for Table:
+
+    - By default tables will be named: t1, t2, t3, etc.
+ 
+    - **Specify Name for Table** 
+      You can provide your own name for a database table, the name should begin with a letter and only contain letters, digits, or underscores. 
+      The name should not be a SQLite key word: https://sqlite.org/lang_keywords.html
+
+
+  There are several ways to name columns in a table:  
+
+    - By default table columns will be named: c1, c2, c3, etc.
+
+    - If **Use first line as column names** is selected, the first line is interpreted as column names, and not loaded into the table as a data row.  
+      Any missing column name will be assigned cn where *n* is the ordinal position of the column, 
+      e.g. a blank header for the second column would be named: c2.
+      The column names will be quoted is they are not valid identifiers, 
+      e.g. if they are SQLite keywords, or start with a non letter character, or contain a character other than a letter, digit, or underscore.
+      The precedent for quoting is to enclose the identifier in double quotes, 
+      else if it contains a double quote enclose in square brackets,
+      else if it contains a square bracket enclose in grave accents.
+      **NOTE:** that this is the first line after line filtering has been applied. 
+      If a line filter *prepend a line number column* had been used, the name of the first column would be "1".
+      You could rename that column using **Specify Column Names**
+
+    - **Specify Column Names** (comma-separated list)
+      This will override any previously assigned column names. 
+      You can also choose to only load those columns for which you provided a name, 
+      but that is better accomplished with the line filter: *select columns*. 
+
+
+  **Table Index**
+
+    Queries on larger tables can be much faster if indexes are specified.  
+    In general, specifiy an index for table columns used in joins with other tables, 
+    or on columns used in SQL query WHERE clauses or in GROUP BY clauses.
+
 
 **Outputs**