query_tabular: query_tabular.xml comparison

comparison query_tabular.xml @ 20:ab27c4bd14b9 draft

Uploaded

author	jjohnson
date	Fri, 14 Jul 2017 11:39:27 -0400
parents	b9f797bf4f38
children	357fe86f245d

comparison

equal deleted inserted replaced

-:9d9ab2c69014
+:ab27c4bd14b9
-<tool id="query_tabular" name="Query Tabular" version="4.0.0">
+<tool id="query_tabular" name="Query Tabular" version="5.0.0">
 <description>using sqlite sql</description>
+<macros>
+<import>macros.xml</import>
+</macros>
 <requirements>
 </requirements>
 <stdio>
 <exit_code range="1:" />
 #set $jtbl['unique'] = $idx_unique
 #end if
 #if len($idx_non) > 0:
 #set $jtbl['index'] = $idx_non
 #end if
-#set $input_filters = []
+#set $linefilters = $tbl.input_opts.linefilters
-#for $fi in $tbl.input_opts.linefilters:
+@LINEFILTERS@
-#if $fi.filter.filter_type == 'skip':
-#set $skip_lines = None
-#if str($fi.filter.skip_lines) != '':
-#set $skip_lines = int($fi.filter.skip_lines)
-#elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:
-#set $skip_lines = int($tbl.table.metadata.comment_lines)
-#end if
-#if $skip_lines is not None:
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = str($fi.filter.filter_type)
-#set $filter_dict['count'] = $skip_lines
-#silent $input_filters.append($filter_dict)
-#end if
-#elif $fi.filter.filter_type == 'comment':
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = 'regex'
-#set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')])
-#set $filter_dict['action'] = 'exclude_match'
-#silent $input_filters.append($filter_dict)
-#elif $fi.filter.filter_type == 'regex':
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = str($fi.filter.filter_type)
-#set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
-#set $filter_dict['action'] = str($fi.filter.regex_action)
-#silent $input_filters.append($filter_dict)
-#elif $fi.filter.filter_type == 'select_columns':
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = str($fi.filter.filter_type)
-#set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
-#silent $input_filters.append($filter_dict)
-#elif $fi.filter.filter_type == 'replace':
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = str($fi.filter.filter_type)
-#set $filter_dict['column'] = int(str($fi.filter.column).replace('c',''))
-#set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
-#set $filter_dict['replace'] = str($fi.filter.regex_replace)
-#silent $input_filters.append($filter_dict)
-#elif str($fi.filter.filter_type).endswith('pend_line_num'):
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = str($fi.filter.filter_type)
-#silent $input_filters.append($filter_dict)
-#elif str($fi.filter.filter_type).endswith('pend_text'):
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = str($fi.filter.filter_type)
-#set $filter_dict['column_text'] = str($fi.filter.column_text)
-#silent $input_filters.append($filter_dict)
-#elif $fi.filter.filter_type == 'normalize':
-#set $filter_dict = dict()
-#set $filter_dict['filter'] = str($fi.filter.filter_type)
-#set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
-#set $filter_dict['separator'] = str($fi.filter.separator)
-#silent $input_filters.append($filter_dict)
-#end if
-#end for
 #if $input_filters:
 #set $jtbl['filters'] = $input_filters
 #end if
 #set $jtbls += [$jtbl]
 #end for
 help="Make sure your added table names are not already in this database"/>
 </section>
 <repeat name="tables" title="Database Table" min="0">
 <param name="table" type="data" format="tabular" label="Tabular Dataset for Table"/>
 <section name="input_opts" expanded="false" title="Filter Dataset Input">
-<repeat name="linefilters" title="Filter Tabular Input Lines">
+<expand macro="macro_line_filters" />
-<conditional name="filter">
-<param name="filter_type" type="select" label="Filter By">
-<option value="skip">skip leading lines</option>
-<option value="comment">comment char</option>
-<option value="regex">by regex expression matching</option>
-<option value="select_columns">select columns</option>
-<option value="replace">regex replace value in column</option>
-<option value="prepend_line_num">prepend a line number column</option>
-<option value="append_line_num">append a line number column</option>
-<option value="prepend_text">prepend a column with the given text</option>
-<option value="append_text">append a column with the given text</option>
-<option value="normalize">normalize list columns, replicates row for each item in list</option>
-</param>
-<when value="skip">
-<param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines"
-help="Leave blank to use the comment lines metadata for this dataset" />
-</when>
-<when value="comment">
-<param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped">
-<option value="62">&gt;</option>
-<option value="64">@</option>
-<option value="43">+</option>
-<option value="60">&lt;</option>
-<option value="42">*</option>
-<option value="45">-</option>
-<option value="61">=</option>
-<option value="124">|</option>
-<option value="63">?</option>
-<option value="36">$</option>
-<option value="46">.</option>
-<option value="58">:</option>
-<option value="38">&amp;</option>
-<option value="37">%</option>
-<option value="94">^</option>
-<option value="35">&#35;</option>
-<option value="33">!</option>
-</param>
-</when>
-<when value="prepend_line_num"/>
-<when value="append_line_num"/>
-<when value="prepend_text">
-<param name="column_text" type="text" value="" label="text for column">
-</param>
-</when>
-<when value="append_text">
-<param name="column_text" type="text" value="" label="text for column">
-</param>
-</when>
-<when value="regex">
-<param name="regex_pattern" type="text" value="" label="regex pattern">
-<sanitizer sanitize="False"/>
-</param>
-<param name="regex_action" type="select" label="action for regex match">
-<option value="exclude_match">exclude line on pattern match</option>
-<option value="include_match">include line on pattern match</option>
-<option value="exclude_find">exclude line if pattern found</option>
-<option value="include_find">include line if pattern found</option>
-</param>
-</when>
-<when value="select_columns">
-<param name="columns" type="text" value="" label="enter column numbers to keep"
-help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)">
-<validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
-</param>
-</when>
-<when value="replace">
-<param name="column" type="text" value="" label="enter column number to replace"
-help="example: 1 or c1 (selects the first column)">
-<validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator>
-</param>
-<param name="regex_pattern" type="text" value="" label="regex pattern">
-<sanitizer sanitize="False"/>
-</param>
-<param name="regex_replace" type="text" value="" label="replacement expression">
-<sanitizer sanitize="False"/>
-</param>
-</when>
-<when value="normalize">
-<param name="columns" type="text" value="" label="enter column numbers to normalize">
-<help><![CDATA[
-example: 2,4 or c2,c4 (selects the second, and fourth columns)
-If multiple columns are selected, they should have the same length and separator on each line
-]]></help>
-<validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
-</param>
-<param name="separator" type="text" value="," label="List item delimiter in column">
-<sanitizer sanitize="False"/>
-<validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator>
-</param>
-</when>
-</conditional>
-</repeat>
 </section>
 <section name="tbl_opts" expanded="false" title="Table Options">
 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table">
 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help>
 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
 <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator>
 </param>
 </repeat>
 </section>
 </repeat>
-<param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history"/>
+<param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history"
+help="SQLite to tabular tool can run additional queries on this database"/>
 <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output">
 <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help>
 <sanitizer sanitize="False"/>
 <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator>
 </param>
 <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers from tabular output"/>
 </inputs>
 <outputs>
 <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}">
-<filter>save_db or not (sqlquery and len(sqlquery) > 0)</filter>
+<filter>save_db</filter>
 </data>
 <data format="tabular" name="output" label="query results on ${on_string}">
-<filter>sqlquery and len(sqlquery) > 0</filter>
+<filter>not save_db or (sqlquery and len(sqlquery.strip()) > 0)</filter>
 </data>
 </outputs>
 <tests>
 <test>
 Loads tabular datasets into a SQLite_ data base.
 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base.
-**Input Line Filters**
+@LINEFILTERS_HELP@
-As a tabular file is being read, line filters may be applied.
-::
-- skip leading lines              skip the first *number* of lines
-- comment char                    omit any lines that start with the specified comment character
-- by regex expression matching    *include/exclude* lines the match the regex expression
-- select columns                  choose to include only selected columns in the order specified
-- regex replace value in column   replace a field in a column using a regex substitution (good for date reformatting)
-- prepend a line number column    each line has the ordinal value of the line read by this filter as the first column
-- append a line number column     each line has the ordinal value of the line read by this filter as the last column
-- normalize list columns          replicates the line for each item in the specified list *columns*
 **Outputs**
 The results of a SQL query are output to the history as a tabular file.
 The SQLite_ data base can also be saved and output as a dataset in the history.
 *(The* **SQLite to tabular** *tool can run additional queries on this database.)*
-For help in using SQLite_ see:  http://www.sqlite.org/docs.html
+@QUERY_HELP@
-**NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30
+@LINEFILTERS_HELP_EXAMPLE@
-See: http://www.sqlite.org/lang_datefunc.html
-**Example**
-Given 2 tabular datasets: *customers* and *sales*
-Dataset *customers*
-Table name: "customers"
-Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone"
-=========== ========== ========== ===================== ========== ============
-#CustomerID FirstName  LastName   Email                 DOB        Phone
-=========== ========== ========== ===================== ========== ============
-1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222
-2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545
-3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232
-4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888
-=========== ========== ========== ===================== ========== ============
-Dataset *sales*
-Table name: "sales"
-Column names: "CustomerID,Date,SaleAmount"
-=============  ============  ============
-#CustomerID    Date          SaleAmount
-=============  ============  ============
-2    2004-05-06         100.22
-1    2004-05-07          99.95
-3    2004-05-07         122.95
-3    2004-05-13         100.00
-4    2004-05-22         555.55
-=============  ============  ============
-The query
-::
-SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales"
-FROM customers join sales on customers.CustomerID = sales.CustomerID
-GROUP BY customers.CustomerID ORDER BY TotalSales DESC;
-Produces this tabular output:
-========== ======== ==========
-#FirstName LastName TotalSales
-========== ======== ==========
-James      Smith    555.55
-Paula      Brown    222.95
-Steven     Goldfish 100.22
-John       Smith    99.95
-========== ======== ==========
-If the optional Table name and Column names inputs are not used, the query would be:
-::
-SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales"
-FROM t1 join t2 on t1.c1 = t2.c1
-GROUP BY t1.c1 ORDER BY TotalSales DESC;
-You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5:
-Column names: ,FirstName,LastName,,BirthDate
-Results in the following data base table
-=========== ========== ========== ===================== ========== ============
-#c1         FirstName  LastName   c4                    BirthDate  c6
-=========== ========== ========== ===================== ========== ============
-1           John       Smith      John.Smith@yahoo.com  1968-02-04 626 222-2222
-2           Steven     Goldfish   goldfish@fishhere.net 1974-04-04 323 455-4545
-3           Paula      Brown      pb@herowndomain.org   1978-05-24 416 323-3232
-4           James      Smith      jim@supergig.co.uk    1980-10-20 416 323-8888
-=========== ========== ========== ===================== ========== ============
-Regular_expression_ functions are included for:
-::
-matching:      re_match('pattern',column)
-SELECT t1.FirstName, t1.LastName
-FROM t1
-WHERE re_match('^.*\.(net|org)$',c4)
-Results:
-=========== ==========
-#FirstName  LastName
-=========== ==========
-Steven      Goldfish
-Paula       Brown
-=========== ==========
-::
-searching:     re_search('pattern',column)
-substituting:  re_sub('pattern','replacement,column)
-SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB"
-FROM t1
-WHERE re_search('[hp]er',c4)
-Results:
-=========== ========== ==========
-#FirstName  LastName   DOB
-=========== ========== ==========
-Steven      Goldfish   04/04/74
-Paula       Brown      24/05/78
-James       Smith      20/10/80
-=========== ========== ==========
-**Line Filtering Example**
-*(Six filters are applied as the following file is read)*
-::
-Input Tabular File:
-#People with pets
-Pets FirstName           LastName   DOB       PetNames  PetType
-2    Paula               Brown      24/05/78  Rex,Fluff dog,cat
-1    Steven              Jones      04/04/74  Allie     cat
-0    Jane                Doe        24/05/78
-1    James               Smith      20/10/80  Spot
-Filter 1 - append a line number column:
-#People with pets                                                 1
-Pets FirstName           LastName   DOB       PetNames  PetType   2
-2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3
-1    Steven              Jones      04/04/74  Allie     cat       4
-0    Jane                Doe        24/05/78                      5
-1    James               Smith      20/10/80  Spot                6
-Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
-2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3
-1    Steven              Jones      04/04/74  Allie     cat       4
-0    Jane                Doe        24/05/78                      5
-1    James               Smith      20/10/80  Spot                6
-Filter 3 - append a line number column:
-2    Paula               Brown      24/05/78  Rex,Fluff dog,cat   3  1
-1    Steven              Jones      04/04/74  Allie     cat       4  2
-0    Jane                Doe        24/05/78                      5  3
-1    James               Smith      20/10/80  Spot                6  4
-Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format)
-2    Paula               Brown      1978-05-24  Rex,Fluff dog,cat   3  1
-1    Steven              Jones      1974-04-04  Allie     cat       4  2
-0    Jane                Doe        1978-05-24                      5  3
-1    James               Smith      1980-10-20  Spot                6  4
-Filter 5 - normalize list columns[5,6]:
-2    Paula               Brown      1978-05-24  Rex       dog       3  1
-2    Paula               Brown      1978-05-24  Fluff     cat       3  1
-1    Steven              Jones      1974-04-04  Allie     cat       4  2
-0    Jane                Doe        1978-05-24                      5  3
-1    James               Smith      1980-10-20  Spot                6  4
-Filter 6 - append a line number column:
-2    Paula               Brown      1978-05-24  Rex       dog       3  1  1
-2    Paula               Brown      1978-05-24  Fluff     cat       3  1  2
-1    Steven              Jones      1974-04-04  Allie     cat       4  2  3
-0    Jane                Doe        1978-05-24                      5  3  4
-1    James               Smith      1980-10-20  Spot                6  4  5
 Table name: pets
 Table columns: Pets,FirstName,LastName,Birthdate,PetNames,PetType,line_num,entry_num,row_num
 ======  ==========  ========  ==========  =========  ========  =========  ==========  ========
 **Normalizing by Line Filtering into 2 Tables**
+*Relational database opertions work with single-valued column entries.
+To apply relational operations to tabular files that contain fields with lists of values,
+we need to "normalize" those fields, duplicating lines for each item in the list.
+In this example we create 2 tables, one for single-valued fields and a second with list-valued fields normalized.
+Becauce we add a line number first for each table, we can join the 2 tables on the line number column.*
+https://en.wikipedia.org/wiki/First_normal_form
 *People Table*
 ::
 Filter 1 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
 2   Allie     cat
 4   Spot
 ==  ========  ========
-Query: SELECT FirstName,LastName,PetName FROM People join Pet on People.id = Pet.id WHERE PetType = 'cat';
+Query: SELECT FirstName,LastName,PetName FROM People JOIN Pet ON People.id = Pet.id WHERE PetType = 'cat';
 Result:
 =========  ========  ========
 FirstName  LastName  PetName
 =========  ========  ========
 Paula      Brown     Fluff
 Steven     Jones     Allie
 =========  ========  ========
-.. _Regular_expression: https://docs.python.org/release/2.7/library/re.html
-.. _SQLite: http://www.sqlite.org/index.html
 ]]></help>
 </tool>

Mercurial > repos > jjohnson > query_tabular

comparison query_tabular.xml @ 20:ab27c4bd14b9 draft