Mercurial > repos > iuc > sqlite_to_tabular
view macros.xml @ 14:f768f6183386 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit eb95b169fd23c0c5d75abd6e54042d2c9a882539
author | iuc |
---|---|
date | Wed, 13 Sep 2023 12:15:59 +0000 |
parents | c29d2f80a066 |
children | c4d18aa4ec4a |
line wrap: on
line source
<macros> <token name="@LINEFILTERS@"> <![CDATA[ ## set linefilters to the #set $input_filters = [] #for $fi in $linefilters: #if $fi.filter.filter_type == 'skip': #set $skip_lines = None #if str($fi.filter.skip_lines) != '': #set $skip_lines = int($fi.filter.skip_lines) #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: #set $skip_lines = int($tbl.table.metadata.comment_lines) #end if #if $skip_lines is not None: #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['count'] = $skip_lines #silent $input_filters.append($filter_dict) #end if #elif $fi.filter.filter_type == 'comment': #set $filter_dict = dict() #set $filter_dict['filter'] = 'regex' #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')]) #set $filter_dict['action'] = 'exclude_match' #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'regex': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) #set $filter_dict['action'] = str($fi.filter.regex_action) #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'select_columns': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['columns'] = [int($c) for $c in str($fi.filter.columns).replace('c','').split(',')] #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'select_column_slices': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['columns'] = [$c for $c in str($fi.filter.columns).split(',')] #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'replace': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['column'] = int(str($fi.filter.column).replace('c','')) #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) #set $filter_dict['replace'] = str($fi.filter.regex_replace) #if $fi.filter.add: #set $filter_dict['add'] = str($fi.filter.add) #end if #silent $input_filters.append($filter_dict) #elif str($fi.filter.filter_type).endswith('pend_line_num'): #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #silent $input_filters.append($filter_dict) #elif str($fi.filter.filter_type).endswith('pend_text'): #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['column_text'] = str($fi.filter.column_text) #silent $input_filters.append($filter_dict) #elif str($fi.filter.filter_type).endswith('pend_dataset_name'): #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type).replace('dataset_name', 'text') #set $filter_dict['column_text'] = $dataset_name #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'normalize': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] #set $filter_dict['separator'] = str($fi.filter.separator) #silent $input_filters.append($filter_dict) #end if #end for ]]> </token> <token name="@RESULT_HEADER@"> <![CDATA[ #if $query_result.header == 'yes': #if $query_result.header_prefix: #set $header_prefix = chr(int(str($query_result.header_prefix))) --comment_char='$header_prefix' #end if #else --no_header #end if ]]> </token> <xml name="requirements"> <requirements> <requirement type="package" version="3.7">python</requirement> </requirements> </xml> <xml name="comment_char_options"> <option value="62">></option> <option value="64">@</option> <option value="43">+</option> <option value="60"><</option> <option value="42">*</option> <option value="45">-</option> <option value="61">=</option> <option value="124">|</option> <option value="63">?</option> <option value="36">$</option> <option value="46">.</option> <option value="58">:</option> <option value="38">&</option> <option value="37">%</option> <option value="94">^</option> <option value="35" selected="true">#</option> <option value="33">!</option> </xml> <xml name="result_results_header_line"> <conditional name="query_result"> <param name="header" type="select" label="include query result column headers"> <option value="yes">Yes</option> <option value="no">No</option> </param> <when value="yes"> <param name="header_prefix" type="select" optional="true" label="Prefix character for column_header line"> <option value="">no comment character prefix</option> <expand macro="comment_char_options" /> </param> </when> <when value="no"/> </conditional> </xml> <xml name="sql_query_input"> <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output"> <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help> <sanitizer sanitize="False"/> <validator type="regex" message="">(?ims)^\s*select\s+.*\s+from\s+.*$</validator> </param> </xml> <xml name="macro_line_filters"> <repeat name="linefilters" title="Filter Tabular Input Lines"> <conditional name="filter"> <param name="filter_type" type="select" label="Filter By"> <option value="skip">skip leading lines</option> <option value="comment">comment char</option> <option value="regex">by regex expression matching</option> <option value="select_columns">select columns</option> <option value="select_column_slices">select columns by indices/slices</option> <option value="replace">regex replace value in column</option> <option value="prepend_line_num">prepend a line number column</option> <option value="append_line_num">append a line number column</option> <option value="prepend_dataset_name">prepend a column with the dataset name</option> <option value="append_dataset_name">append a column with the dataset name</option> <option value="prepend_text">prepend a column with the given text</option> <option value="append_text">append a column with the given text</option> <option value="normalize">normalize list columns, replicates row for each item in list</option> </param> <when value="skip"> <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the comment lines metadata for this dataset" /> </when> <when value="comment"> <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped"> <expand macro="comment_char_options" /> </param> </when> <when value="prepend_line_num"/> <when value="append_line_num"/> <when value="prepend_dataset_name"/> <when value="append_dataset_name"/> <when value="prepend_text"> <param name="column_text" type="text" value="" label="text for column"> </param> </when> <when value="append_text"> <param name="column_text" type="text" value="" label="text for column"> </param> </when> <when value="regex"> <param name="regex_pattern" type="text" value="" label="regex pattern"> <sanitizer sanitize="False"/> </param> <param name="regex_action" type="select" label="action for regex match"> <option value="exclude_match">exclude line on pattern match</option> <option value="include_match">include line on pattern match</option> <option value="exclude_find">exclude line if pattern found</option> <option value="include_find">include line if pattern found</option> </param> </when> <when value="select_columns"> <param name="columns" type="text" value="" label="enter column numbers to keep" help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)"> <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> </param> </when> <when value="select_column_slices"> <param name="columns" type="text" value="" label="enter indices or slices of the columns to keep"> <help><![CDATA[ Python offset indexes or slices. Examples: <ul> <li>Column offset indexes: 0,3,1 (selects the first,fourth, and second columns)</li> <li>Negative column numbers: -1,-2 (selects the last, and second last columns)</li> <li>python slices ( slice(start, stop[, step]) select a range of columns): <li> <ul> <li>0:3 or :3 (selects the first 3 columns)</li> <li>3:5 (selects the fourth and fifth columns)</li> <li>2: (selects all columns after the second)</li> <li>-2: (selects the last 2 columns)</li> <li>2::-1 (selects the first 3 columns n reverse order: third,second,first)</li> </ul> </ul> ]]></help> <validator type="regex" message="Column ordinal positions separated by commas">^(-?[1-9]\d*|((-?\d+)?:(-?\d*(:-?\d*)?)))(,(-?[1-9]\d*|((-?\d+)?:(-?\d*(:-?\d*)?))))*$</validator> </param> </when> <when value="replace"> <param name="column" type="text" value="" label="enter column number to replace" help="example: 1 or c1 (selects the first column)"> <validator type="regex" message="Column ordinal position">^(c?[1-9]\d*)$</validator> </param> <param name="regex_pattern" type="text" value="" label="regex pattern"> <sanitizer sanitize="False"/> </param> <param name="regex_replace" type="text" value="" label="replacement expression"> <sanitizer sanitize="False"/> </param> <param name="add" type="select" optional="true" label="Instead of replacing, Add as new column:"> <option value="prepend">prepend to beginning of row</option> <option value="append">append to the end of row</option> <option value="before">insert before column field</option> <option value="after">insert after column field</option> </param> </when> <when value="normalize"> <param name="columns" type="text" value="" label="enter column numbers to normalize"> <help><![CDATA[ example: 2,4 or c2,c4 (selects the second, and fourth columns) If multiple columns are selected, they should have the same length and separator on each line ]]></help> <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> </param> <param name="separator" type="text" value="," label="List item delimiter in column"> <sanitizer sanitize="False"/> <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> </param> </when> </conditional> </repeat> </xml> <token name="@LINEFILTERS_HELP@"> <![CDATA[ **Input Line Filters** As a tabular file is being read, line filters may be applied: - skip leading lines - skip the first *number* of lines - comment char - omit any lines that start with the specified comment character - by regex expression matching - *include/exclude* lines that match the regex expression - select columns - choose to include only selected columns in the order specified - select columns by indices/slices - *indices or slices* of the columns to keep (python_list_ indexing) - regex replace value in column - replace a field in a column using a regex substitution (good for date reformatting) - regex replace value in column - add a new column using a regex substitution of a column value - prepend a line number column - each line has the *ordinal* value of the line read by this filter as the first column - append a line number column - each line has the *ordinal* value of the line read by this filter as the last column - prepend a text column - each line has the text string as the first column - append a text column - each line has the text string as the last column - prepend the dataset name - each line has the *dataset name* as the first column - append the dataset name - each line has the *dataset name* as the last column - normalize list columns - replicates the line for each item in the specified list *columns* .. _python_list: https://docs.python.org/3/library/stdtypes.html#common-sequence-operations ]]> </token> <token name="@LINEFILTERS_HELP_EXAMPLE@"> <![CDATA[ **Line Filtering Example** *(Six filters are applied as the following file is read)* :: Input Tabular File: #People with pets Pets FirstName LastName DOB PetNames PetType 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 1 Steven Jones 04/04/74 Allie cat 0 Jane Doe 24/05/78 1 James Smith 20/10/80 Spot Filter 1 - append a line number column: #People with pets 1 Pets FirstName LastName DOB PetNames PetType 2 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 Steven Jones 04/04/74 Allie cat 4 0 Jane Doe 24/05/78 5 1 James Smith 20/10/80 Spot 6 Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number) 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 Steven Jones 04/04/74 Allie cat 4 0 Jane Doe 24/05/78 5 1 James Smith 20/10/80 Spot 6 Filter 3 - append a line number column: 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 1 Steven Jones 04/04/74 Allie cat 4 2 0 Jane Doe 24/05/78 5 3 1 James Smith 20/10/80 Spot 6 4 Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format) 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1 1 Steven Jones 1974-04-04 Allie cat 4 2 0 Jane Doe 1978-05-24 5 3 1 James Smith 1980-10-20 Spot 6 4 Filter 5 - normalize list columns[5,6]: 2 Paula Brown 1978-05-24 Rex dog 3 1 2 Paula Brown 1978-05-24 Fluff cat 3 1 1 Steven Jones 1974-04-04 Allie cat 4 2 0 Jane Doe 1978-05-24 5 3 1 James Smith 1980-10-20 Spot 6 4 Filter 6 - select columns by indices/slices: '1:6' Paula Brown 1978-05-24 Rex dog Paula Brown 1978-05-24 Fluff cat Steven Jones 1974-04-04 Allie cat Jane Doe 1978-05-24 James Smith 1980-10-20 Spot ]]> </token> <token name="@QUERY_HELP@"> <![CDATA[ For help in using SQLite_ see: http://www.sqlite.org/docs.html **NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30 See: http://www.sqlite.org/lang_datefunc.html **Example** Given 2 tabular datasets: *customers* and *sales* Dataset *customers* Table name: "customers" Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone" =========== ========== ========== ===================== ========== ============ #CustomerID FirstName LastName Email DOB Phone =========== ========== ========== ===================== ========== ============ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 =========== ========== ========== ===================== ========== ============ Dataset *sales* Table name: "sales" Column names: "CustomerID,Date,SaleAmount" ============= ============ ============ #CustomerID Date SaleAmount ============= ============ ============ 2 2004-05-06 100.22 1 2004-05-07 99.95 3 2004-05-07 122.95 3 2004-05-13 100.00 4 2004-05-22 555.55 ============= ============ ============ The query :: SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" FROM customers join sales on customers.CustomerID = sales.CustomerID GROUP BY customers.CustomerID ORDER BY TotalSales DESC; Produces this tabular output: ========== ======== ========== #FirstName LastName TotalSales ========== ======== ========== James Smith 555.55 Paula Brown 222.95 Steven Goldfish 100.22 John Smith 99.95 ========== ======== ========== If the optional Table name and Column names inputs are not used, the query would be: :: SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" FROM t1 join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 ORDER BY TotalSales DESC; You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: Column names: ,FirstName,LastName,,BirthDate Results in the following data base table =========== ========== ========== ===================== ========== ============ #c1 FirstName LastName c4 BirthDate c6 =========== ========== ========== ===================== ========== ============ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 =========== ========== ========== ===================== ========== ============ Regular_expression_ functions are included for: :: matching: re_match('pattern',column) SELECT t1.FirstName, t1.LastName FROM t1 WHERE re_match('^.*\.(net|org)$',c4) Results: =========== ========== #FirstName LastName =========== ========== Steven Goldfish Paula Brown =========== ========== :: searching: re_search('pattern',column) substituting: re_sub('pattern','replacement,column) SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" FROM t1 WHERE re_search('[hp]er',c4) Results: =========== ========== ========== #FirstName LastName DOB =========== ========== ========== Steven Goldfish 04/04/74 Paula Brown 24/05/78 James Smith 20/10/80 =========== ========== ========== Math_ functions *( python math library: https://docs.python.org/3.6/library/math.html )*: acos(x), acosh(x), asin(x), asinh(x), atan(x), atanh(x), atan2(x, y), ceil(x), cos(x), cosh(x), degrees(x), exp(x), expm1(x), fabs(x), floor(x), fmod(x, y), log(b,x), log(x), log10(x), log1p(x), log2(x), mod(x, y), pow(x, y), radians(x), sin(x), sinh(x), sqrt(x), tan(x), tanh(x), trunc(x) :: Query: SELECT SaleAmount, floor(SaleAmount) as "dollars" FROM sales Results: ============ ======== SaleAmount dollars ============ ======== 100.22 100 99.95 99 122.95 122 100.00 100 555.55 555 ============ ======== .. _Regular_expression: https://docs.python.org/3.9/library/re.html .. _Math: https://docs.python.org/3.9/library/math.html .. _SQLite: http://www.sqlite.org/index.html .. _SQLite_functions: http://www.sqlite.org/docs.html ]]> </token> <xml name="citations"> <citations> <citation type="doi">10.12688/f1000research.16450.1</citation> <yield /> </citations> </xml> </macros>