Mercurial > repos > jjohnson > query_tabular
view macros.xml @ 23:5bb148ee644a draft default tip
Deleted selected files
author | jjohnson |
---|---|
date | Mon, 17 Jul 2017 15:22:38 -0400 |
parents | ab27c4bd14b9 |
children |
line wrap: on
line source
<macros> <token name="@LINEFILTERS@"> <![CDATA[ ## set linefilters to the #set $input_filters = [] #for $fi in $linefilters: #if $fi.filter.filter_type == 'skip': #set $skip_lines = None #if str($fi.filter.skip_lines) != '': #set $skip_lines = int($fi.filter.skip_lines) #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: #set $skip_lines = int($tbl.table.metadata.comment_lines) #end if #if $skip_lines is not None: #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['count'] = $skip_lines #silent $input_filters.append($filter_dict) #end if #elif $fi.filter.filter_type == 'comment': #set $filter_dict = dict() #set $filter_dict['filter'] = 'regex' #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')]) #set $filter_dict['action'] = 'exclude_match' #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'regex': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) #set $filter_dict['action'] = str($fi.filter.regex_action) #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'select_columns': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'replace': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['column'] = int(str($fi.filter.column).replace('c','')) #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) #set $filter_dict['replace'] = str($fi.filter.regex_replace) #silent $input_filters.append($filter_dict) #elif str($fi.filter.filter_type).endswith('pend_line_num'): #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #silent $input_filters.append($filter_dict) #elif str($fi.filter.filter_type).endswith('pend_text'): #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['column_text'] = str($fi.filter.column_text) #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'normalize': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] #set $filter_dict['separator'] = str($fi.filter.separator) #silent $input_filters.append($filter_dict) #end if #end for ]]> </token> <xml name="macro_line_filters"> <repeat name="linefilters" title="Filter Tabular Input Lines"> <conditional name="filter"> <param name="filter_type" type="select" label="Filter By"> <option value="skip">skip leading lines</option> <option value="comment">comment char</option> <option value="regex">by regex expression matching</option> <option value="select_columns">select columns</option> <option value="replace">regex replace value in column</option> <option value="prepend_line_num">prepend a line number column</option> <option value="append_line_num">append a line number column</option> <option value="prepend_text">prepend a column with the given text</option> <option value="append_text">append a column with the given text</option> <option value="normalize">normalize list columns, replicates row for each item in list</option> </param> <when value="skip"> <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the comment lines metadata for this dataset" /> </when> <when value="comment"> <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped"> <option value="62">></option> <option value="64">@</option> <option value="43">+</option> <option value="60"><</option> <option value="42">*</option> <option value="45">-</option> <option value="61">=</option> <option value="124">|</option> <option value="63">?</option> <option value="36">$</option> <option value="46">.</option> <option value="58">:</option> <option value="38">&</option> <option value="37">%</option> <option value="94">^</option> <option value="35">#</option> <option value="33">!</option> </param> </when> <when value="prepend_line_num"/> <when value="append_line_num"/> <when value="prepend_text"> <param name="column_text" type="text" value="" label="text for column"> </param> </when> <when value="append_text"> <param name="column_text" type="text" value="" label="text for column"> </param> </when> <when value="regex"> <param name="regex_pattern" type="text" value="" label="regex pattern"> <sanitizer sanitize="False"/> </param> <param name="regex_action" type="select" label="action for regex match"> <option value="exclude_match">exclude line on pattern match</option> <option value="include_match">include line on pattern match</option> <option value="exclude_find">exclude line if pattern found</option> <option value="include_find">include line if pattern found</option> </param> </when> <when value="select_columns"> <param name="columns" type="text" value="" label="enter column numbers to keep" help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)"> <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> </param> </when> <when value="replace"> <param name="column" type="text" value="" label="enter column number to replace" help="example: 1 or c1 (selects the first column)"> <validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator> </param> <param name="regex_pattern" type="text" value="" label="regex pattern"> <sanitizer sanitize="False"/> </param> <param name="regex_replace" type="text" value="" label="replacement expression"> <sanitizer sanitize="False"/> </param> </when> <when value="normalize"> <param name="columns" type="text" value="" label="enter column numbers to normalize"> <help><![CDATA[ example: 2,4 or c2,c4 (selects the second, and fourth columns) If multiple columns are selected, they should have the same length and separator on each line ]]></help> <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> </param> <param name="separator" type="text" value="," label="List item delimiter in column"> <sanitizer sanitize="False"/> <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> </param> </when> </conditional> </repeat> </xml> <token name="@LINEFILTERS_HELP@"> <![CDATA[ **Input Line Filters** As a tabular file is being read, line filters may be applied. :: - skip leading lines skip the first *number* of lines - comment char omit any lines that start with the specified comment character - by regex expression matching *include/exclude* lines the match the regex expression - select columns choose to include only selected columns in the order specified - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting) - prepend a line number column each line has the ordinal value of the line read by this filter as the first column - append a line number column each line has the ordinal value of the line read by this filter as the last column - prepend a text column each line has the text string as the first column - append a text column each line has the text string as the last column - normalize list columns replicates the line for each item in the specified list *columns* ]]> </token> <token name="@LINEFILTERS_HELP_EXAMPLE@"> <![CDATA[ **Line Filtering Example** *(Six filters are applied as the following file is read)* :: Input Tabular File: #People with pets Pets FirstName LastName DOB PetNames PetType 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 1 Steven Jones 04/04/74 Allie cat 0 Jane Doe 24/05/78 1 James Smith 20/10/80 Spot Filter 1 - append a line number column: #People with pets 1 Pets FirstName LastName DOB PetNames PetType 2 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 Steven Jones 04/04/74 Allie cat 4 0 Jane Doe 24/05/78 5 1 James Smith 20/10/80 Spot 6 Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number) 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 Steven Jones 04/04/74 Allie cat 4 0 Jane Doe 24/05/78 5 1 James Smith 20/10/80 Spot 6 Filter 3 - append a line number column: 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 1 Steven Jones 04/04/74 Allie cat 4 2 0 Jane Doe 24/05/78 5 3 1 James Smith 20/10/80 Spot 6 4 Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format) 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1 1 Steven Jones 1974-04-04 Allie cat 4 2 0 Jane Doe 1978-05-24 5 3 1 James Smith 1980-10-20 Spot 6 4 Filter 5 - normalize list columns[5,6]: 2 Paula Brown 1978-05-24 Rex dog 3 1 2 Paula Brown 1978-05-24 Fluff cat 3 1 1 Steven Jones 1974-04-04 Allie cat 4 2 0 Jane Doe 1978-05-24 5 3 1 James Smith 1980-10-20 Spot 6 4 Filter 6 - append a line number column: 2 Paula Brown 1978-05-24 Rex dog 3 1 1 2 Paula Brown 1978-05-24 Fluff cat 3 1 2 1 Steven Jones 1974-04-04 Allie cat 4 2 3 0 Jane Doe 1978-05-24 5 3 4 1 James Smith 1980-10-20 Spot 6 4 5 ]]> </token> <token name="@QUERY_HELP@"> <![CDATA[ For help in using SQLite_ see: http://www.sqlite.org/docs.html **NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30 See: http://www.sqlite.org/lang_datefunc.html **Example** Given 2 tabular datasets: *customers* and *sales* Dataset *customers* Table name: "customers" Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone" =========== ========== ========== ===================== ========== ============ #CustomerID FirstName LastName Email DOB Phone =========== ========== ========== ===================== ========== ============ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 =========== ========== ========== ===================== ========== ============ Dataset *sales* Table name: "sales" Column names: "CustomerID,Date,SaleAmount" ============= ============ ============ #CustomerID Date SaleAmount ============= ============ ============ 2 2004-05-06 100.22 1 2004-05-07 99.95 3 2004-05-07 122.95 3 2004-05-13 100.00 4 2004-05-22 555.55 ============= ============ ============ The query :: SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" FROM customers join sales on customers.CustomerID = sales.CustomerID GROUP BY customers.CustomerID ORDER BY TotalSales DESC; Produces this tabular output: ========== ======== ========== #FirstName LastName TotalSales ========== ======== ========== James Smith 555.55 Paula Brown 222.95 Steven Goldfish 100.22 John Smith 99.95 ========== ======== ========== If the optional Table name and Column names inputs are not used, the query would be: :: SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" FROM t1 join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 ORDER BY TotalSales DESC; You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: Column names: ,FirstName,LastName,,BirthDate Results in the following data base table =========== ========== ========== ===================== ========== ============ #c1 FirstName LastName c4 BirthDate c6 =========== ========== ========== ===================== ========== ============ 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 =========== ========== ========== ===================== ========== ============ Regular_expression_ functions are included for: :: matching: re_match('pattern',column) SELECT t1.FirstName, t1.LastName FROM t1 WHERE re_match('^.*\.(net|org)$',c4) Results: =========== ========== #FirstName LastName =========== ========== Steven Goldfish Paula Brown =========== ========== :: searching: re_search('pattern',column) substituting: re_sub('pattern','replacement,column) SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" FROM t1 WHERE re_search('[hp]er',c4) Results: =========== ========== ========== #FirstName LastName DOB =========== ========== ========== Steven Goldfish 04/04/74 Paula Brown 24/05/78 James Smith 20/10/80 =========== ========== ========== .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html .. _SQLite: http://www.sqlite.org/index.html .. _SQLite_functions: http://www.sqlite.org/docs.html ]]> </token> </macros>