annotate macros.xml @ 21:357fe86f245d draft

Uploaded
author jjohnson
date Fri, 14 Jul 2017 17:34:22 -0400
parents ab27c4bd14b9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
20
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
1 <macros>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
2 <token name="@LINEFILTERS@">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
3 <![CDATA[
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
4 ## set linefilters to the
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
5 #set $input_filters = []
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
6 #for $fi in $linefilters:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
7 #if $fi.filter.filter_type == 'skip':
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
8 #set $skip_lines = None
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
9 #if str($fi.filter.skip_lines) != '':
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
10 #set $skip_lines = int($fi.filter.skip_lines)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
11 #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
12 #set $skip_lines = int($tbl.table.metadata.comment_lines)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
13 #end if
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
14 #if $skip_lines is not None:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
15 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
16 #set $filter_dict['filter'] = str($fi.filter.filter_type)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
17 #set $filter_dict['count'] = $skip_lines
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
18 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
19 #end if
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
20 #elif $fi.filter.filter_type == 'comment':
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
21 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
22 #set $filter_dict['filter'] = 'regex'
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
23 #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')])
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
24 #set $filter_dict['action'] = 'exclude_match'
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
25 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
26 #elif $fi.filter.filter_type == 'regex':
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
27 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
28 #set $filter_dict['filter'] = str($fi.filter.filter_type)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
29 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
30 #set $filter_dict['action'] = str($fi.filter.regex_action)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
31 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
32 #elif $fi.filter.filter_type == 'select_columns':
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
33 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
34 #set $filter_dict['filter'] = str($fi.filter.filter_type)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
35 #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
36 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
37 #elif $fi.filter.filter_type == 'replace':
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
38 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
39 #set $filter_dict['filter'] = str($fi.filter.filter_type)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
40 #set $filter_dict['column'] = int(str($fi.filter.column).replace('c',''))
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
41 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
42 #set $filter_dict['replace'] = str($fi.filter.regex_replace)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
43 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
44 #elif str($fi.filter.filter_type).endswith('pend_line_num'):
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
45 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
46 #set $filter_dict['filter'] = str($fi.filter.filter_type)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
47 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
48 #elif str($fi.filter.filter_type).endswith('pend_text'):
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
49 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
50 #set $filter_dict['filter'] = str($fi.filter.filter_type)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
51 #set $filter_dict['column_text'] = str($fi.filter.column_text)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
52 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
53 #elif $fi.filter.filter_type == 'normalize':
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
54 #set $filter_dict = dict()
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
55 #set $filter_dict['filter'] = str($fi.filter.filter_type)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
56 #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
57 #set $filter_dict['separator'] = str($fi.filter.separator)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
58 #silent $input_filters.append($filter_dict)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
59 #end if
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
60 #end for
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
61 ]]>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
62 </token>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
63 <xml name="macro_line_filters">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
64 <repeat name="linefilters" title="Filter Tabular Input Lines">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
65 <conditional name="filter">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
66 <param name="filter_type" type="select" label="Filter By">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
67 <option value="skip">skip leading lines</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
68 <option value="comment">comment char</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
69 <option value="regex">by regex expression matching</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
70 <option value="select_columns">select columns</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
71 <option value="replace">regex replace value in column</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
72 <option value="prepend_line_num">prepend a line number column</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
73 <option value="append_line_num">append a line number column</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
74 <option value="prepend_text">prepend a column with the given text</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
75 <option value="append_text">append a column with the given text</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
76 <option value="normalize">normalize list columns, replicates row for each item in list</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
77 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
78 <when value="skip">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
79 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
80 help="Leave blank to use the comment lines metadata for this dataset" />
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
81 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
82 <when value="comment">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
83 <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
84 <option value="62">&gt;</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
85 <option value="64">@</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
86 <option value="43">+</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
87 <option value="60">&lt;</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
88 <option value="42">*</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
89 <option value="45">-</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
90 <option value="61">=</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
91 <option value="124">|</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
92 <option value="63">?</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
93 <option value="36">$</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
94 <option value="46">.</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
95 <option value="58">:</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
96 <option value="38">&amp;</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
97 <option value="37">%</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
98 <option value="94">^</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
99 <option value="35">&#35;</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
100 <option value="33">!</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
101 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
102 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
103 <when value="prepend_line_num"/>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
104 <when value="append_line_num"/>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
105 <when value="prepend_text">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
106 <param name="column_text" type="text" value="" label="text for column">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
107 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
108 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
109 <when value="append_text">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
110 <param name="column_text" type="text" value="" label="text for column">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
111 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
112 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
113 <when value="regex">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
114 <param name="regex_pattern" type="text" value="" label="regex pattern">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
115 <sanitizer sanitize="False"/>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
116 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
117 <param name="regex_action" type="select" label="action for regex match">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
118 <option value="exclude_match">exclude line on pattern match</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
119 <option value="include_match">include line on pattern match</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
120 <option value="exclude_find">exclude line if pattern found</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
121 <option value="include_find">include line if pattern found</option>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
122 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
123 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
124 <when value="select_columns">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
125 <param name="columns" type="text" value="" label="enter column numbers to keep"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
126 help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
127 <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
128 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
129 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
130 <when value="replace">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
131 <param name="column" type="text" value="" label="enter column number to replace"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
132 help="example: 1 or c1 (selects the first column)">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
133 <validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
134 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
135 <param name="regex_pattern" type="text" value="" label="regex pattern">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
136 <sanitizer sanitize="False"/>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
137 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
138 <param name="regex_replace" type="text" value="" label="replacement expression">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
139 <sanitizer sanitize="False"/>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
140 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
141 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
142 <when value="normalize">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
143 <param name="columns" type="text" value="" label="enter column numbers to normalize">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
144 <help><![CDATA[
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
145 example: 2,4 or c2,c4 (selects the second, and fourth columns)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
146 If multiple columns are selected, they should have the same length and separator on each line
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
147 ]]></help>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
148 <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
149 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
150 <param name="separator" type="text" value="," label="List item delimiter in column">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
151 <sanitizer sanitize="False"/>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
152 <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
153 </param>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
154 </when>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
155 </conditional>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
156 </repeat>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
157 </xml>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
158
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
159 <token name="@LINEFILTERS_HELP@">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
160 <![CDATA[
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
161 **Input Line Filters**
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
162
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
163 As a tabular file is being read, line filters may be applied.
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
164
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
165 ::
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
166
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
167 - skip leading lines skip the first *number* of lines
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
168 - comment char omit any lines that start with the specified comment character
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
169 - by regex expression matching *include/exclude* lines the match the regex expression
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
170 - select columns choose to include only selected columns in the order specified
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
171 - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
172 - prepend a line number column each line has the ordinal value of the line read by this filter as the first column
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
173 - append a line number column each line has the ordinal value of the line read by this filter as the last column
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
174 - prepend a text column each line has the text string as the first column
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
175 - append a text column each line has the text string as the last column
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
176 - normalize list columns replicates the line for each item in the specified list *columns*
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
177 ]]>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
178 </token>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
179
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
180 <token name="@LINEFILTERS_HELP_EXAMPLE@">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
181 <![CDATA[
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
182 **Line Filtering Example**
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
183 *(Six filters are applied as the following file is read)*
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
184
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
185 ::
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
186
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
187 Input Tabular File:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
188
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
189 #People with pets
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
190 Pets FirstName LastName DOB PetNames PetType
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
191 2 Paula Brown 24/05/78 Rex,Fluff dog,cat
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
192 1 Steven Jones 04/04/74 Allie cat
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
193 0 Jane Doe 24/05/78
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
194 1 James Smith 20/10/80 Spot
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
195
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
196
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
197 Filter 1 - append a line number column:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
198
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
199 #People with pets 1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
200 Pets FirstName LastName DOB PetNames PetType 2
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
201 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
202 1 Steven Jones 04/04/74 Allie cat 4
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
203 0 Jane Doe 24/05/78 5
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
204 1 James Smith 20/10/80 Spot 6
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
205
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
206 Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
207
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
208 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
209 1 Steven Jones 04/04/74 Allie cat 4
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
210 0 Jane Doe 24/05/78 5
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
211 1 James Smith 20/10/80 Spot 6
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
212
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
213 Filter 3 - append a line number column:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
214
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
215 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
216 1 Steven Jones 04/04/74 Allie cat 4 2
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
217 0 Jane Doe 24/05/78 5 3
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
218 1 James Smith 20/10/80 Spot 6 4
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
219
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
220 Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
221
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
222 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
223 1 Steven Jones 1974-04-04 Allie cat 4 2
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
224 0 Jane Doe 1978-05-24 5 3
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
225 1 James Smith 1980-10-20 Spot 6 4
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
226
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
227 Filter 5 - normalize list columns[5,6]:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
228
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
229 2 Paula Brown 1978-05-24 Rex dog 3 1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
230 2 Paula Brown 1978-05-24 Fluff cat 3 1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
231 1 Steven Jones 1974-04-04 Allie cat 4 2
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
232 0 Jane Doe 1978-05-24 5 3
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
233 1 James Smith 1980-10-20 Spot 6 4
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
234
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
235 Filter 6 - append a line number column:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
236
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
237 2 Paula Brown 1978-05-24 Rex dog 3 1 1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
238 2 Paula Brown 1978-05-24 Fluff cat 3 1 2
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
239 1 Steven Jones 1974-04-04 Allie cat 4 2 3
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
240 0 Jane Doe 1978-05-24 5 3 4
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
241 1 James Smith 1980-10-20 Spot 6 4 5
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
242
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
243 ]]>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
244 </token>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
245
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
246 <token name="@QUERY_HELP@">
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
247 <![CDATA[
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
248
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
249 For help in using SQLite_ see: http://www.sqlite.org/docs.html
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
250
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
251 **NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
252
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
253 See: http://www.sqlite.org/lang_datefunc.html
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
254
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
255 **Example**
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
256
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
257 Given 2 tabular datasets: *customers* and *sales*
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
258
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
259 Dataset *customers*
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
260
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
261 Table name: "customers"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
262
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
263 Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
264
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
265 =========== ========== ========== ===================== ========== ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
266 #CustomerID FirstName LastName Email DOB Phone
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
267 =========== ========== ========== ===================== ========== ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
268 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
269 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
270 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
271 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
272 =========== ========== ========== ===================== ========== ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
273
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
274 Dataset *sales*
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
275
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
276 Table name: "sales"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
277
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
278 Column names: "CustomerID,Date,SaleAmount"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
279
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
280 ============= ============ ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
281 #CustomerID Date SaleAmount
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
282 ============= ============ ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
283 2 2004-05-06 100.22
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
284 1 2004-05-07 99.95
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
285 3 2004-05-07 122.95
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
286 3 2004-05-13 100.00
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
287 4 2004-05-22 555.55
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
288 ============= ============ ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
289
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
290 The query
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
291
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
292 ::
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
293
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
294 SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
295 FROM customers join sales on customers.CustomerID = sales.CustomerID
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
296 GROUP BY customers.CustomerID ORDER BY TotalSales DESC;
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
297
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
298 Produces this tabular output:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
299
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
300 ========== ======== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
301 #FirstName LastName TotalSales
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
302 ========== ======== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
303 James Smith 555.55
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
304 Paula Brown 222.95
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
305 Steven Goldfish 100.22
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
306 John Smith 99.95
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
307 ========== ======== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
308
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
309
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
310 If the optional Table name and Column names inputs are not used, the query would be:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
311
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
312 ::
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
313
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
314 SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
315 FROM t1 join t2 on t1.c1 = t2.c1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
316 GROUP BY t1.c1 ORDER BY TotalSales DESC;
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
317
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
318 You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
319
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
320 Column names: ,FirstName,LastName,,BirthDate
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
321
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
322 Results in the following data base table
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
323
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
324 =========== ========== ========== ===================== ========== ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
325 #c1 FirstName LastName c4 BirthDate c6
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
326 =========== ========== ========== ===================== ========== ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
327 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
328 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
329 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
330 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
331 =========== ========== ========== ===================== ========== ============
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
332
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
333
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
334 Regular_expression_ functions are included for:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
335
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
336 ::
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
337
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
338 matching: re_match('pattern',column)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
339
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
340 SELECT t1.FirstName, t1.LastName
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
341 FROM t1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
342 WHERE re_match('^.*\.(net|org)$',c4)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
343
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
344 Results:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
345
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
346 =========== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
347 #FirstName LastName
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
348 =========== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
349 Steven Goldfish
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
350 Paula Brown
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
351 =========== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
352
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
353
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
354 ::
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
355
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
356 searching: re_search('pattern',column)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
357 substituting: re_sub('pattern','replacement,column)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
358
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
359 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB"
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
360 FROM t1
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
361 WHERE re_search('[hp]er',c4)
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
362
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
363 Results:
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
364
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
365
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
366 =========== ========== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
367 #FirstName LastName DOB
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
368 =========== ========== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
369 Steven Goldfish 04/04/74
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
370 Paula Brown 24/05/78
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
371 James Smith 20/10/80
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
372 =========== ========== ==========
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
373
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
374 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
375 .. _SQLite: http://www.sqlite.org/index.html
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
376 .. _SQLite_functions: http://www.sqlite.org/docs.html
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
377
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
378
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
379 ]]>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
380 </token>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
381
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
382 </macros>
ab27c4bd14b9 Uploaded
jjohnson
parents:
diff changeset
383