Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.xml @ 20:ab27c4bd14b9 draft
Uploaded
| author | jjohnson |
|---|---|
| date | Fri, 14 Jul 2017 11:39:27 -0400 |
| parents | b9f797bf4f38 |
| children | 357fe86f245d |
comparison
equal
deleted
inserted
replaced
| 19:9d9ab2c69014 | 20:ab27c4bd14b9 |
|---|---|
| 1 <tool id="query_tabular" name="Query Tabular" version="4.0.0"> | 1 <tool id="query_tabular" name="Query Tabular" version="5.0.0"> |
| 2 <description>using sqlite sql</description> | 2 <description>using sqlite sql</description> |
| 3 | |
| 4 <macros> | |
| 5 <import>macros.xml</import> | |
| 6 </macros> | |
| 3 | 7 |
| 4 <requirements> | 8 <requirements> |
| 5 </requirements> | 9 </requirements> |
| 6 <stdio> | 10 <stdio> |
| 7 <exit_code range="1:" /> | 11 <exit_code range="1:" /> |
| 74 #set $jtbl['unique'] = $idx_unique | 78 #set $jtbl['unique'] = $idx_unique |
| 75 #end if | 79 #end if |
| 76 #if len($idx_non) > 0: | 80 #if len($idx_non) > 0: |
| 77 #set $jtbl['index'] = $idx_non | 81 #set $jtbl['index'] = $idx_non |
| 78 #end if | 82 #end if |
| 79 #set $input_filters = [] | 83 #set $linefilters = $tbl.input_opts.linefilters |
| 80 #for $fi in $tbl.input_opts.linefilters: | 84 @LINEFILTERS@ |
| 81 #if $fi.filter.filter_type == 'skip': | |
| 82 #set $skip_lines = None | |
| 83 #if str($fi.filter.skip_lines) != '': | |
| 84 #set $skip_lines = int($fi.filter.skip_lines) | |
| 85 #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: | |
| 86 #set $skip_lines = int($tbl.table.metadata.comment_lines) | |
| 87 #end if | |
| 88 #if $skip_lines is not None: | |
| 89 #set $filter_dict = dict() | |
| 90 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 91 #set $filter_dict['count'] = $skip_lines | |
| 92 #silent $input_filters.append($filter_dict) | |
| 93 #end if | |
| 94 #elif $fi.filter.filter_type == 'comment': | |
| 95 #set $filter_dict = dict() | |
| 96 #set $filter_dict['filter'] = 'regex' | |
| 97 #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')]) | |
| 98 #set $filter_dict['action'] = 'exclude_match' | |
| 99 #silent $input_filters.append($filter_dict) | |
| 100 #elif $fi.filter.filter_type == 'regex': | |
| 101 #set $filter_dict = dict() | |
| 102 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 103 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) | |
| 104 #set $filter_dict['action'] = str($fi.filter.regex_action) | |
| 105 #silent $input_filters.append($filter_dict) | |
| 106 #elif $fi.filter.filter_type == 'select_columns': | |
| 107 #set $filter_dict = dict() | |
| 108 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 109 #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] | |
| 110 #silent $input_filters.append($filter_dict) | |
| 111 #elif $fi.filter.filter_type == 'replace': | |
| 112 #set $filter_dict = dict() | |
| 113 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 114 #set $filter_dict['column'] = int(str($fi.filter.column).replace('c','')) | |
| 115 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) | |
| 116 #set $filter_dict['replace'] = str($fi.filter.regex_replace) | |
| 117 #silent $input_filters.append($filter_dict) | |
| 118 #elif str($fi.filter.filter_type).endswith('pend_line_num'): | |
| 119 #set $filter_dict = dict() | |
| 120 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 121 #silent $input_filters.append($filter_dict) | |
| 122 #elif str($fi.filter.filter_type).endswith('pend_text'): | |
| 123 #set $filter_dict = dict() | |
| 124 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 125 #set $filter_dict['column_text'] = str($fi.filter.column_text) | |
| 126 #silent $input_filters.append($filter_dict) | |
| 127 #elif $fi.filter.filter_type == 'normalize': | |
| 128 #set $filter_dict = dict() | |
| 129 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 130 #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] | |
| 131 #set $filter_dict['separator'] = str($fi.filter.separator) | |
| 132 #silent $input_filters.append($filter_dict) | |
| 133 #end if | |
| 134 #end for | |
| 135 #if $input_filters: | 85 #if $input_filters: |
| 136 #set $jtbl['filters'] = $input_filters | 86 #set $jtbl['filters'] = $input_filters |
| 137 #end if | 87 #end if |
| 138 #set $jtbls += [$jtbl] | 88 #set $jtbls += [$jtbl] |
| 139 #end for | 89 #end for |
| 147 help="Make sure your added table names are not already in this database"/> | 97 help="Make sure your added table names are not already in this database"/> |
| 148 </section> | 98 </section> |
| 149 <repeat name="tables" title="Database Table" min="0"> | 99 <repeat name="tables" title="Database Table" min="0"> |
| 150 <param name="table" type="data" format="tabular" label="Tabular Dataset for Table"/> | 100 <param name="table" type="data" format="tabular" label="Tabular Dataset for Table"/> |
| 151 <section name="input_opts" expanded="false" title="Filter Dataset Input"> | 101 <section name="input_opts" expanded="false" title="Filter Dataset Input"> |
| 152 <repeat name="linefilters" title="Filter Tabular Input Lines"> | 102 <expand macro="macro_line_filters" /> |
| 153 <conditional name="filter"> | |
| 154 <param name="filter_type" type="select" label="Filter By"> | |
| 155 <option value="skip">skip leading lines</option> | |
| 156 <option value="comment">comment char</option> | |
| 157 <option value="regex">by regex expression matching</option> | |
| 158 <option value="select_columns">select columns</option> | |
| 159 <option value="replace">regex replace value in column</option> | |
| 160 <option value="prepend_line_num">prepend a line number column</option> | |
| 161 <option value="append_line_num">append a line number column</option> | |
| 162 <option value="prepend_text">prepend a column with the given text</option> | |
| 163 <option value="append_text">append a column with the given text</option> | |
| 164 <option value="normalize">normalize list columns, replicates row for each item in list</option> | |
| 165 </param> | |
| 166 <when value="skip"> | |
| 167 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" | |
| 168 help="Leave blank to use the comment lines metadata for this dataset" /> | |
| 169 </when> | |
| 170 <when value="comment"> | |
| 171 <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped"> | |
| 172 <option value="62">></option> | |
| 173 <option value="64">@</option> | |
| 174 <option value="43">+</option> | |
| 175 <option value="60"><</option> | |
| 176 <option value="42">*</option> | |
| 177 <option value="45">-</option> | |
| 178 <option value="61">=</option> | |
| 179 <option value="124">|</option> | |
| 180 <option value="63">?</option> | |
| 181 <option value="36">$</option> | |
| 182 <option value="46">.</option> | |
| 183 <option value="58">:</option> | |
| 184 <option value="38">&</option> | |
| 185 <option value="37">%</option> | |
| 186 <option value="94">^</option> | |
| 187 <option value="35">#</option> | |
| 188 <option value="33">!</option> | |
| 189 </param> | |
| 190 </when> | |
| 191 <when value="prepend_line_num"/> | |
| 192 <when value="append_line_num"/> | |
| 193 <when value="prepend_text"> | |
| 194 <param name="column_text" type="text" value="" label="text for column"> | |
| 195 </param> | |
| 196 </when> | |
| 197 <when value="append_text"> | |
| 198 <param name="column_text" type="text" value="" label="text for column"> | |
| 199 </param> | |
| 200 </when> | |
| 201 <when value="regex"> | |
| 202 <param name="regex_pattern" type="text" value="" label="regex pattern"> | |
| 203 <sanitizer sanitize="False"/> | |
| 204 </param> | |
| 205 <param name="regex_action" type="select" label="action for regex match"> | |
| 206 <option value="exclude_match">exclude line on pattern match</option> | |
| 207 <option value="include_match">include line on pattern match</option> | |
| 208 <option value="exclude_find">exclude line if pattern found</option> | |
| 209 <option value="include_find">include line if pattern found</option> | |
| 210 </param> | |
| 211 </when> | |
| 212 <when value="select_columns"> | |
| 213 <param name="columns" type="text" value="" label="enter column numbers to keep" | |
| 214 help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)"> | |
| 215 <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> | |
| 216 </param> | |
| 217 </when> | |
| 218 <when value="replace"> | |
| 219 <param name="column" type="text" value="" label="enter column number to replace" | |
| 220 help="example: 1 or c1 (selects the first column)"> | |
| 221 <validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator> | |
| 222 </param> | |
| 223 <param name="regex_pattern" type="text" value="" label="regex pattern"> | |
| 224 <sanitizer sanitize="False"/> | |
| 225 </param> | |
| 226 <param name="regex_replace" type="text" value="" label="replacement expression"> | |
| 227 <sanitizer sanitize="False"/> | |
| 228 </param> | |
| 229 </when> | |
| 230 <when value="normalize"> | |
| 231 <param name="columns" type="text" value="" label="enter column numbers to normalize"> | |
| 232 <help><![CDATA[ | |
| 233 example: 2,4 or c2,c4 (selects the second, and fourth columns) | |
| 234 If multiple columns are selected, they should have the same length and separator on each line | |
| 235 ]]></help> | |
| 236 <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> | |
| 237 </param> | |
| 238 <param name="separator" type="text" value="," label="List item delimiter in column"> | |
| 239 <sanitizer sanitize="False"/> | |
| 240 <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> | |
| 241 </param> | |
| 242 </when> | |
| 243 </conditional> | |
| 244 </repeat> | |
| 245 </section> | 103 </section> |
| 246 <section name="tbl_opts" expanded="false" title="Table Options"> | 104 <section name="tbl_opts" expanded="false" title="Table Options"> |
| 247 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table"> | 105 <param name="table_name" type="text" value="" optional="true" label="Specify Name for Table"> |
| 248 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help> | 106 <help>By default, tables will be named: t1,t2,...,tn (table names must be unique)</help> |
| 249 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> | 107 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> |
| 267 <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> | 125 <validator type="regex" message="Column name, separated by commes if more than one">^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$</validator> |
| 268 </param> | 126 </param> |
| 269 </repeat> | 127 </repeat> |
| 270 </section> | 128 </section> |
| 271 </repeat> | 129 </repeat> |
| 272 <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history"/> | 130 <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database in your history" |
| 131 help="SQLite to tabular tool can run additional queries on this database"/> | |
| 273 <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output"> | 132 <param name="sqlquery" type="text" area="true" size="20x80" value="" optional="true" label="SQL Query to generate tabular output"> |
| 274 <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help> | 133 <help>By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn</help> |
| 275 <sanitizer sanitize="False"/> | 134 <sanitizer sanitize="False"/> |
| 276 <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator> | 135 <validator type="regex" message="">^(?ims)\s*select\s+.*\s+from\s+.*$</validator> |
| 277 </param> | 136 </param> |
| 278 <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers from tabular output"/> | 137 <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers from tabular output"/> |
| 279 </inputs> | 138 </inputs> |
| 280 <outputs> | 139 <outputs> |
| 281 <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}"> | 140 <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}"> |
| 282 <filter>save_db or not (sqlquery and len(sqlquery) > 0)</filter> | 141 <filter>save_db</filter> |
| 283 </data> | 142 </data> |
| 284 <data format="tabular" name="output" label="query results on ${on_string}"> | 143 <data format="tabular" name="output" label="query results on ${on_string}"> |
| 285 <filter>sqlquery and len(sqlquery) > 0</filter> | 144 <filter>not save_db or (sqlquery and len(sqlquery.strip()) > 0)</filter> |
| 286 </data> | 145 </data> |
| 287 </outputs> | 146 </outputs> |
| 288 <tests> | 147 <tests> |
| 289 | 148 |
| 290 <test> | 149 <test> |
| 398 Loads tabular datasets into a SQLite_ data base. | 257 Loads tabular datasets into a SQLite_ data base. |
| 399 | 258 |
| 400 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base. | 259 An existing SQLite_ data base can be used as input, and any selected tabular datasets will be added as new tables in that data base. |
| 401 | 260 |
| 402 | 261 |
| 403 **Input Line Filters** | 262 @LINEFILTERS_HELP@ |
| 404 | |
| 405 As a tabular file is being read, line filters may be applied. | |
| 406 | |
| 407 :: | |
| 408 | |
| 409 - skip leading lines skip the first *number* of lines | |
| 410 - comment char omit any lines that start with the specified comment character | |
| 411 - by regex expression matching *include/exclude* lines the match the regex expression | |
| 412 - select columns choose to include only selected columns in the order specified | |
| 413 - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting) | |
| 414 - prepend a line number column each line has the ordinal value of the line read by this filter as the first column | |
| 415 - append a line number column each line has the ordinal value of the line read by this filter as the last column | |
| 416 - normalize list columns replicates the line for each item in the specified list *columns* | |
| 417 | 263 |
| 418 | 264 |
| 419 **Outputs** | 265 **Outputs** |
| 420 | 266 |
| 421 The results of a SQL query are output to the history as a tabular file. | 267 The results of a SQL query are output to the history as a tabular file. |
| 423 The SQLite_ data base can also be saved and output as a dataset in the history. | 269 The SQLite_ data base can also be saved and output as a dataset in the history. |
| 424 | 270 |
| 425 *(The* **SQLite to tabular** *tool can run additional queries on this database.)* | 271 *(The* **SQLite to tabular** *tool can run additional queries on this database.)* |
| 426 | 272 |
| 427 | 273 |
| 428 For help in using SQLite_ see: http://www.sqlite.org/docs.html | 274 @QUERY_HELP@ |
| 429 | 275 |
| 430 **NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30 | 276 @LINEFILTERS_HELP_EXAMPLE@ |
| 431 | |
| 432 See: http://www.sqlite.org/lang_datefunc.html | |
| 433 | |
| 434 **Example** | |
| 435 | |
| 436 Given 2 tabular datasets: *customers* and *sales* | |
| 437 | |
| 438 Dataset *customers* | |
| 439 | |
| 440 Table name: "customers" | |
| 441 | |
| 442 Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone" | |
| 443 | |
| 444 =========== ========== ========== ===================== ========== ============ | |
| 445 #CustomerID FirstName LastName Email DOB Phone | |
| 446 =========== ========== ========== ===================== ========== ============ | |
| 447 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 | |
| 448 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 | |
| 449 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 | |
| 450 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 | |
| 451 =========== ========== ========== ===================== ========== ============ | |
| 452 | |
| 453 Dataset *sales* | |
| 454 | |
| 455 Table name: "sales" | |
| 456 | |
| 457 Column names: "CustomerID,Date,SaleAmount" | |
| 458 | |
| 459 ============= ============ ============ | |
| 460 #CustomerID Date SaleAmount | |
| 461 ============= ============ ============ | |
| 462 2 2004-05-06 100.22 | |
| 463 1 2004-05-07 99.95 | |
| 464 3 2004-05-07 122.95 | |
| 465 3 2004-05-13 100.00 | |
| 466 4 2004-05-22 555.55 | |
| 467 ============= ============ ============ | |
| 468 | |
| 469 The query | |
| 470 | |
| 471 :: | |
| 472 | |
| 473 SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" | |
| 474 FROM customers join sales on customers.CustomerID = sales.CustomerID | |
| 475 GROUP BY customers.CustomerID ORDER BY TotalSales DESC; | |
| 476 | |
| 477 Produces this tabular output: | |
| 478 | |
| 479 ========== ======== ========== | |
| 480 #FirstName LastName TotalSales | |
| 481 ========== ======== ========== | |
| 482 James Smith 555.55 | |
| 483 Paula Brown 222.95 | |
| 484 Steven Goldfish 100.22 | |
| 485 John Smith 99.95 | |
| 486 ========== ======== ========== | |
| 487 | |
| 488 | |
| 489 If the optional Table name and Column names inputs are not used, the query would be: | |
| 490 | |
| 491 :: | |
| 492 | |
| 493 SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" | |
| 494 FROM t1 join t2 on t1.c1 = t2.c1 | |
| 495 GROUP BY t1.c1 ORDER BY TotalSales DESC; | |
| 496 | |
| 497 You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: | |
| 498 | |
| 499 Column names: ,FirstName,LastName,,BirthDate | |
| 500 | |
| 501 Results in the following data base table | |
| 502 | |
| 503 =========== ========== ========== ===================== ========== ============ | |
| 504 #c1 FirstName LastName c4 BirthDate c6 | |
| 505 =========== ========== ========== ===================== ========== ============ | |
| 506 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 | |
| 507 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 | |
| 508 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 | |
| 509 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 | |
| 510 =========== ========== ========== ===================== ========== ============ | |
| 511 | |
| 512 | |
| 513 Regular_expression_ functions are included for: | |
| 514 | |
| 515 :: | |
| 516 | |
| 517 matching: re_match('pattern',column) | |
| 518 | |
| 519 SELECT t1.FirstName, t1.LastName | |
| 520 FROM t1 | |
| 521 WHERE re_match('^.*\.(net|org)$',c4) | |
| 522 | |
| 523 Results: | |
| 524 | |
| 525 =========== ========== | |
| 526 #FirstName LastName | |
| 527 =========== ========== | |
| 528 Steven Goldfish | |
| 529 Paula Brown | |
| 530 =========== ========== | |
| 531 | |
| 532 | |
| 533 :: | |
| 534 | |
| 535 searching: re_search('pattern',column) | |
| 536 substituting: re_sub('pattern','replacement,column) | |
| 537 | |
| 538 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" | |
| 539 FROM t1 | |
| 540 WHERE re_search('[hp]er',c4) | |
| 541 | |
| 542 Results: | |
| 543 | |
| 544 | |
| 545 =========== ========== ========== | |
| 546 #FirstName LastName DOB | |
| 547 =========== ========== ========== | |
| 548 Steven Goldfish 04/04/74 | |
| 549 Paula Brown 24/05/78 | |
| 550 James Smith 20/10/80 | |
| 551 =========== ========== ========== | |
| 552 | |
| 553 | |
| 554 **Line Filtering Example** | |
| 555 *(Six filters are applied as the following file is read)* | |
| 556 | |
| 557 :: | |
| 558 | |
| 559 Input Tabular File: | |
| 560 | |
| 561 #People with pets | |
| 562 Pets FirstName LastName DOB PetNames PetType | |
| 563 2 Paula Brown 24/05/78 Rex,Fluff dog,cat | |
| 564 1 Steven Jones 04/04/74 Allie cat | |
| 565 0 Jane Doe 24/05/78 | |
| 566 1 James Smith 20/10/80 Spot | |
| 567 | |
| 568 | |
| 569 Filter 1 - append a line number column: | |
| 570 | |
| 571 #People with pets 1 | |
| 572 Pets FirstName LastName DOB PetNames PetType 2 | |
| 573 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 | |
| 574 1 Steven Jones 04/04/74 Allie cat 4 | |
| 575 0 Jane Doe 24/05/78 5 | |
| 576 1 James Smith 20/10/80 Spot 6 | |
| 577 | |
| 578 Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number) | |
| 579 | |
| 580 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 | |
| 581 1 Steven Jones 04/04/74 Allie cat 4 | |
| 582 0 Jane Doe 24/05/78 5 | |
| 583 1 James Smith 20/10/80 Spot 6 | |
| 584 | |
| 585 Filter 3 - append a line number column: | |
| 586 | |
| 587 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 | |
| 588 1 Steven Jones 04/04/74 Allie cat 4 2 | |
| 589 0 Jane Doe 24/05/78 5 3 | |
| 590 1 James Smith 20/10/80 Spot 6 4 | |
| 591 | |
| 592 Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format) | |
| 593 | |
| 594 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1 | |
| 595 1 Steven Jones 1974-04-04 Allie cat 4 2 | |
| 596 0 Jane Doe 1978-05-24 5 3 | |
| 597 1 James Smith 1980-10-20 Spot 6 4 | |
| 598 | |
| 599 Filter 5 - normalize list columns[5,6]: | |
| 600 | |
| 601 2 Paula Brown 1978-05-24 Rex dog 3 1 | |
| 602 2 Paula Brown 1978-05-24 Fluff cat 3 1 | |
| 603 1 Steven Jones 1974-04-04 Allie cat 4 2 | |
| 604 0 Jane Doe 1978-05-24 5 3 | |
| 605 1 James Smith 1980-10-20 Spot 6 4 | |
| 606 | |
| 607 Filter 6 - append a line number column: | |
| 608 | |
| 609 2 Paula Brown 1978-05-24 Rex dog 3 1 1 | |
| 610 2 Paula Brown 1978-05-24 Fluff cat 3 1 2 | |
| 611 1 Steven Jones 1974-04-04 Allie cat 4 2 3 | |
| 612 0 Jane Doe 1978-05-24 5 3 4 | |
| 613 1 James Smith 1980-10-20 Spot 6 4 5 | |
| 614 | 277 |
| 615 | 278 |
| 616 Table name: pets | 279 Table name: pets |
| 617 | 280 |
| 618 Table columns: Pets,FirstName,LastName,Birthdate,PetNames,PetType,line_num,entry_num,row_num | 281 Table columns: Pets,FirstName,LastName,Birthdate,PetNames,PetType,line_num,entry_num,row_num |
| 632 ====== ========== ======== ========== ========= ======== ========= ========== ======== | 295 ====== ========== ======== ========== ========= ======== ========= ========== ======== |
| 633 | 296 |
| 634 | 297 |
| 635 **Normalizing by Line Filtering into 2 Tables** | 298 **Normalizing by Line Filtering into 2 Tables** |
| 636 | 299 |
| 300 *Relational database opertions work with single-valued column entries. | |
| 301 To apply relational operations to tabular files that contain fields with lists of values, | |
| 302 we need to "normalize" those fields, duplicating lines for each item in the list. | |
| 303 In this example we create 2 tables, one for single-valued fields and a second with list-valued fields normalized. | |
| 304 Becauce we add a line number first for each table, we can join the 2 tables on the line number column.* | |
| 305 https://en.wikipedia.org/wiki/First_normal_form | |
| 306 | |
| 637 *People Table* | 307 *People Table* |
| 638 | 308 |
| 639 :: | 309 :: |
| 640 | 310 |
| 641 Filter 1 - by regex expression matching [include]: '^\d+' (include lines that start with a number) | 311 Filter 1 - by regex expression matching [include]: '^\d+' (include lines that start with a number) |
| 677 2 Allie cat | 347 2 Allie cat |
| 678 4 Spot | 348 4 Spot |
| 679 == ======== ======== | 349 == ======== ======== |
| 680 | 350 |
| 681 | 351 |
| 682 Query: SELECT FirstName,LastName,PetName FROM People join Pet on People.id = Pet.id WHERE PetType = 'cat'; | 352 Query: SELECT FirstName,LastName,PetName FROM People JOIN Pet ON People.id = Pet.id WHERE PetType = 'cat'; |
| 683 | 353 |
| 684 Result: | 354 Result: |
| 685 | 355 |
| 686 ========= ======== ======== | 356 ========= ======== ======== |
| 687 FirstName LastName PetName | 357 FirstName LastName PetName |
| 688 ========= ======== ======== | 358 ========= ======== ======== |
| 689 Paula Brown Fluff | 359 Paula Brown Fluff |
| 690 Steven Jones Allie | 360 Steven Jones Allie |
| 691 ========= ======== ======== | 361 ========= ======== ======== |
| 692 | 362 |
| 693 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html | |
| 694 .. _SQLite: http://www.sqlite.org/index.html | |
| 695 | 363 |
| 696 ]]></help> | 364 ]]></help> |
| 697 </tool> | 365 </tool> |
