comparison query_tabular.xml @ 0:926c62f7fa09 draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9ae87502ea7c3da33ecc453872c4eb2f41ecea4a-dirty
author jjohnson
date Thu, 21 Jan 2016 08:23:45 -0500
parents
children c7a1a686e42b
comparison
equal deleted inserted replaced
-1:000000000000 0:926c62f7fa09
1 <tool id="query_tabular" name="Query Tabular" version="0.1.0">
2 <description>using sqlite sql</description>
3
4 <requirements>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" />
8 </stdio>
9 <command interpreter="python"><![CDATA[
10 query_tabular.py
11 #if $save_db
12 -s $sqlitedb
13 #else
14 -s $workdb
15 #end if
16 -j $table_json
17 #*
18 ## #for $i,$tbl in enumerate($tables):
19 ## #if $tbl.table_name
20 ## #set $tname = $tbl.table_name
21 ## #else
22 ## #set $tname = 't' + str($i + 1)
23 ## #end if
24 ## #if $tbl.col_names:
25 ## #set $col_names = ':' + str($tbl.col_names)
26 ## #else
27 ## #set $col_names = ''
28 ## #end if
29 ## -t ${tbl.table}=${tname}${$col_names}
30 ## #end for
31 *#
32 #if $sqlquery:
33 -Q "$query_file"
34 $no_header
35 -o $output
36 #end if
37 ]]></command>
38 <configfiles>
39 <configfile name="query_file">
40 $sqlquery
41 </configfile>
42 <configfile name="table_json">
43 #import json
44 #set $jtbldef = dict()
45 #set $jtbls = []
46 #set $jtbldef['tables'] = $jtbls
47 #for $i,$tbl in enumerate($tables):
48 #set $jtbl = dict()
49 #set $jtbl['file_path'] = str($tbl.table)
50 #if $tbl.table_name
51 #set $tname = str($tbl.table_name)
52 #else
53 #set $tname = 't' + str($i + 1)
54 #end if
55 #set $jtbl['table_name'] = $tname
56 #if $tbl.col_names:
57 #set $col_names = str($tbl.col_names)
58 #else
59 #set $col_names = ''
60 #end if
61 #set $jtbl['column_names'] = $col_names
62 #if str($tbl.skip_lines) != '':
63 #set $jtbl['comment_lines'] = $tbl.skip_lines
64 #elif $tbl.table.metadata.comment_lines > 0:
65 #set $jtbl['comment_lines'] = int($tbl.table.metadata.comment_lines)
66 #end if
67 #set $jtbls += [$jtbl]
68 #end for
69 #echo $json.dumps($jtbldef)
70 </configfile>
71 </configfiles>
72 <inputs>
73 <param name="workdb" type="hidden" value="workdb.sqlite" label=""/>
74 <repeat name="tables" title="Add tables" min="1">
75 <param name="table" type="data" format="tabular" label="Dataset"/>
76 <param name="table_name" type="text" value="" optional="true" label="Table name">
77 <help>By default, tables will be named: t1,t2,...,tn</help>
78 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator>
79 </param>
80 <!--
81 <param name="sel_cols" label="Include columns" type="data_column" multiple="true" data_ref="table" />
82 -->
83 <param name="col_names" type="text" value="" optional="true" label="Column names">
84 <help>By default, table columns will be named: c1,c2,c3,...,cn</help>
85 <validator type="regex" message="A List of separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores">^([A-Za-z]\w*)?(,([A-Za-z]\w*)?)*$</validator>
86 </param>
87 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" />
88 </repeat>
89 <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query">
90 <help>By default, tables will be named: t1,t2,...,tn</help>
91 <sanitizer sanitize="False"/>
92 <validator type="regex" message="">^(?i)\s*select\s+.*\s+from\s+.*$</validator>
93 </param>
94 <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/>
95
96 <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database"/>
97 </inputs>
98 <outputs>
99 <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}">
100 <filter>save_db or not (sqlquery and len(sqlquery) > 0)</filter>
101 </data>
102 <data format="tabular" name="output" label="query results on ${on_string}">
103 <filter>sqlquery and len(sqlquery) > 0</filter>
104 </data>
105 </outputs>
106 <tests>
107
108 <test>
109 <repeat name="tables">
110 <param name="table" ftype="tabular" value="customers.tsv"/>
111 <param name="table_name" value="customers"/>
112 <param name="col_names" value="CustomerID,FirstName,LastName,Email,DOB,Phone"/>
113 </repeat>
114 <repeat name="tables">
115 <param name="table" ftype="tabular" value="sales.tsv"/>
116 <param name="table_name" value="sales"/>
117 <param name="col_names" value="CustomerID,Date,SaleAmount"/>
118 </repeat>
119 <param name="sqlquery" value="SELECT FirstName,LastName,sum(SaleAmount) as &quot;TotalSales&quot; FROM customers join sales on customers.CustomerID = sales.CustomerID GROUP BY customers.CustomerID ORDER BY TotalSales DESC"/>
120 <output name="output" file="sales_results.tsv"/>
121 </test>
122
123 <test>
124 <repeat name="tables">
125 <param name="table" ftype="tabular" value="customers.tsv"/>
126 <param name="col_names" value=",FirstName,LastName,,DOB,"/>
127 </repeat>
128 <repeat name="tables">
129 <param name="table" ftype="tabular" value="sales.tsv"/>
130 </repeat>
131 <param name="sqlquery" value="SELECT FirstName,LastName,sum(t2.c3) as &quot;TotalSales&quot; FROM t1 join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 ORDER BY TotalSales DESC;"/>
132 <output name="output" file="sales_results.tsv"/>
133 </test>
134
135 <test>
136 <repeat name="tables">
137 <param name="table" ftype="tabular" value="customers.tsv"/>
138 <param name="col_names" value=",FirstName,LastName,,BirthDate,"/>
139 </repeat>
140 <param name="sqlquery" value="select FirstName,LastName,re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as &quot;DOB&quot; from t1 WHERE re_search('[hp]er',c4)"/>
141 <output name="output" file="regex_results.tsv"/>
142 </test>
143
144 <test>
145 <repeat name="tables">
146 <param name="table" ftype="tabular" value="IEDB.tsv"/>
147 <param name="table_name" value="iedb"/>
148 <param name="col_names" value="ID,allele,seq_num,start,end,length,peptide,method,percentile_rank,ann_ic50,ann_rank,smm_ic50,smm_rank,comblib_sidney2008_score,comblib_sidney2008_rank,netmhcpan_ic50,netmhcpan_rank"/>
149 </repeat>
150 <repeat name="tables">
151 <param name="table" ftype="tabular" value="netMHC_summary.tsv"/>
152 <param name="table_name" value="mhc_summary"/>
153 <param name="col_names" value="pos,peptide,logscore,affinity,Bind_Level,Protein,Allele"/>
154 </repeat>
155 <param name="sqlquery" value="select iedb.ID,iedb.peptide,iedb.start,iedb.end,iedb.percentile_rank,mhc_summary.logscore,mhc_summary.affinity,mhc_summary.Bind_Level from iedb left outer join mhc_summary on iedb.peptide = mhc_summary.peptide order by affinity,Bind_Level"/>
156 <output name="output" file="query_results.tsv"/>
157 </test>
158
159 </tests>
160 <help><![CDATA[
161 =============
162 Query Tabular
163 =============
164
165 **Inputs**
166
167 Loads tabular datasets into a SQLite_ data base.
168
169 **Outputs**
170
171 The results of a SQL query are output to the history as a tabular file.
172
173 The SQLite_ data base can also be saved and output as a dataset in the history.
174
175
176 For help in using SQLite_ see: http://www.sqlite.org/docs.html
177
178 **NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30
179
180 See: http://www.sqlite.org/lang_datefunc.html
181
182 **Example**
183
184 Given 2 tabular datasets: *customers* and *sales*
185
186 Dataset *customers*
187
188 Table name: "customers"
189
190 Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone"
191
192 =========== ========== ========== ===================== ========== ============
193 #CustomerID FirstName LastName Email DOB Phone
194 =========== ========== ========== ===================== ========== ============
195 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222
196 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
197 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
198 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888
199 =========== ========== ========== ===================== ========== ============
200
201 Dataset *sales*
202
203 Table name: "sales"
204
205 Column names: "CustomerID,Date,SaleAmount"
206
207 ============= ============ ============
208 #CustomerID Date SaleAmount
209 ============= ============ ============
210 2 2004-05-06 100.22
211 1 2004-05-07 99.95
212 3 2004-05-07 122.95
213 3 2004-05-13 100.00
214 4 2004-05-22 555.55
215 ============= ============ ============
216
217 The query
218
219 ::
220
221 SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales"
222 FROM customers join sales on customers.CustomerID = sales.CustomerID
223 GROUP BY customers.CustomerID ORDER BY TotalSales DESC;
224
225 Produces this tabular output:
226
227 ========== ======== ==========
228 #FirstName LastName TotalSales
229 ========== ======== ==========
230 James Smith 555.55
231 Paula Brown 222.95
232 Steven Goldfish 100.22
233 John Smith 99.95
234 ========== ======== ==========
235
236
237 If the optional Table name and Column names inputs are not used, the query would be:
238
239 ::
240
241 SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales"
242 FROM t1 join t2 on t1.c1 = t2.c1
243 GROUP BY t1.c1 ORDER BY TotalSales DESC;
244
245 You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5:
246
247 Column names: ,FirstName,LastName,,BirthDate
248
249 Results in the following data base table
250
251 =========== ========== ========== ===================== ========== ============
252 #c1 FirstName LastName c4 BirthDate c6
253 =========== ========== ========== ===================== ========== ============
254 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222
255 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545
256 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232
257 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888
258 =========== ========== ========== ===================== ========== ============
259
260 Regular_expression_ functions are included for:
261
262 ::
263
264 matching: re_match('pattern',column)
265
266 SELECT t1.FirstName, t1.LastName
267 FROM t1
268 WHERE re_match('^.*\.(net|org)$',c4)
269
270 Results:
271
272 =========== ==========
273 #FirstName LastName
274 =========== ==========
275 Steven Goldfish
276 Paula Brown
277 =========== ==========
278
279
280 ::
281
282 searching: re_search('pattern',column)
283 substituting: re_sub('pattern','replacement,column)
284
285 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB"
286 FROM t1
287 WHERE re_search('[hp]er',c4)
288
289 Results:
290
291 =========== ========== ==========
292 #FirstName LastName DOB
293 =========== ========== ==========
294 Steven Goldfish 04/04/74
295 Paula Brown 24/05/78
296 James Smith 20/10/80
297 =========== ========== ==========
298
299 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html
300 .. _SQLite: http://www.sqlite.org/index.html
301
302 ]]></help>
303 </tool>