Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.xml @ 0:926c62f7fa09 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 9ae87502ea7c3da33ecc453872c4eb2f41ecea4a-dirty
author | jjohnson |
---|---|
date | Thu, 21 Jan 2016 08:23:45 -0500 |
parents | |
children | c7a1a686e42b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:926c62f7fa09 |
---|---|
1 <tool id="query_tabular" name="Query Tabular" version="0.1.0"> | |
2 <description>using sqlite sql</description> | |
3 | |
4 <requirements> | |
5 </requirements> | |
6 <stdio> | |
7 <exit_code range="1:" /> | |
8 </stdio> | |
9 <command interpreter="python"><![CDATA[ | |
10 query_tabular.py | |
11 #if $save_db | |
12 -s $sqlitedb | |
13 #else | |
14 -s $workdb | |
15 #end if | |
16 -j $table_json | |
17 #* | |
18 ## #for $i,$tbl in enumerate($tables): | |
19 ## #if $tbl.table_name | |
20 ## #set $tname = $tbl.table_name | |
21 ## #else | |
22 ## #set $tname = 't' + str($i + 1) | |
23 ## #end if | |
24 ## #if $tbl.col_names: | |
25 ## #set $col_names = ':' + str($tbl.col_names) | |
26 ## #else | |
27 ## #set $col_names = '' | |
28 ## #end if | |
29 ## -t ${tbl.table}=${tname}${$col_names} | |
30 ## #end for | |
31 *# | |
32 #if $sqlquery: | |
33 -Q "$query_file" | |
34 $no_header | |
35 -o $output | |
36 #end if | |
37 ]]></command> | |
38 <configfiles> | |
39 <configfile name="query_file"> | |
40 $sqlquery | |
41 </configfile> | |
42 <configfile name="table_json"> | |
43 #import json | |
44 #set $jtbldef = dict() | |
45 #set $jtbls = [] | |
46 #set $jtbldef['tables'] = $jtbls | |
47 #for $i,$tbl in enumerate($tables): | |
48 #set $jtbl = dict() | |
49 #set $jtbl['file_path'] = str($tbl.table) | |
50 #if $tbl.table_name | |
51 #set $tname = str($tbl.table_name) | |
52 #else | |
53 #set $tname = 't' + str($i + 1) | |
54 #end if | |
55 #set $jtbl['table_name'] = $tname | |
56 #if $tbl.col_names: | |
57 #set $col_names = str($tbl.col_names) | |
58 #else | |
59 #set $col_names = '' | |
60 #end if | |
61 #set $jtbl['column_names'] = $col_names | |
62 #if str($tbl.skip_lines) != '': | |
63 #set $jtbl['comment_lines'] = $tbl.skip_lines | |
64 #elif $tbl.table.metadata.comment_lines > 0: | |
65 #set $jtbl['comment_lines'] = int($tbl.table.metadata.comment_lines) | |
66 #end if | |
67 #set $jtbls += [$jtbl] | |
68 #end for | |
69 #echo $json.dumps($jtbldef) | |
70 </configfile> | |
71 </configfiles> | |
72 <inputs> | |
73 <param name="workdb" type="hidden" value="workdb.sqlite" label=""/> | |
74 <repeat name="tables" title="Add tables" min="1"> | |
75 <param name="table" type="data" format="tabular" label="Dataset"/> | |
76 <param name="table_name" type="text" value="" optional="true" label="Table name"> | |
77 <help>By default, tables will be named: t1,t2,...,tn</help> | |
78 <validator type="regex" message="Table name should start with a letter and may contain additional letters, digits, and underscores">^[A-Za-z]\w*$</validator> | |
79 </param> | |
80 <!-- | |
81 <param name="sel_cols" label="Include columns" type="data_column" multiple="true" data_ref="table" /> | |
82 --> | |
83 <param name="col_names" type="text" value="" optional="true" label="Column names"> | |
84 <help>By default, table columns will be named: c1,c2,c3,...,cn</help> | |
85 <validator type="regex" message="A List of separated by commas: Column names should start with a letter and may contain additional letters, digits, and underscores">^([A-Za-z]\w*)?(,([A-Za-z]\w*)?)*$</validator> | |
86 </param> | |
87 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" help="Leave blank to use the datatype comment lines metadata" /> | |
88 </repeat> | |
89 <param name="sqlquery" type="text" area="true" size="10x80" value="" optional="true" label="SQL Query"> | |
90 <help>By default, tables will be named: t1,t2,...,tn</help> | |
91 <sanitizer sanitize="False"/> | |
92 <validator type="regex" message="">^(?i)\s*select\s+.*\s+from\s+.*$</validator> | |
93 </param> | |
94 <param name="no_header" type="boolean" truevalue="-n" falsevalue="" checked="False" label="Omit column headers"/> | |
95 | |
96 <param name="save_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Save the sqlite database"/> | |
97 </inputs> | |
98 <outputs> | |
99 <data format="sqlite" name="sqlitedb" label="sqlite db of ${on_string}"> | |
100 <filter>save_db or not (sqlquery and len(sqlquery) > 0)</filter> | |
101 </data> | |
102 <data format="tabular" name="output" label="query results on ${on_string}"> | |
103 <filter>sqlquery and len(sqlquery) > 0</filter> | |
104 </data> | |
105 </outputs> | |
106 <tests> | |
107 | |
108 <test> | |
109 <repeat name="tables"> | |
110 <param name="table" ftype="tabular" value="customers.tsv"/> | |
111 <param name="table_name" value="customers"/> | |
112 <param name="col_names" value="CustomerID,FirstName,LastName,Email,DOB,Phone"/> | |
113 </repeat> | |
114 <repeat name="tables"> | |
115 <param name="table" ftype="tabular" value="sales.tsv"/> | |
116 <param name="table_name" value="sales"/> | |
117 <param name="col_names" value="CustomerID,Date,SaleAmount"/> | |
118 </repeat> | |
119 <param name="sqlquery" value="SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" FROM customers join sales on customers.CustomerID = sales.CustomerID GROUP BY customers.CustomerID ORDER BY TotalSales DESC"/> | |
120 <output name="output" file="sales_results.tsv"/> | |
121 </test> | |
122 | |
123 <test> | |
124 <repeat name="tables"> | |
125 <param name="table" ftype="tabular" value="customers.tsv"/> | |
126 <param name="col_names" value=",FirstName,LastName,,DOB,"/> | |
127 </repeat> | |
128 <repeat name="tables"> | |
129 <param name="table" ftype="tabular" value="sales.tsv"/> | |
130 </repeat> | |
131 <param name="sqlquery" value="SELECT FirstName,LastName,sum(t2.c3) as "TotalSales" FROM t1 join t2 on t1.c1 = t2.c1 GROUP BY t1.c1 ORDER BY TotalSales DESC;"/> | |
132 <output name="output" file="sales_results.tsv"/> | |
133 </test> | |
134 | |
135 <test> | |
136 <repeat name="tables"> | |
137 <param name="table" ftype="tabular" value="customers.tsv"/> | |
138 <param name="col_names" value=",FirstName,LastName,,BirthDate,"/> | |
139 </repeat> | |
140 <param name="sqlquery" value="select FirstName,LastName,re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" from t1 WHERE re_search('[hp]er',c4)"/> | |
141 <output name="output" file="regex_results.tsv"/> | |
142 </test> | |
143 | |
144 <test> | |
145 <repeat name="tables"> | |
146 <param name="table" ftype="tabular" value="IEDB.tsv"/> | |
147 <param name="table_name" value="iedb"/> | |
148 <param name="col_names" value="ID,allele,seq_num,start,end,length,peptide,method,percentile_rank,ann_ic50,ann_rank,smm_ic50,smm_rank,comblib_sidney2008_score,comblib_sidney2008_rank,netmhcpan_ic50,netmhcpan_rank"/> | |
149 </repeat> | |
150 <repeat name="tables"> | |
151 <param name="table" ftype="tabular" value="netMHC_summary.tsv"/> | |
152 <param name="table_name" value="mhc_summary"/> | |
153 <param name="col_names" value="pos,peptide,logscore,affinity,Bind_Level,Protein,Allele"/> | |
154 </repeat> | |
155 <param name="sqlquery" value="select iedb.ID,iedb.peptide,iedb.start,iedb.end,iedb.percentile_rank,mhc_summary.logscore,mhc_summary.affinity,mhc_summary.Bind_Level from iedb left outer join mhc_summary on iedb.peptide = mhc_summary.peptide order by affinity,Bind_Level"/> | |
156 <output name="output" file="query_results.tsv"/> | |
157 </test> | |
158 | |
159 </tests> | |
160 <help><![CDATA[ | |
161 ============= | |
162 Query Tabular | |
163 ============= | |
164 | |
165 **Inputs** | |
166 | |
167 Loads tabular datasets into a SQLite_ data base. | |
168 | |
169 **Outputs** | |
170 | |
171 The results of a SQL query are output to the history as a tabular file. | |
172 | |
173 The SQLite_ data base can also be saved and output as a dataset in the history. | |
174 | |
175 | |
176 For help in using SQLite_ see: http://www.sqlite.org/docs.html | |
177 | |
178 **NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30 | |
179 | |
180 See: http://www.sqlite.org/lang_datefunc.html | |
181 | |
182 **Example** | |
183 | |
184 Given 2 tabular datasets: *customers* and *sales* | |
185 | |
186 Dataset *customers* | |
187 | |
188 Table name: "customers" | |
189 | |
190 Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone" | |
191 | |
192 =========== ========== ========== ===================== ========== ============ | |
193 #CustomerID FirstName LastName Email DOB Phone | |
194 =========== ========== ========== ===================== ========== ============ | |
195 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 | |
196 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 | |
197 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 | |
198 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 | |
199 =========== ========== ========== ===================== ========== ============ | |
200 | |
201 Dataset *sales* | |
202 | |
203 Table name: "sales" | |
204 | |
205 Column names: "CustomerID,Date,SaleAmount" | |
206 | |
207 ============= ============ ============ | |
208 #CustomerID Date SaleAmount | |
209 ============= ============ ============ | |
210 2 2004-05-06 100.22 | |
211 1 2004-05-07 99.95 | |
212 3 2004-05-07 122.95 | |
213 3 2004-05-13 100.00 | |
214 4 2004-05-22 555.55 | |
215 ============= ============ ============ | |
216 | |
217 The query | |
218 | |
219 :: | |
220 | |
221 SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" | |
222 FROM customers join sales on customers.CustomerID = sales.CustomerID | |
223 GROUP BY customers.CustomerID ORDER BY TotalSales DESC; | |
224 | |
225 Produces this tabular output: | |
226 | |
227 ========== ======== ========== | |
228 #FirstName LastName TotalSales | |
229 ========== ======== ========== | |
230 James Smith 555.55 | |
231 Paula Brown 222.95 | |
232 Steven Goldfish 100.22 | |
233 John Smith 99.95 | |
234 ========== ======== ========== | |
235 | |
236 | |
237 If the optional Table name and Column names inputs are not used, the query would be: | |
238 | |
239 :: | |
240 | |
241 SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" | |
242 FROM t1 join t2 on t1.c1 = t2.c1 | |
243 GROUP BY t1.c1 ORDER BY TotalSales DESC; | |
244 | |
245 You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: | |
246 | |
247 Column names: ,FirstName,LastName,,BirthDate | |
248 | |
249 Results in the following data base table | |
250 | |
251 =========== ========== ========== ===================== ========== ============ | |
252 #c1 FirstName LastName c4 BirthDate c6 | |
253 =========== ========== ========== ===================== ========== ============ | |
254 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 | |
255 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 | |
256 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 | |
257 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 | |
258 =========== ========== ========== ===================== ========== ============ | |
259 | |
260 Regular_expression_ functions are included for: | |
261 | |
262 :: | |
263 | |
264 matching: re_match('pattern',column) | |
265 | |
266 SELECT t1.FirstName, t1.LastName | |
267 FROM t1 | |
268 WHERE re_match('^.*\.(net|org)$',c4) | |
269 | |
270 Results: | |
271 | |
272 =========== ========== | |
273 #FirstName LastName | |
274 =========== ========== | |
275 Steven Goldfish | |
276 Paula Brown | |
277 =========== ========== | |
278 | |
279 | |
280 :: | |
281 | |
282 searching: re_search('pattern',column) | |
283 substituting: re_sub('pattern','replacement,column) | |
284 | |
285 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" | |
286 FROM t1 | |
287 WHERE re_search('[hp]er',c4) | |
288 | |
289 Results: | |
290 | |
291 =========== ========== ========== | |
292 #FirstName LastName DOB | |
293 =========== ========== ========== | |
294 Steven Goldfish 04/04/74 | |
295 Paula Brown 24/05/78 | |
296 James Smith 20/10/80 | |
297 =========== ========== ========== | |
298 | |
299 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html | |
300 .. _SQLite: http://www.sqlite.org/index.html | |
301 | |
302 ]]></help> | |
303 </tool> |