Mercurial > repos > iuc > gemini_query
comparison gemini_query.xml @ 5:cd00221d67cb draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
author | iuc |
---|---|
date | Fri, 11 Jan 2019 17:47:02 -0500 |
parents | 7ca6716748c2 |
children | da74170c55c7 |
comparison
equal
deleted
inserted
replaced
4:7ca6716748c2 | 5:cd00221d67cb |
---|---|
1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.1"> | 1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@"> |
2 <description>Querying the GEMINI database</description> | 2 <description>Querying the GEMINI database</description> |
3 <macros> | 3 <macros> |
4 <import>gemini_macros.xml</import> | 4 <import>gemini_macros.xml</import> |
5 <token name="@BINARY@">query</token> | 5 <token name="@BINARY@">query</token> |
6 | |
7 <xml name="sorting"> | |
8 <param name="order_by" type="text" | |
9 label="Sort the output by the following column(s)" | |
10 help="" /> | |
11 <param name="sort_order" type="select" label="Sort order"> | |
12 <option value=" ASC">Ascending</option> | |
13 <option value=" DESC">Descending</option> | |
14 </param> | |
15 </xml> | |
16 <xml name="pheno_strat"> | |
17 <param name="phenotype" type="text" | |
18 label="Phenotype to stratify samples across" | |
19 help="Leave blank to stratify across the default phenotype column" /> | |
20 </xml> | |
21 <xml name="sample_delimiter" token_applied_to="samples"> | |
22 <param argument="--sample-delim" name="sample_delim" type="text" value="," | |
23 label="Delimiter to use in the list of affected @APPLIED_TO@" | |
24 help="" /> | |
25 </xml> | |
26 <xml name="dgidb_query"> | |
27 <param argument="--dgidb" name="dgidb" type="boolean" truevalue="--dgidb" falsevalue="" checked="False" | |
28 label="Request drug-gene interaction info from DGIdb" help="" /> | |
29 </xml> | |
6 </macros> | 30 </macros> |
7 <expand macro="requirements" /> | 31 <expand macro="requirements" /> |
8 <expand macro="stdio" /> | 32 <expand macro="stdio" /> |
9 <expand macro="version_command" /> | 33 <expand macro="version_command" /> |
10 <command> | 34 <command> |
11 <![CDATA[ | 35 <![CDATA[ |
12 gemini @BINARY@ | 36 gemini @BINARY@ |
13 | 37 ${query.oformat.report.header} |
14 --in "${in}" | 38 ${query.oformat.report.dgidb} |
15 | 39 |
16 #set $multiline_sql_expr = $gt_filter | 40 #for $i in $query.filter_by_genotype: |
17 #set $cmdln_param = "--gt-filter" | 41 #set $multiline_sql_expr = str($i.gt_filter) |
18 @MULTILN_SQL_EXPR_TO_CMDLN@ | 42 #set $cmdln_param = "--gt-filter" |
19 | 43 @MULTILN_SQL_EXPR_TO_CMDLN@ |
20 #set $multiline_sql_expr = $sample_filter | 44 #end for |
21 #set $cmdln_param = "--sample-filter" | 45 |
22 @MULTILN_SQL_EXPR_TO_CMDLN@ | 46 #for $i in $query.filter_by_sample: |
23 | 47 $i.family_wise |
24 $show_samples | 48 #if int($i.min_kindreds) > 0: |
25 $show_families | 49 --min-kindreds ${i.min_kindreds} |
26 $family_wise | 50 #end if |
27 $header | 51 ${i.in} |
28 $dgidb | 52 #set $multiline_sql_expr = str($i.sample_filter) |
29 #if $region.strip(): | 53 #set $cmdln_param = "--sample-filter" |
30 --region "${region}" | 54 @MULTILN_SQL_EXPR_TO_CMDLN@ |
55 #end for | |
56 | |
57 #if str($query.oformat.report.format) == 'with_samples': | |
58 #set $sample_delim = str($query.oformat.report.sample_delim) or ',' | |
59 --show-samples --sample-delim '$sample_delim' | |
60 #elif str($query.oformat.report.format) == 'with_samples_flattened': | |
61 --show-samples --format sampledetail | |
62 #elif str($query.oformat.report.format) == 'with_families': | |
63 #set $sample_delim = str($query.oformat.report.sample_delim) or ',' | |
64 --show-families --sample-delim '$sample_delim' | |
65 #elif str($query.oformat.report.format) == 'carrier_summary': | |
66 --carrier-summary-by-phenotype | |
67 #if str($query.oformat.report.phenotype).strip(): | |
68 '${query.oformat.report.phenotype}' | |
69 #else: | |
70 affected | |
71 #end if | |
72 #else: | |
73 --format ${query.oformat.report.format} | |
31 #end if | 74 #end if |
32 #if int($min_kindreds) > 0: | 75 |
33 --min-kindreds $min_kindreds | 76 #if str($query.interface) == 'basic': |
77 ## build the SQL query string from its components | |
78 #if str($query.oformat.report.format) in ('vcf', 'tped'): | |
79 #set $cols = "*" | |
80 #else: | |
81 #set $report = $query.oformat.report.report | |
82 @SET_COLS@ | |
83 #end if | |
84 #set $q = "SELECT %s FROM variants" % $cols | |
85 #set $where_clause_elements = [] | |
86 #if str($query.filter).strip(): | |
87 #silent $where_clause_elements.append(str($query.filter).strip()) | |
88 #end if | |
89 | |
90 #set $regions = $query.regions | |
91 @PARSE_REGION_ELEMENTS@ | |
92 #if $region_elements: | |
93 #silent $where_clause_elements.append(" OR ".join($region_elements)) | |
94 #end if | |
95 #if $where_clause_elements: | |
96 #set $q = $q + " WHERE " + " AND ".join($where_clause_elements) | |
97 #end if | |
98 #if str($query.oformat.report.order_by).strip(): | |
99 #set $q = $q + " ORDER BY " + str($query.oformat.report.order_by).strip() + str($query.oformat.report.sort_order) | |
100 #end if | |
101 #else | |
102 ## The user entered the SQL query string directly. | |
103 #set $q = str($query.q) | |
34 #end if | 104 #end if |
35 ##--format FORMAT Format of output (JSON, TPED or default) # we will take default for the time being | |
36 ## --sample-delim STRING The delimiter to be used with the --show-samples option. | |
37 | 105 |
38 #set $multiline_sql_expr = $q | 106 #set $multiline_sql_expr = $q |
39 #set $cmdln_param = "-q" | 107 #set $cmdln_param = "-q" |
40 @MULTILN_SQL_EXPR_TO_CMDLN@ | 108 @MULTILN_SQL_EXPR_TO_CMDLN@ |
41 | 109 |
42 "${ infile }" | 110 '$infile' |
43 > "${ outfile }" | 111 > '$outfile' |
44 ]]> | 112 ]]> |
45 </command> | 113 </command> |
46 <!-- | |
47 ##TODO: | |
48 - -carrier-summary-by-phenotype CARRIER_SUMMARY | |
49 Output columns of counts of carriers and non-carriers | |
50 stratified by the given sample phenotype column--> | |
51 <inputs> | 114 <inputs> |
52 <expand macro="infile" /> | 115 <expand macro="infile" /> |
53 | 116 <conditional name="query"> |
54 <param name="q" type="text" area="True" size="5x50" label="The query to be issued to the database" help="(-q)"> | 117 <param name="interface" type="select" |
55 <expand macro="sanitize_query" /> | 118 label="Build GEMINI query using" |
56 </param> | 119 help=""> |
57 <param name="gt_filter" type="text" area="True" size="5x50" label="Restrictions to apply to genotype values" help="(--gt-filer)"> | 120 <option value="basic">Basic variant query constructor</option> |
58 <expand macro="sanitize_query" /> | 121 <option value="advanced">Advanced query constructor</option> |
59 </param> | 122 </param> |
60 <param name="sample_filter" type="text" area="True" size="5x50" label="SQL filter to use to filter the sample table" help="(--sample-filter)"> | 123 <when value="basic"> |
61 <expand macro="sanitize_query" /> | 124 <expand macro="gt_filter" /> |
62 </param> | 125 <expand macro="sample_filter" /> |
63 | 126 <expand macro="region_filter" /> |
64 <param name="show_samples" type="boolean" truevalue="--show-samples" falsevalue="" checked="False" | 127 <expand macro="filter" argument="" /> |
65 label="Add a column of all sample names with a variant to each variant" help="(--show-samples)"/> | 128 <section name="oformat" title="Output format options" expanded="true"> |
66 | 129 <conditional name="report"> |
67 <param name="show_families" type="boolean" truevalue="--show-families" falsevalue="" checked="False" | 130 <param name="format" type="select" |
68 label="Add a column listing all of the families with a variant to each variant" help="(--show-families)"/> | 131 label="Type of report to generate"> |
69 | 132 <option value="default">tabular (GEMINI default)</option> |
70 <param name="family_wise" type="boolean" truevalue="--family-wise" falsevalue="" checked="False" | 133 <option value="with_samples">tabular with affected samples</option> |
71 label="Perform the sample-filter on a family-wise basis" help="(--family-wise)"/> | 134 <option value="with_samples_flattened">tabular with affected samples flattened</option> |
72 | 135 <option value="with_families">tabular with affected families</option> |
73 <expand macro="add_header_column" /> | 136 <option value="carrier_summary">tabular with carrier summary</option> |
74 <expand macro="min_kindreds" /> | 137 <option value="vcf">VCF (simplified)</option> |
75 | 138 <option value="json">JSON</option> |
76 <param name="dgidb" type="boolean" truevalue="--dgidb" falsevalue="" checked="False" | 139 <option value="tped">TPED</option> |
77 label="Request drug-gene interaction info from DGIdb" help="(--dgidb)"/> | 140 </param> |
78 | 141 <when value="default"> |
79 <param name="in" type="select" label="A variant must be in either all, none or any samples passing the sample-query filter" help="(--in)"> | 142 <expand macro="add_header_column" /> |
80 <option value="all">Return a variant if all samples matching the query have the variant. (all)</option> | 143 <expand macro="column_filter" |
81 <option value="none">Return a variant if the variant does not appear in any of the matching samples. (none)</option> | 144 minimalset="chrom, start, end, ref, alt, gene, impact" |
82 <option value="any">Return all of the variant which are in all of the matching samples and not in any of the non-matching samples. (any)</option> | 145 help=""/> |
83 <option value="only">Return a variant if the variant is only in the matching samples and not in any of the non-matching samples. (only)</option> | 146 <expand macro="dgidb_query" /> |
84 </param> | 147 <expand macro="sorting" /> |
85 | 148 </when> |
86 <param name="region" type="text" value="" label="Restrict query to this region" help="e.g. chr1:10-20 (--region)"/> | 149 <when value="with_samples"> |
87 | 150 <expand macro="add_header_column" /> |
88 | 151 <expand macro="sample_delimiter" /> |
152 <expand macro="column_filter" | |
153 minimalset="chrom, start, end, ref, alt, gene, impact" | |
154 help=""/> | |
155 <expand macro="dgidb_query" /> | |
156 <expand macro="sorting" /> | |
157 </when> | |
158 <when value="with_samples_flattened"> | |
159 <expand macro="add_header_column" /> | |
160 <expand macro="column_filter" | |
161 minimalset="chrom, start, end, ref, alt, gene, impact" | |
162 help=""/> | |
163 <param name="dgidb" type="hidden" value="" /> | |
164 <expand macro="sorting" /> | |
165 </when> | |
166 <when value="with_families"> | |
167 <expand macro="add_header_column" /> | |
168 <expand macro="sample_delimiter" applied_to="families"/> | |
169 <expand macro="column_filter" | |
170 minimalset="chrom, start, end, ref, alt, gene, impact" | |
171 help=""/> | |
172 <expand macro="dgidb_query" /> | |
173 <expand macro="sorting" /> | |
174 </when> | |
175 <when value="carrier_summary"> | |
176 <expand macro="add_header_column" /> | |
177 <expand macro="pheno_strat" /> | |
178 <expand macro="column_filter" | |
179 minimalset="chrom, start, end, ref, alt, gene, impact" | |
180 help=""/> | |
181 <expand macro="dgidb_query" /> | |
182 <expand macro="sorting" /> | |
183 </when> | |
184 <when value="vcf"> | |
185 <expand macro="add_header_column" /> | |
186 <param name="order_by" type="hidden" value="" /> | |
187 <param name="dgidb" type="hidden" value="" /> | |
188 </when> | |
189 <when value="json"> | |
190 <param name="header" type="hidden" value="" /> | |
191 <expand macro="column_filter" | |
192 minimalset="chrom, start, end, ref, alt, gene, impact" | |
193 help=""/> | |
194 <param name="dgidb" type="hidden" value="" /> | |
195 <expand macro="sorting" /> | |
196 </when> | |
197 <when value="tped"> | |
198 <param name="header" type="hidden" value="" /> | |
199 <param name="dgidb" type="hidden" value="" /> | |
200 <expand macro="sorting" /> | |
201 </when> | |
202 </conditional> | |
203 </section> | |
204 </when> | |
205 <when value="advanced"> | |
206 <param argument="-q" name="q" type="text" area="True" size="5x50" | |
207 label="The query to be issued to the database" | |
208 help="Formulate your query using SQL syntax."> | |
209 <expand macro="sanitize_query" /> | |
210 <validator type="expression" message="Query cannot be empty">value.strip()</validator> | |
211 </param> | |
212 <expand macro="gt_filter" /> | |
213 <expand macro="sample_filter" /> | |
214 <section name="oformat" title="Output format options" expanded="true"> | |
215 <conditional name="report"> | |
216 <param name="format" type="select" | |
217 label="Type of report to generate"> | |
218 <option value="default">tabular (GEMINI default)</option> | |
219 <option value="with_samples">tabular with affected samples</option> | |
220 <option value="with_samples_flattened">tabular with affected samples flattened</option> | |
221 <option value="with_families">tabular with affected families</option> | |
222 <option value="carrier_summary">tabular with carrier summary</option> | |
223 <option value="vcf">VCF (simplified)</option> | |
224 <option value="json">JSON</option> | |
225 <option value="tped">TPED</option> | |
226 </param> | |
227 <when value="default"> | |
228 <expand macro="add_header_column" /> | |
229 <expand macro="dgidb_query" /> | |
230 </when> | |
231 <when value="with_samples"> | |
232 <expand macro="add_header_column" /> | |
233 <expand macro="sample_delimiter" /> | |
234 <expand macro="dgidb_query" /> | |
235 </when> | |
236 <when value="with_samples_flattened"> | |
237 <expand macro="add_header_column" /> | |
238 <param name="dgidb" type="hidden" value="" /> | |
239 </when> | |
240 <when value="with_families"> | |
241 <expand macro="add_header_column" /> | |
242 <expand macro="sample_delimiter" /> | |
243 <expand macro="dgidb_query" /> | |
244 </when> | |
245 <when value="carrier_summary"> | |
246 <expand macro="pheno_strat" /> | |
247 <expand macro="add_header_column" /> | |
248 <expand macro="dgidb_query" /> | |
249 </when> | |
250 <when value="vcf"> | |
251 <expand macro="add_header_column" /> | |
252 <param name="dgidb" type="hidden" value="" /> | |
253 </when> | |
254 <when value="json"> | |
255 <param name="header" type="hidden" value="" /> | |
256 <param name="dgidb" type="hidden" value="" /> | |
257 </when> | |
258 <when value="tped"> | |
259 <param name="header" type="hidden" value="" /> | |
260 <param name="dgidb" type="hidden" value="" /> | |
261 </when> | |
262 </conditional> | |
263 </section> | |
264 </when> | |
265 </conditional> | |
89 </inputs> | 266 </inputs> |
90 <outputs> | 267 <outputs> |
91 <data name="outfile" format="tabular" /> | 268 <data name="outfile" format="tabular"> |
269 <change_format> | |
270 <when input="query.oformat.report.format" value="json" format="json" /> | |
271 <when input="query.oformat.report.format" value="vcf" format="vcf" /> | |
272 </change_format> | |
273 </data> | |
92 </outputs> | 274 </outputs> |
93 <tests> | 275 <tests> |
94 <test> | 276 <test> |
95 <param name="infile" value="gemini_load_result1.db" ftype="gemini.sqlite" /> | 277 <param name="infile" value="gemini_load_result1.db" ftype="gemini.sqlite" /> |
96 <param name="q" value="select chrom,start from variants limit 10" /> | 278 <conditional name="query"> |
97 <param name="header" value="True" /> | 279 <param name="interface" value="advanced" /> |
280 <param name="q" value="select chrom,start from variants limit 10" /> | |
281 </conditional> | |
98 <output name="outfile"> | 282 <output name="outfile"> |
99 <assert_contents> | 283 <assert_contents> |
100 <has_line_matching expression="chrom	start" /> | 284 <has_line_matching expression="chrom	start" /> |
101 </assert_contents> | 285 </assert_contents> |
102 </output> | 286 </output> |
104 </tests> | 288 </tests> |
105 <help> | 289 <help> |
106 <![CDATA[ | 290 <![CDATA[ |
107 **What it does** | 291 **What it does** |
108 | 292 |
109 The real power in the GEMINI framework lies in the fact that all of your genetic variants have been stored in a convenient database in the context of a wealth of genome annotations that facilitate variant interpretation. | 293 The real power in the GEMINI framework lies in the fact that all of your |
110 The expressive power of SQL allows one to pose intricate questions of one’s variation data. This tool offers you an easy way to query your variants! | 294 genetic variants have been stored in a convenient database in the context of a |
111 | 295 wealth of genome annotations that facilitate variant interpretation. |
112 http://gemini.readthedocs.org/en/latest/content/querying.html | 296 The expressive power of SQL allows one to pose intricate questions of one’s |
297 variation data. This tool offers you a flexible, yet relatively easy way | |
298 to query your variants! | |
299 | |
300 ----- | |
301 | |
302 *Building your variant query with the Basic variant query constructor* | |
303 | |
304 This mode tries to break down the complexity of formulating GEMINI queries | |
305 into more easily digestable parts. In this mode, the tool also prevents you | |
306 from combining options that are incompatible or not meaningful. | |
307 | |
308 *Genotype filters* | |
309 | |
310 These are discussed `here | |
311 <https://gemini.readthedocs.io/en/latest/content/querying.html#gt-filter-filtering-on-genotypes>`__ | |
312 in the GEMINI documentation. | |
313 | |
314 The tool supports regular genotype filters like:: | |
315 | |
316 gt.sample1 == HET and gt_depths.sample1 >= 15 | |
317 | |
318 , which would keep only variants for which sample 1 is a heterozygous carrier | |
319 and if the genomic position in sample1 is covered by at least 15 sequencing | |
320 reads, as well as GEMINI wildcard filters of the general form | |
321 *(COLUMN).(SAMPLE_FILTER).(RULE).(RULE_ENFORCEMENT)* like:: | |
322 | |
323 (gt_types).(phenotype==2).(!=HOM_REF).(all) | |
324 | |
325 , which keeps only variants for which all phenotypic samples are homozygous. | |
326 | |
327 *Sample filters* | |
328 | |
329 Sample filters have the same format as the second component of the genotype | |
330 wildcard filters above, so:: | |
331 | |
332 phenotype == 2 | |
333 | |
334 would filter for phenotypically affected samples. In this case, however, the | |
335 filter determines, from which samples variants should be reported, i.e., here, | |
336 only variants found in phenotypically affected samples become analyzed. You can | |
337 use the ``--in`` filter to adjust the exact meaning of the sample filter. | |
338 | |
339 *Region filters* | |
340 | |
341 They let you restrict your analysis to parts of the genome, which can be useful | |
342 if you have prior knowledge of the approximate location of a variant of | |
343 interest. | |
344 | |
345 If you specify more then one region filter, they get combined with a logical | |
346 *OR*, meaning variants and genes falling in *any* of the regions are reported. | |
347 | |
348 *Additional constraints on variants* | |
349 | |
350 These get translated directly into the WHERE clause of an SQL query and, thus, | |
351 have to be expressed in valid SQL syntax. As an example you could use:: | |
352 | |
353 is_exonic = 1 and impact_severity != 'LOW' | |
354 | |
355 to indicate that you are only interested in exonic variants that are not of | |
356 *LOW* impact severity, *i.e.*, not silent mutations. | |
357 | |
358 Note that in SQL syntax tests for equality use a single ``=``, while genotype | |
359 filters (discussed above) are following Python syntax and use ``==`` for the | |
360 same purpose. Also note that non-numerical values need to be enclosed in | |
361 single-quotes, *e.g.* ``'LOW'``, but numerical values must *NOT* be. | |
362 | |
363 ----- | |
364 | |
365 *Building your query with the Advanced query constructor* | |
366 | |
367 For the sake of simplicity, the basic mode of the tool limits your queries to | |
368 the variants table of the underlying database. While this still allows many | |
369 useful queries to be formulated, it prevents you from joining information from | |
370 other tables (in particular, the gene_detailed table) or to query a different | |
371 table directly. | |
372 | |
373 In advanced mode, you take responsibility for formulating the complete SQL | |
374 query in correct syntax, which allows you to do anything you could do with the | |
375 command line tool. Beyond querying other tables, this includes changing output | |
376 column names, deriving simple statistics on columns using the SQL Min, Max, | |
377 Count, Avg and Sum functions, and more. | |
378 | |
379 The price you pay for this extra flexibility is that you will have to make sure | |
380 that any other tool options you set are compatible with the result of your | |
381 particular query. For example, most output formats except the tabular default | |
382 output of GEMINI are incompatible with non-standard queries. Choosing | |
383 non-compatible options can result in them getting ignored silently, but also | |
384 in tool errors, or in problems with downstream tools. | |
385 | |
386 The chapter `Querying the GEMINI database | |
387 <http://gemini.readthedocs.org/en/latest/content/querying.html>`__ of the | |
388 GEMINI documentation can get you started with formulating your own queries. | |
389 | |
390 Note that genotype filters and sample filters cannot be expressed as genuine | |
391 SQL queries, so even the Advanced query constructor is offering them. Region | |
392 filters and sort order of rows and columns on the other hand can be controlled | |
393 through SQL queries, like in this example:: | |
394 | |
395 SELECT gene, chrom, start, end, ref, alt FROM variants WHERE chrom = 'chr1' | |
396 AND start >= 10000000 and stop <= 20000000 and is_lof = 1 ORDER BY chrom, | |
397 start | |
398 | |
399 , which would report all loss-of-function variants between 10,000,000 and | |
400 20,000,000 on chr1 and report the selected columns sorted on chromosome, then | |
401 position. | |
402 | |
113 ]]> | 403 ]]> |
114 </help> | 404 </help> |
115 <expand macro="citations"/> | 405 <expand macro="citations"/> |
116 </tool> | 406 </tool> |