comparison snpSift_filter.xml @ 7:2e497a770bca draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit 200c7d062259a94a28c6a224586f59d1a5e08309"
author iuc
date Sun, 29 Nov 2020 21:14:56 +0000
parents 09d6806c609e
children
comparison
equal deleted inserted replaced
6:2b3e65a4252f 7:2e497a770bca
1 <tool id="snpSift_filter" name="SnpSift Filter" version="@WRAPPER_VERSION@.galaxy0"> 1 <tool id="snpSift_filter" name="SnpSift Filter" version="@WRAPPER_VERSION@.galaxy1">
2 <description>Filter variants using arbitrary expressions</description> 2 <description>Filter variants using arbitrary expressions</description>
3 <macros> 3 <macros>
4 <import>snpSift_macros.xml</import> 4 <import>snpSift_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <expand macro="stdio" /> 7 <expand macro="stdio" />
8 <expand macro="version_command" /> 8 <expand macro="version_command" />
9 <command><![CDATA[ 9 <command><![CDATA[
10 SnpSift -Xmx6G filter -f '$input' -e '$exprFile' $inverse 10 SnpSift -Xmx6G filter -f '$input' -e '$exprFile' $inverse
11 #if $filtering.mode == 'field': 11 #if str($filter_expression.type) == 'complex':
12 #if $filtering.replace.pass: 12 #for $set_file in $filter_expression.set:
13 --pass 13 --set $set_file
14 #if $filtering.replace.filterId.strip(): 14 #end for
15 --filterId '$filtering.replace.filterId' 15 #end if
16 #end if 16 #if $filtering.mode == 'set_filter':
17 #end if 17 --filterID $filtering.filter_id
18 #if $filtering.addFilter.strip(): 18 #elif $filtering.mode == 'remove_filter':
19 --addFilter '$filtering.addFilter' 19 --rmFilter $filtering.rm_filter
20 #end if 20 #elif $filtering.mode == 'add_filter':
21 #if $filtering.rmFilter.strip(): 21 --addFilter $filtering.add_filter
22 --rmFilter '$filtering.rmFilter'
23 #end if
24 #end if 22 #end if
25 > '$output' 23 > '$output'
26 ]]></command> 24 ]]></command>
27 <configfiles> 25 <configfiles>
28 <configfile name="exprFile"> 26 <configfile name="exprFile">
29 $expr#slurp 27 $filter_expression.expr#slurp
30 </configfile> 28 </configfile>
31 </configfiles> 29 </configfiles>
32 <inputs> 30 <inputs>
33 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> 31 <param name="input" type="data" format="vcf" label="Input variant list in VCF format"/>
34 <param name="expr" type="text" label="Filter criteria" help="Need help? See below a few examples"> 32 <conditional name="filter_expression">
35 <sanitizer sanitize="False"/> 33 <param name="type" type="select"
36 </param> 34 label="Type of filter expression">
37 <param name="inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Inverse filter" help="Show lines that do not match filter expression" /> 35 <option value="simple">Simple expression</option>
36 <option value="complex">Expression using value set(s)</option>
37 </param>
38 <when value="simple">
39 <param name="expr" type="text" label="Filter criteria"
40 help="Need help? See the tool help below for some examples.">
41 <sanitizer sanitize="False"/>
42 </param>
43 </when>
44 <when value="complex">
45 <param name="expr" type="text" label="Filter criteria"
46 help="Need help? See the tool help below for some examples.">
47 <sanitizer sanitize="False"/>
48 </param>
49 <param name="set" type="data" format="txt" multiple="true" optional="False"
50 label="Set value source"
51 help="Select one or more datasets for construction of value sets. The datasets are supposed to specify one value per line. See also: the help section on Sets below." />
52 </when>
53 </conditional>
54 <param argument="--inverse" type="boolean" truevalue="--inverse" falsevalue="" checked="false" label="Invert filter" help="Select variants that do not match the filter expression." />
38 <conditional name="filtering"> 55 <conditional name="filtering">
39 <param name="mode" type="select" label="Filter mode"> 56 <param name="mode" type="select" label="Filter mode">
40 <option value="entries" selected="true">Retain entries that pass filter, remove other entries</option> 57 <option value="entries" selected="true">Retain selected variants, remove others</option>
41 <option value="field">Change the FILTER field, but retain all entries</option> 58 <option value="set_filter">Add a value to FILTER field of non-selected variants</option>
59 <option value="remove_filter">Remove a value from FILTER field of selected variants</option>
60 <option value="add_filter">Add a value to the FILTER field of selected variants</option>
42 </param> 61 </param>
43 <when value="entries"/> 62 <when value="entries" />
44 <when value="field"> 63 <when value="set_filter">
45 <conditional name="replace"> 64 <param argument="--filterID" name="filter_id" type="text" optional="false"
46 <param name="pass" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Set matching entry FILTER to 'PASS'" 65 label="Value to add to FILTER field of non-selected variants"
47 help="appends an ID tag to non-matching entry FILTER" /> 66 help="The value provided here will be added to the FILTER field of variants that are NOT selected by the expression above. Selected variants, in contrast, will have their FILTER field cleared and set to PASS." />
48 <when value="no"/> 67 </when>
49 <when value="yes"> 68 <when value="remove_filter">
50 <param name="filterId" type="text" value="" label="ID appended to non-matching (##FILTER tag in header and FILTER VCF field)" help="Default ID is 'SnpSift'"/> 69 <param argument="--rmFilter" name="rm_filter" type="text" optional="false"
51 </when> 70 label="Value to remove from FILTER field of selected variants"
52 </conditional> 71 help="The FILTER field of variants that are selected by the expression above will be checked for existence of the specified value. When the value is found it will be removed, but other values will be preserved. If the removed value was the only value in a FILTER field, that field will be set to the '.' (missing) value." />
53 <param name="addFilter" type="text" value="" label="Add a string to FILTER VCF field if 'expression' is true" /> 72 </when>
54 <param name="rmFilter" type="text" value="" label="Remove a string from FILTER VCF field if 'expression' is true (and 'str' is in the field)" /> 73 <when value="add_filter">
74 <param argument="--addFilter" name="add_filter" type="text" optional="false"
75 label="Value to add to FILTER field of selected variants"
76 help="The value provided here will be added to the FILTER field of variants that are selected by the expression above. USE WITH CARE: The tool will NOT add a corresponding FILTER entry to the VCF header of the output, i.e. will produce output that violates the VCF format specification. To avoid issues with downstream tools it may be necessary to add such a header line using standard text processing tools before proceeding." />
55 </when> 77 </when>
56 </conditional> 78 </conditional>
57 </inputs> 79 </inputs>
58
59 <outputs> 80 <outputs>
60 <data name="output" format="vcf" /> 81 <data name="output" format="vcf" />
61 </outputs> 82 </outputs>
62 <tests> 83 <tests>
63 <test> 84 <test>
69 <has_text text="28837706" /> 90 <has_text text="28837706" />
70 <not_has_text text="NT_166464" /> 91 <not_has_text text="NT_166464" />
71 </assert_contents> 92 </assert_contents>
72 </output> 93 </output>
73 </test> 94 </test>
74
75 <test> 95 <test>
76 <param name="input" ftype="vcf" value="test01.vcf"/> 96 <param name="input" ftype="vcf" value="test01.vcf"/>
77 <param name="expr" value="(CHROM = '19')"/> 97 <param name="expr" value="(CHROM = '19')"/>
78 <param name="mode" value="entries"/> 98 <param name="mode" value="entries"/>
79 <output name="output"> 99 <output name="output">
81 <has_text text="3205820" /> 101 <has_text text="3205820" />
82 <not_has_text text="NT_16" /> 102 <not_has_text text="NT_16" />
83 </assert_contents> 103 </assert_contents>
84 </output> 104 </output>
85 </test> 105 </test>
86
87 <test> 106 <test>
88 <param name="input" ftype="vcf" value="test01.vcf"/> 107 <param name="input" ftype="vcf" value="test01.vcf"/>
89 <param name="expr" value="(POS &gt;= 20175) &amp; (POS &lt;= 35549)"/> 108 <param name="expr" value="(POS &gt;= 20175) &amp; (POS &lt;= 35549)"/>
90 <param name="mode" value="entries"/> 109 <param name="mode" value="entries"/>
91 <output name="output"> 110 <output name="output">
96 <not_has_text text="18933" /> 115 <not_has_text text="18933" />
97 <not_has_text text="37567" /> 116 <not_has_text text="37567" />
98 </assert_contents> 117 </assert_contents>
99 </output> 118 </output>
100 </test> 119 </test>
101
102 <test> 120 <test>
103 <param name="input" ftype="vcf" value="test01.vcf"/> 121 <param name="input" ftype="vcf" value="test01.vcf"/>
104 <param name="expr" value="( DP &gt;= 5 )"/> 122 <param name="expr" value="( DP &gt;= 5 )"/>
105 <param name="mode" value="entries"/> 123 <param name="mode" value="entries"/>
106 <output name="output"> 124 <output name="output">
109 <has_text text="DP=6;" /> 127 <has_text text="DP=6;" />
110 <not_has_text text="DP=1;" /> 128 <not_has_text text="DP=1;" />
111 </assert_contents> 129 </assert_contents>
112 </output> 130 </output>
113 </test> 131 </test>
132 <test>
133 <param name="input" ftype="vcf" value="test01.vcf"/>
134 <conditional name="filter_expression">
135 <param name="type" value="complex" />
136 <param name="expr" value="( POS in SET[0] ) | ( POS in SET[1] )" />
137 <param name="set" ftype="txt" value="test_set1.txt,test_set2.txt" />
138 </conditional>
139 <param name="mode" value="entries"/>
140 <output name="output">
141 <assert_contents>
142 <has_text text="NT_166464&#009;7268&#009;" />
143 <has_text text="NT_166464&#009;7283&#009;" />
144 <has_text text="NT_166464&#009;7335&#009;" />
145 <has_text text="NT_166480&#009;12474&#009;" />
146 <has_text text="NT_166480&#009;12483&#009;" />
147 <not_has_text text="NT_166464&#009;7258&#009;" />
148 <not_has_text text="NT_166452&#009;16693&#009;" />
149 </assert_contents>
150 </output>
151 </test>
114 </tests> 152 </tests>
115 <help><![CDATA[ 153 <help><![CDATA[
116 **SnpSift filter** 154 **SnpSift filter**
117 155
118 You can filter a VCF file using arbitrary expressions, for instance "(QUAL > 30) | (exists INDEL) | ( countHet() > 2 )". The actual expressions can be quite complex, so it allows for a lot of flexibility. 156 This tool provides a flexible solution for filtering the variants in a VCF input dataset through the use of arbitrary, possibly rather complex expressions.
119 157
120 Some examples: 158 Some examples:
121 159
122 - *I want just the variants from the second million bases of chr1*:: 160 - *I want just the variants from the second million bases of chr1*::
123 161
125 163
126 - *Filter value is either 'PASS' or it is missing*:: 164 - *Filter value is either 'PASS' or it is missing*::
127 165
128 (FILTER = 'PASS') | ( na FILTER ) 166 (FILTER = 'PASS') | ( na FILTER )
129 167
168 - *Variants that have either a QUAL score above 30, or are indel variants, or for which at least two samples have a heterozygous genotype called*::
169
170 (QUAL > 30) | (exists INDEL) | ( countHet() > 2 )
171
172 - *Variants that are supported by at least 10 reads (as calculated from the DP4 attribute in the INFO field through zero-based index-access to the multiple values)*::
173
174 (DP4[2] + DP4[3] >= 10)
175
176 ----
177
178 Sets:
179
180 The tool can construct sets of values for use in expressions from text files listing one value per line. Variants can then be filtered based on whether a given field in the variant record has a value that's contained in a set. For example, the expression::
181
182 ( ID in SET[2] )
183
184 would filter variants based on whether their ID field value appears in the set parsed from the third dataset used for set construction (the first set can be addressed with index ``[0]``, the second with index ``[1]``, and so on).
185
186 ----
187
188 Genotype-based filtering:
189
190 Genotypes of specific samples can be accessed via zero-based indexing or via sample names.
191
192 - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*::
193
194 (isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] ))
195
196 ----
197
198 Filtering based on SnpEff annotations (``ANN`` or ``EFF`` fields):
199
130 - *I want to filter lines with an ANN annotation EFFECT of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*:: 200 - *I want to filter lines with an ANN annotation EFFECT of 'frameshift_variant' ( for vcf files using Sequence Ontology terms )*::
131 201
132 ( ANN[*].EFFECT has 'frameshift_variant' ) 202 ( ANN[*].EFFECT has 'frameshift_variant' )
133 203
134 **Important** According to the specification, there can be more than one EFFECT separated by & (e.g. 'missense_variant&splice_region_variant', thus using has operator is better than using equality operator (=). For instance 'missense_variant&splice_region_variant' = 'missense_variant' is false, whereas 'missense_variant&splice_region_variant' has 'missense_variant' is true. 204 .. class:: infomark
205
206 According to the specification, there can be more than one EFFECT separated by ``&`` (e.g. ``'missense_variant&splice_region_variant'``), thus using the ``has`` operator is better than using the equality operator (``=``). For instance, ``'missense_variant&splice_region_variant' = 'missense_variant'`` is false, whereas ``'missense_variant&splice_region_variant' has 'missense_variant'`` is true.
135 207
136 - *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*:: 208 - *I want to filter lines with an EFF of 'FRAME_SHIFT' ( for vcf files using Classic Effect names )*::
137 209
138 ( EFF[*].EFFECT = 'FRAME_SHIFT' ) 210 ( EFF[*].EFFECT = 'FRAME_SHIFT' )
139 211
140 - *I want to filter out samples with quality less than 30*:: 212
141 213 .. class:: infomark
142 ( QUAL > 30 ) 214
143 215 For information regarding HGVS and Sequence Ontology terms versus classic names:
144 - *...but we also want InDels that have quality 20 or more*:: 216
145 217 - https://pcingola.github.io/SnpEff/se_commandline/ for the options: ``-classic``, ``-hgvs``, and ``-sequenceOntology``
146 (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 ) 218 - https://pcingola.github.io/SnpEff/se_inputoutput/#effect-prediction-details for the table containing the classic name and sequence onology term for each effect
147
148 - *...or any homozygous variant present in more than 3 samples*::
149
150 (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )
151
152 - *...or any heterozygous sample with coverage 25 or more*::
153
154 ((countHet() > 0) & (DP >= 25)) | (countHom() > 3) | (( exists INDEL ) & (QUAL >= 20)) | (QUAL >= 30 )
155
156 - *I want to keep samples where the genotype for the first sample is homozygous variant and the genotype for the second sample is reference*::
157
158 (isHom( GEN[0] ) & isVariant( GEN[0] ) & isRef( GEN[1] ))
159
160 **For information regarding HGVS and Sequence Ontology terms versus classic names**:
161
162 - http://snpeff.sourceforge.net/SnpEff_manual.html#cmdline for the options: -classic, -hgvs, and -sequenceOntology
163 - http://snpeff.sourceforge.net/SnpEff_manual.html#input for the table containing the classic name and sequence onology term for each effect
164 219
165 @EXTERNAL_DOCUMENTATION@ 220 @EXTERNAL_DOCUMENTATION@
166 - http://snpeff.sourceforge.net/SnpSift.html#filter 221 - https://pcingola.github.io/SnpEff/ss_filter/
222
223 The second link in particular has further details and more examples about the tool's expression syntax.
167 ]]></help> 224 ]]></help>
168 <expand macro="citations" /> 225 <expand macro="citations" />
169 </tool> 226 </tool>