comparison VCFToolFilter/vcfToolsFilter.xml @ 24:21d878747ac6 draft default tip

Uploaded
author dereeper
date Mon, 23 Mar 2015 05:53:20 -0400
parents 50bd37c444ac
children
comparison
equal deleted inserted replaced
23:a1ab979f4551 24:21d878747ac6
1 <tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1">
2
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
4 <description> Filter VCF using VCFtools</description>
5
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
7 <requirements>
8 <requirement type="binary">perl</requirement>
9 <requirement type="package" version="0.1.12b">VCFtools</requirement>
10 </requirements>
11
12 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
13 <version_command>
14 <!--
15 tool_binary -v
16 -->
17 </version_command>
18
19 <!-- [REQUIRED] The command to execute -->
20 <command interpreter="perl">
21 vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
22 #if str( $samples ) == "":
23 'None'
24 #else
25 $samples
26 #end if
27 #if str( $chromosomes ) == "":
28 'None'
29 #else
30 $chromosomes
31 #end if
32 #if str( $export ) == "plink":
33 $fileout_map
34 #else
35 ''
36 #end if
37 </command>
38
39 <!-- [REQUIRED] Input files and tool parameters -->
40 <inputs>
41 <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
42 <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
43 <param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
44 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
45 </param>
46 <param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
47 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
48 </param>
49 <param name="export" type="select" label="Output format" >
50 <option value="VCF" selected="true">VCF</option>
51 <option value="freq">freq</option>
52 <option value="plink">plink</option>
53 </param>
54 <param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
55 <param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
56 <param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
57 <param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
58 <param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
59 <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
60 <option value="ALL" selected="true">All</option>
61 <option value="SNP">SNP</option>
62 <option value="INDEL">Indel</option>
63 </param>
64 <param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
65 <param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
66 </inputs>
67
68 <!-- [REQUIRED] Output files -->
69 <outputs>
70 <data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
71 <change_format>
72 <when input="export" value="freq" format="tabular" />
73 <when input="export" value="plink" format="txt" />
74 </change_format>
75 </data>
76 <data name="fileout_map" format="txt" label="${fileout_label}.map">
77 <filter>(export == 'plink')</filter>
78 </data>
79 <data name="filelog" format="txt" label="${fileout_label}.log" />
80 </outputs>
81
82 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
83 <stdio>
84 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
85 <exit_code range="1:" level="fatal" />
86 </stdio>
87
88 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
89 <tests>
90 <!-- [HELP] Test files have to be in the ~/test-data directory -->
91 <test>
92 <param name="filein" value="sample.vcf" />
93 <param name="chromosomes" value="chr1" />
94 <param name="export" value="VCF" />
95 <param name="frequency" value="0.001" />
96 <param name="max_freq" value="0.5" />
97 <param name="allow_missing" value="0" />
98 <param name="nb_alleles_min" value="2" />
99 <param name="nb_alleles_max" value="4" />
100 <param name="type_p" value="ALL" />
101 <param name="bound_start" value="1" />
102 <param name="bound_end" value="100000000" />
103 <output name="fileout" file="result.vcf" />
104 <output name="filelog" file="result.log" />
105 </test>
106 </tests>
107
108 <!-- [OPTIONAL] Help displayed in Galaxy -->
109 <help>
110
111 .. class:: infomark
112
113 **Authors**
114
115 ---------------------------------------------------
116
117 .. class:: infomark
118
119 **Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.
120
121 ---------------------------------------------------
122
123 ================
124 VCF tools filter
125 ================
126
127 -----------
128 Description
129 -----------
130
131 Filter VCF file
132
133 -----------------
134 Workflow position
135 -----------------
136
137 **Upstream tools**
138
139 =========== ========================== =======
140 Name output file(s) format
141 =========== ========================== =======
142 =========== ========================== =======
143
144
145 **Downstream tools**
146
147 =========== ========================== =======
148 Name output file(s) format
149 =========== ========================== =======
150 =========== ========================== =======
151
152
153 ----------
154 Input file
155 ----------
156
157 VCF file
158 VCF file with all SNPs
159
160 ----------
161 Parameters
162 ----------
163
164 Output file basename
165 Prefix for the output VCF file
166
167 Samples
168 Samples to be analyzed. Comma separated list
169
170 Chromosomes
171 Chromosomes to be analyzed. Comma separated list
172
173 Output format
174 VCF/freq/plink
175
176 Minimum MAF
177 Minimum frequency
178
179 Maximum MAF
180 Maximum frequency
181
182 Missing data proportion
183 Allowed missing data proportion per site. Must be comprised between 0 and 1.
184
185 Number of alleles
186 Accepted number of alleles min and max.
187
188 Polymorphisms
189 Type of polymorphisms to keep (ALL/SNP/INDEL).
190 Bounds
191 Lower bound and upper bound for a range of sites to be processed.
192
193 ------------
194 Output files
195 ------------
196
197 VCF file
198 VCF file filtered
199
200 Log file
201
202 ---------------------------------------------------
203
204 ---------------
205 Working example
206 ---------------
207
208 Input files
209 ===========
210
211 VCF file
212 ---------
213
214 ::
215
216 #fileformat=VCFv4.1
217 #FILTER=&lt;ID=LowQual,Description="Low quality">
218 #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
219 [...]
220 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
221 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
222
223
224 Parameters
225 ==========
226
227 Output name -> filtered_chr1
228
229 Chromosomes -> chr1
230
231 Output format -> VCF
232
233 Minimum MAF -> 0.001
234
235 Maximum MAF -> 0.5
236
237 Missing data proportion -> 1
238
239 Number of alleles min -> 2
240
241 Number of alleles max -> 4
242
243 Polymorphisms -> All
244
245 Lower bound -> 1
246
247 Upper bound -> 100000000
248
249
250 Output files
251 ============
252
253 filtered_genelist_intron.vcf
254 ---------
255
256 ::
257
258 #fileformat=VCFv4.1
259 #FILTER=&lt;ID=LowQual,Description="Low quality"&gt;
260 #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
261 [...]
262 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
263 chr1 5059 . C G 146.84 . AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,8:8:18:175,18,0
264
265
266 </help>
267
268 </tool>