7
|
1 <tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1">
|
|
2
|
|
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
|
|
4 <description> </description>
|
|
5
|
|
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
|
|
7 <requirements>
|
|
8 <requirement type="binary">perl</requirement>
|
|
9 <requirement type="package" version="0.1.12b">VCFtools</requirement>
|
|
10 </requirements>
|
|
11
|
|
12 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
|
|
13 <version_command>
|
|
14 <!--
|
|
15 tool_binary -v
|
|
16 -->
|
|
17 </version_command>
|
|
18
|
|
19 <!-- [REQUIRED] The command to execute -->
|
|
20 <command interpreter="perl">
|
|
21 vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
|
|
22 #if str( $samples ) == "":
|
|
23 'None'
|
|
24 #else
|
|
25 $samples
|
|
26 #end if
|
|
27 #if str( $chromosomes ) == "":
|
|
28 'None'
|
|
29 #else
|
|
30 $chromosomes
|
|
31 #end if
|
|
32 #if str( $export ) == "plink":
|
|
33 $fileout_map
|
|
34 #else
|
|
35 ''
|
|
36 #end if
|
|
37 </command>
|
|
38
|
|
39 <!-- [REQUIRED] Input files and tool parameters -->
|
|
40 <inputs>
|
|
41 <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
|
|
42 <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
|
|
43 <param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
|
|
44 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
|
|
45 </param>
|
|
46 <param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
|
|
47 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
|
|
48 </param>
|
|
49 <param name="export" type="select" label="Output format" >
|
|
50 <option value="VCF" selected="true">VCF</option>
|
|
51 <option value="freq">freq</option>
|
|
52 <option value="plink">plink</option>
|
|
53 </param>
|
|
54 <param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
|
|
55 <param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
|
|
56 <param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
|
|
57 <param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
|
|
58 <param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
|
|
59 <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
|
|
60 <option value="ALL" selected="true">All</option>
|
|
61 <option value="SNP">SNP</option>
|
|
62 <option value="INDEL">Indel</option>
|
|
63 </param>
|
|
64 <param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
|
|
65 <param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
|
|
66 </inputs>
|
|
67
|
|
68 <!-- [REQUIRED] Output files -->
|
|
69 <outputs>
|
|
70 <data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
|
|
71 <change_format>
|
|
72 <when input="export" value="freq" format="tabular" />
|
|
73 <when input="export" value="plink" format="txt" />
|
|
74 </change_format>
|
|
75 </data>
|
|
76 <data name="fileout_map" format="txt" label="${fileout_label}.map">
|
|
77 <filter>(export == 'plink')</filter>
|
|
78 </data>
|
|
79 <data name="filelog" format="txt" label="${fileout_label}.log" />
|
|
80 </outputs>
|
|
81
|
|
82 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
|
|
83 <stdio>
|
|
84 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
|
|
85 <exit_code range="1:" level="fatal" />
|
|
86 </stdio>
|
|
87
|
|
88 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
|
|
89 <tests>
|
|
90 <!-- [HELP] Test files have to be in the ~/test-data directory -->
|
|
91 <test>
|
|
92 <param name="filein" value="sample.vcf" />
|
|
93 <param name="chromosomes" value="chr1" />
|
|
94 <param name="export" value="VCF" />
|
|
95 <param name="frequency" value="0.001" />
|
|
96 <param name="max_freq" value="0.5" />
|
|
97 <param name="allow_missing" value="0" />
|
|
98 <param name="nb_alleles_min" value="2" />
|
|
99 <param name="nb_alleles_max" value="4" />
|
|
100 <param name="type_p" value="ALL" />
|
|
101 <param name="bound_start" value="1" />
|
|
102 <param name="bound_end" value="100000000" />
|
|
103 <output name="fileout" file="result.vcf" />
|
|
104 <output name="filelog" file="result.log" />
|
|
105 </test>
|
|
106 </tests>
|
|
107
|
|
108 <!-- [OPTIONAL] Help displayed in Galaxy -->
|
|
109 <help>
|
|
110
|
|
111 .. class:: infomark
|
|
112
|
|
113 **Authors**
|
|
114
|
|
115 ---------------------------------------------------
|
|
116
|
|
117 .. class:: infomark
|
|
118
|
|
119 **Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.
|
|
120
|
|
121 ---------------------------------------------------
|
|
122
|
|
123 ================
|
|
124 VCF tools filter
|
|
125 ================
|
|
126
|
|
127 -----------
|
|
128 Description
|
|
129 -----------
|
|
130
|
|
131 Filter VCF file
|
|
132
|
|
133 -----------------
|
|
134 Workflow position
|
|
135 -----------------
|
|
136
|
|
137 **Upstream tools**
|
|
138
|
|
139 =========== ========================== =======
|
|
140 Name output file(s) format
|
|
141 =========== ========================== =======
|
|
142 =========== ========================== =======
|
|
143
|
|
144
|
|
145 **Downstream tools**
|
|
146
|
|
147 =========== ========================== =======
|
|
148 Name output file(s) format
|
|
149 =========== ========================== =======
|
|
150 =========== ========================== =======
|
|
151
|
|
152
|
|
153 ----------
|
|
154 Input file
|
|
155 ----------
|
|
156
|
|
157 VCF file
|
|
158 VCF file with all SNPs
|
|
159
|
|
160 ----------
|
|
161 Parameters
|
|
162 ----------
|
|
163
|
|
164 Output file basename
|
|
165 Prefix for the output VCF file
|
|
166
|
|
167 Samples
|
|
168 Samples to be analyzed. Comma separated list
|
|
169
|
|
170 Chromosomes
|
|
171 Chromosomes to be analyzed. Comma separated list
|
|
172
|
|
173 Output format
|
|
174 VCF/freq/plink
|
|
175
|
|
176 Minimum MAF
|
|
177 Minimum frequency
|
|
178
|
|
179 Maximum MAF
|
|
180 Maximum frequency
|
|
181
|
|
182 Missing data proportion
|
|
183 Allowed missing data proportion per site. Must be comprised between 0 and 1.
|
|
184
|
|
185 Number of alleles
|
|
186 Accepted number of alleles min and max.
|
|
187
|
|
188 Polymorphisms
|
|
189 Type of polymorphisms to keep (ALL/SNP/INDEL).
|
|
190 Bounds
|
|
191 Lower bound and upper bound for a range of sites to be processed.
|
|
192
|
|
193 ------------
|
|
194 Output files
|
|
195 ------------
|
|
196
|
|
197 VCF file
|
|
198 VCF file filtered
|
|
199
|
|
200 Log file
|
|
201
|
|
202 ---------------------------------------------------
|
|
203
|
|
204 ---------------
|
|
205 Working example
|
|
206 ---------------
|
|
207
|
|
208 Input files
|
|
209 ===========
|
|
210
|
|
211 VCF file
|
|
212 ---------
|
|
213
|
|
214 ::
|
|
215
|
|
216 #fileformat=VCFv4.1
|
|
217 #FILTER=<ID=LowQual,Description="Low quality">
|
|
218 #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
|
|
219 [...]
|
|
220 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
|
|
221 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
|
|
222
|
|
223
|
|
224 Parameters
|
|
225 ==========
|
|
226
|
|
227 Output name -> filtered_chr1
|
|
228
|
|
229 Chromosomes -> chr1
|
|
230
|
|
231 Output format -> VCF
|
|
232
|
|
233 Minimum MAF -> 0.001
|
|
234
|
|
235 Maximum MAF -> 0.5
|
|
236
|
|
237 Missing data proportion -> 1
|
|
238
|
|
239 Number of alleles min -> 2
|
|
240
|
|
241 Number of alleles max -> 4
|
|
242
|
|
243 Polymorphisms -> All
|
|
244
|
|
245 Lower bound -> 1
|
|
246
|
|
247 Upper bound -> 100000000
|
|
248
|
|
249
|
|
250 Output files
|
|
251 ============
|
|
252
|
|
253 filtered_genelist_intron.vcf
|
|
254 ---------
|
|
255
|
|
256 ::
|
|
257
|
|
258 #fileformat=VCFv4.1
|
|
259 #FILTER=<ID=LowQual,Description="Low quality">
|
|
260 #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
|
|
261 [...]
|
|
262 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
|
|
263 chr1 5059 . C G 146.84 . AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,8:8:18:175,18,0
|
|
264
|
|
265
|
|
266 </help>
|
|
267
|
|
268 </tool>
|