comparison varscan_mpileup2snp_from_bam.xml @ 4:796653c6376b draft

Uploaded
author jason-ellul
date Wed, 01 Jun 2016 02:36:11 -0400
parents
children
comparison
equal deleted inserted replaced
3:8c9b1fa6c456 4:796653c6376b
1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="varscan_mpileup2snp_from_bam" name="VarScan2 Call SNPs from BAM">
3 <description>VarScan2 SNP/SNV detection; directly reading *.bam file(s) to avoid unncessairy I/O overhead.</description>
4 <requirements>
5 <!--<requirement type="package" version="0.1.19">samtools-parallel-mpileup</requirement>-->
6 <requirement type="package" version="2.3.6">VarScan</requirement>
7 <requirement type="package" version="0.1.19">package_samtools_0_1_19</requirement>
8 </requirements>
9 <command>
10 #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1
11 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&amp;2
12 #else
13 <!--
14 #if $mpileup_parallelization.mpileup_parallelization_select == "true"
15 samtools-parallel-mpileup mpileup
16 -t $mpileup_parallelization.samtools_threads
17 #else
18 samtools mpileup
19 #end if
20 -->
21 samtools mpileup
22 -f
23 #if $reference_genome_source.source_select == "indexed_filtered"
24 "$reference_genome_source.reference_genome"
25 #else if $reference_genome_source.source_select == "indexed_all"
26 "$reference_genome_source.reference_genome"
27 #else if $reference_genome_source.source_select == "history"
28 "$reference_genome_source.reference_genome"
29 #else
30 <!--
31 This is a workaround to obtain the "genome.fa" file that
32 corresponds to the dbkey of the alignments.
33 Because this file is "calculated" during run-time, it can
34 be used in a workflow.
35 -->
36 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }"
37 #end if
38
39 #if $extended_parameters_regions.samtools_regions == "region"
40 -r $extended_parameters_regions.samtools_r
41 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed"
42 -l $extended_parameters_regions.samtools_l
43 #end if
44
45 #if $extended_parameters.parameters == "extended"
46 $extended_parameters.samtools_6
47 $extended_parameters.samtools_A
48 $extended_parameters.samtools_B
49 -C $extended_parameters.samtools_C
50 -d $extended_parameters.samtools_d
51 $extended_parameters.samtools_E
52 -M $extended_parameters.samtools_M
53 $extended_parameters.samtools_R
54 -q $extended_parameters.samtools_q
55 -Q $extended_parameters.samtools_Q
56
57 -e $extended_parameters.samtools_e
58 -F $extended_parameters.samtools_F
59 -h $extended_parameters.samtools_h
60 $extended_parameters.samtools_I
61 -L $extended_parameters.samtools_L
62 -m $extended_parameters.samtools_m
63 -o $extended_parameters.samtools_o
64 $extended_parameters.samtools_p
65 -P $extended_parameters.samtools_P
66 #end if
67
68 #for $alignment in $alignments
69 ${alignment}
70 #end for
71 2>stderr_1.txt
72
73 <!--
74 #if $sort_mpileup
75 | sort -k 1,1 -k 2,2
76 #end if
77 -->
78
79 | java
80 -Xmx64G
81 -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar
82 mpileup2snp
83
84 #if $extended_parameters.parameters == "extended"
85 --min-coverage $varscan_min_coverage
86 --min-reads2 $varscan_min_reads2
87 --min-avg-qual $varscan_min_avg_qual
88 --min-var-freq $varscan_min_var_freq
89 --min-freq-for-hom $varscan_min_freq_for_hom
90 --p-value $varscan_p_value
91 $varscan_strand_filter
92 $varscan_variants
93 #end if
94
95 #if $varscan_output_vcf
96 --output-vcf 1
97 #end if
98
99 2>stderr_2.txt
100 > $snv_output ;
101
102 echo "-------------------------[ mpileup generation ]-------------------------" ;
103 echo "" ;
104 cat stderr_1.txt ;
105 echo "" ;
106 echo "" ;
107 echo "-------------------------[ VarScan SNP detect ]-------------------------" ;
108 echo "" ;
109 echo "" ;
110 cat stderr_2.txt ;
111 echo "" ;
112 echo "------------------------------------------------------------------------" ;
113 #end if
114 </command>
115
116 <inputs>
117 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/>
118
119 <!-- Find out how to access the reference genome from the BAM file(s) -->
120 <conditional name="reference_genome_source">
121 <param name="source_select" type="select" label="Fasta Source">
122 <option value="indexed_filtered">Use a built-in index (which fits your reference)</option>
123 <option value="history">Use reference from the history</option>
124 <option value="indexed_all">Use a built-in index (entire list) - avoid this option if possible; only useful if you design a workflow</option>
125 <option value="attribute">Use a built-in index based on the 'metadata.dbkey' attribute; ideal in workflows</option>
126 </param>
127 <when value="history">
128 <param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." />
129 </when>
130 <when value="indexed_filtered">
131 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
132 <options from_data_table="all_fasta">
133 <column name="name" index="2"/>
134 <column name="dbkey" index="1"/>
135 <column name="value" index="3"/><!-- Value is the path of the fasta file -->
136 <filter type="data_meta" ref="alignments" multiple="false" key="dbkey" column="1" />
137 <validator type="no_options" message="No indexes are available for the selected input dataset" />
138 </options>
139 </param>
140 </when>
141 <when value="indexed_all">
142 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
143 <options from_data_table="all_fasta">
144 <column name="name" index="2"/>
145 <column name="dbkey" index="1"/>
146 <column name="value" index="3"/><!-- Value is the path of the fasta file -->
147 <validator type="no_options" message="No indexes are available for the selected input dataset" />
148 </options>
149 </param>
150 </when>
151 <when value="attribute" />
152 </conditional>
153
154 <conditional name="extended_parameters_regions">
155 <param name="samtools_regions" type="select" label="Region specific parameters" help="Let samtools target specific genomic locations.">
156 <option value="entire_genome">Entire genome</option>
157 <option value="region">Specific region</option>
158 <option value="regions_file_pos">Specific positions (file); list of positions</option>
159 <option value="regions_file_bed">Specific regions (file); list of regions in BED</option>
160 </param>
161 <when value="entire_genome" />
162 <when value="region">
163 <param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" />
164 </when>
165 <when value="regions_file_pos">
166 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" />
167 </when>
168 <when value="regions_file_bed">
169 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" />
170 </when>
171 </conditional>
172
173
174 <!--
175 <conditional name="mpileup_parallelization">
176 <param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation, WARNING: if parallelization is used output will be unsorted!" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance.">
177 <option value="false" >False - uses classical samtools</option>
178 <option value="true">True - uses (experimental) samtools mpileup-parallel</option>
179 </param>
180 <when value="false" />
181 <when value="true">
182 <param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" />
183 </when>
184 </conditional>
185
186 <param name="sort_mpileup" type="boolean" truevalue="true" falsevalue="false" label="Sort mpileup file" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but consumes (much) resources. Only use it if it's really neccesairy." />
187 -->
188
189 <conditional name="extended_parameters">
190 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings.">
191 <option value="default">Default settings</option>
192 <option value="extended">Extended settings</option>
193 </param>
194 <when value="default" />
195 <when value="extended">
196 <param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" />
197 <param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" />
198 <param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" />
199 <param type="integer" name="samtools_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
200 <param type="integer" name="samtools_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
201 <param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" />
202 <param type="integer" name="samtools_M" value="60" label="cap mapping quality at INT [60]" />
203 <param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" />
204 <param type="integer" name="samtools_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" />
205 <param type="integer" name="samtools_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />
206
207 <param type="integer" name="samtools_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" />
208 <param type="float" name="samtools_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
209 <param type="integer" name="samtools_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" />
210 <param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" />
211 <param type="integer" name="samtools_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" />
212 <param type="integer" name="samtools_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
213 <param type="integer" name="samtools_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
214 <param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" />
215 <param type="text" name="samtools_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" />
216
217 <param type="integer" name="varscan_min_coverage" value="8" label="VarScan: Minimum read depth at a position to make a call [8]" />
218 <param type="integer" name="varscan_min_reads2" value="2" label="VarScan: PMinimum supporting reads at a position to call variants [2]" />
219 <param type="integer" name="varscan_min_avg_qual" value="15" label="VarScan: Minimum base quality at a position to count a read [15]" />
220 <param type="float" name="varscan_min_var_freq" value="0.01" label="VarScan: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
221 <param type="float" name="varscan_min_freq_for_hom" value="0.75" label="VarScan: Minimum frequency to call homozygote [0.75]" />
222 <param type="float" name="varscan_p_value" value="0.99" label="VarScan: Default p-value threshold for calling variants [99e-02]" />
223 <param type="boolean" name="varscan_strand_filter" falsevalue=" --strand_filter 0" truevalue=" --strand_filter 1" checked="true" label="VarScan: Ignore variants with >90% support on one strand [1]" />
224 <param type="boolean" name="varscan_variants" falsevalue=" --variants 0" truevalue=" --variants 1" checked="false" label="VarScan: Report only variant (SNP/indel) positions [0]" />
225 </when>
226 </conditional>
227
228 <param type="boolean" name="varscan_output_vcf" falsevalue="0" truevalue="1" label="VarScan: If set to 1, outputs in VCF format" />
229 </inputs>
230
231 <outputs>
232 <data format="tabular" name="snv_output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}">
233 <change_format>
234 <when input="varscan_output_vcf" value="1" format="vcf" />
235 </change_format>
236 </data>
237 </outputs>
238
239 <tests>
240 <test><!-- Use classical samtools -->
241 <param name="alignments" value="hg19_mutant.bam.txt" dbkey="hg19" ftype="bam" />
242 <param name="source_select" value="attribute" />
243 <param name="samtools_regions" value="entire_genome" />
244
245 <!-- <param name="mpileup_parallelization_select" value="false" />
246 <param name="sort_mpileup" value="true" /> -->
247
248 <param name="parameters" value="default" />
249 <param name="varscan_output_vcf" value="1" />
250
251
252 <output name="snv_output" file="hg19_mutant.vcf" />
253 </test>
254 <!-- <test> Use parallelized samtools
255 <param name="alignments" value="hg19_mutant.bam.txt" dbkey="hg19" ftype="bam" />
256 <param name="source_select" value="attribute" />
257 <param name="samtools_regions" value="entire_genome" />
258
259 param name="mpileup_parallelization_select" value="true" />
260 <param name="samtools_threads" value="2" />
261 <param name="sort_mpileup" value="true" />
262
263 <param name="parameters" value="default" />
264 <param name="varscan_output_vcf" value="1" />
265
266
267 <output name="snv_output" file="hg19_mutant.vcf" />
268 </test>-->
269 </tests>
270
271 <help>
272 **VarScan 2.3.6**
273
274 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems.
275 http://dx.doi.org/10.1101/gr.129684.111
276 http://www.ncbi.nlm.nih.gov/pubmed/19542151
277
278 *VarScan* requires mpileup formatted input files, which are generally derived from BAM files. Since mpileup files can become humongous, the interim step of storing it is bypassed. Thus, in this wrapper one or multiple BAM/SAM files go in, get processed into a mpileup file and get directly linked to VarScan.
279 The samtools package is not able to parallelize the mpileup generation which make it a very slow process.
280 Other people were aware of this and have written a version that can do parallelization:
281 https://github.com/mydatascience/parallel-mpileup
282
283 Consequently, when a BAM files gets processed by this wrapper, it's processed by *parallel-mpileup* before its send to VarScan.
284
285 .. _VarScan: http://varscan.sourceforge.net/
286
287 **Input formats**
288
289 VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.
290
291 **Installation**
292
293 Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment.
294
295 **License**
296
297 * VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0)
298 * parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)
299
300
301 **Contact**
302
303 The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project:
304 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
305
306 More tools by the Translational Research IT (TraIT) project can be found in the following repository:
307 http://toolshed.dtls.nl/
308 </help>
309 </tool>