Mercurial > repos > yhoogstrate > varscan_mpileup2indel_from_bam
comparison varscan_mpileup2indel_from_bam.xml @ 1:2c56a59a112f draft default tip
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/galaxytools-emc/tree/master/tools/galaxy-tool-shed-tools commit bd543e68c1af82bcd6a04f0ae3d1180e8887e122
author | erasmus-medical-center |
---|---|
date | Wed, 15 Feb 2017 16:15:21 -0500 |
parents | 10e2ea79ec55 |
children |
comparison
equal
deleted
inserted
replaced
0:10e2ea79ec55 | 1:2c56a59a112f |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | 1 <?xml version="1.0" encoding="UTF-8"?> |
2 <tool id="varscan_mpileup2indel_from_bam" name="VarScan2 Call INDELs from BAM" version="2.3.6.a"> | 2 <tool id="varscan_mpileup2indel_from_bam" name="VarScan2 Call INDELs from BAM" version="2.4.2.a"> |
3 <description>VarScan2 INDEL detection; directly reading *.bam file(s) & using parallel mpileup generation, to avoid unnecessairy I/O overhead and increase performance.</description> | 3 <description>VarScan2 INDEL detection; directly reading *.bam file(s) & using parallel mpileup generation, to avoid unnecessairy I/O overhead and increase performance.</description> |
4 | 4 |
5 <requirements> | 5 <requirements> |
6 <requirement type="package" version="0.1.19a">samtools_parallel_mpileup_0_1_19a</requirement> | 6 <requirement type="package" version="2.4.2">varscan</requirement> |
7 <requirement type="package" version="0.1.19">samtools</requirement> | 7 <requirement type="package" version="0.6.5">sambamba</requirement> |
8 <requirement type="package" version="2.3.6">varscan</requirement> | |
9 </requirements> | 8 </requirements> |
10 | 9 |
11 <version_command>java -jar $JAVA_JAR_PATH/VarScan.v2.3.6.jar 2>&1 | head -n 1</version_command> | 10 <version_command>varscan 2>&1 | head -n 1</version_command> |
12 | 11 |
13 <command> | 12 <command detect_errors="exit_code"><![CDATA[ |
14 #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 | 13 #for $alignment in $alignments |
15 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&2 | 14 ln -f -s '${alignment.metadata.bam_index}' '${alignment}.bai' && |
16 #else | 15 #end for |
17 #import os.path | 16 |
17 sambamba mpileup | |
18 -t \${GALAXY_SLOTS:-4} | |
19 | |
18 #for $alignment in $alignments | 20 #for $alignment in $alignments |
19 <!-- @todo use the existence of $alignment.metadata.bam_index or $alignment.metadata['bam_index'] --> | 21 '${alignment}' |
20 #if not os.path.isfile(str($alignment)+".bai") | 22 #end for |
21 echo "- Indexing alignment file: $alignment.name " ; | 23 |
22 samtools index $alignment 2>&1 ; | 24 --samtools |
25 -f | |
26 #if $reference_genome_source.source_select == "indexed_filtered" | |
27 '$reference_genome_source.reference_genome' | |
28 #else if $reference_genome_source.source_select == "indexed_all" | |
29 '$reference_genome_source.reference_genome' | |
30 #else if $reference_genome_source.source_select == "history" | |
31 '$reference_genome_source.reference_genome' | |
23 #else | 32 #else |
24 echo "- Skiping indexing: $alignment.name " ; | 33 <!-- |
34 This is a workaround to obtain the "genome.fa" file that | |
35 corresponds to the dbkey of the alignments. | |
36 Because this file is "calculated" during run-time, it can | |
37 be used in a workflow. | |
38 --> | |
39 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" | |
25 #end if | 40 #end if |
26 #end for | 41 |
27 | 42 #if $extended_parameters_regions.samtools_regions == "region" |
28 #if $mpileup_parallelization.mpileup_parallelization_select == "true" | 43 -r '${extended_parameters_regions.samtools_r}' |
29 samtools-parallel-mpileup mpileup | 44 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" |
30 -t $mpileup_parallelization.samtools_threads | 45 -l '${extended_parameters_regions.sambamba_l}' |
31 #else | 46 #end if |
32 samtools mpileup | 47 |
33 #end if | 48 #if $extended_parameters.parameters == "extended" |
34 -f | 49 $extended_parameters.samtools_6 |
35 #if $reference_genome_source.source_select == "indexed_filtered" | 50 $extended_parameters.samtools_A |
36 "$reference_genome_source.reference_genome" | 51 $extended_parameters.samtools_B |
37 #else if $reference_genome_source.source_select == "indexed_all" | 52 -C $extended_parameters.samtools_C |
38 "$reference_genome_source.reference_genome" | 53 -d $extended_parameters.samtools_d |
39 #else if $reference_genome_source.source_select == "history" | 54 $extended_parameters.samtools_E |
40 "$reference_genome_source.reference_genome" | 55 -M $extended_parameters.samtools_M |
41 #else | 56 $extended_parameters.samtools_R |
42 <!-- | 57 -q $extended_parameters.samtools_q |
43 This is a workaround to obtain the "genome.fa" file that | 58 -Q $extended_parameters.samtools_Q |
44 corresponds to the dbkey of the alignments. | 59 |
45 Because this file is "calculated" during run-time, it can | 60 -e $extended_parameters.samtools_e |
46 be used in a workflow. | 61 -F $extended_parameters.samtools_F |
47 --> | 62 -h $extended_parameters.samtools_h |
48 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" | 63 $extended_parameters.samtools_I |
49 #end if | 64 -L $extended_parameters.samtools_L |
50 | 65 -m $extended_parameters.samtools_m |
51 #if $extended_parameters_regions.samtools_regions == "region" | 66 -o $extended_parameters.samtools_o |
52 -r $extended_parameters_regions.samtools_r | 67 $extended_parameters.samtools_p |
53 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" | 68 -P $extended_parameters.samtools_P |
54 -l $extended_parameters_regions.samtools_l | 69 #end if |
55 #end if | 70 |
56 | 71 #for $alignment in $alignments |
57 #if $extended_parameters.parameters == "extended" | 72 '${alignment}' |
58 $extended_parameters.samtools_6 | 73 #end for |
59 $extended_parameters.samtools_A | 74 |
60 $extended_parameters.samtools_B | 75 | varscan mpileup2indel |
61 -C $extended_parameters.samtools_C | 76 |
62 -d $extended_parameters.samtools_d | 77 #if $extended_parameters.parameters == "extended" |
63 $extended_parameters.samtools_E | 78 --min-coverage $extended_parameters.varscan_min_coverage |
64 -M $extended_parameters.samtools_M | 79 --min-reads2 $extended_parameters.varscan_min_reads2 |
65 $extended_parameters.samtools_R | 80 --min-avg-qual $extended_parameters.varscan_min_avg_qual |
66 -q $extended_parameters.samtools_q | 81 --min-var-freq $extended_parameters.varscan_min_var_freq |
67 -Q $extended_parameters.samtools_Q | 82 --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom |
68 | 83 --p-value $extended_parameters.varscan_p_value |
69 -e $extended_parameters.samtools_e | 84 $extended_parameters.varscan_strand_filter |
70 -F $extended_parameters.samtools_F | 85 $extended_parameters.varscan_variants |
71 -h $extended_parameters.samtools_h | 86 #end if |
72 $extended_parameters.samtools_I | 87 |
73 -L $extended_parameters.samtools_L | 88 #if $varscan_output == "vcf" or $varscan_output.value == "vcf" |
74 -m $extended_parameters.samtools_m | 89 --output-vcf 1 |
75 -o $extended_parameters.samtools_o | 90 #end if |
76 $extended_parameters.samtools_p | 91 |
77 -P $extended_parameters.samtools_P | 92 > '${snv_output}' |
78 #end if | 93 |
79 | 94 ]]></command> |
80 #for $alignment in $alignments | |
81 ${alignment} | |
82 #end for | |
83 2>stderr_1.txt | |
84 | |
85 #if $mpileup_parallelization.mpileup_parallelization_select == "true" | |
86 #if $mpileup_parallelization.sort_mpileup | |
87 | sort -k1,1V -k2,2g | |
88 #end if | |
89 #end if | |
90 | |
91 | java | |
92 -Xmx64G | |
93 -jar \$JAVA_JAR_PATH/VarScan.v2.3.6.jar | |
94 mpileup2indel | |
95 | |
96 #if $extended_parameters.parameters == "extended" | |
97 --min-coverage $extended_parameters.varscan_min_coverage | |
98 --min-reads2 $extended_parameters.varscan_min_reads2 | |
99 --min-avg-qual $extended_parameters.varscan_min_avg_qual | |
100 --min-var-freq $extended_parameters.varscan_min_var_freq | |
101 --min-freq-for-hom $extended_parameters.varscan_min_freq_for_hom | |
102 --p-value $extended_parameters.varscan_p_value | |
103 $extended_parameters.varscan_strand_filter | |
104 $extended_parameters.varscan_variants | |
105 #end if | |
106 | |
107 #if $varscan_output == "vcf" or $varscan_output.value == "vcf" | |
108 --output-vcf 1 | |
109 #end if | |
110 | |
111 2>stderr_2.txt | |
112 > $snv_output ; | |
113 | |
114 | |
115 echo "---------------[ mpileup generation ]---------------" ; | |
116 cat stderr_1.txt ; | |
117 echo "" ; | |
118 echo "---------------[ VarScan INDEL detect ]-------------" ; | |
119 cat stderr_2.txt ; | |
120 echo "" ; | |
121 echo "----------------------------------------------------" ; | |
122 #end if | |
123 </command> | |
124 | 95 |
125 <inputs> | 96 <inputs> |
126 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/> | 97 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file(s)" help="Mapped reads in BAM or SAM format."/> |
127 | 98 |
128 <!-- Find out how to access the reference genome from the BAM file(s) --> | 99 <!-- Find out how to access the reference genome from the BAM file(s) --> |
174 <when value="regions_file_pos"> | 145 <when value="regions_file_pos"> |
175 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> | 146 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> |
176 </when> | 147 </when> |
177 <when value="regions_file_bed"> | 148 <when value="regions_file_bed"> |
178 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> | 149 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> |
179 </when> | |
180 </conditional> | |
181 | |
182 <conditional name="mpileup_parallelization"> | |
183 <param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation (experimental)" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance."> | |
184 <option value="false" >False - uses classical samtools</option> | |
185 <option value="true">True - uses (experimental) samtools mpileup-parallel</option> | |
186 </param> | |
187 <when value="false" /> | |
188 <when value="true"> | |
189 <param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" /> | |
190 <param type="boolean" name="sort_mpileup" truevalue="true" falsevalue="false" label="Sort mpileup file (SLOW)" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but consumes (much) resources. Only use it if it's really neccesairy." /> | |
191 </when> | 150 </when> |
192 </conditional> | 151 </conditional> |
193 | 152 |
194 <conditional name="extended_parameters"> | 153 <conditional name="extended_parameters"> |
195 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings."> | 154 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings."> |
258 | 217 |
259 <param name="parameters" value="default" /> | 218 <param name="parameters" value="default" /> |
260 <param name="varscan_output_vcf" value="1" /> | 219 <param name="varscan_output_vcf" value="1" /> |
261 | 220 |
262 | 221 |
263 <output name="snv_output" file="example.vcf" /> | 222 <output name="snv_output" file="example.2.vcf" /> |
264 </test> | |
265 <test><!-- Use parallelized samtools --> | |
266 <param name="alignments" value="example.bam" ftype="bam" /> | |
267 | |
268 <param name="source_select" value="history" /> | |
269 <param name="reference_genome" value="example.fa" ftype="fasta" /> | |
270 | |
271 <param name="samtools_regions" value="entire_genome" /> | |
272 | |
273 <param name="mpileup_parallelization_select" value="true" /> | |
274 <param name="samtools_threads" value="2" /> | |
275 <param name="sort_mpileup" value="true" /> | |
276 | |
277 <param name="parameters" value="default" /> | |
278 <param name="varscan_output_vcf" value="1" /> | |
279 | |
280 | |
281 <output name="snv_output" file="example.vcf" /> | |
282 </test> | 223 </test> |
283 </tests> | 224 </tests> |
284 | 225 |
285 <help> | 226 <help> |
286 **VarScan 2.3.6** | 227 **VarScan 2.4.2** |
287 | 228 |
288 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. | 229 VarScan is a platform-independent mutation caller for targeted, exome, and whole-genome resequencing data generated on Illumina, SOLiD, Life/PGM, Roche/454, and similar instruments. The newest version, VarScan 2, is written in Java, so it runs on most operating systems. |
289 http://dx.doi.org/10.1101/gr.129684.111 | 230 http://dx.doi.org/10.1101/gr.129684.111 |
290 http://www.ncbi.nlm.nih.gov/pubmed/19542151 | 231 http://www.ncbi.nlm.nih.gov/pubmed/19542151 |
291 | 232 |
300 | 241 |
301 **Input formats** | 242 **Input formats** |
302 | 243 |
303 VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. | 244 VarScan2 accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. |
304 | 245 |
305 **Installation** | |
306 | |
307 Make sure your reference genomes are properly annotated in "tool-data/all_fasta.loc", and linked to the names of the reference used for alignment. | |
308 | |
309 **License** | 246 **License** |
310 | 247 |
311 * VarScan2.3.6: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0) | 248 * VarScan 2.4.2: Non-Profit Open Software License 3.0 (Non-Profit OSL 3.0) |
312 * parallel-mpileup: MIT License (https://github.com/mydatascience/parallel-mpileup/blob/master/samtools-0.1.19/COPYING) | |
313 | 249 |
314 | 250 |
315 Contact | 251 Contact |
316 ------- | 252 ------- |
317 | 253 |
318 The tool wrapper has been written by Youri Hoogstrate from the Erasmus | 254 The tool wrapper has been written by Youri Hoogstrate from the Erasmus |
319 Medical Center (Rotterdam, Netherlands) on behalf of the Translational | 255 Medical Center (Rotterdam, Netherlands). |
320 Research IT (TraIT) project: | |
321 | |
322 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch | |
323 | |
324 More tools by the Translational Research IT (TraIT) project can be found | |
325 in the following toolsheds: | |
326 | |
327 http://toolshed.g2.bx.psu.edu/ | |
328 | |
329 http://testtoolshed.g2.bx.psu.edu/ | |
330 </help> | 256 </help> |
331 <citations> | 257 <citations> |
332 <citation type="doi">10.1101/gr.129684.111</citation> | 258 <citation type="doi">10.1101/gr.129684.111</citation> |
333 </citations> | 259 </citations> |
334 </tool> | 260 </tool> |