Mercurial > repos > yhoogstrate > samtools_parallel_mpileup
comparison samtools-parallel-mpileup.xml @ 0:460f0749aac5 draft default tip
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/samtools_parallel_mpileup_galaxy_wrapper commit ede01f67a8def5be7c88d5c31c2435b3946f1523-dirty
author | yhoogstrate |
---|---|
date | Thu, 05 Nov 2015 07:49:02 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:460f0749aac5 |
---|---|
1 <?xml version="1.0" encoding="UTF-8"?> | |
2 <tool id="samtools_parallel_mpileup" name="Samtools parallel mpileup" version="0.1.19-a.a"> | |
3 <description>Samtools mpileup (supporting parallelization)</description> | |
4 | |
5 <requirements> | |
6 <requirement type="package" version="0.1.19-a">samtools_parallel_mpileup</requirement> | |
7 <requirement type="package" version="0.1.19">samtools</requirement> | |
8 </requirements> | |
9 | |
10 <version_command>samtools-parallel-mpileup 2>&1 | grep Version</version_command> | |
11 | |
12 <command> | |
13 #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1 | |
14 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&2 | |
15 #else | |
16 #if $mpileup_parallelization.mpileup_parallelization_select == "true" | |
17 samtools-parallel-mpileup mpileup | |
18 -t $mpileup_parallelization.samtools_threads | |
19 #else | |
20 samtools mpileup | |
21 #end if | |
22 -f | |
23 #if $reference_genome_source.source_select == "indexed_filtered" | |
24 "$reference_genome_source.reference_genome" | |
25 #else if $reference_genome_source.source_select == "indexed_all" | |
26 "$reference_genome_source.reference_genome" | |
27 #else if $reference_genome_source.source_select == "history" | |
28 "$reference_genome_source.reference_genome" | |
29 #else | |
30 <!-- | |
31 This is a workaround to obtain the "genome.fa" file that | |
32 corresponds to the dbkey of the alignments. | |
33 Because this file is "calculated" during run-time, it can | |
34 be used in a workflow. | |
35 --> | |
36 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }" | |
37 #end if | |
38 | |
39 #if $extended_parameters_regions.samtools_regions == "region" | |
40 -r $extended_parameters_regions.$samtools_r | |
41 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed" | |
42 -l $extended_parameters_regions.$samtools_l | |
43 #end if | |
44 | |
45 #if $extended_parameters.parameters == "extended" | |
46 $extended_parameters.samtools_6 | |
47 $extended_parameters.samtools_A | |
48 $extended_parameters.samtools_B | |
49 -C $extended_parameters.samtools_C | |
50 -d $extended_parameters.samtools_d | |
51 $extended_parameters.samtools_E | |
52 -M $extended_parameters.samtools_M | |
53 $extended_parameters.samtools_R | |
54 -q $extended_parameters.samtools_q | |
55 -Q $extended_parameters.samtools_Q | |
56 | |
57 -e $extended_parameters.samtools_e | |
58 -F $extended_parameters.samtools_F | |
59 -h $extended_parameters.samtools_h | |
60 $extended_parameters.samtools_I | |
61 -L $extended_parameters.samtools_L | |
62 -m $extended_parameters.samtools_m | |
63 -o $extended_parameters.samtools_o | |
64 $extended_parameters.samtools_p | |
65 -P $extended_parameters.samtools_P | |
66 #end if | |
67 | |
68 #for $alignment in $alignments | |
69 ${alignment} | |
70 #end for | |
71 | |
72 2> stderr_1.txt | |
73 | |
74 #if $sort_mpileup | |
75 | sort -k1,1V -k2,2g | |
76 #end if | |
77 | |
78 > $output ; | |
79 cat stderr_1.txt | |
80 #end if | |
81 </command> | |
82 | |
83 <inputs> | |
84 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/> | |
85 | |
86 <!-- Find out how to access the reference genome from the BAM file(s) --> | |
87 <conditional name="reference_genome_source"> | |
88 <param name="source_select" type="select" label="Fasta Source"> | |
89 <option value="indexed_filtered">Use a built-in index (which fits your reference)</option> | |
90 <option value="history">Use reference from the history</option> | |
91 <option value="indexed_all">Use a built-in index (entire list) - avoid this option if possible; only useful if you design a workflow</option> | |
92 <option value="attribute">Use a built-in index based on the 'metadata.dbkey' attribute; ideal in workflows</option> | |
93 </param> | |
94 <when value="indexed_filtered"> | |
95 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" > | |
96 <options from_data_table="all_fasta"> | |
97 <column name="name" index="2"/> | |
98 <column name="dbkey" index="1"/> | |
99 <column name="value" index="3"/><!-- Value is the path of the fasta file --> | |
100 <filter type="data_meta" ref="alignments" multiple="false" key="dbkey" column="1" /> | |
101 <validator type="no_options" message="No indexes are available for the selected input dataset" /> | |
102 </options> | |
103 </param> | |
104 </when> | |
105 <when value="history"> | |
106 <param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." /> | |
107 </when> | |
108 <when value="indexed_all"> | |
109 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" > | |
110 <options from_data_table="all_fasta"> | |
111 <column name="name" index="2"/> | |
112 <column name="dbkey" index="1"/> | |
113 <column name="value" index="3"/><!-- Value is the path of the fasta file --> | |
114 <validator type="no_options" message="No indexes are available for the selected input dataset" /> | |
115 </options> | |
116 </param> | |
117 </when> | |
118 <when value="attribute" /> | |
119 </conditional> | |
120 | |
121 <conditional name="extended_parameters_regions"> | |
122 <param name="samtools_regions" type="select" label="Region specific parameters" help="Let samtools target specific genomic locations."> | |
123 <option value="entire_genome">Entire genome</option> | |
124 <option value="region">Specific region</option> | |
125 <option value="regions_file_pos">Specific positions (file); list of positions</option> | |
126 <option value="regions_file_bed">Specific regions (file); list of regions in BED</option> | |
127 </param> | |
128 <when value="entire_genome"> | |
129 </when> | |
130 <when value="region"> | |
131 <param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" /> | |
132 </when> | |
133 <when value="regions_file_pos"> | |
134 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" /> | |
135 </when> | |
136 <when value="regions_file_bed"> | |
137 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" /> | |
138 </when> | |
139 </conditional> | |
140 | |
141 <conditional name="mpileup_parallelization"> | |
142 <param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance."> | |
143 <option value="false">No (uses original build of samtools)</option> | |
144 <option value="true" selected="true">Yes (uses samtools-parallel-mpileup)</option> | |
145 </param> | |
146 <when value="false" /> | |
147 <when value="true"> | |
148 <param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" /> | |
149 </when> | |
150 </conditional> | |
151 | |
152 <param name="sort_mpileup" type="boolean" truevalue="true" falsevalue="false" label="Sort mpileup file" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but consumes (much) resources. Only use it if it's really neccesairy." /> | |
153 | |
154 <conditional name="extended_parameters"> | |
155 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings."> | |
156 <option value="default">Default settings</option> | |
157 <option value="extended">Extended settings</option> | |
158 </param> | |
159 <when value="default" /> | |
160 <when value="extended"> | |
161 <param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" /> | |
162 <param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" /> | |
163 <param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" /> | |
164 <param type="integer" name="samtools_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" /> | |
165 <param type="integer" name="samtools_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" /> | |
166 <param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" /> | |
167 <param type="integer" name="samtools_M" value="60" label="cap mapping quality at INT [60]" /> | |
168 <param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" /> | |
169 <param type="integer" name="samtools_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" /> | |
170 <param type="integer" name="samtools_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" /> | |
171 | |
172 <param type="integer" name="samtools_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" /> | |
173 <param type="float" name="samtools_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" /> | |
174 <param type="integer" name="samtools_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" /> | |
175 <param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" /> | |
176 <param type="integer" name="samtools_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" /> | |
177 <param type="integer" name="samtools_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" /> | |
178 <param type="integer" name="samtools_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" /> | |
179 <param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" /> | |
180 <param type="text" name="samtools_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" /> | |
181 </when> | |
182 </conditional> | |
183 </inputs> | |
184 | |
185 <outputs> | |
186 <data format="mpileup" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" /> | |
187 </outputs> | |
188 | |
189 <tests> | |
190 <test><!-- Use classical samtools --> | |
191 <param name="alignments" value="example.bam" ftype="bam" /> | |
192 | |
193 <param name="source_select" value="history" /> | |
194 <param name="reference_genome" value="example.fa" ftypet="fasta" /> | |
195 | |
196 <param name="samtools_regions" value="entire_genome" /> | |
197 | |
198 <param name="mpileup_parallelization_select" value="false" /> | |
199 <param name="sort_mpileup" value="true" /> | |
200 | |
201 <param name="parameters" value="default" /> | |
202 | |
203 | |
204 <output name="output" file="example.mpileup" /> | |
205 </test> | |
206 <test><!-- Use parallelized samtools - @todo replace with sambamba! --> | |
207 <param name="alignments" value="example.bam" ftype="bam" /> | |
208 | |
209 <param name="source_select" value="history" /> | |
210 <param name="reference_genome" value="example.fa" ftypet="fasta" /> | |
211 | |
212 <param name="samtools_regions" value="entire_genome" /> | |
213 | |
214 <param name="mpileup_parallelization_select" value="true" /> | |
215 <param name="samtools_threads" value="2" /> | |
216 <param name="sort_mpileup" value="true" /> | |
217 | |
218 <param name="parameters" value="default" /> | |
219 | |
220 | |
221 <output name="output" file="example.mpileup.parallel" /> | |
222 </test> | |
223 </tests> | |
224 | |
225 <help> | |
226 **Samtools mpileup (supporting parallelization)** | |
227 | |
228 SAM (Sequence Alignment/Map) format is a generic format for storing large nucleotide sequence alignments. SAM aims to be a format that: | |
229 | |
230 Is flexible enough to store all the alignment information generated by various alignment programs; | |
231 Is simple enough to be easily generated by alignment programs or converted from existing alignment formats; | |
232 Is compact in file size; | |
233 Allows most of operations on the alignment to work on a stream without loading the whole alignment into memory; | |
234 Allows the file to be indexed by genomic position to efficiently retrieve all reads aligning to a locus. | |
235 SAM Tools provide various utilities for manipulating alignments in the SAM format, including sorting, merging, indexing and generating alignments in a per-position format. | |
236 | |
237 SAMtools is hosted by SourceForge.net. The project page is http://samtools.sourceforge.net/. The source code releases are available from the download page. You can check out the most recent source code from the github project page with: | |
238 git clone git://github.com/samtools/samtools.git | |
239 https://github.com/mydatascience/parallel-mpileup/ | |
240 | |
241 Because samtools does not support parallization of the mpileup command, the project was forked to include paralellization support: | |
242 | |
243 | |
244 However, since the project seems to lack support and contains fatal bugs this project was forked at: | |
245 https://github.com/yhoogstrate/parallel-mpileup/ | |
246 | |
247 | |
248 **Input formats** | |
249 | |
250 Satmools accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing. | |
251 | |
252 **Installation** | |
253 | |
254 The installation is fully automatic. | |
255 | |
256 **License** | |
257 | |
258 * parallel-mpileup: MIT License (https://github.com/yhoogstrate/parallel-mpileup/blob/master/samtools-0.1.19/COPYING) | |
259 * samtool: MIT License | |
260 | |
261 | |
262 Contact | |
263 ------- | |
264 | |
265 The tool wrapper has been written by Youri Hoogstrate from the Erasmus | |
266 Medical Center (Rotterdam, Netherlands) on behalf of the Translational | |
267 Research IT (TraIT) project: | |
268 | |
269 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch | |
270 | |
271 More tools by the Translational Research IT (TraIT) project can be found | |
272 in the following toolsheds: | |
273 | |
274 http://toolshed.g2.bx.psu.edu/ | |
275 | |
276 http://testtoolshed.g2.bx.psu.edu/ | |
277 </help> | |
278 <citations> | |
279 <citation type="bibtex"> | |
280 @unpublished{samtools_parallel_mpileup, | |
281 author = {Youri Hoogstrate}, | |
282 title = { Samtools parallel-mpileup, fork of classical samtools }, | |
283 year = 2014, | |
284 url = { https://github.com/yhoogstrate/parallel-mpileup } | |
285 } | |
286 </citation> | |
287 <citation type="bibtex"> | |
288 @misc{SAM_def, | |
289 title={Definition of SAM/BAM format}, | |
290 url = {https://samtools.github.io/hts-specs/SAMv1.pdf},} | |
291 </citation> | |
292 <citation type="bibtex"> | |
293 @misc{SamTools_github, | |
294 title={SAMTools GitHub page}, | |
295 url = {https://github.com/samtools/samtools},} | |
296 </citation> | |
297 </citations> | |
298 </tool> |