comparison bwa-mem2.xml @ 0:82217dccdbcf draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bwa_mem2 commit 7998bbefd9bfd03bc0e92a922297b503832c0419"
author iuc
date Fri, 08 Oct 2021 10:19:48 +0000
parents
children b4a22d90cce9
comparison
equal deleted inserted replaced
-1:000000000000 0:82217dccdbcf
1 <tool id="bwa_mem2" name="BWA-MEM2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>- map medium and long reads (&gt; 100 bp) against reference genome</description>
3 <macros>
4 <import>read_group_macros.xml</import>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements"/>
8 <expand macro="stdio"/>
9 <expand macro="xrefs"/>
10 <command><![CDATA[
11 @pipefail@
12 @set_reference_fasta_filename@
13
14 ## Begin BWA-MEM command line
15
16 bwa-mem2 mem
17 #if str( $output_sort ) == "unsorted":
18 -t 1
19 #else
20 -t "\${GALAXY_SLOTS:-1}"
21 #end if
22 ## Verbosity is set to 1 (errors only)
23 -v 1
24
25 #if str( $fastq_input.fastq_input_selector ) == "paired_iv":
26 ## For interleaved fastq files set -p option
27 -p
28 ## check that insert statistics is used
29 #if str( $fastq_input.iset_stats ):
30 -I '${fastq_input.iset_stats}'
31 #end if
32 #end if
33
34 #if str( $analysis_type.analysis_type_selector ) not in ["illumina", "full"]:
35 -x '$analysis_type.analysis_type_selector'
36 #elif str( $analysis_type.analysis_type_selector ) == "full":
37 ## Algorithmic options
38 #if str( $analysis_type.algorithmic_options.algorithmic_options_selector ) == "set":
39 -k '${analysis_type.algorithmic_options.k}'
40 -w '${analysis_type.algorithmic_options.w}'
41 -d '${analysis_type.algorithmic_options.d}'
42 -r '${analysis_type.algorithmic_options.r}'
43 -y '${analysis_type.algorithmic_options.y}'
44 -c '${analysis_type.algorithmic_options.c}'
45 -D '${analysis_type.algorithmic_options.D}'
46 -W '${analysis_type.algorithmic_options.W}'
47 -m '${analysis_type.algorithmic_options.m}'
48 ${analysis_type.algorithmic_options.S}
49 ${analysis_type.algorithmic_options.P}
50 ${analysis_type.algorithmic_options.e}
51 #end if
52
53 ## Scoring options
54 #if str( $analysis_type.scoring_options.scoring_options_selector ) == "set":
55 -A '${analysis_type.scoring_options.A}'
56 -B '${analysis_type.scoring_options.B}'
57 -O '${analysis_type.scoring_options.O}'
58 -E '${analysis_type.scoring_options.E}'
59 -L '${analysis_type.scoring_options.L}'
60 -U '${analysis_type.scoring_options.U}'
61 #end if
62
63 ## IO options
64 #if str( $analysis_type.io_options.io_options_selector ) == "set":
65 -T '${analysis_type.io_options.T}'
66 -h '${analysis_type.io_options.h}'
67 ${analysis_type.io_options.a}
68 ${analysis_type.io_options.C}
69 ${analysis_type.io_options.V}
70 ${analysis_type.io_options.Y}
71 ${analysis_type.io_options.M}
72 ${analysis_type.io_options.five}
73 ${analysis_type.io_options.q}
74 #end if
75
76 #end if
77
78 ## Handle read group options...
79 @define_read_group_helpers@
80 #if str( $fastq_input.fastq_input_selector ) == "paired":
81 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1, $fastq_input.fastq_input2)
82 #else:
83 #set $rg_auto_name = $read_group_name_default($fastq_input.fastq_input1)
84 #end if
85 @set_use_rg_var@
86 @set_read_group_vars@
87 #if $use_rg
88 @set_rg_string@
89 -R '$rg_string'
90 #end if
91
92 #if str( $fastq_input.fastq_input_selector ) == "paired":
93 ## check that insert statistics is used
94 #if str( $fastq_input.iset_stats ):
95 -I '${fastq_input.iset_stats}'
96 #end if
97
98 '${reference_fasta_filename}'
99 '${fastq_input.fastq_input1}' '${fastq_input.fastq_input2}'
100 #elif str( $fastq_input.fastq_input_selector ) == "paired_collection":
101 ## check that insert statistics is used
102 #if str( $fastq_input.iset_stats ):
103 -I '${fastq_input.iset_stats}'
104 #end if
105
106 '${reference_fasta_filename}'
107 '${fastq_input.fastq_input1.forward}' '${fastq_input.fastq_input1.reverse}'
108 #else:
109 '${reference_fasta_filename}'
110 '${fastq_input.fastq_input1}'
111 #end if
112
113 #if str( $output_sort ) == "coordinate":
114 | samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$bam_output'
115 #elif str( $output_sort ) == "name":
116 | samtools sort -n -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$bam_output'
117 #else
118 | samtools view -@ \${GALAXY_SLOTS:-2} -bS - -o '$bam_output'
119 #end if
120
121
122 ]]></command>
123
124 <inputs>
125 <expand macro="reference_source_conditional" />
126 <conditional name="fastq_input">
127 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
128 <option value="paired">Paired</option>
129 <option value="single">Single</option>
130 <option value="paired_collection">Paired Collection</option>
131 <option value="paired_iv">Paired Interleaved</option>
132 </param>
133 <when value="paired">
134 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/>
135 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/>
136 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
137 <sanitizer invalid_char="">
138 <valid initial="string.digits"><add value=","/> </valid>
139 </sanitizer>
140 </param>
141 </when>
142 <when value="single">
143 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/>
144 </when>
145 <when value="paired_collection">
146 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz,fasta" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
147 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
148 <sanitizer invalid_char="">
149 <valid initial="string.digits"><add value=","/> </valid>
150 </sanitizer>
151 </param>
152 </when>
153 <when value="paired_iv">
154 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with interleaved reads"/>
155 <param name="iset_stats" type="text" optional="True" label="Enter mean, standard deviation, max, and min for insert lengths." help="-I; This parameter is only used for paired reads. Only mean is required while sd, max, and min will be inferred. Examples: both &quot;250&quot; and &quot;250,25&quot; will work while &quot;250,,10&quot; will not. See below for details.">
156 <sanitizer invalid_char="">
157 <valid initial="string.digits"><add value=","/> </valid>
158 </sanitizer>
159 </param>
160 </when>
161 </conditional>
162
163 <expand macro="read_group_conditional" />
164
165 <conditional name="analysis_type">
166 <param name="analysis_type_selector" type="select" label="Select analysis mode">
167 <option value="illumina">1.Simple Illumina mode</option>
168 <option value="pacbio">2.PacBio mode (-x pacbio)</option>
169 <option value="ont2d">3.Nanopore 2D-reads mode (-x ont2d)</option>
170 <option value="intractg">4.Intra-species contigs mode (-x intractg)</option>
171 <option value="full">5.Full list of options</option>
172 </param>
173 <when value="illumina">
174 <!-- do nothing -->
175 </when>
176 <when value="pacbio">
177 <!-- do nothing. all magic happens within <command> tag -->
178 </when>
179 <when value="ont2d">
180 <!-- do nothing. all magic happens within <command> tag -->
181 </when>
182 <when value="intractg">
183 <!-- do nothing. all magic happens within <command> tag -->
184 </when>
185 <when value="full">
186 <conditional name="algorithmic_options">
187 <param name="algorithmic_options_selector" type="select" label="Set algorithmic options?" help="Sets -k, -w, -d, -r, -y, -c, -D, -W, -m, -S, -P, and -e options.">
188 <option value="set">Set</option>
189 <option value="do_not_set" selected="True">Do not set</option>
190 </param>
191 <when value="set">
192 <param name="k" type="integer" value="19" label="Minimum seed length" help="-k; default=19"/>
193 <param name="w" type="integer" value="100" label="Band width for banded alignment" help="-w; default=100"/>
194 <param name="d" type="integer" value="100" label="Off-diagonal X-dropoff" help="-d; default=100"/>
195 <param name="r" type="float" value="1.5" label="Look for internal seeds inside a seed longer than -k * THIS VALUE" help="-r; default=1.5; This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy" />
196 <param name="y" type="integer" value="20" label="Seed occurrence for the 3rd round seeding" help="-y; default=20" />
197 <param name="c" type="integer" value="500" label="Skip seeds with more than that many occurrences" help="-c; default=500"/>
198 <param name="D" type="float" value="0.5" label="Drop chains shorter than this fraction of the longest overlapping chain" help="-D; default=0.5"/>
199 <param name="W" type="integer" value="0" label="Discard a chain if seeded bases shorter than THIS VALUE" help="-W; default=0"/>
200 <param name="m" type="integer" value="50" label="Perform at most this many rounds of mate rescues for each read" help="-m; default=50"/>
201 <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Skip mate rescue" help="-S"/>
202 <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip pairing; mate rescue performed unless -S also in use" help="-P"/>
203 <param name="e" type="boolean" truevalue="-e" falsevalue="" label="Discard full-length exact matches" help="-e"/>
204 </when>
205 <when value="do_not_set">
206 <!-- do nothing -->
207 </when>
208 </conditional>
209
210 <conditional name="scoring_options">
211 <param name="scoring_options_selector" type="select" label="Set scoring options?" help="Sets -A, -B, -O, -E, -L, and -U options.">
212 <option value="set">Set</option>
213 <option value="do_not_set" selected="True">Do not set</option>
214 </param>
215 <when value="set">
216 <param name="A" type="integer" value="1" label="Score for a sequence match" help="-A; scales options -T, -d, -B, -O, -E, -L, and -U unless overridden; default=1"/>
217 <param name="B" type="integer" value="4" label="Penalty for a mismatch" help="-B; default=4"/>
218 <param name="O" type="text" value="6,6" label="Gap open penalties for deletions and insertions" help="-O; default=6,6">
219 <sanitizer invalid_char="">
220 <valid initial="string.digits"><add value=","/> </valid>
221 </sanitizer>
222 </param>
223 <param name="E" type="text" value="1,1" label="Gap extension penalties; a gap of size k cost &#39;-O + -E*k&#39;. If two numbers are specified, the first is the penalty of extending a deletion and the second for extending an insertion" help="-E; default=1,1">
224 <sanitizer invalid_char="">
225 <valid initial="string.digits"><add value=","/> </valid>
226 </sanitizer>
227 </param>
228 <param name="L" type="text" value="5,5" label="Penalties for 5&#39;-end and 3&#39;-end clipping" help="-L; default=5,5; When performing Smith-Waterman extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best Smith-Waterman score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best Smith-Waterman score; clipping penalty is not deduced">
229 <sanitizer invalid_char="">
230 <valid initial="string.digits"><add value=","/> </valid>
231 </sanitizer>
232 </param>
233 <param name="U" type="integer" value="17" label="Penalty for an unpaired read pair" help="-U; default=17"/>
234 </when>
235 <when value="do_not_set">
236 <!-- do nothing -->
237 </when>
238 </conditional>
239
240 <conditional name="io_options">
241 <param name="io_options_selector" type="select" label="Set input/output options" help="Sets -T, -h, -a, -C, -V, -Y, and -M options.">
242 <option value="set">Set</option>
243 <option value="do_not_set" selected="True">Do not set</option>
244 </param>
245 <when value="set">
246 <param name="five" argument="-5" type="boolean" truevalue="-5" falsevalue="" label="For split alignment, take alignment with smallest coordinate as primary" help="Useful for HiC data"/>
247 <param argument="-q" type="boolean" truevalue="-q" falsevalue="" label="Don't lower MAPQ for split alignment" help="By default the MAPQ score of a supplementary alignment will be lowered to the primary alignment score."/>
248 <param name="T" type="integer" value="30" label="Minimum score to output" help="-T; default=30"/>
249 <param name="h" type="integer" value="5" label="If there are less than THIS VALUE hits with score &gt;80% of the max score, output them all in the XA tag" help="-h; default=5" />
250 <param name="a" type="boolean" truevalue="-a" falsevalue="" label="Output all alignments for single-ends or unpaired paired-ends" help="-a; These alignments will be flagged as secondary alignments"/>
251 <param name="C" type="boolean" truevalue="-C" falsevalue="" label="Append FASTA/FASTQ comment to BAM output" help="-C"/>
252 <param name="V" type="boolean" truevalue="-V" falsevalue="" label="Output the reference FASTA header in the XR tag" help="-C"/>
253 <param name="Y" type="boolean" truevalue="-Y" falsevalue="" label="Use soft clipping for supplementary alignments" help="-Y; By default, BWA-MEM uses soft clipping for the primary alignment and hard clipping for supplementary alignments" />
254 <param name="M" type="boolean" truevalue="-M" falsevalue="" label="Mark shorter split hits of a chimeric alignment in the FLAG field as 'secondary alignment' instead of 'supplementary alignment'" help="-M; For Picard&lt;1.96 compatibility" />
255 </when>
256 <when value="do_not_set">
257 <!-- do nothing -->
258 </when>
259 </conditional>
260 </when>
261 </conditional>
262 <param name="output_sort" type="select" label="BAM sorting mode" help="The 'Not sorted' option can extend the run time of the tool significantly (cause it requires running on only a single thread).">
263 <option value="coordinate" selected="True">Sort by chromosomal coordinates</option>
264 <option value="name">Sort by read names (i.e., the QNAME field) </option>
265 <option value="unsorted">Not sorted (sorted as input)</option>
266 </param>
267 </inputs>
268
269 <outputs>
270 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)">
271 <expand macro="dbKeyActionsBwaMem" />
272 <change_format>
273 <when input="output_sort" value="name" format="qname_sorted.bam" />
274 <when input="output_sort" value="unsorted" format="qname_input_sorted.bam" />
275 </change_format>
276 </data>
277 </outputs>
278
279 <tests>
280 <test>
281 <param name="reference_source_selector" value="history" />
282 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
283 <param name="fastq_input_selector" value="paired"/>
284 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
285 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
286 <param name="analysis_type_selector" value="illumina"/>
287 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
288 </test>
289 <test>
290 <param name="reference_source_selector" value="history" />
291 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
292 <param name="fastq_input_selector" value="single"/>
293 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fasta1.fa"/>
294 <param name="analysis_type_selector" value="illumina"/>
295 <output name="bam_output" ftype="bam" file="bwa-mem-test1-fasta.bam" lines_diff="4" />
296 </test>
297 <test>
298 <param name="reference_source_selector" value="history" />
299 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
300 <param name="fastq_input_selector" value="paired"/>
301 <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
302 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
303 <param name="analysis_type_selector" value="illumina"/>
304 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
305 </test>
306 <test>
307 <param name="reference_source_selector" value="history" />
308 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
309 <param name="index_a" value="is"/>
310 <param name="fastq_input_selector" value="paired"/>
311 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
312 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
313 <param name="rg_selector" value="set"/>
314 <param name="ID" value="rg1"/>
315 <param name="PL" value="CAPILLARY"/>
316 <param name="LB" value="AARDVARK-1" />
317 <param name="analysis_type_selector" value="illumina"/>
318 <output name="bam_output" ftype="bam" file="bwa-mem-test2.bam" lines_diff="4" />
319 </test>
320 <test>
321 <param name="reference_source_selector" value="history" />
322 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
323 <param name="fastq_input_selector" value="paired"/>
324 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
325 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
326 <param name="analysis_type_selector" value="illumina"/>
327 <param name="output_sort" value="unsorted"/>
328 <output name="bam_output" ftype="qname_input_sorted.bam" file="bwa-mem-test3.bam" lines_diff="4" />
329 </test>
330 <test>
331 <param name="reference_source_selector" value="history" />
332 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
333 <param name="fastq_input_selector" value="paired"/>
334 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
335 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
336 <param name="analysis_type_selector" value="illumina"/>
337 <param name="output_sort" value="name"/>
338 <output name="bam_output" ftype="qname_sorted.bam" file="bwa-mem-test4.bam" lines_diff="4" />
339 </test>
340 <test>
341 <param name="reference_source_selector" value="cached" />
342 <param name="ref_file" value="mtgenome"/>
343 <param name="fastq_input_selector" value="paired"/>
344 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
345 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
346 <param name="analysis_type_selector" value="illumina"/>
347 <output name="bam_output" ftype="bam" file="bwa-mem-test1.bam" lines_diff="4" />
348 </test>
349 </tests>
350 <help><![CDATA[
351 **What is does**
352 BWA-MEM2 is the new version of the bwa-mem algorithm in bwa. It produces alignment identical to bwa and is ~1.3-3.1x faster depending on the use-case, dataset and the running machine.
353 The algorithm is robust to sequencing errors and applicable to a wide range of sequence lengths from 70bp to a few megabases.
354
355 The Galaxy implementation takes fastq files as input and produces output in BAM format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
356
357 -----
358
359 **Indices: Selecting reference genomes for BWA**
360
361 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options:
362
363 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against.
364 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa mem`.
365
366 If your genome of interest is not listed here you have two choices:
367
368 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added
369 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option.
370
371 -----
372
373 **Galaxy-specific option**
374
375 Galaxy allows four levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
376
377 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2]
378 2. *PacBio mode*: The mode adjusted specifically for mapping of long PacBio subreads. Equivalent to the following command: bwa mem -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 <reference index> <PacBio dataset in fastq format>
379 3. *Full list of options*: Allows access to all options through Galaxy interface.
380
381 -----
382
383 **Bam sorting mode**
384
385 The generated bam files can be sorted according to three criteria: coordinates, names and input order.
386
387 In coordinate sorted mode the reads are sorted by coordinates. It means that the reads from the beginning of the first chromosome are first in the file.
388
389 When sorted by read name, the file is sorted by the reference ID (i.e., the QNAME field).
390
391 Finally, the *No sorted (sorted as input)* option yield a BAM file in which the records are sorted in an order corresponding to the order of the reads in the original input file. This option requires using a single thread to perform the conversion from SAM to BAM format, so the runtime is extended.
392
393
394 @RG@
395
396 @info@
397 ]]></help>
398 <expand macro="citations" />
399 </tool>