comparison bwa.xml @ 18:48f306c57611 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bwa commit c355891532cecaab6b3288a148a6b3bcb5973396
author iuc
date Fri, 24 Nov 2017 09:55:45 -0500
parents 051eba708f43
children 4f774c1e6049
comparison
equal deleted inserted replaced
17:d1228ec6233f 18:48f306c57611
1 <?xml version="1.0"?> 1 <?xml version="1.0"?>
2 <tool id="bwa" name="Map with BWA" version="@VERSION@.2"> 2 <tool id="bwa" name="Map with BWA" version="@VERSION@.3">
3 <description>- map short reads (&lt; 100 bp) against reference genome</description> 3 <description>- map short reads (&lt; 100 bp) against reference genome</description>
4 <macros> 4 <macros>
5 <import>read_group_macros.xml</import> 5 <import>read_group_macros.xml</import>
6 <import>bwa_macros.xml</import> 6 <import>bwa_macros.xml</import>
7 <token name="@command_options@"> 7 <token name="@command_options@">
8 #if str( $analysis_type.analysis_type_selector ) == "full": 8 #if str( $analysis_type.analysis_type_selector ) == "full":
9 -n ${analysis_type.n} 9 -n ${analysis_type.n}
10 -o ${analysis_type.o} 10 -o ${analysis_type.o}
11 -e ${analysis_type.e} 11 -e ${analysis_type.e}
12 -i ${analysis_type.i} 12 -i ${analysis_type.i}
13 -d ${analysis_type.d} 13 -d ${analysis_type.d}
14 -l ${analysis_type.l} 14 -l ${analysis_type.l}
15 -k ${analysis_type.k} 15 -k ${analysis_type.k}
16 -m ${analysis_type.m} 16 -m ${analysis_type.m}
17 -M ${analysis_type.M} 17 -M ${analysis_type.M}
18 -O ${analysis_type.O} 18 -O ${analysis_type.O}
19 -E ${analysis_type.E} 19 -E ${analysis_type.E}
20 -R ${analysis_type.R} 20 -R ${analysis_type.R}
21 -q ${analysis_type.q} 21 -q ${analysis_type.q}
22 22 #if str( $analysis_type.B ):
23 #if str( $analysis_type.B ):
24 -B ${analysis_type.B} 23 -B ${analysis_type.B}
25 #end if 24 #end if
26 25 #if str( $analysis_type.L ):
27 #if str( $analysis_type.L ):
28 -L ${analysis_type.L} 26 -L ${analysis_type.L}
29 #end if 27 #end if
30 #end if 28 #end if
31 </token> 29 </token>
32 <token name="@read_group_options@"> 30 <token name="@read_group_options@">
33 #if $use_rg: 31 #if $use_rg:
34 @set_rg_string@ 32 @set_rg_string@
35 -r '$rg_string' 33 -r '$rg_string'
36 #end if 34 #end if
37 </token> 35 </token>
38 36 <xml name="advanced_pe_options">
39 <xml name="advanced_pe_options"> 37 <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?"
40 <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?" help="Provides additional controls"> 38 help="Provides additional controls">
41 <option value="set">Set</option> 39 <option value="set">Set</option>
42 <option value="do_not_set" selected="True">Do not set</option> 40 <option value="do_not_set" selected="True">Do not set</option>
43 </param> 41 </param>
44 <when value="set"> 42 <when value="set">
45 <param name="a" type="integer" value="500" label="Maximum insert size for a read pair to be considered being mapped properly." help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/> 43 <param name="a" type="integer" value="500"
46 <param name="o" type="integer" value="100000" label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read." help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/> 44 label="Maximum insert size for a read pair to be considered being mapped properly."
47 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly." help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/> 45 help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/>
48 <param name="N" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)." help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/> 46 <param name="o" type="integer" value="100000"
49 <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)" help="sampe -c"/> 47 label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read."
50 </when> 48 help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/>
51 <when value="do_not_set"> 49 <param name="n" type="integer" value="3"
52 <!-- do nothing --> 50 label="Maximum number of alignments to output in the XA tag for reads paired properly."
53 </when> 51 help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
54 </xml> 52 <param name="N" type="integer" value="10"
55 <xml name="advanced_se_options"> 53 label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)."
56 <param name="adv_se_options_selector" type="select" label="Set advanced single end options?" help="Provides additional controls"> 54 help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/>
57 <option value="set">Set</option> 55 <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)"
58 <option value="do_not_set" selected="True">Do not set</option> 56 help="sampe -c"/>
59 </param> 57 </when>
60 <when value="set"> 58 <when value="do_not_set">
61 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag." help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/> 59 <!-- do nothing -->
62 </when> 60 </when>
63 <when value="do_not_set"> 61 </xml>
64 <!-- do nothing --> 62 <xml name="advanced_se_options">
65 </when> 63 <param name="adv_se_options_selector" type="select" label="Set advanced single end options?"
66 </xml> 64 help="Provides additional controls">
67 </macros> 65 <option value="set">Set</option>
68 <expand macro="requirements" /> 66 <option value="do_not_set" selected="True">Do not set</option>
69 <expand macro="stdio" /> 67 </param>
70 <command> 68 <when value="set">
71 <![CDATA[ 69 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag."
72 @set_reference_fasta_filename@ 70 help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/>
73 71 </when>
74 ## setup vars for rg handling... 72 <when value="do_not_set">
75 @define_read_group_helpers@ 73 <!-- do nothing -->
76 #if str( $input_type.input_type_selector ) == "paired": 74 </when>
77 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2) 75 </xml>
78 #elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]: 76 </macros>
79 #set $rg_auto_name = $read_group_name_default($input_type.bam_input) 77 <expand macro="requirements"/>
78 <expand macro="stdio"/>
79 <command>
80 <![CDATA[
81 @set_reference_fasta_filename@
82
83 ## setup vars for rg handling...
84 @define_read_group_helpers@
85 #if str( $input_type.input_type_selector ) == "paired":
86 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2)
87 #elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]:
88 #set $rg_auto_name = $read_group_name_default($input_type.bam_input)
89 #else
90 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1)
91 #end if
92 @set_use_rg_var@
93 @set_read_group_vars@
94
95 ## Begin bwa command line
96
97 ####### Fastq paired
98
99 #if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection":
100 bwa aln
101 -t "\${GALAXY_SLOTS:-1}"
102 @command_options@
103 '$reference_fasta_filename'
104 #if str( $input_type.input_type_selector ) == "paired_collection":
105 '${input_type.fastq_input1.forward}'
80 #else 106 #else
81 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1) 107 '${input_type.fastq_input1}'
82 #end if 108 #end if
83 @set_use_rg_var@ 109 > first.sai &&
84 @set_read_group_vars@ 110
85 111 bwa aln
86 ## Begin bwa command line 112 -t "\${GALAXY_SLOTS:-1}"
87 113 @command_options@
88 ####### Fastq paired 114 '${reference_fasta_filename}'
89 115 #if str( $input_type.input_type_selector ) == "paired_collection":
90 #if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection": 116 '${input_type.fastq_input1.reverse}'
91 bwa aln 117 #else
92 -t "\${GALAXY_SLOTS:-1}" 118 '${input_type.fastq_input2}'
93 119 #end if
94 @command_options@ 120 > second.sai &&
95 121
96 "${reference_fasta_filename}" 122 bwa sampe
97 123 #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True":
98 #if str( $input_type.input_type_selector ) == "paired_collection":
99 "${input_type.fastq_input1.forward}"
100 #else
101 "${input_type.fastq_input1}"
102 #end if
103
104 > first.sai &&
105
106 bwa aln
107 -t "\${GALAXY_SLOTS:-1}"
108
109 @command_options@
110
111 "${reference_fasta_filename}"
112
113 #if str( $input_type.input_type_selector ) == "paired_collection":
114 "${input_type.fastq_input1.reverse}"
115 #else
116 "${input_type.fastq_input2}"
117 #end if
118
119 > second.sai &&
120
121 bwa sampe
122
123 #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True":
124 -a ${$input_type.adv_pe_options.a} 124 -a ${$input_type.adv_pe_options.a}
125 -o ${$input_type.adv_pe_options.o} 125 -o ${$input_type.adv_pe_options.o}
126 -n ${$input_type.adv_pe_options.n} 126 -n ${$input_type.adv_pe_options.n}
127 -N ${$input_type.adv_pe_options.N} 127 -N ${$input_type.adv_pe_options.N}
128 #end if 128 #end if
129 129 @read_group_options@
130 @read_group_options@ 130 #if str( $input_type.input_type_selector ) == "paired_collection":
131 131 '${reference_fasta_filename}' first.sai second.sai '${input_type.fastq_input1.forward}' '${input_type.fastq_input1.reverse}'
132 #if str( $input_type.input_type_selector ) == "paired_collection": 132 #else:
133 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1.forward}" "${input_type.fastq_input1.reverse}" 133 '${reference_fasta_filename}' first.sai second.sai '${input_type.fastq_input1}' '${input_type.fastq_input2}'
134 #else: 134 #end if
135 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1}" "${input_type.fastq_input2}" 135
136 #end if 136 ## Fastq single
137 137
138 ####### Fastq single 138 #elif str( $input_type.input_type_selector ) == "single":
139 139 bwa aln
140 #elif str( $input_type.input_type_selector ) == "single": 140 -t "\${GALAXY_SLOTS:-1}"
141 bwa aln 141
142 -t "\${GALAXY_SLOTS:-1}" 142 @command_options@
143 143
144 @command_options@ 144 '${reference_fasta_filename}'
145 145 '${input_type.fastq_input1}'
146 "${reference_fasta_filename}" 146 > first.sai &&
147 "${input_type.fastq_input1}" 147
148 > first.sai && 148 bwa samse
149 149
150 bwa samse 150 #if str( $input_type.adv_se_options.adv_se_options_selector) == "True":
151 151 -n ${$input_type.adv_se_options.n}
152 #if str( $input_type.adv_se_options.adv_se_options_selector) == "True": 152 #end if
153 -n ${$input_type.adv_se_options.n} 153 @read_group_options@
154 #end if 154 '${reference_fasta_filename}' first.sai '${input_type.fastq_input1}'
155
156 @read_group_options@
157
158 "${reference_fasta_filename}" first.sai "${input_type.fastq_input1}"
159 155
160 ####### BAM paired 156 ####### BAM paired
161 157
162 #elif str( $input_type.input_type_selector ) == "paired_bam": 158 #elif str( $input_type.input_type_selector ) == "paired_bam":
163 bwa aln 159 bwa aln
164 -t "\${GALAXY_SLOTS:-1}" 160 -t "\${GALAXY_SLOTS:-1}"
165 -b 161 -b
166 -1 162 -1
167 163 @command_options@
168 @command_options@ 164 '${reference_fasta_filename}'
169 165 '${input_type.bam_input}'
170 "${reference_fasta_filename}" 166 > first.sai &&
171 "${input_type.bam_input}" 167
172 > first.sai && 168 bwa aln
173 169 -t "\${GALAXY_SLOTS:-1}"
174 bwa aln 170 -b
175 -t "\${GALAXY_SLOTS:-1}" 171 -2
176 -b 172 @command_options@
177 -2 173 '${reference_fasta_filename}'
178 @command_options@ 174 '${input_type.bam_input}'
179 "${reference_fasta_filename}" 175 > second.sai &&
180 "${input_type.bam_input}" 176
181 > second.sai && 177 bwa sampe
182 178
183 bwa sampe 179 #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True":
184
185 #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True":
186 -a ${$input_type.adv_bam_pe_options.a} 180 -a ${$input_type.adv_bam_pe_options.a}
187 -o ${$input_type.adv_bam_pe_options.o} 181 -o ${$input_type.adv_bam_pe_options.o}
188 -n ${$input_type.adv_bam_pe_options.n} 182 -n ${$input_type.adv_bam_pe_options.n}
189 -N ${$input_type.adv_bam_pe_options.N} 183 -N ${$input_type.adv_bam_pe_options.N}
190 #end if 184 #end if
191 185 @read_group_options@
192 @read_group_options@ 186 '${reference_fasta_filename}' first.sai second.sai '${input_type.bam_input}' '${input_type.bam_input}'
193
194 "${reference_fasta_filename}" first.sai second.sai "${input_type.bam_input}" "${input_type.bam_input}"
195 187
196 ####### Fastq single ------------ to do next 188 ####### Fastq single ------------ to do next
197 189
198 #elif str( $input_type.input_type_selector ) == "single_bam": 190 #elif str( $input_type.input_type_selector ) == "single_bam":
199 bwa aln 191 bwa aln
200 -t "\${GALAXY_SLOTS:-1}" 192 -t "\${GALAXY_SLOTS:-1}"
201 -b 193 -b
202 -0 194 -0
203 195
204 @command_options@ 196 @command_options@
205 197
206 "${reference_fasta_filename}" 198 '${reference_fasta_filename}'
207 "${input_type.bam_input}" 199 '${input_type.bam_input}'
208 > first.sai && 200 > first.sai &&
209 201
210 bwa samse 202 bwa samse
211 203
212 #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True": 204 #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True":
213 -n ${$input_type.adv_bam_se_options.n} 205 -n ${$input_type.adv_bam_se_options.n}
214 #end if 206 #end if
215 207 @read_group_options@
216 @read_group_options@ 208 '${reference_fasta_filename}' first.sai '${input_type.bam_input}'
217 209 #end if
218 "${reference_fasta_filename}" first.sai "${input_type.bam_input}" 210
219 #end if 211 | samtools sort -@\${GALAXY_SLOTS:-2} -O bam -o '$bam_output'
220
221 | samtools sort -O bam -o '$bam_output'
222 ]]> 212 ]]>
223 </command> 213 </command>
224 214
225 <inputs> 215 <inputs>
226 <expand macro="reference_source_conditional" /> 216 <expand macro="reference_source_conditional"/>
227 <conditional name="input_type"> 217 <conditional name="input_type">
228 <param name="input_type_selector" type="select" label="Select input type" help="Select between fastq and bam datasets and between paired and single end data"> 218 <param name="input_type_selector" type="select" label="Select input type"
229 <option value="paired">Paired fastq</option> 219 help="Select between fastq and bam datasets and between paired and single end data">
230 <option value="paired_collection">Paired fastq collection</option> 220 <option value="paired">Paired fastq</option>
231 <option value="single">Single fastq</option> 221 <option value="paired_collection">Paired fastq collection</option>
232 <option value="paired_bam">Paired BAM</option> 222 <option value="single">Single fastq</option>
233 <option value="single_bam">Single BAM</option> 223 <option value="paired_bam">Paired BAM</option>
234 </param> 224 <option value="single_bam">Single BAM</option>
235 <when value="paired"> 225 </param>
236 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select first set of reads" help="Specify dataset with forward reads"/> 226 <when value="paired">
237 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/> 227 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta"
238 <conditional name="adv_pe_options"> 228 label="Select first set of reads" help="Specify dataset with forward reads"/>
239 229 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fasta"
240 <expand macro="advanced_pe_options" /> 230 label="Select second set of reads" help="Specify dataset with reverse reads"/>
241 231 <conditional name="adv_pe_options">
232 <expand macro="advanced_pe_options"/>
233 </conditional>
234 </when>
235 <when value="paired_collection">
236 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection"
237 help="See help section for an explanation of dataset collections"/>
238 <conditional name="adv_pe_options">
239 <expand macro="advanced_pe_options"/>
240 </conditional>
241 </when>
242 <when value="single">
243 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta"
244 label="Select fastq dataset" help="Specify dataset with single reads"/>
245 <conditional name="adv_se_options">
246 <expand macro="advanced_se_options"/>
247 </conditional>
248 </when>
249 <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options -->
250 <when value="paired_bam">
251 <param name="bam_input" type="data" format="bam" label="Select BAM dataset"
252 help="Specify BAM dataset with paired reads"/>
253 <conditional name="adv_bam_pe_options">
254 <expand macro="advanced_pe_options"/>
255 </conditional>
256 </when>
257 <when value="single_bam">
258 <param name="bam_input" type="data" format="bam" label="Select BAM dataset"
259 help="Specify BAM dataset with single reads"/>
260 <conditional name="adv_bam_se_options">
261 <expand macro="advanced_se_options"/>
262 </conditional>
263 </when>
242 </conditional> 264 </conditional>
243 </when> 265 <expand macro="read_group_conditional"/>
244 266 <conditional name="analysis_type">
245 <when value="paired_collection"> 267 <param name="analysis_type_selector" type="select" label="Select analysis mode">
246 <param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> 268 <option value="illumina">1.Simple Illumina mode</option>
247 <conditional name="adv_pe_options"> 269 <option value="full">2.Full list of options</option>
248 270 </param>
249 <expand macro="advanced_pe_options" /> 271 <when value="illumina">
250 272 <!-- do nothing -->
273 </when>
274 <when value="full">
275 <param name="n" type="text" value="0.04"
276 label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths."
277 help="aln -n; default=0.04"/>
278 <param name="o" type="integer" value="1" label="maximum number or gap openings"
279 help="aln -o; default=1"/>
280 <param name="e" type="integer" value="-1" label="maximum number of gap extensions"
281 help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/>
282 <param name="i" type="integer" value="5"
283 label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/>
284 <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion"
285 help="aln -d; default=10"/>
286 <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/>
287 <param name="k" type="integer" value="2" label="maximum differences in the seed"
288 help="aln -k; default=2"/>
289 <param name="m" type="integer" value="2000000" label="maximum entries in the queue"
290 help="aln -m; default=2000000"/>
291 <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/>
292 <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/>
293 <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/>
294 <param name="R" type="integer" value="30"
295 label="stop searching when there are more than this value of equally best hits"
296 help="aln -R; default=30"/>
297 <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp"
298 help="aln -q; default=0"/>
299 <param name="B" type="integer" optional="True" label="length of barcode"
300 help="aln -B; optional parameter"/>
301 <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions"
302 help="aln -L; optional parameter"/>
303 </when>
251 </conditional> 304 </conditional>
252 </when> 305 </inputs>
253 306 <outputs>
254 <when value="single"> 307 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)">
255 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select fastq dataset" help="Specify dataset with single reads"/> 308 <expand macro="dbKeyActionsBwa"/>
256 <conditional name="adv_se_options"> 309 </data>
257 310 </outputs>
258 <expand macro="advanced_se_options" /> 311 <tests>
259 312 <test>
260 </conditional> 313 <param name="reference_source_selector" value="history"/>
261 </when> 314 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
262 315 <param name="input_type_selector" value="single"/>
263 <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options --> 316 <param name="fastq_input1" ftype="fasta" value="bwa-mem-fasta1.fa"/>
264 317 <param name="analysis_type_selector" value="illumina"/>
265 <when value="paired_bam"> 318 <output name="bam_output" ftype="bam" file="bwa-aln-test1-fasta.bam" lines_diff="2"/>
266 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with paired reads"/> 319 </test>
267 <conditional name="adv_bam_pe_options"> 320 <test>
268 321 <param name="reference_source_selector" value="history"/>
269 <expand macro="advanced_pe_options" /> 322 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
270 323 <param name="input_type_selector" value="paired"/>
271 </conditional> 324 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
272 </when> 325 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
273 326 <param name="analysis_type_selector" value="illumina"/>
274 <when value="single_bam"> 327 <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/>
275 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with single reads"/> 328 </test>
276 <conditional name="adv_bam_se_options"> 329 <test>
277 330 <param name="reference_source_selector" value="history"/>
278 <expand macro="advanced_se_options" /> 331 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
279 332 <param name="input_type_selector" value="paired"/>
280 </conditional> 333 <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
281 </when> 334 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
282 335 <param name="analysis_type_selector" value="illumina"/>
283 </conditional> 336 <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2"/>
284 337 </test>
285 <expand macro="read_group_conditional" /> 338 <test>
286 339 <param name="reference_source_selector" value="history"/>
287 <conditional name="analysis_type"> 340 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
288 <param name="analysis_type_selector" type="select" label="Select analysis mode"> 341 <param name="input_type_selector" value="paired_bam"/>
289 <option value="illumina">1.Simple Illumina mode</option> 342 <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/>
290 <option value="full">2.Full list of options</option> 343 <param name="analysis_type_selector" value="illumina"/>
291 </param> 344 <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2"/>
292 <when value="illumina"> 345 </test>
293 <!-- do nothing --> 346 <test>
294 </when> 347 <param name="reference_source_selector" value="history"/>
295 <when value="full"> 348 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
296 <param name="n" type="text" value="0.04" label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths." help="aln -n; default=0.04"/> 349 <param name="input_type_selector" value="paired"/>
297 <param name="o" type="integer" value="1" label="maximum number or gap openings" help="aln -o; default=1"/> 350 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
298 <param name="e" type="integer" value="-1" label="maximum number of gap extensions" help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/> 351 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
299 <param name="i" type="integer" value="5" label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/> 352 <param name="rg_selector" value="set"/>
300 <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion" help="aln -d; default=10"/> 353 <param name="ID" value="rg1"/>
301 <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/> 354 <param name="PL" value="CAPILLARY"/>
302 <param name="k" type="integer" value="2" label="maximum differences in the seed" help="aln -k; default=2"/> 355 <param name="analysis_type_selector" value="illumina"/>
303 <param name="m" type="integer" value="2000000" label="maximum entries in the queue" help="aln -m; default=2000000"/> 356 <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2"/>
304 <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/> 357 </test>
305 <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/> 358 </tests>
306 <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/> 359 <help><![CDATA[
307 <param name="R" type="integer" value="30" label="stop searching when there are more than this value of equally best hits" help="aln -R; default=30"/>
308 <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp" help="aln -q; default=0"/>
309 <param name="B" type="integer" optional="True" label="length of barcode" help="aln -B; optional parameter"/>
310 <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions" help="aln -L; optional parameter"/>
311 </when>
312 </conditional>
313 </inputs>
314
315 <outputs>
316 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)">
317 <expand macro="dbKeyActionsBwa" />
318 </data>
319 </outputs>
320
321 <tests>
322 <test>
323 <param name="reference_source_selector" value="history" />
324 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
325 <param name="input_type_selector" value="single"/>
326 <param name="fastq_input1" ftype="fasta" value="bwa-mem-fasta1.fa"/>
327 <param name="analysis_type_selector" value="illumina"/>
328 <output name="bam_output" ftype="bam" file="bwa-aln-test1-fasta.bam" lines_diff="2" />
329 </test>
330 <test>
331 <param name="reference_source_selector" value="history" />
332 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
333 <param name="input_type_selector" value="paired"/>
334 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
335 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
336 <param name="analysis_type_selector" value="illumina"/>
337 <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2" />
338 </test>
339 <test>
340 <param name="reference_source_selector" value="history" />
341 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
342 <param name="input_type_selector" value="paired"/>
343 <param name="fastq_input1" ftype="fastqsanger.gz" value="bwa-mem-fastq1.fq.gz"/>
344 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
345 <param name="analysis_type_selector" value="illumina"/>
346 <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2" />
347 </test>
348 <test>
349 <param name="reference_source_selector" value="history" />
350 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
351 <param name="input_type_selector" value="paired_bam"/>
352 <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/>
353 <param name="analysis_type_selector" value="illumina"/>
354 <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2" />
355 </test>
356 <test>
357 <param name="reference_source_selector" value="history" />
358 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/>
359 <param name="input_type_selector" value="paired"/>
360 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/>
361 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/>
362 <param name="rg_selector" value="set"/>
363 <param name="ID" value="rg1"/>
364 <param name="PL" value="CAPILLARY"/>
365 <param name="analysis_type_selector" value="illumina"/>
366 <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2" />
367 </test>
368 </tests>
369 <help>
370 **What is does** 360 **What is does**
371 361
372 BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use BWA-MEM algorithm distributed as a separate Galaxy tool. 362 BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the
363 human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use
364 the separate BWA-MEM Galaxy tool.
373 365
374 This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool: 366 This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool:
375 367
376 - **bwa aln** - actual mapper placing reads onto the reference sequence 368 - **bwa aln** - actual mapper placing reads onto the reference sequence
377 - **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads 369 - **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for
378 - **bam sampe** - post-processor for paired reads 370 single reads
379 371 - **bam sampe** - post-processor for paired reads
380 Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM (not SAM; in reality SAM produced by the bwa is converted to BAM on the fly by samtools view command) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). 372
373
374 The Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM format,
375 which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard).
381 376
382 ----- 377 -----
383 378
384 **Indices: Selecting reference genomes for BWA** 379 **Indices: Selecting reference genomes for BWA**
385 380
386 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options: 381 The Galaxy wrapper for BWA allows you to select between precomputed and user-defined indices for reference genomes
387 382 using the **Will you select a reference genome from your history or use a built-in index?** select box.
388 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. 383
389 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa aln`. 384 This select box has two options:
390 385
386 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select
387 reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility
388 and are ready to be mapped against.
389 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select
390 reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your
391 current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome
392 from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run
393 mapping with `bwa aln`.
394
395
391 If your genome of interest is not listed here you have two choices: 396 If your genome of interest is not listed here you have two choices:
392 397
393 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added 398 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index
394 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option. 399 needs to be added
395 400 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history
396 ----- 401 and build index** option.
397 402
398 **Galaxy-specific option**
399
400 Galaxy allows three levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are:
401
402 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem &lt;reference index&gt; &lt;fastq dataset1&gt; [fastq dataset2]
403 2. *Full list of options*: Allows access to all options through Galaxy interface.
404
405 ------
406
407 **bwa-aln options**
408
409 Each Galaxy parameter widget corresponds to command line flags listed below::
410
411 -n NUM max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
412 -o INT maximum number or fraction of gap opens [1]
413 -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]
414 -i INT do not put an indel within INT bp towards the ends [5]
415 -d INT maximum occurrences for extending a long deletion [10]
416 -l INT seed length [32]
417 -k INT maximum differences in the seed [2]
418 -m INT maximum entries in the queue [2000000]
419 -M INT mismatch penalty [3]
420 -O INT gap open penalty [11]
421 -E INT gap extension penalty [4]
422 -R INT stop searching when there are >INT equally best hits [30]
423 -q INT quality threshold for read trimming down to 35bp [0]
424 -B INT length of barcode
425 -L log-scaled gap penalty for long deletions
426 -N non-iterative mode: search for all n-difference hits (slooow)
427 -I the input is in the Illumina 1.3+ FASTQ-like format
428 -b the input read file is in the BAM format
429 -0 use single-end reads only (effective with -b)
430 -1 use the 1st read in a pair (effective with -b)
431 -2 use the 2nd read in a pair (effective with -b)
432
433 **bwa-samse options**::
434
435 -a INT maximum insert size [500]
436 -o INT maximum occurrences for one end [100000]
437 -n INT maximum hits to output for paired reads [3]
438 -N INT maximum hits to output for discordant pairs [10]
439 -c FLOAT prior of chimeric rate (lower bound) [1.0e-05]
440 -r STR read group header line [null]
441
442 **bwa-sampe options**::
443
444 -n INT maximum hits to output for paired reads [3]
445 -r STR read group header line [null]
446
447 @dataset_collections@
448 403
449 @RG@ 404 @RG@
450 405
451 @info@ 406 @info@
452 </help> 407 ]]></help>
453 <citations> 408 <citations>
454 <citation type="doi">10.1093/bioinformatics/btp324</citation> 409 <citation type="doi">10.1093/bioinformatics/btp324</citation>
455 <citation type="doi">10.1093/bioinformatics/btp698</citation> 410 <citation type="doi">10.1093/bioinformatics/btp698</citation>
456 </citations> 411 </citations>
457 </tool> 412 </tool>