Mercurial > repos > devteam > bwa
annotate bwa.xml @ 6:09a7281d24c5 draft
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
author | devteam |
---|---|
date | Tue, 21 Jul 2015 14:12:49 -0400 |
parents | fbf460831036 |
children | d8c9597bfb09 |
rev | line source |
---|---|
0 | 1 <?xml version="1.0"?> |
6
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
2 <tool id="bwa" name="Map with BWA" version="0.3.1"> |
0 | 3 <description>- map short reads (< 100 bp) against reference genome</description> |
4 <macros> | |
5
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
5 <import>read_group_macros.xml</import> |
0 | 6 <import>bwa_macros.xml</import> |
2 | 7 <token name="@command_options@"> |
8 #if str( $analysis_type.analysis_type_selector ) == "full": | |
0 | 9 -n ${analysis_type.n} |
10 -o ${analysis_type.o} | |
11 -e ${analysis_type.e} | |
12 -i ${analysis_type.i} | |
13 -d ${analysis_type.d} | |
14 -l ${analysis_type.l} | |
15 -k ${analysis_type.k} | |
16 -m ${analysis_type.m} | |
17 -M ${analysis_type.M} | |
18 -O ${analysis_type.O} | |
19 -E ${analysis_type.E} | |
20 -R ${analysis_type.R} | |
21 -q ${analysis_type.q} | |
2 | 22 |
0 | 23 #if str( $analysis_type.B ): |
24 -B ${analysis_type.B} | |
25 #end if | |
2 | 26 |
0 | 27 #if str( $analysis_type.L ): |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
28 -L ${analysis_type.L} |
0 | 29 #end if |
2 | 30 #end if |
0 | 31 </token> |
32 <token name="@read_group_options@"> | |
5
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
33 #if $use_rg: |
2 | 34 @set_rg_string@ |
35 -r '$rg_string' | |
0 | 36 #end if |
37 </token> | |
2 | 38 |
0 | 39 <xml name="advanced_pe_options"> |
40 <param name="adv_pe_options_selector" type="select" label="Set advanced paired end options?" help="Provides additional controls"> | |
41 <option value="set">Set</option> | |
42 <option value="do_not_set" selected="True">Do not set</option> | |
43 </param> | |
44 <when value="set"> | |
45 <param name="a" type="integer" value="500" label="Maximum insert size for a read pair to be considered being mapped properly." help="sampe -a; This option is only used when there are not enough good alignment to infer the distribution of insert sizes; default=500"/> | |
46 <param name="o" type="integer" value="100000" label="Maximum occurrences of a read for pairing. A read with more occurrences will be treated as a single-end read." help="sampe -o; Reducing this parameter helps faster pairing; default=100000"/> | |
47 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly." help="sampe -n; If a read has more than this many hits, the XA tag will not be written; default=3"/> | |
48 <param name="N" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons)." help="sampe -N; If a read has more than this many hits, the XA tag will not be written; default=10"/> | |
49 <param name="c" type="float" value="0.00005" label="Prior of chimeric rate (lower bound)" help="sampe -c"/> | |
50 </when> | |
51 <when value="do_not_set"> | |
52 <!-- do nothing --> | |
53 </when> | |
54 </xml> | |
55 <xml name="advanced_se_options"> | |
56 <param name="adv_se_options_selector" type="select" label="Set advanced single end options?" help="Provides additional controls"> | |
57 <option value="set">Set</option> | |
58 <option value="do_not_set" selected="True">Do not set</option> | |
59 </param> | |
60 <when value="set"> | |
61 <param name="n" type="integer" value="3" label="Maximum number of alignments to output in the XA tag." help="-n; If a read has more than this many hits, the XA tag will not be written; default=3"/> | |
62 </when> | |
63 <when value="do_not_set"> | |
64 <!-- do nothing --> | |
65 </when> | |
66 </xml> | |
67 </macros> | |
68 | |
2 | 69 <requirements> |
70 <requirement type="package" version="0.7.10.039ea20639">bwa</requirement> | |
71 <requirement type="package" version="1.1">samtools</requirement> | |
72 </requirements> | |
6
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
73 <expand macro="stdio" /> |
2 | 74 <command> |
75 #set $reference_fasta_filename = "localref.fa" | |
76 | |
77 #if str( $reference_source.reference_source_selector ) == "history": | |
78 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && | |
79 | |
80 ## The following shell commands decide with of the BWA indexing algorithms (IS or BWTSW) will be run | |
81 ## depending ob the size of the input FASTA dataset | |
82 ( | |
83 size=`stat -c %s "${reference_fasta_filename}" 2>/dev/null`; ## Linux | |
84 if [ $? -eq 0 ]; | |
85 then | |
86 if [ "\$size" -lt 2000000000 ]; | |
87 then | |
88 bwa index -a is "${reference_fasta_filename}"; | |
89 else | |
90 bwa index -a bwtsw "${reference_fasta_filename}"; | |
91 fi; | |
92 fi; | |
93 | |
94 eval \$(stat -s "${reference_fasta_filename}" 2>/dev/null); ## OSX | |
95 if [ -n "\$st_size" ]; | |
96 then | |
97 if [ "\$st_size" -lt 2000000000 ]; | |
98 then | |
99 bwa index -a is "${reference_fasta_filename}"; | |
100 echo "Generating BWA index with is algorithm"; | |
101 else | |
102 bwa index -a bwtsw "${reference_fasta_filename}"; | |
103 echo "Generating BWA index with bwtsw algorithm"; | |
104 fi; | |
105 fi; | |
106 ) && | |
107 #else: | |
108 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
109 #end if | |
110 | |
5
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
111 ## setup vars for rg handling... |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
112 @define_read_group_helpers@ |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
113 #if str( $input_type.input_type_selector ) == "paired": |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
114 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1, $input_type.fastq_input2) |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
115 #elif str( $input_type.input_type_selector ) in ["single_bam", "paired_bam"]: |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
116 #set $rg_auto_name = $read_group_name_default($input_type.bam_input) |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
117 #else |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
118 #set $rg_auto_name = $read_group_name_default($input_type.fastq_input1) |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
119 #end if |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
120 @set_use_rg_var@ |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
121 @set_read_group_vars@ |
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
122 |
2 | 123 ## Begin bwa command line |
124 | |
125 ####### Fastq paired | |
126 | |
127 #if str( $input_type.input_type_selector ) == "paired" or str( $input_type.input_type_selector ) == "paired_collection": | |
128 bwa aln | |
129 -t "\${GALAXY_SLOTS:-1}" | |
130 | |
131 @command_options@ | |
132 | |
133 "${reference_fasta_filename}" | |
134 | |
135 #if str( $input_type.input_type_selector ) == "paired_collection": | |
136 "${input_type.fastq_input1.forward}" | |
137 #else | |
138 "${input_type.fastq_input1}" | |
139 #end if | |
140 | |
141 > first.sai && | |
142 | |
143 bwa aln | |
144 -t "\${GALAXY_SLOTS:-1}" | |
145 | |
146 @command_options@ | |
147 | |
148 "${reference_fasta_filename}" | |
149 | |
150 #if str( $input_type.input_type_selector ) == "paired_collection": | |
151 "${input_type.fastq_input1.reverse}" | |
152 #else | |
153 "${input_type.fastq_input2}" | |
154 #end if | |
155 | |
156 > second.sai && | |
157 | |
158 bwa sampe | |
159 | |
160 #if str( $input_type.adv_pe_options.adv_pe_options_selector) == "True": | |
161 -a ${$input_type.adv_pe_options.a} | |
162 -o ${$input_type.adv_pe_options.o} | |
163 -n ${$input_type.adv_pe_options.n} | |
164 -N ${$input_type.adv_pe_options.N} | |
165 #end if | |
166 | |
167 @read_group_options@ | |
168 | |
169 #if str( $input_type.input_type_selector ) == "paired_collection": | |
170 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1.forward}" "${input_type.fastq_input1.reverse}" | |
171 #else: | |
172 "${reference_fasta_filename}" first.sai second.sai "${input_type.fastq_input1}" "${input_type.fastq_input2}" | |
173 #end if | |
174 | |
175 ####### Fastq single | |
176 | |
177 #elif str( $input_type.input_type_selector ) == "single": | |
178 bwa aln | |
179 -t "\${GALAXY_SLOTS:-1}" | |
180 | |
181 @command_options@ | |
182 | |
183 "${reference_fasta_filename}" | |
184 "${input_type.fastq_input1}" | |
185 > first.sai && | |
186 | |
187 bwa samse | |
188 | |
189 #if str( $input_type.adv_se_options.adv_se_options_selector) == "True": | |
190 -n ${$input_type.adv_se_options.n} | |
191 #end if | |
192 | |
193 @read_group_options@ | |
194 | |
195 "${reference_fasta_filename}" first.sai "${input_type.fastq_input1}" | |
196 | |
197 ####### BAM paired | |
198 | |
199 #elif str( $input_type.input_type_selector ) == "paired_bam": | |
200 bwa aln | |
201 -t "\${GALAXY_SLOTS:-1}" | |
202 -b | |
203 -1 | |
204 | |
205 @command_options@ | |
206 | |
207 "${reference_fasta_filename}" | |
208 "${input_type.bam_input}" | |
209 > first.sai && | |
210 | |
211 bwa aln | |
212 -t "\${GALAXY_SLOTS:-1}" | |
213 -b | |
214 -2 | |
215 @command_options@ | |
216 "${reference_fasta_filename}" | |
217 "${input_type.bam_input}" | |
218 > second.sai && | |
219 | |
220 bwa sampe | |
221 | |
222 #if str( $input_type.adv_bam_pe_options.adv_pe_options_selector) == "True": | |
223 -a ${$input_type.adv_bam_pe_options.a} | |
224 -o ${$input_type.adv_bam_pe_options.o} | |
225 -n ${$input_type.adv_bam_pe_options.n} | |
226 -N ${$input_type.adv_bam_pe_options.N} | |
227 #end if | |
228 | |
229 @read_group_options@ | |
230 | |
231 "${reference_fasta_filename}" first.sai second.sai "${input_type.bam_input}" "${input_type.bam_input}" | |
232 | |
233 ####### Fastq single ------------ to do next | |
234 | |
235 #elif str( $input_type.input_type_selector ) == "single_bam": | |
236 bwa aln | |
237 -t "\${GALAXY_SLOTS:-1}" | |
238 -b | |
239 -0 | |
240 | |
241 @command_options@ | |
242 | |
243 "${reference_fasta_filename}" | |
244 "${input_type.bam_input}" | |
245 > first.sai && | |
246 | |
247 bwa samse | |
248 | |
249 #if str( $input_type.adv_bam_se_options.adv_se_options_selector) == "True": | |
250 -n ${$input_type.adv_bam_se_options.n} | |
251 #end if | |
252 | |
253 @read_group_options@ | |
254 | |
255 "${reference_fasta_filename}" first.sai "${input_type.bam_input}" | |
256 #end if | |
257 | |
258 | samtools view -Sb - > temporary_bam_file.bam && | |
259 | |
260 samtools sort -f temporary_bam_file.bam ${bam_output} | |
261 </command> | |
262 | |
0 | 263 <inputs> |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
264 |
0 | 265 <conditional name="reference_source"> |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
266 <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
267 <option value="cached">Use a built-in genome index</option> |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
268 <option value="history">Use a genome from history and build index</option> |
0 | 269 </param> |
270 <when value="cached"> | |
271 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> | |
272 <options from_data_table="bwa_mem_indexes"> | |
273 <filter type="sort_by" column="2" /> | |
274 <validator type="no_options" message="No indexes are available" /> | |
275 </options> | |
276 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
277 </param> | |
278 </when> | |
2 | 279 <when value="history"> |
0 | 280 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> |
281 </when> | |
282 </conditional> | |
283 <conditional name="input_type"> | |
284 <param name="input_type_selector" type="select" label="Select input type" help="Select between fastq and bam datasets and between paired and single end data"> | |
285 <option value="paired">Paired fastq</option> | |
286 <option value="paired_collection">Paired fastq collection</option> | |
287 <option value="single">Single fastq</option> | |
288 <option value="paired_bam">Paired BAM</option> | |
289 <option value="single_bam">Single BAM</option> | |
290 </param> | |
291 <when value="paired"> | |
292 <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/> | |
293 <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/> | |
294 <conditional name="adv_pe_options"> | |
2 | 295 |
0 | 296 <expand macro="advanced_pe_options" /> |
2 | 297 |
0 | 298 </conditional> |
299 </when> | |
2 | 300 |
0 | 301 <when value="paired_collection"> |
302 <param name="fastq_input1" format="fastqsanger" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> | |
303 <conditional name="adv_pe_options"> | |
2 | 304 |
0 | 305 <expand macro="advanced_pe_options" /> |
2 | 306 |
0 | 307 </conditional> |
308 </when> | |
2 | 309 |
0 | 310 <when value="single"> |
311 <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/> | |
312 <conditional name="adv_se_options"> | |
2 | 313 |
0 | 314 <expand macro="advanced_se_options" /> |
2 | 315 |
0 | 316 </conditional> |
317 </when> | |
2 | 318 |
0 | 319 <!-- the difference between single and paired bams is in the <command> tag portion and realated to -0, -1, and -2 options --> |
2 | 320 |
0 | 321 <when value="paired_bam"> |
322 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with paired reads"/> | |
323 <conditional name="adv_bam_pe_options"> | |
2 | 324 |
0 | 325 <expand macro="advanced_pe_options" /> |
2 | 326 |
0 | 327 </conditional> |
328 </when> | |
2 | 329 |
0 | 330 <when value="single_bam"> |
331 <param name="bam_input" type="data" format="bam" label="Select BAM dataset" help="Specify BAM dataset with single reads"/> | |
332 <conditional name="adv_bam_se_options"> | |
2 | 333 |
0 | 334 <expand macro="advanced_se_options" /> |
2 | 335 |
0 | 336 </conditional> |
337 </when> | |
2 | 338 |
0 | 339 </conditional> |
2 | 340 |
5
fbf460831036
planemo upload commit d0e3412c58bd3bdc1a483a1e2f7f9c2aa5c87a1f-dirty
devteam
parents:
4
diff
changeset
|
341 <expand macro="read_group_conditional" /> |
2 | 342 |
0 | 343 <conditional name="analysis_type"> |
344 <param name="analysis_type_selector" type="select" label="Select analysis mode"> | |
345 <option value="illumina">1.Simple Illumina mode</option> | |
346 <option value="full">2.Full list of options</option> | |
347 </param> | |
348 <when value="illumina"> | |
349 <!-- do nothing --> | |
350 </when> | |
2 | 351 <when value="full"> |
0 | 352 <param name="n" type="text" value="0.04" label="maximum edit distance if the value is integer, or the fraction of missing alignments given 2% uniform base error rate if float. In the latter case, the maximum edit distance is automatically chosen for different read lengths." help="aln -n; default=0.04"/> |
353 <param name="o" type="integer" value="1" label="maximum number or gap openings" help="aln -o; default=1"/> | |
354 <param name="e" type="integer" value="-1" label="maximum number of gap extensions" help="aln -e; -1 disables long gaps and invokes k-difference mode; default=-1"/> | |
355 <param name="i" type="integer" value="5" label="do not put an indel within this many bp towards the ends" help="aln -i; default=5"/> | |
356 <param name="d" type="integer" value="10" label="maximum occurrences for extending a long deletion" help="aln -d; default=10"/> | |
357 <param name="l" type="integer" value="32" label="seed length" help="aln -l; default=32"/> | |
358 <param name="k" type="integer" value="2" label="maximum differences in the seed" help="aln -k; default=2"/> | |
359 <param name="m" type="integer" value="2000000" label="maximum entries in the queue" help="aln -m; default=2000000"/> | |
360 <param name="M" type="integer" value="3" label="mismatch penalty" help="aln -M; default=3"/> | |
361 <param name="O" type="integer" value="11" label="gap open penalty" help="aln -O; default=11"/> | |
362 <param name="E" type="integer" value="4" label="gap extension penalty" help="aln -E; default=4"/> | |
363 <param name="R" type="integer" value="30" label="stop searching when there are more than this value of equally best hits" help="aln -R; default=30"/> | |
364 <param name="q" type="integer" value="0" label="quality threshold for read trimming down to 35bp" help="aln -q; default=0"/> | |
365 <param name="B" type="integer" optional="True" label="length of barcode" help="aln -B; optional parameter"/> | |
2 | 366 <param name="L" type="float" optional="True" label="log-scaled gap penalty for long deletions" help="aln -L; optional parameter"/> |
0 | 367 </when> |
368 </conditional> | |
369 </inputs> | |
2 | 370 |
0 | 371 <outputs> |
6
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
372 <data format="bam" name="bam_output" label="${tool.name} on ${on_string} (mapped reads in BAM format)"> |
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
373 <expand macro="dbKeyActionsBwa" /> |
09a7281d24c5
planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents:
5
diff
changeset
|
374 </data> |
0 | 375 </outputs> |
2 | 376 |
0 | 377 <tests> |
378 <test> | |
379 <param name="reference_source_selector" value="history" /> | |
380 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> | |
381 <param name="input_type_selector" value="paired"/> | |
382 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> | |
383 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> | |
384 <param name="analysis_type_selector" value="illumina"/> | |
385 <output name="bam_output" ftype="bam" file="bwa-aln-test1.bam" lines_diff="2" /> | |
386 </test> | |
387 <test> | |
388 <param name="reference_source_selector" value="history" /> | |
389 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> | |
390 <param name="input_type_selector" value="paired_bam"/> | |
391 <param name="bam_input" ftype="bam" value="bwa-aln-bam-input.bam"/> | |
392 <param name="analysis_type_selector" value="illumina"/> | |
393 <output name="bam_output" ftype="bam" file="bwa-aln-test2.bam" lines_diff="2" /> | |
394 </test> | |
2 | 395 <test> |
396 <param name="reference_source_selector" value="history" /> | |
397 <param name="ref_file" ftype="fasta" value="bwa-mem-mt-genome.fa"/> | |
398 <param name="input_type_selector" value="paired"/> | |
399 <param name="fastq_input1" ftype="fastqsanger" value="bwa-mem-fastq1.fq"/> | |
400 <param name="fastq_input2" ftype="fastqsanger" value="bwa-mem-fastq2.fq"/> | |
401 <param name="rg_selector" value="set"/> | |
402 <param name="ID" value="rg1"/> | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
403 <param name="PL" value="CAPILLARY"/> |
2 | 404 <param name="analysis_type_selector" value="illumina"/> |
405 <output name="bam_output" ftype="bam" file="bwa-aln-test3.bam" lines_diff="2" /> | |
406 </test> | |
0 | 407 </tests> |
408 <help> | |
409 **What is does** | |
410 | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
411 BWA is a software package for mapping low-divergent sequences against a large reference genome, such as the human genome. The bwa-aln algorithm is designed for Illumina sequence reads up to 100bp. For longer reads use BWA-MEM algorithm distributed as a separate Galaxy tool. |
0 | 412 |
413 This Galaxy tool wraps bwa-aln, bwa-samse and -sampe modules of bwa read mapping tool: | |
414 | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
415 - **bwa aln** - actual mapper placing reads onto the reference sequence |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
416 - **bwa samse** - post-processor converting suffix array coordinates into genome coordinates in SAM format for single reads |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
417 - **bam sampe** - post-processor for paired reads |
2 | 418 |
0 | 419 Galaxy implementation takes fastq or BAM (unaligned BAM) datasets as input and produces output in BAM (not SAM; in reality SAM produced by the bwa is converted to BAM on the fly by samtools view command) format, which can be further processed using various BAM utilities exiting in Galaxy (BAMTools, SAMTools, Picard). |
420 | |
421 ----- | |
422 | |
4
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
423 **Indices: Selecting reference genomes for BWA** |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
424 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
425 Galaxy wrapper for BWA allows you select between precomputed and user-defined indices for reference genomes using **Will you select a reference genome from your history or use a built-in index?** flag. This flag has two options: |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
426 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
427 1. **Use a built-in genome index** - when selected (this is default), Galaxy provides the user with **Select reference genome index** dropdown. Genomes listed in this dropdown have been pre-indexed with bwa index utility and are ready to be mapped against. |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
428 2. **Use a genome from the history and build index** - when selected, Galaxy provides the user with **Select reference genome sequence** dropdown. This dropdown is populated by all FASTA formatted files listed in your current history. If your genome of interest is uploaded into history it will be shown there. Selecting a genome from this dropdown will cause Galaxy to first transparently index it using `bwa index` command, and then run mapping with `bwa aln`. |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
429 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
430 If your genome of interest is not listed here you have two choices: |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
431 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
432 1. Contact galaxy team using **Help->Support** link at the top of the interface and let us know that an index needs to be added |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
433 2. Upload your genome of interest as a FASTA file to Galaxy history and selected **Use a genome from the history and build index** option. |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
434 |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
435 ----- |
ac30bfd3e2a8
planemo upload commit a50a3947aebc8a1d11bac39599f4efd8ed9a3bd5
devteam
parents:
3
diff
changeset
|
436 |
0 | 437 **Galaxy-specific option** |
438 | |
439 Galaxy allows three levels of control over bwa-mem options provided by **Select analysis mode** menu option. These are: | |
440 | |
441 1. *Simple Illumina mode*: The simplest possible bwa mem application in which it alignes single or paired-end data to reference using default parameters. It is equivalent to the following command: bwa mem <reference index> <fastq dataset1> [fastq dataset2] | |
442 2. *Full list of options*: Allows access to all options through Galaxy interface. | |
2 | 443 |
0 | 444 ------ |
445 | |
446 **bwa-aln options** | |
447 | |
448 Each Galaxy parameter widget corresponds to command line flags listed below:: | |
449 | |
450 -n NUM max #diff (int) or missing prob under 0.02 err rate (float) [0.04] | |
451 -o INT maximum number or fraction of gap opens [1] | |
452 -e INT maximum number of gap extensions, -1 for disabling long gaps [-1] | |
453 -i INT do not put an indel within INT bp towards the ends [5] | |
454 -d INT maximum occurrences for extending a long deletion [10] | |
455 -l INT seed length [32] | |
456 -k INT maximum differences in the seed [2] | |
457 -m INT maximum entries in the queue [2000000] | |
458 -M INT mismatch penalty [3] | |
459 -O INT gap open penalty [11] | |
460 -E INT gap extension penalty [4] | |
461 -R INT stop searching when there are >INT equally best hits [30] | |
462 -q INT quality threshold for read trimming down to 35bp [0] | |
463 -B INT length of barcode | |
464 -L log-scaled gap penalty for long deletions | |
465 -N non-iterative mode: search for all n-difference hits (slooow) | |
466 -I the input is in the Illumina 1.3+ FASTQ-like format | |
467 -b the input read file is in the BAM format | |
468 -0 use single-end reads only (effective with -b) | |
469 -1 use the 1st read in a pair (effective with -b) | |
470 -2 use the 2nd read in a pair (effective with -b) | |
471 | |
472 **bwa-samse options**:: | |
473 | |
474 -a INT maximum insert size [500] | |
475 -o INT maximum occurrences for one end [100000] | |
476 -n INT maximum hits to output for paired reads [3] | |
477 -N INT maximum hits to output for discordant pairs [10] | |
478 -c FLOAT prior of chimeric rate (lower bound) [1.0e-05] | |
479 -r STR read group header line [null] | |
480 | |
481 **bwa-sampe options**:: | |
482 | |
483 -n INT maximum hits to output for paired reads [3] | |
484 -r STR read group header line [null] | |
485 | |
486 @dataset_collections@ | |
487 | |
488 @RG@ | |
489 | |
490 @info@ | |
491 </help> | |
492 <citations> | |
493 <citation type="doi">10.1093/bioinformatics/btp324</citation> | |
494 <citation type="doi">10.1093/bioinformatics/btp698</citation> | |
495 </citations> | |
496 </tool> |