comparison bwa_color_wrapper.xml @ 0:ffa8aaa14f7c draft

Uploaded initial tarball.
author devteam
date Fri, 28 Sep 2012 13:53:48 -0400
parents
children 150b3fe44caa
comparison
equal deleted inserted replaced
-1:000000000000 0:ffa8aaa14f7c
1 <tool id="bwa_color_wrapper" name="Map with BWA for SOLiD" version="1.0.2">
2 <requirements>
3 <requirement type="package" version="0.5.9">bwa</requirement>
4 </requirements>
5 <description></description>
6 <parallelism method="basic"></parallelism>
7 <command interpreter="python">
8 bwa_wrapper.py
9 --threads="4"
10 --color-space
11
12 ## reference source
13 --fileSource=$genomeSource.refGenomeSource
14 #if $genomeSource.refGenomeSource == "history":
15 ##build index on the fly
16 --ref="${genomeSource.ownFile}"
17 --dbkey=$dbkey
18 #else:
19 ##use precomputed indexes
20 --ref="${genomeSource.indices.fields.path}"
21 --do_not_build_index
22 #end if
23
24 ## input file(s)
25 --input1=$paired.input1
26 #if $paired.sPaired == "paired":
27 --input2=$paired.input2
28 #end if
29
30 ## output file
31 --output=$output
32
33 ## run parameters
34 --genAlignType=$paired.sPaired
35 --params=$params.source_select
36 #if $params.source_select != "pre_set":
37 --maxEditDist=$params.maxEditDist
38 --fracMissingAligns=$params.fracMissingAligns
39 --maxGapOpens=$params.maxGapOpens
40 --maxGapExtens=$params.maxGapExtens
41 --disallowLongDel=$params.disallowLongDel
42 --disallowIndel=$params.disallowIndel
43 --seed=$params.seed
44 --maxEditDistSeed=$params.maxEditDistSeed
45 --mismatchPenalty=$params.mismatchPenalty
46 --gapOpenPenalty=$params.gapOpenPenalty
47 --gapExtensPenalty=$params.gapExtensPenalty
48 --suboptAlign="${params.suboptAlign}"
49 --noIterSearch=$params.noIterSearch
50 --outputTopN=$params.outputTopN
51 --outputTopNDisc=$params.outputTopNDisc
52 --maxInsertSize=$params.maxInsertSize
53 --maxOccurPairing=$params.maxOccurPairing
54 #if $params.readGroup.specReadGroup == "yes"
55 --rgid="$params.readGroup.rgid"
56 --rgcn="$params.readGroup.rgcn"
57 --rgds="$params.readGroup.rgds"
58 --rgdt="$params.readGroup.rgdt"
59 --rgfo="$params.readGroup.rgfo"
60 --rgks="$params.readGroup.rgks"
61 --rglb="$params.readGroup.rglb"
62 --rgpg="$params.readGroup.rgpg"
63 --rgpi="$params.readGroup.rgpi"
64 --rgpl="$params.readGroup.rgpl"
65 --rgpu="$params.readGroup.rgpu"
66 --rgsm="$params.readGroup.rgsm"
67 #end if
68 #end if
69
70 ## suppress output SAM header
71 --suppressHeader=$suppressHeader
72 </command>
73 <requirements>
74 <requirement type="package">bwa</requirement>
75 </requirements>
76 <inputs>
77 <conditional name="genomeSource">
78 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
79 <option value="indexed">Use a built-in index</option>
80 <option value="history">Use one from the history</option>
81 </param>
82 <when value="indexed">
83 <param name="indices" type="select" label="Select a reference genome">
84 <options from_data_table="bwa_indexes_color">
85 <filter type="sort_by" column="2" />
86 <validator type="no_options" message="No indexes are available for the selected input dataset" />
87 </options>
88 </param>
89 </when>
90 <when value="history">
91 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
92 </when>
93 </conditional>
94 <conditional name="paired">
95 <param name="sPaired" type="select" label="Is this library mate-paired?">
96 <option value="single">Single-end</option>
97 <option value="paired">Paired-end</option>
98 </param>
99 <when value="single">
100 <param name="input1" type="data" format="fastqcssanger" label="FASTQ file (Nucleotide-space recoded from color-space)">
101 <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
102 </param>
103 </when>
104 <when value="paired">
105 <param name="input1" type="data" format="fastqcssanger" label="Forward FASTQ file (Nucleotide-space recoded from color-space)" help="Must have Sanger-scaled quality values with ASCII offset 33">
106 <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
107 </param>
108 <param name="input2" type="data" format="fastqcssanger" label="Reverse FASTQ file (Nucleotide-space recoded from color-space)" help="Must have Sanger-scaled quality values with ASCII offset 33">
109 <help>Convert color-space data to nucleotide-space (see help section below for steps). Must have Sanger-scaled quality values with ASCII offset 33</help>
110 </param>
111 </when>
112 </conditional>
113 <conditional name="params">
114 <param name="source_select" type="select" label="BWA settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
115 <option value="pre_set">Commonly Used</option>
116 <option value="full">Full Parameter List</option>
117 </param>
118 <when value="pre_set" />
119 <when value="full">
120 <param name="maxEditDist" type="integer" value="0" label="Maximum edit distance (aln -n)" help="Enter this value OR a fraction of missing alignments, not both" />
121 <param name="fracMissingAligns" type="float" value="0.04" label="Fraction of missing alignments given 2% uniform base error rate (aln -n)" help="Enter this value OR maximum edit distance, not both" />
122 <param name="maxGapOpens" type="integer" value="1" label="Maximum number of gap opens (aln -o)" />
123 <param name="maxGapExtens" type="integer" value="-1" label="Maximum number of gap extensions (aln -e)" help="-1 for k-difference mode (disallowing long gaps)" />
124 <param name="disallowLongDel" type="integer" value="16" label="Disallow long deletion within [value] bp towards the 3'-end (aln -d)" />
125 <param name="disallowIndel" type="integer" value="5" label="Disallow insertion/deletion within [value] bp towards the end (aln -i)" />
126 <param name="seed" type="integer" value="-1" label="Number of first subsequences to take as seed (aln -l)" help="Enter -1 for infinity" />
127 <param name="maxEditDistSeed" type="integer" value="2" label="Maximum edit distance in the seed (aln -k)" />
128 <param name="mismatchPenalty" type="integer" value="3" label="Mismatch penalty (aln -M)" help="BWA will not search for suboptimal hits with a score lower than [value]" />
129 <param name="gapOpenPenalty" type="integer" value="11" label="Gap open penalty (aln -O)" />
130 <param name="gapExtensPenalty" type="integer" value="4" label="Gap extension penalty (aln -E)" />
131 <param name="suboptAlign" type="integer" optional="True" label="Proceed with suboptimal alignments if there are no more than INT equally best hits. (aln -R)" help="For paired-end reads only. By default, BWA only searches for suboptimal alignments if the top hit is unique. Using this option has no effect on accuracy for single-end reads. It is mainly designed for improving the alignment accuracy of paired-end reads. However, the pairing procedure will be slowed down, especially for very short reads (~32bp)" />
132 <param name="noIterSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Disable iterative search (aln -N)" help="All hits with no more than maxDiff differences will be found. This mode is much slower than the default" />
133 <param name="outputTopN" type="integer" value="3" label="Maximum number of alignments to output in the XA tag for reads paired properly (samse/sampe -n)" help="If a read has more than INT hits, the XA tag will not be written" />
134 <param name="outputTopNDisc" type="integer" value="10" label="Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) (sampe -N)" help="For paired-end reads only. If a read has more than INT hits, the XA tag will not be written" />
135 <param name="maxInsertSize" type="integer" value="500" label="Maximum insert size for a read pair to be considered as being mapped properly (sampe -a)" help="For paired-end reads only. Only used when there are not enough good alignments to infer the distribution of insert sizes" />
136 <param name="maxOccurPairing" type="integer" value="100000" label="Maximum occurrences of a read for pairing (sampe -o)" help="For paired-end reads only. A read with more occurrences will be treated as a single-end read. Reducing this parameter helps faster pairing" />
137 <conditional name="readGroup">
138 <param name="specReadGroup" type="select" label="Specify the read group for this file? (samse/sampe -r)">
139 <option value="yes">Yes</option>
140 <option value="no" selected="True">No</option>
141 </param>
142 <when value="yes">
143 <param name="rgid" type="text" size="25" label="Read group identifier (ID). Each @RG line must have a unique ID. The value of ID is used in the RG
144 tags of alignment records. Must be unique among all read groups in header section." help="Required if RG specified. Read group
145 IDs may be modified when merging SAM files in order to handle collisions." />
146 <param name="rgcn" type="text" size="25" label="Sequencing center that produced the read (CN)" help="Optional" />
147 <param name="rgds" type="text" size="25" label="Description (DS)" help="Optional" />
148 <param name="rgdt" type="text" size="25" label="Date that run was produced (DT)" help="Optional. ISO8601 format date or date/time, like YYYY-MM-DD" />
149 <param name="rgfo" type="text" size="25" label="Flow order (FO). The array of nucleotide bases that correspond to the nucleotides used for each
150 flow of each read." help="Optional. Multi-base flows are encoded in IUPAC format, and non-nucleotide flows by
151 various other characters. Format : /\*|[ACMGRSVTWYHKDBN]+/" />
152 <param name="rgks" type="text" size="25" label="The array of nucleotide bases that correspond to the key sequence of each read (KS)" help="Optional" />
153 <param name="rglb" type="text" size="25" label="Library name (LB)" help="Required if RG specified" />
154 <param name="rgpg" type="text" size="25" label="Programs used for processing the read group (PG)" help="Optional" />
155 <param name="rgpi" type="text" size="25" label="Predicted median insert size (PI)" help="Optional" />
156 <param name="rgpl" type="text" size="25" label="Platform/technology used to produce the reads (PL)" help="Required if RG specified. Valid values : CAPILLARY, LS454, ILLUMINA,
157 SOLID, HELICOS, IONTORRENT and PACBIO" />
158 <param name="rgpu" type="text" size="25" label="Platform unit (PU)" help="Optional. Unique identifier (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD)" />
159 <param name="rgsm" type="text" size="25" label="Sample (SM)" help="Required if RG specified. Use pool name where a pool is being sequenced" />
160 </when>
161 <when value="no" />
162 </conditional>
163 </when>
164 </conditional>
165 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="BWA produces SAM with several lines of header information" />
166 </inputs>
167 <outputs>
168 <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
169 <actions>
170 <conditional name="genomeSource.refGenomeSource">
171 <when value="indexed">
172 <action type="metadata" name="dbkey">
173 <option type="from_data_table" name="bwa_indexes_color" column="1">
174 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
175 <filter type="param_value" ref="genomeSource.indices" column="0" />
176 </option>
177 </action>
178 </when>
179 <when value="history">
180 <action type="metadata" name="dbkey">
181 <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
182 </action>
183 </when>
184 </conditional>
185 </actions>
186 </data>
187 </outputs>
188 <tests>
189 <test>
190 <!--
191 BWA commands:
192 cp test-data/hg19chrX_midpart.fasta hg19chrX_midpart.fasta
193 bwa index -c -a is hg19chrX_midpart.fasta
194 bwa aln -t 4 -c hg19chrX_midpart.fasta test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out4.sai
195 bwa samse hg19chrX_midpart.fasta bwa_wrapper_out4.sai test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out4.u.sam
196 hg19chrX_midpart.fasta is the prefix for the reference files (hg19chrX_midpart.fasta.amb, hg19chrX_midpart.fasta.ann, hg19chrX_midpart.fasta.bwt, ...)
197 It's just part of hg19 chrX, from the middle of the chromosome
198 plain old sort doesn't handle underscores like python:
199 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out4.u.sam bwa_wrapper_out4.sam
200 -->
201 <param name="refGenomeSource" value="history" />
202 <param name="ownFile" value="hg19chrX_midpart.fasta" />
203 <param name="sPaired" value="single" />
204 <param name="input1" value="bwa_wrapper_in4.fastqcssanger" ftype="fastqcssanger" />
205 <param name="source_select" value="pre_set" />
206 <param name="suppressHeader" value="false" />
207 <output name="output" file="bwa_wrapper_out4.sam" ftype="sam" sort="True" lines_diff="2" />
208 </test>
209 <test>
210 <!--
211 BWA commands:
212 bwa aln -t 4 -c equCab2chrM_cs.fa test-data/bwa_wrapper_in5.fastqcssanger > bwa_wrapper_out5a.sai
213 bwa aln -t 4 -c equCab2chrM_cs.fa test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out5b.sai
214 bwa sampe equCab2chrM_cs.fa bwa_wrapper_out5a.sai bwa_wrapper_out5b.sai test-data/bwa_wrapper_in5.fastqcssanger test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out5.u.sam
215 equCab2chrM_cs.fa is the prefix of the index files (equCab2chrM_cs.fa.amb, equCab2chrM_cs.fa.ann, ...)
216 remove the comment lines (beginning with '@') from the resulting sam file
217 plain old sort doesn't handle underscores like python:
218 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out5.u.sam bwa_wrapper_out5.sam
219 -->
220 <param name="refGenomeSource" value="indexed" />
221 <param name="indices" value="equCab2chrM" />
222 <param name="sPaired" value="paired" />
223 <param name="input1" value="bwa_wrapper_in5.fastqcssanger" ftype="fastqcssanger" />
224 <param name="input2" value="bwa_wrapper_in6.fastqcssanger" ftype="fastqcssanger" />
225 <param name="source_select" value="pre_set" />
226 <param name="suppressHeader" value="true" />
227 <output name="output" file="bwa_wrapper_out5.sam" ftype="sam" sort="True" />
228 </test>
229 <test>
230 <!--
231 BWA commands:
232 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c hg19chrX_midpart.fasta test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out6.sai
233 bwa samse -n 3 -r "@RG\tID:474747\tDS:description\tDT:2011-03-14\tLB:lib-child-1-A\tPI:200\tPL:SOLID\tSM:child-1" hg19chrX_midpart.fasta bwa_wrapper_out6.sai test-data/bwa_wrapper_in4.fastqcssanger > bwa_wrapper_out6.u.sam
234 hg19chrX_midpart_cs.fa is the prefix of the index files (hg19chrX_midpart.fa.amb, hg19chrX_midpart.fa.ann, ...)
235 (It's just part of hg19 chrX, from the middle of the chromosome)
236 plain old sort doesn't handle underscores like python:
237 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out6.u.sam bwa_wrapper_out6.sam
238 -->
239 <param name="refGenomeSource" value="indexed" />
240 <param name="indices" value="hg19chrX_midpart" />
241 <param name="sPaired" value="single" />
242 <param name="input1" value="bwa_wrapper_in4.fastqcssanger" ftype="fastqcssanger" />
243 <param name="source_select" value="full" />
244 <param name="maxEditDist" value="0" />
245 <param name="fracMissingAligns" value="0.04" />
246 <param name="maxGapOpens" value="1" />
247 <param name="maxGapExtens" value="-1" />
248 <param name="disallowLongDel" value="16" />
249 <param name="disallowIndel" value="5" />
250 <param name="seed" value="-1" />
251 <param name="maxEditDistSeed" value="2" />
252 <param name="mismatchPenalty" value="3" />
253 <param name="gapOpenPenalty" value="11" />
254 <param name="gapExtensPenalty" value="4" />
255 <param name="suboptAlign" value="" />
256 <param name="noIterSearch" value="true" />
257 <param name="outputTopN" value="3" />
258 <param name="outputTopNDisc" value="10" />
259 <param name="maxInsertSize" value="500" />
260 <param name="maxOccurPairing" value="100000" />
261 <param name="specReadGroup" value="yes" />
262 <param name="rgid" value="474747" />
263 <param name="rgcn" value="" />
264 <param name="rgds" value="description" />
265 <param name="rgdt" value="2011-03-14" />
266 <param name="rgfo" value="" />
267 <param name="rgks" value="" />
268 <param name="rglb" value="lib-child-1-A" />
269 <param name="rgpg" value="" />
270 <param name="rgpi" value="200" />
271 <param name="rgpl" value="SOLID" />
272 <param name="rgpu" value="" />
273 <param name="rgsm" value="child-1" />
274 <param name="suppressHeader" value="false" />
275 <output name="output" file="bwa_wrapper_out6.sam" ftype="sam" sort="True" lines_diff="2" />
276 </test>
277 <test>
278 <!--
279 BWA commands:
280 cp test-data/chr_m.fasta chr_m.fasta
281 bwa index -c -a is chr_m.fasta
282 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c chr_m.fasta test-data/bwa_wrapper_in5.fastqcssanger > bwa_wrapper_out7a.sai
283 bwa aln -n 0.04 -o 1 -e -1 -d 16 -i 5 -k 2 -t 4 -M 3 -O 11 -E 4 -R -N -c chr_m.fasta test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out7b.sai
284 bwa sampe -a 100 -o 2 -n 3 -N 10 chr_m.fasta bwa_wrapper_out7a.sai bwa_wrapper_out7b.sai test-data/bwa_wrapper_in5.fastqcssanger test-data/bwa_wrapper_in6.fastqcssanger > bwa_wrapper_out7.u.sam
285 chr_m.fasta is the prefix of the index files (chr_m.fasta.amb, chr_m.fasta.ann, ...)
286 plain old sort doesn't handle underscores like python:
287 python -c "import sys; lines=file(sys.argv[1],'rb').readlines(); lines.sort(); file(sys.argv[2],'wb').write(''.join(lines))" bwa_wrapper_out7.u.sam bwa_wrapper_out7.sam
288 -->
289 <param name="refGenomeSource" value="history" />
290 <param name="ownFile" value="chr_m.fasta" />
291 <param name="sPaired" value="paired" />
292 <param name="input1" value="bwa_wrapper_in5.fastqcssanger" ftype="fastqcssanger" />
293 <param name="input2" value="bwa_wrapper_in6.fastqcssanger" ftype="fastqcssanger" />
294 <param name="source_select" value="full" />
295 <param name="maxEditDist" value="0" />
296 <param name="fracMissingAligns" value="0.04" />
297 <param name="maxGapOpens" value="1" />
298 <param name="maxGapExtens" value="-1" />
299 <param name="disallowLongDel" value="16" />
300 <param name="disallowIndel" value="5" />
301 <param name="seed" value="-1" />
302 <param name="maxEditDistSeed" value="2" />
303 <param name="mismatchPenalty" value="3" />
304 <param name="gapOpenPenalty" value="11" />
305 <param name="gapExtensPenalty" value="4" />
306 <param name="suboptAlign" value="" />
307 <param name="noIterSearch" value="true" />
308 <param name="outputTopN" value="3" />
309 <param name="outputTopNDisc" value="10" />
310 <param name="maxInsertSize" value="100" />
311 <param name="maxOccurPairing" value="2" />
312 <param name="specReadGroup" value="no" />
313 <param name="suppressHeader" value="false" />
314 <output name="output" file="bwa_wrapper_out7.sam" ftype="sam" sort="True" lines_diff="2" />
315 </test>
316 </tests>
317 <help>
318
319 **What it does**
320
321 BWA is a fast light-weighted tool that aligns relatively short sequences (queries) to a sequence database (large), such as the human reference genome. It is developed by Heng Li at the Sanger Insitute. Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 25, 1754-60.
322
323 ------
324
325 **Know what you are doing**
326
327 .. class:: warningmark
328
329 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
330
331 .. __: http://bio-bwa.sourceforge.net/
332
333 ------
334
335 **Input formats**
336
337 BWA accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files, set to either FASTQ Sanger or FASTQ Color Space Sanger as appropriate.
338
339 If you have Color Space Sanger, it must be converted to nucleotide-space first. To do this, use the Manipulate FASTQ tool under NGS: QC and manipulation, with the following settings:
340 Manipulate reads on Sequence Content, choosing Change Adapter Base, and having the text box empty.
341 Manipulate reads on Sequence Content, doing a String Translate from "01234." to "ACGTN".
342
343
344 ------
345
346 **A Note on Built-in Reference Genomes**
347
348 The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
349
350 ------
351
352 **Outputs**
353
354 The output is in SAM format, and has the following columns::
355
356 Column Description
357 -------- --------------------------------------------------------
358 1 QNAME Query (pair) NAME
359 2 FLAG bitwise FLAG
360 3 RNAME Reference sequence NAME
361 4 POS 1-based leftmost POSition/coordinate of clipped sequence
362 5 MAPQ MAPping Quality (Phred-scaled)
363 6 CIGAR extended CIGAR string
364 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
365 8 MPOS 1-based Mate POSition
366 9 ISIZE Inferred insert SIZE
367 10 SEQ query SEQuence on the same strand as the reference
368 11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
369 12 OPT variable OPTional fields in the format TAG:VTYPE:VALU
370
371 The flags are as follows::
372
373 Flag Description
374 ------ -------------------------------------
375 0x0001 the read is paired in sequencing
376 0x0002 the read is mapped in a proper pair
377 0x0004 the query sequence itself is unmapped
378 0x0008 the mate is unmapped
379 0x0010 strand of the query (1 for reverse)
380 0x0020 strand of the mate
381 0x0040 the read is the first read in a pair
382 0x0080 the read is the second read in a pair
383 0x0100 the alignment is not primary
384
385 It looks like this (scroll sideways to see the entire example)::
386
387 QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT
388 HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
389 HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
390
391 -------
392
393 **BWA settings**
394
395 All of the options have a default value. You can change any of them. All of the options in BWA have been implemented here.
396
397 ------
398
399 **BWA parameter list**
400
401 This is an exhaustive list of BWA options:
402
403 For **aln**::
404
405 -n NUM Maximum edit distance if the value is INT, or the fraction of missing
406 alignments given 2% uniform base error rate if FLOAT. In the latter
407 case, the maximum edit distance is automatically chosen for different
408 read lengths. [0.04]
409 -o INT Maximum number of gap opens [1]
410 -e INT Maximum number of gap extensions, -1 for k-difference mode
411 (disallowing long gaps) [-1]
412 -d INT Disallow a long deletion within INT bp towards the 3'-end [16]
413 -i INT Disallow an indel within INT bp towards the ends [5]
414 -l INT Take the first INT subsequence as seed. If INT is larger than the
415 query sequence, seeding will be disabled. For long reads, this option
416 is typically ranged from 25 to 35 for '-k 2'. [inf]
417 -k INT Maximum edit distance in the seed [2]
418 -t INT Number of threads (multi-threading mode) [1]
419 -M INT Mismatch penalty. BWA will not search for suboptimal hits with a score
420 lower than (bestScore-misMsc). [3]
421 -O INT Gap open penalty [11]
422 -E INT Gap extension penalty [4]
423 -c Reverse query but not complement it, which is required for alignment
424 in the color space.
425 -R Proceed with suboptimal alignments even if the top hit is a repeat. By
426 default, BWA only searches for suboptimal alignments if the top hit is
427 unique. Using this option has no effect on accuracy for single-end
428 reads. It is mainly designed for improving the alignment accuracy of
429 paired-end reads. However, the pairing procedure will be slowed down,
430 especially for very short reads (~32bp).
431 -N Disable iterative search. All hits with no more than maxDiff
432 differences will be found. This mode is much slower than the default.
433
434 For **samse**::
435
436 -n INT Maximum number of alignments to output in the XA tag for reads paired
437 properly. If a read has more than INT hits, the XA tag will not be
438 written. [3]
439 -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
440
441 For **sampe**::
442
443 -a INT Maximum insert size for a read pair to be considered as being mapped
444 properly. Since version 0.4.5, this option is only used when there
445 are not enough good alignment to infer the distribution of insert
446 sizes. [500]
447 -n INT Maximum number of alignments to output in the XA tag for reads paired
448 properly. If a read has more than INT hits, the XA tag will not be
449 written. [3]
450 -N INT Maximum number of alignments to output in the XA tag for disconcordant
451 read pairs (excluding singletons). If a read has more than INT hits,
452 the XA tag will not be written. [10]
453 -o INT Maximum occurrences of a read for pairing. A read with more
454 occurrences will be treated as a single-end read. Reducing this
455 parameter helps faster pairing. [100000]
456 -r STR Specify the read group in a format like '@RG\tID:foo\tSM:bar' [null]
457
458 For specifying the read group in **samse** or **sampe**, use the following::
459
460 @RG Read group. Unordered multiple @RG lines are allowed.
461 ID Read group identifier. Each @RG line must have a unique ID. The value of
462 ID is used in the RG tags of alignment records. Must be unique among all
463 read groups in header section. Read group IDs may be modified when
464 merging SAM files in order to handle collisions.
465 CN Name of sequencing center producing the read.
466 DS Description.
467 DT Date the run was produced (ISO8601 date or date/time).
468 FO Flow order. The array of nucleotide bases that correspond to the
469 nucleotides used for each flow of each read. Multi-base flows are encoded
470 in IUPAC format, and non-nucleotide flows by various other characters.
471 Format : /\*|[ACMGRSVTWYHKDBN]+/
472 KS The array of nucleotide bases that correspond to the key sequence of each read.
473 LB Library.
474 PG Programs used for processing the read group.
475 PI Predicted median insert size.
476 PL Platform/technology used to produce the reads. Valid values : CAPILLARY,
477 LS454, ILLUMINA, SOLID, HELICOS, IONTORRENT and PACBIO.
478 PU Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for
479 SOLiD). Unique identifier.
480 SM Sample. Use pool name where a pool is being sequenced.
481
482 </help>
483 </tool>
484
485