annotate tools/sr_mapping/bfast_wrapper.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="bfast_wrapper" name="Map with BFAST" version="0.1.3">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description></description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 <command interpreter="python">bfast_wrapper.py
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 --numThreads="4" ##HACK: hardcode numThreads for now, should come from a location file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 --fastq="$input1"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 #if $input1.extension.startswith( "fastqcs" ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 ##if extention starts with fastqcs, then we have a color space file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 --space="1" ##color space
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 #else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 --space="0"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 --output="$output"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 $suppressHeader
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 #if $refGenomeSource.refGenomeSource_type == "history":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 ##build indexes on the fly
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 --buildIndex
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 --ref="${refGenomeSource.ownFile}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 --indexMask="${",".join( [ "%s:%s" % ( str( custom_index.get( 'mask' ) ).strip(), str( custom_index.get( 'hash_width' ) ).strip() ) for custom_index in $refGenomeSource.custom_index ] )}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 ${refGenomeSource.indexing_repeatmasker}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 #if $refGenomeSource.indexing_option.indexing_option_selector == "contig_offset":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 --indexContigOptions="${refGenomeSource.indexing_option.start_contig},${refGenomeSource.indexing_option.start_pos},${refGenomeSource.indexing_option.end_contig},${refGenomeSource.indexing_option.end_pos}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 #elif $refGenomeSource.indexing_option.indexing_option_selector == "exons_file":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 --indexExonsFileName="${refGenomeSource.indexing_option.exons_file}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 #else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 ##use precomputed indexes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 --ref="${ refGenomeSource.indices.fields.path }"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 #if $params.source_select == "full":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 --offsets="$params.offsets"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 --keySize="$params.keySize"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 --maxKeyMatches="$params.maxKeyMatches"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 --maxNumMatches="$params.maxNumMatches"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 --whichStrand="$params.whichStrand"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 #if str( $params.scoringMatrixFileName ) != 'None':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 --scoringMatrixFileName="$params.scoringMatrixFileName"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 ${params.ungapped}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 ${params.unconstrained}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 --offset="${params.offset}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 --avgMismatchQuality="${params.avgMismatchQuality}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 --algorithm="${params.localalign_params.algorithm}"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 ${params.unpaired}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 ${params.reverseStrand}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 #if $params.localalign_params.algorithm == "3":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 ${params.localalign_params.pairedEndInfer}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 ${params.localalign_params.randomBest}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 <param name="input1" type="data" format="fastqsanger,fastqcssanger" label="FASTQ file" help="Must have Sanger-scaled quality values with ASCII offset 33"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 <conditional name="refGenomeSource">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in index?">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <option value="indexed">Use a built-in index</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <option value="history">Use one from the history</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 <when value="indexed">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 <param name="indices" type="select" label="Select a reference genome index set">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 <options from_data_table="bfast_indexes">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <filter type="multiple_splitter" column="2" separator=","/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 <filter type="param_value" column="2" ref="input1" ref_attribute="extension"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 <filter type="sort_by" column="3"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 </options>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 <when value="history">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 <repeat name="custom_index" title="Custom indice" min="1" >
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 <param name="mask" type="text" value="" label="Specify the mask" size="20">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 <!-- <validator type="no_options" message="No indexes are available for the selected input dataset"/> need is int validator here or regex all 01s-->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 <param name="hash_width" type="integer" value="" label="Hash Width" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 </repeat>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 <param name="indexing_repeatmasker" type="boolean" truevalue="--indexRepeatMasker" falsevalue="" checked="False" label="Do not index lower case sequences" help="Such as those created by RepeatMasker"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 <conditional name="indexing_option">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <param name="indexing_option_selector" type="select" label="BFAST indexing settings to use" help="For most indexing needs use default settings. If you want full control use the other options.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 <option value="default">Default</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <option value="contig_offset">Contig Offset</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 <option value="exons_file">Exons file</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 <when value="default">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <!-- nothing here -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 <when value="contig_offset">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 <param name="start_contig" type="integer" value="-1" label="Start Contig" help="Specifies the first contig to include when building indexes. (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 <param name="start_pos" type="integer" value="-1" label="Start Position" help="Specifies the first position in the first contig to include when building indexes. (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 <param name="end_contig" type="integer" value="-1" label="End Contig" help="Specifies the last contig to include when building indexes. (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 <param name="end_pos" type="integer" value="-1" label="End Position" help="Specifies the last position in the last contig to include when building indexes. (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 <when value="exons_file">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 <param name="exons_file" type="data" format="tabular" label="Select an exons file from history" help="See BFAST manual for file format requirements. (advanced users only)"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 <conditional name="params">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 <param name="source_select" type="select" label="BFAST matching settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 <option value="pre_set">Commonly Used</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 <option value="full">Full Parameter List</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 <when value="pre_set">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 <!-- nothing here -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 <when value="full">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 <param name="offsets" type="text" value="" label="The offsets for 'bfast match'" help="Set if not all offsets from the 5' end of the read are to be examined (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 <param name="keySize" type="integer" value="-1" label="Truncate key size in 'match'" help="Set this to reduce the effective key size of all indexes in 'bfast match' (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 <param name="maxKeyMatches" type="integer" value="8" label="The maximum number of matches to allow before a key is ignored" help="Lower values will result in more unique regions being examined, while larger values will allow include repetitive regions" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 <param name="maxNumMatches" type="integer" value="384" label="The maximum number of matches to allow before a read is discarded" help="Larger values will allow more hits to be examined" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 <param name="whichStrand" type="select" label="The strands to consider" help="Both strands, forward strand only, or reverse strand only">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 <option value="0">Both strands</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 <option value="1">Forward strand only</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 <option value="2">Reverse strand only</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 <param name="scoringMatrixFileName" type="data" format="text" optional="True" label="Scoring Matrix file used to score the alignments" help="See BFAST manual for file format requirements. (advanced users only)"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 <param name="ungapped" type="boolean" truevalue="--ungapped" falsevalue="" checked="no" label="Perform ungapped local alignment" help="Performing ungapped local alignment will not consider indels while providing a significant speed increase" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 <param name="unconstrained" type="boolean" truevalue="--unconstrained" falsevalue="" checked="no" label="Perform unconstrained local alignment" help="Performing unconstrained local alignment will not use mask constraints at the cost of speed" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 <param name="offset" type="integer" value="20" label="The number of bases before and after each hit to consider in local alignment" help="Larger values will allow for larger insertions and deletions to be detected at the cost of speed" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 <param name="avgMismatchQuality" type="integer" value="10" label="The average mismatch quality" help="This can be used as a scaling factor for mapping quality (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 <conditional name="localalign_params">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 <param name="algorithm" type="select" label="The post processing algorithm" help="This determines how reads with multiple candidate alignments are returned. Unique alignments will return an alignment if the read has only one candidate alignment. Uniquely best scoring alignments will return one alignment for a read if that alignment has a better alignment score than the rest of the candidate alignments. All best scoring alignments will return all alignments that have the best alignment score for a read.">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 <option value="0" selected="True">No filtering</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 <option value="1">All alignments that pass filtering</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 <option value="2">Unique alignments</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 <option value="3">Uniquely best scoring alignments</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 <option value="4">All best scoring alignments</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 <when value="0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 <!-- nothing here -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 <when value="1">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 <!-- nothing here -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 <when value="2">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 <!-- nothing here -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 <when value="4">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 <!-- nothing here -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 <when value="3">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 <param name="pairedEndInfer" type="boolean" truevalue="--pairedEndInfer" falsevalue="" checked="no" label="pairedEndInfer" help="break ties when one end of a paired end read by estimating the insert size distribution" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 <param name="randomBest" type="boolean" truevalue="--randomBest" falsevalue="" checked="no" label="Random alignments" help="output a random best scoring alignment (advanced users only)" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 <param name="unpaired" type="boolean" truevalue="--unpaired" falsevalue="" checked="no" label="Disallow pairing" help="do not choose alignments based on pairing" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 <param name="reverseStrand" type="boolean" truevalue="--reverseStrand" falsevalue="" checked="no" label="Reverse paired ends" help="paired end reads are given on reverse strands" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 <param name="suppressHeader" type="boolean" truevalue="--suppressHeader" falsevalue="" checked="False" label="Suppress the header in the output SAM file" help="BFAST produces SAM with several lines of header information" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 <actions>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 <conditional name="refGenomeSource.refGenomeSource_type">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 <when value="indexed">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 <action type="metadata" name="dbkey">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 <option type="from_data_table" column="1" name="bfast_indexes">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 <filter type="param_value" ref="refGenomeSource.indices" column="0" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 </option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 </action>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 <when value="history">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 <action type="metadata" name="dbkey">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 </action>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 </actions>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 </data>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 BFAST facilitates the fast and accurate mapping of short reads to reference sequences. Some advantages of BFAST include:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 * Speed: enables billions of short reads to be mapped quickly.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 * Accuracy: A priori probabilities for mapping reads with defined set of variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 * An easy way to measurably tune accuracy at the expense of speed.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 Specifically, BFAST was designed to facilitate whole-genome resequencing, where mapping billions of short reads with variants is of utmost importance.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 BFAST supports both Illumina and ABI SOLiD data, as well as any other Next-Generation Sequencing Technology (454, Helicos), with particular emphasis on sensitivity towards errors, SNPs and especially indels. Other algorithms take short-cuts by ignoring errors, certain types of variants (indels), and even require further alignment, all to be the "fastest" (but still not complete). BFAST is able to be tuned to find variants regardless of the error-rate, polymorphism rate, or other factors.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 Please cite the website "http://bfast.sourceforge.net" as well as the accompanying
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 papers:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 Homer N, Merriman B, Nelson SF.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 BFAST: An alignment tool for large scale genome resequencing.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 PMID: 19907642
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 PLoS ONE. 2009 4(11): e7767.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 http://dx.doi.org/10.1371/journal.pone.0007767
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 Homer N, Merriman B, Nelson SF.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 Local alignment of two-base encoded DNA sequence.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 BMC Bioinformatics. 2009 Jun 9;10(1):175.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 PMID: 19508732
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 http://dx.doi.org/10.1186/1471-2105-10-175
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 **Know what you are doing**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 .. __: http://bfast.sourceforge.net/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 **Input formats**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221 BFAST accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 **Outputs**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 The output is in SAM format, and has the following columns::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 Column Description
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 -------- --------------------------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 1 QNAME Query (pair) NAME
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 2 FLAG bitwise FLAG
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 3 RNAME Reference sequence NAME
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 4 POS 1-based leftmost POSition/coordinate of clipped sequence
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 5 MAPQ MAPping Quality (Phred-scaled)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 6 CIGAR extended CIGAR string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 8 MPOS 1-based Mate POSition
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 9 ISIZE Inferred insert SIZE
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 10 SEQ query SEQuence on the same strand as the reference
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 12 OPT variable OPTional fields in the format TAG:VTYPE:VALU
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 The flags are as follows::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 Flag Description
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 ------ -------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 0x0001 the read is paired in sequencing
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 0x0002 the read is mapped in a proper pair
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 0x0004 the query sequence itself is unmapped
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 0x0008 the mate is unmapped
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252 0x0010 strand of the query (1 for reverse)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 0x0020 strand of the mate
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 0x0040 the read is the first read in a pair
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 0x0080 the read is the second read in a pair
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 0x0100 the alignment is not primary
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 It looks like this (scroll sideways to see the entire example)::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 -------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 **BFAST settings**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 All of the options have a default value. You can change any of them. Most of the options in BFAST have been implemented here.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 ------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 **BFAST parameter list**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 This is an exhaustive list of BFAST options:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276 For **match**::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 -o STRING Specifies the offset [Use all]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 -l Specifies to load all main or secondary indexes into memory
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 -A INT 0: NT space 1: Color space [0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 -k INT Specifies to truncate all indexes to have the given key size
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 (must be greater than the hash width) [Not Using]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 -K INT Specifies the maximum number of matches to allow before a key
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284 is ignored [8]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 -M INT Specifies the maximum total number of matches to consider
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 before the read is discarded [384]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 -w INT 0: consider both strands 1: forward strand only 2: reverse
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 strand only [0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289 -n INT Specifies the number of threads to use [1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 -t Specifies to output timing information
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 For **localalign**::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 -x FILE Specifies the file name storing the scoring matrix
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 -u Do ungapped local alignment (the default is gapped).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 -U Do not use mask constraints from the match step
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 -A INT 0: NT space 1: Color space [0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 -o INT Specifies the number of bases before and after the match to
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 include in the reference genome
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 -M INT Specifies the maximum total number of matches to consider
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 before the read is discarded [384]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 -q INT Specifies the average mismatch quality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303 -n INT Specifies the number of threads to use [1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 -t Specifies to output timing information
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
306 For **postprocess**::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
307
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
308 -a INT Specifies the algorithm to choose the alignment for each end of the read:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
309
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
310 0: No filtering will occur.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
311 1: All alignments that pass the filters will be output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
312 2: Only consider reads that have been aligned uniquely
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
313 3: Choose uniquely the alignment with the best score
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
314 4: Choose all alignments with the best score
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
315
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
316 -A INT 0: NT space 1: Color space [0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
317 -U Specifies that pairing should not be performed
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
318 -R Specifies that paired reads are on opposite strands
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
319 -q INT Specifies the average mismatch quality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
320 -x FILE Specifies the file name storing the scoring matrix
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
321 -z Specifies to output a random best scoring alignment (with -a 3)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
322 -r FILE Specifies to add the RG in the specified file to the SAM
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
323 header and updates the RG tag (and LB/PU tags if present) in
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
324 the reads (SAM only)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
325 -n INT Specifies the number of threads to use [1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
326 -t Specifies to output timing information
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
327
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
328 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
329 <requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
330 <requirement type="package">bfast</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
331 </requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
332 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
333 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
334 <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
335 <param name="refGenomeSource_type" value="history" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
336 <param name="ownFile" ftype="fasta" value="phiX.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
337 <param name="mask" value="111111111111111111" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
338 <param name="hash_width" value="14" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
339 <param name="source_select" value="pre_set" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
340 <param name="indexing_repeatmasker" value="False" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
341 <param name="indexing_option_selector" value="default" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
342 <param name="suppressHeader" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
343 <output name="output" ftype="sam" file="bfast_out1.sam" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
344 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
345 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
346 <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
347 <param name="refGenomeSource_type" value="history" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
348 <param name="ownFile" ftype="fasta" value="phiX.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
349 <param name="mask" value="111111111111111111" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
350 <param name="hash_width" value="14" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
351 <param name="source_select" value="pre_set" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
352 <param name="indexing_repeatmasker" value="False" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
353 <param name="indexing_option_selector" value="default" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
354 <param name="suppressHeader" value="--suppressHeader" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
355 <output name="output" ftype="sam" file="bfast_out1.sam" lines_diff="3" /><!-- 3 headers exist in compare file, but headers are suppressed -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
356 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
357 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
358 <param name="input1" ftype="fastqcssanger" value="random_phiX_1.fastqcssanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
359 <param name="refGenomeSource_type" value="history" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
360 <param name="ownFile" ftype="fasta" value="phiX.fasta" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
361 <param name="mask" value="111111111111111111" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
362 <param name="hash_width" value="14" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
363 <param name="source_select" value="pre_set" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
364 <param name="indexing_repeatmasker" value="False" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
365 <param name="indexing_option_selector" value="default" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
366 <param name="suppressHeader" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
367 <output name="output" ftype="sam" file="bfast_out2.sam" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
368 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
369 <!-- test of pre-indexed data now -->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
370 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
371 <param name="input1" ftype="fastqsanger" value="random_phiX_1.fastqsanger" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
372 <param name="refGenomeSource_type" value="indexed" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
373 <param name="indices" value="phiX_nt_50" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
374 <param name="source_select" value="pre_set" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
375 <param name="suppressHeader" value="" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
376 <output name="output" ftype="sam" file="bfast_out3.sam" lines_diff="2" /><!-- MD:Z:11T38 instead of MD:Z:50 on one line-->
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
377 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
378 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
379 </tool>