0
|
1 <tool id="mafft" name="Mafft" version="1.0.0">
|
|
2 <description>Multiple sequence Alignment using Fast Fourier Transform</description>
|
|
3 <command>
|
|
4 #if (str($advanced.select)=="proteins"):
|
|
5 #if($oformat.value=="clustal"):
|
|
6 $advanced.select_strategy
|
|
7 --clustalout
|
|
8 $advanced.matrixAA
|
|
9 --op $advanced.gap_open
|
|
10 --ep $advanced.offset
|
|
11 #else:
|
|
12 $advanced.select_strategy
|
|
13 $advanced.matrixAA
|
|
14 --op $advanced.gap_open
|
|
15 --ep $advanced.offset
|
|
16 #end if
|
|
17 #else if (str($advanced.select)=="nucleic"):
|
|
18 #if($oformat.value=="clustal"):
|
|
19 $advanced.select_strategy
|
|
20 --clustalout
|
|
21 $advanced.matrixNucl
|
|
22 --op $advanced.gap_open
|
|
23 --ep $advanced.offset
|
|
24 #else:
|
|
25 $advanced.select_strategy
|
|
26 $advanced.matrixNucl
|
|
27 --op $advanced.gap_open
|
|
28 --ep $advanced.offset
|
|
29 #end if
|
|
30 #else:
|
|
31 mafft --retree 2 --maxiterate 0 --quiet ##mafft-fftns2, the default strategy
|
|
32 #if ($oformat.value=="clustal"):
|
|
33 --clustalout
|
|
34 #end if
|
|
35 #if (str($inputtype.selectInput)=="proteins"):
|
|
36 --bl 62
|
|
37 #else:
|
|
38 --kimura 200
|
|
39 #end if
|
|
40 --op 1.530000 --ep 0.000000
|
|
41 #end if
|
|
42 $inputfile > $output 2> /dev/null
|
|
43 </command>
|
|
44 <inputs>
|
|
45 <conditional name="inputtype">
|
|
46 <param name="selectInput" type="select" label="Select the type of sequences in your input file.">
|
|
47 <option value="proteins" selected="True">protein input</option>
|
|
48 <option value="nucleic">nucleic acid input</option>
|
|
49 </param>
|
|
50 <when value="proteins">
|
|
51 </when>
|
|
52 <when value="nucleic">
|
|
53 </when>
|
|
54 </conditional> ## end conditional input
|
|
55 <param name="inputfile" type="data" format="fasta" label="Unaligned fasta input sequences (proteins or nucleotides)" />
|
|
56 <param name="oformat" type="select" label="Output format" help="Please select an output format. Error: tree output generates alignement file instead of Newick file, also in the commandline.">
|
|
57 <option value="fasta" selected="true">fasta</option>
|
|
58 <option value="clustal">clustal</option>
|
|
59 </param>
|
|
60 <conditional name="advanced" help="if no advanced options default FFN-NS2 strategy is executed">
|
|
61 <param name="select" type="select" label="Show advanced options">
|
|
62 <option value="no" selected="True">No</option>
|
|
63 <option value="proteins">for protein input files</option>
|
|
64 <option value="nucleic">for nucleic acid input files</option>
|
|
65 </param>
|
|
66 <when value="no">
|
|
67 </when>
|
|
68 <when value="proteins">
|
|
69 <param name="select_strategy" type="select" label="Strategy">
|
|
70 <option value="mafft --quiet --auto" selected="true">Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size)</option>
|
|
71 <option value="mafft --quiet --retree 1 --maxiterate 0">FFT-NS-1 (Very fast, recommended for > 2.000 sequences; progressive method)</option>
|
|
72 <option value="mafft --retree 2 --maxiterate 0 --quiet">FFT-NS-2 (Fast, progressive method)</option>
|
|
73 <option value="mafft --retree 2 --maxiterate 2 --nofft --quiet">medium (Iterative refinement method, two cycles only)</option>
|
|
74 <option value="mafft --retree 2 --maxiterate 2 --quiet">FFT-NS-i (Slow, iterative refinement method)</option>
|
|
75 <option value="mafft --ep 0 --genafpair --maxiterate 1000 --quiet">E-INS-I (Very slow, recommended for < 2 sequences with multiple conserved domains and long gaps)</option>
|
|
76 <option value="mafft --localpair --maxiterate 1000 --quiet">L-INS-I (Very slow, recommended for < 200 sequences whith one conserved domain and long gaps)</option>
|
|
77 <option value="mafft --globalpair --maxiterate 1000 --quiet">G-INS-I (Very slow, recommended for < 200 sequences with global homology)</option>
|
|
78 <option value="mafft --quiet --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm)</option>
|
|
79 </param>
|
|
80 <param name="matrixAA" type="select" label="Scoring matrix for amino acid sequences">
|
|
81 <option value="--bl 30">BLOSUM30</option>
|
|
82 <option value="--bl 45">BLOSUM45</option>
|
|
83 <option value="--bl 62">BLOSUM62</option>
|
|
84 <option value="--bl 80">BLOSUM80</option>
|
|
85 <option value="--jtt 100">JTT100</option>
|
|
86 <option value="--jtt 200">JTT200</option>
|
|
87 </param>
|
|
88 <param name="gap_open" label="Gap openingpenalty (5-200)" type="integer" value="50"/>
|
|
89 <param name="offset" label="Offset value (0-1)" type="float" value="0.0"/>
|
|
90 </when>
|
|
91 <when value="nucleic">
|
|
92 <param name="select_strategy" type="select" label="Strategy">
|
|
93 <option value="mafft --quiet --auto" selected="true">Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size)</option>
|
|
94 <option value="mafft --quiet --retree 1 --maxiterate 0">FFT-NS-1 (Very fast, recommended for > 2.000 sequences; progressive method)</option>
|
|
95 <option value="mafft --retree 2 --maxiterate 0 --quiet">FFT-NS-2 (Fast, progressive method)</option>
|
|
96 <option value="mafft --retree 2 --maxiterate 2 --nofft --quiet">medium (Iterative refinement method, two cycles only)</option>
|
|
97 <option value="mafft --retree 2 --maxiterate 2 --quiet">FFT-NS-i (Slow, iterative refinement method)</option>
|
|
98 <option value="mafft --ep 0 --genafpair --maxiterate 1000 --quiet">E-INS-I (Very slow, recommended for < 2 sequences with multiple conserved domains and long gaps)</option>
|
|
99 <option value="mafft --localpair --maxiterate 1000 --quiet">L-INS-I (Very slow, recommended for < 200 sequences whith one conserved domain and long gaps)</option>
|
|
100 <option value="mafft --globalpair --maxiterate 1000 --quiet">C-INS-I (Very slow, recommended for < 200 sequences with global homology)</option>
|
|
101 <option value="mafft --quiet --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm)</option>
|
|
102 <option value="mafft-qinsi --quiet">Q-INS-I (Extremely slow; secondary structure of RNA is considered; recommended for a global alignment of highly divergent ncRNAs with < 200 sequences, < 1.000 nucleotides)</option>
|
|
103 <option value="mafft-xinsi --quiet">X-INS-I (Applicable to up to ~50 sequences to ~1,000 nucleotides. Multiple structural alignment by combining pairwise structural alignments given by an external program.)</option>
|
|
104 </param>
|
|
105 <param name="matrixNucl" type="select" label="Scoring matrix for nucleic acid sequences">
|
|
106 <option value="--kimura 1">1PAM / kappa=2</option>
|
|
107 <option value="--kimura 20">20PAM / kappa=2</option>
|
|
108 <option value="--kimura 200">200PAM / kappa=2</option>
|
|
109 </param>
|
|
110 <param name="gap_open" label="Gap openingpenalty (5-200)" type="integer" value="50"/>
|
|
111 <param name="offset" label="Offset value (0-1)" type="float" value="0.0"/>
|
|
112 </when>
|
|
113 </conditional> ## end conditional advanced
|
|
114 </inputs>
|
|
115 <outputs>
|
|
116 <data format="fasta" name="output">
|
|
117 <change_format>
|
|
118 <when input="oformat" value="clustal" format="clustal"/>
|
|
119 </change_format>
|
|
120 </data>
|
|
121 </outputs>
|
|
122 <help>
|
|
123 **What it does**
|
|
124 MAFFT is a multiple sequence alignment program for proteins and nucleotides using fast fourier transform.
|
|
125
|
|
126 If no advanced options are selected, the following default paramters will be used:
|
|
127 - for proteins: mafft-FFT-NS-2 method (Fast, progressive method), BLOSUM62 substitution matrix, gap opening penalty 1.53 and offset value 0.00
|
|
128
|
|
129 - for nucleic acids: mafft-FFT-NS-2 method (Fast, progressive method), 200PAM/kappa=2 substitution matrix, gap opening penalty 1.53 and offset value 0.00
|
|
130
|
|
131
|
|
132 MAFFT offers a range of multiple alignment methods, classified into three types, (a) the progressive method, (b) the iterative refinement method with the WSP score, and (c) the iterative refinment method using both the WSP and consistency scores. In general, there is a tradeoff between speed and accuracy. The order of speed is a > b > c, whereas the order of accuracy is a < b < c.
|
|
133
|
|
134 - Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size) (a,b or c)
|
|
135
|
|
136 - FFT-NS-1 (Very fast, recommended for > 2.000 sequences; progressive method) (a)
|
|
137
|
|
138 - FFT-NS-2 (Fast, progressive method) (DEFAULT if no advanced options) (a)
|
|
139
|
|
140 - medium (Iterative refinement method, two cycles only) (b)
|
|
141
|
|
142 - FFT-NS-i (Slow, iterative refinement method) (b)
|
|
143
|
|
144 - E-INS-I (Very slow, recommended for < 2 sequences with multiple conserved domains and long gaps) (c)
|
|
145
|
|
146 - L-INS-I (Very slow, recommended for < 200 sequences whith one conserved domain and long gaps) (c)
|
|
147
|
|
148 - G-INS-I (Very slow, recommended for < 200 sequences with global homology) (c)
|
|
149
|
|
150 - NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm) (a)
|
|
151
|
|
152 For nucleotides only, there are still additional alignment methods:
|
|
153
|
|
154 - Q-INS-I (Extremely slow; secondary structure of RNA is considered; recommended for a global alignment of highly divergent ncRNAs with < 200 sequences, < 1.000 nucleotides)
|
|
155
|
|
156 - X-INS-I (Applicable to up to ~50 sequences to ~1,000 nucleotides. Multiple structural alignment by combining pairwise structural alignments given by an external program.)
|
|
157
|
|
158
|
|
159 Depending on the nature of the sequences in the input file, the advanced options change. When "for protein input files" is selected from the advanced options, BLOSUM or JTT substitution matrices can be chosen. The selection "for nucleic acid input files" only offers substitution matrices of the type PAM / kappa = x. For nucleic acids, two extra strategies are available compared to proteins, namely X-INS-i and Q-INS-i.
|
|
160
|
|
161 **Documentation**
|
|
162
|
|
163 Mafft website http://mafft.cbrc.jp/alignment/software/
|
|
164
|
|
165 Manpages of Mafft at http://mafft.cbrc.jp/alignment/software/manual/manual.html
|
|
166
|
|
167 More information about the algorithms can be found at http://mafft.cbrc.jp/alignment/software/algorithms/algorithms.html#GLE.
|
|
168
|
|
169
|
|
170 **Author and affiliation**
|
|
171
|
|
172
|
|
173 Katrien Bernaerts and Domantas Motiejunas, 21/06/2012
|
|
174
|
|
175 Corresponding author: domantas dot motiejunas at cropdesign dot com
|
|
176
|
|
177
|
|
178
|
|
179 Affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
|
|
180
|
|
181 **Terms of use**
|
|
182
|
|
183 Galaxy wrapper for Mafft – multiple aligment tool - Copyright (C) 2012 CropDesign N.V. - this software may be used, copied and redistributed, with or without modification freely, without advance permission, provided that the above Copyright statement is reproduced with each copy.
|
|
184 THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE (INCLUDING NEGLIGENCE OR OTHERWISE).
|
|
185
|
|
186
|
|
187 **Citation**
|
|
188
|
|
189 - Katoh, Toh 2010 (Bioinformatics 26:1899-1900). Parallelization of the MAFFT multiple sequence alignment program.(describes the multithread version; Linux only)
|
|
190
|
|
191 - Katoh, Asimenos, Toh 2009 (Methods in Molecular Biology 537:39-64). Multiple Alignment of DNA Sequences with MAFFT. In Bioinformatics for DNA Sequence Analysis edited by D. Posada (outlines DNA alignment methods and several tips including group-to-group alignment and rough clustering of a large number of sequences)
|
|
192
|
|
193 - Katoh, Toh 2008 (BMC Bioinformatics 9:212). Improved accuracy of multiple ncRNA alignment by incorporating structural information into a MAFFT-based framework. (describes RNA structural alignment methods)
|
|
194
|
|
195 - Katoh, Toh 2008 (Briefings in Bioinformatics 9:286-298). Recent developments in the MAFFT multiple sequence alignment program. (outlines version 6; Fast Breaking Paper in Thomson Reuters' ScienceWatch)
|
|
196
|
|
197 - Katoh, Toh 2007 (Bioinformatics 23:372-374) Errata. PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences. (describes the PartTree algorithm)
|
|
198
|
|
199 - Katoh, Kuma, Toh, Miyata 2005 (Nucleic Acids Res. 33:511-518). MAFFT version 5: improvement in accuracy of multiple sequence alignment. (describes [ancestral versions of] the G-INS-i, L-INS-i and E-INS-i strategies)
|
|
200
|
|
201 - Katoh, Misawa, Kuma, Miyata 2002 (Nucleic Acids Res. 30:3059-3066). MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform. (describes the FFT-NS-1, FFT-NS-2 and FFT-NS-i strategies)
|
|
202
|
|
203 </help>
|
|
204 </tool>
|