comparison Mafft/mafft.xml @ 0:e4d26cd8be10 draft default tip

Uploaded
author basfplant
date Tue, 05 Mar 2013 04:01:17 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e4d26cd8be10
1 <tool id="mafft" name="Mafft" version="1.0.0">
2 <description>Multiple sequence Alignment using Fast Fourier Transform</description>
3 <command>
4 #if (str($advanced.select)=="proteins"):
5 #if($oformat.value=="clustal"):
6 $advanced.select_strategy
7 --clustalout
8 $advanced.matrixAA
9 --op $advanced.gap_open
10 --ep $advanced.offset
11 #else:
12 $advanced.select_strategy
13 $advanced.matrixAA
14 --op $advanced.gap_open
15 --ep $advanced.offset
16 #end if
17 #else if (str($advanced.select)=="nucleic"):
18 #if($oformat.value=="clustal"):
19 $advanced.select_strategy
20 --clustalout
21 $advanced.matrixNucl
22 --op $advanced.gap_open
23 --ep $advanced.offset
24 #else:
25 $advanced.select_strategy
26 $advanced.matrixNucl
27 --op $advanced.gap_open
28 --ep $advanced.offset
29 #end if
30 #else:
31 mafft --retree 2 --maxiterate 0 --quiet ##mafft-fftns2, the default strategy
32 #if ($oformat.value=="clustal"):
33 --clustalout
34 #end if
35 #if (str($inputtype.selectInput)=="proteins"):
36 --bl 62
37 #else:
38 --kimura 200
39 #end if
40 --op 1.530000 --ep 0.000000
41 #end if
42 $inputfile > $output 2> /dev/null
43 </command>
44 <inputs>
45 <conditional name="inputtype">
46 <param name="selectInput" type="select" label="Select the type of sequences in your input file.">
47 <option value="proteins" selected="True">protein input</option>
48 <option value="nucleic">nucleic acid input</option>
49 </param>
50 <when value="proteins">
51 </when>
52 <when value="nucleic">
53 </when>
54 </conditional> ## end conditional input
55 <param name="inputfile" type="data" format="fasta" label="Unaligned fasta input sequences (proteins or nucleotides)" />
56 <param name="oformat" type="select" label="Output format" help="Please select an output format. Error: tree output generates alignement file instead of Newick file, also in the commandline.">
57 <option value="fasta" selected="true">fasta</option>
58 <option value="clustal">clustal</option>
59 </param>
60 <conditional name="advanced" help="if no advanced options default FFN-NS2 strategy is executed">
61 <param name="select" type="select" label="Show advanced options">
62 <option value="no" selected="True">No</option>
63 <option value="proteins">for protein input files</option>
64 <option value="nucleic">for nucleic acid input files</option>
65 </param>
66 <when value="no">
67 </when>
68 <when value="proteins">
69 <param name="select_strategy" type="select" label="Strategy">
70 <option value="mafft --quiet --auto" selected="true">Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size)</option>
71 <option value="mafft --quiet --retree 1 --maxiterate 0">FFT-NS-1 (Very fast, recommended for &gt; 2.000 sequences; progressive method)</option>
72 <option value="mafft --retree 2 --maxiterate 0 --quiet">FFT-NS-2 (Fast, progressive method)</option>
73 <option value="mafft --retree 2 --maxiterate 2 --nofft --quiet">medium (Iterative refinement method, two cycles only)</option>
74 <option value="mafft --retree 2 --maxiterate 2 --quiet">FFT-NS-i (Slow, iterative refinement method)</option>
75 <option value="mafft --ep 0 --genafpair --maxiterate 1000 --quiet">E-INS-I (Very slow, recommended for &lt; 2 sequences with multiple conserved domains and long gaps)</option>
76 <option value="mafft --localpair --maxiterate 1000 --quiet">L-INS-I (Very slow, recommended for &lt; 200 sequences whith one conserved domain and long gaps)</option>
77 <option value="mafft --globalpair --maxiterate 1000 --quiet">G-INS-I (Very slow, recommended for &lt; 200 sequences with global homology)</option>
78 <option value="mafft --quiet --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm)</option>
79 </param>
80 <param name="matrixAA" type="select" label="Scoring matrix for amino acid sequences">
81 <option value="--bl 30">BLOSUM30</option>
82 <option value="--bl 45">BLOSUM45</option>
83 <option value="--bl 62">BLOSUM62</option>
84 <option value="--bl 80">BLOSUM80</option>
85 <option value="--jtt 100">JTT100</option>
86 <option value="--jtt 200">JTT200</option>
87 </param>
88 <param name="gap_open" label="Gap openingpenalty (5-200)" type="integer" value="50"/>
89 <param name="offset" label="Offset value (0-1)" type="float" value="0.0"/>
90 </when>
91 <when value="nucleic">
92 <param name="select_strategy" type="select" label="Strategy">
93 <option value="mafft --quiet --auto" selected="true">Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size)</option>
94 <option value="mafft --quiet --retree 1 --maxiterate 0">FFT-NS-1 (Very fast, recommended for &gt; 2.000 sequences; progressive method)</option>
95 <option value="mafft --retree 2 --maxiterate 0 --quiet">FFT-NS-2 (Fast, progressive method)</option>
96 <option value="mafft --retree 2 --maxiterate 2 --nofft --quiet">medium (Iterative refinement method, two cycles only)</option>
97 <option value="mafft --retree 2 --maxiterate 2 --quiet">FFT-NS-i (Slow, iterative refinement method)</option>
98 <option value="mafft --ep 0 --genafpair --maxiterate 1000 --quiet">E-INS-I (Very slow, recommended for &lt; 2 sequences with multiple conserved domains and long gaps)</option>
99 <option value="mafft --localpair --maxiterate 1000 --quiet">L-INS-I (Very slow, recommended for &lt; 200 sequences whith one conserved domain and long gaps)</option>
100 <option value="mafft --globalpair --maxiterate 1000 --quiet">C-INS-I (Very slow, recommended for &lt; 200 sequences with global homology)</option>
101 <option value="mafft --quiet --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm)</option>
102 <option value="mafft-qinsi --quiet">Q-INS-I (Extremely slow; secondary structure of RNA is considered; recommended for a global alignment of highly divergent ncRNAs with &lt; 200 sequences, &lt; 1.000 nucleotides)</option>
103 <option value="mafft-xinsi --quiet">X-INS-I (Applicable to up to ~50 sequences to ~1,000 nucleotides. Multiple structural alignment by combining pairwise structural alignments given by an external program.)</option>
104 </param>
105 <param name="matrixNucl" type="select" label="Scoring matrix for nucleic acid sequences">
106 <option value="--kimura 1">1PAM / kappa=2</option>
107 <option value="--kimura 20">20PAM / kappa=2</option>
108 <option value="--kimura 200">200PAM / kappa=2</option>
109 </param>
110 <param name="gap_open" label="Gap openingpenalty (5-200)" type="integer" value="50"/>
111 <param name="offset" label="Offset value (0-1)" type="float" value="0.0"/>
112 </when>
113 </conditional> ## end conditional advanced
114 </inputs>
115 <outputs>
116 <data format="fasta" name="output">
117 <change_format>
118 <when input="oformat" value="clustal" format="clustal"/>
119 </change_format>
120 </data>
121 </outputs>
122 <help>
123 **What it does**
124 MAFFT is a multiple sequence alignment program for proteins and nucleotides using fast fourier transform.
125
126 If no advanced options are selected, the following default paramters will be used:
127 - for proteins: mafft-FFT-NS-2 method (Fast, progressive method), BLOSUM62 substitution matrix, gap opening penalty 1.53 and offset value 0.00
128
129 - for nucleic acids: mafft-FFT-NS-2 method (Fast, progressive method), 200PAM/kappa=2 substitution matrix, gap opening penalty 1.53 and offset value 0.00
130
131
132 MAFFT offers a range of multiple alignment methods, classified into three types, (a) the progressive method, (b) the iterative refinement method with the WSP score, and (c) the iterative refinment method using both the WSP and consistency scores. In general, there is a tradeoff between speed and accuracy. The order of speed is a > b > c, whereas the order of accuracy is a &lt; b &lt; c.
133
134 - Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size) (a,b or c)
135
136 - FFT-NS-1 (Very fast, recommended for &gt; 2.000 sequences; progressive method) (a)
137
138 - FFT-NS-2 (Fast, progressive method) (DEFAULT if no advanced options) (a)
139
140 - medium (Iterative refinement method, two cycles only) (b)
141
142 - FFT-NS-i (Slow, iterative refinement method) (b)
143
144 - E-INS-I (Very slow, recommended for &lt; 2 sequences with multiple conserved domains and long gaps) (c)
145
146 - L-INS-I (Very slow, recommended for &lt; 200 sequences whith one conserved domain and long gaps) (c)
147
148 - G-INS-I (Very slow, recommended for &lt; 200 sequences with global homology) (c)
149
150 - NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm) (a)
151
152 For nucleotides only, there are still additional alignment methods:
153
154 - Q-INS-I (Extremely slow; secondary structure of RNA is considered; recommended for a global alignment of highly divergent ncRNAs with &lt; 200 sequences, &lt; 1.000 nucleotides)
155
156 - X-INS-I (Applicable to up to ~50 sequences to ~1,000 nucleotides. Multiple structural alignment by combining pairwise structural alignments given by an external program.)
157
158
159 Depending on the nature of the sequences in the input file, the advanced options change. When "for protein input files" is selected from the advanced options, BLOSUM or JTT substitution matrices can be chosen. The selection "for nucleic acid input files" only offers substitution matrices of the type PAM / kappa = x. For nucleic acids, two extra strategies are available compared to proteins, namely X-INS-i and Q-INS-i.
160
161 **Documentation**
162
163 Mafft website http://mafft.cbrc.jp/alignment/software/
164
165 Manpages of Mafft at http://mafft.cbrc.jp/alignment/software/manual/manual.html
166
167 More information about the algorithms can be found at http://mafft.cbrc.jp/alignment/software/algorithms/algorithms.html#GLE.
168
169
170 **Author and affiliation**
171
172
173 Katrien Bernaerts and Domantas Motiejunas, 21/06/2012
174
175 Corresponding author: domantas dot motiejunas at cropdesign dot com
176
177
178
179 Affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
180
181 **Terms of use**
182
183 Galaxy wrapper for Mafft – multiple aligment tool - Copyright (C) 2012 CropDesign N.V. - this software may be used, copied and redistributed, with or without modification freely, without advance permission, provided that the above Copyright statement is reproduced with each copy.
184 THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE (INCLUDING NEGLIGENCE OR OTHERWISE).
185
186
187 **Citation**
188
189 - Katoh, Toh 2010 (Bioinformatics 26:1899-1900). Parallelization of the MAFFT multiple sequence alignment program.(describes the multithread version; Linux only)
190
191 - Katoh, Asimenos, Toh 2009 (Methods in Molecular Biology 537:39-64). Multiple Alignment of DNA Sequences with MAFFT. In Bioinformatics for DNA Sequence Analysis edited by D. Posada (outlines DNA alignment methods and several tips including group-to-group alignment and rough clustering of a large number of sequences)
192
193 - Katoh, Toh 2008 (BMC Bioinformatics 9:212). Improved accuracy of multiple ncRNA alignment by incorporating structural information into a MAFFT-based framework. (describes RNA structural alignment methods)
194
195 - Katoh, Toh 2008 (Briefings in Bioinformatics 9:286-298). Recent developments in the MAFFT multiple sequence alignment program. (outlines version 6; Fast Breaking Paper in Thomson Reuters' ScienceWatch)
196
197 - Katoh, Toh 2007 (Bioinformatics 23:372-374) Errata. PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences. (describes the PartTree algorithm)
198
199 - Katoh, Kuma, Toh, Miyata 2005 (Nucleic Acids Res. 33:511-518). MAFFT version 5: improvement in accuracy of multiple sequence alignment. (describes [ancestral versions of] the G-INS-i, L-INS-i and E-INS-i strategies)
200
201 - Katoh, Misawa, Kuma, Miyata 2002 (Nucleic Acids Res. 30:3059-3066). MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform. (describes the FFT-NS-1, FFT-NS-2 and FFT-NS-i strategies)
202
203 </help>
204 </tool>