diff Mafft/mafft.xml @ 0:e4d26cd8be10 draft default tip

Uploaded
author basfplant
date Tue, 05 Mar 2013 04:01:17 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Mafft/mafft.xml	Tue Mar 05 04:01:17 2013 -0500
@@ -0,0 +1,204 @@
+<tool id="mafft" name="Mafft" version="1.0.0">
+<description>Multiple sequence Alignment using Fast Fourier Transform</description>
+<command>
+#if (str($advanced.select)=="proteins"):
+	#if($oformat.value=="clustal"):
+		$advanced.select_strategy
+		--clustalout
+		$advanced.matrixAA
+		--op $advanced.gap_open
+		--ep $advanced.offset
+	#else:
+		$advanced.select_strategy
+		$advanced.matrixAA
+		--op $advanced.gap_open
+		--ep $advanced.offset
+	#end if
+#else if (str($advanced.select)=="nucleic"):
+	#if($oformat.value=="clustal"):
+		$advanced.select_strategy
+		--clustalout
+		$advanced.matrixNucl
+		--op $advanced.gap_open
+		--ep $advanced.offset
+	#else:
+		$advanced.select_strategy
+		$advanced.matrixNucl
+		--op $advanced.gap_open
+		--ep $advanced.offset
+	#end if
+#else: 
+	mafft --retree 2 --maxiterate 0 --quiet ##mafft-fftns2, the default strategy 
+	#if ($oformat.value=="clustal"):
+		--clustalout 
+	#end if
+	#if (str($inputtype.selectInput)=="proteins"):
+		--bl 62
+	#else:
+		--kimura 200
+	#end if
+	--op 1.530000 --ep 0.000000 
+#end if
+$inputfile > $output 2> /dev/null
+</command>
+<inputs>
+	<conditional name="inputtype">
+		<param name="selectInput" type="select" label="Select the type of sequences in your input file.">
+			<option value="proteins" selected="True">protein input</option>
+			<option value="nucleic">nucleic acid input</option>
+		</param>
+    		<when value="proteins">
+		</when>
+    		<when value="nucleic">
+     		</when>
+	</conditional> ## end conditional input
+	<param name="inputfile" type="data" format="fasta" label="Unaligned fasta input sequences (proteins or nucleotides)" />
+	<param name="oformat" type="select" label="Output format" help="Please select an output format. Error: tree output generates alignement file instead of Newick file, also in the commandline.">
+    		<option value="fasta" selected="true">fasta</option>
+    		<option value="clustal">clustal</option>
+	</param>
+	<conditional name="advanced" help="if no advanced options default FFN-NS2 strategy is executed">
+		<param name="select" type="select" label="Show advanced options">
+			<option value="no" selected="True">No</option>
+			<option value="proteins">for protein input files</option>
+			<option value="nucleic">for nucleic acid input files</option>
+		</param>
+		<when value="no">
+		</when>
+    		<when value="proteins">
+			<param name="select_strategy" type="select" label="Strategy">
+        			<option value="mafft --quiet --auto" selected="true">Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size)</option>
+        			<option value="mafft --quiet --retree 1 --maxiterate 0">FFT-NS-1 (Very fast, recommended for &gt; 2.000 sequences; progressive method)</option>
+				<option value="mafft --retree 2 --maxiterate 0 --quiet">FFT-NS-2 (Fast, progressive method)</option>
+				<option value="mafft --retree 2 --maxiterate 2 --nofft --quiet">medium (Iterative refinement method, two cycles only)</option>
+				<option value="mafft --retree 2 --maxiterate 2 --quiet">FFT-NS-i (Slow, iterative refinement method)</option>
+				<option value="mafft --ep 0 --genafpair --maxiterate 1000 --quiet">E-INS-I (Very slow, recommended for &lt; 2 sequences with multiple conserved domains and long gaps)</option>
+				<option value="mafft --localpair --maxiterate 1000 --quiet">L-INS-I (Very slow, recommended for &lt; 200 sequences whith one conserved domain and long gaps)</option>
+				<option value="mafft --globalpair --maxiterate 1000 --quiet">G-INS-I (Very slow, recommended for &lt; 200 sequences with global homology)</option>
+				<option value="mafft --quiet --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm)</option>
+      			</param>
+       			<param name="matrixAA" type="select" label="Scoring matrix for amino acid sequences">
+				<option value="--bl 30">BLOSUM30</option>
+				<option value="--bl 45">BLOSUM45</option>
+				<option value="--bl 62">BLOSUM62</option>
+				<option value="--bl 80">BLOSUM80</option>
+				<option value="--jtt 100">JTT100</option>
+				<option value="--jtt 200">JTT200</option>
+      			</param>
+      			<param name="gap_open" label="Gap openingpenalty (5-200)" type="integer" value="50"/>
+      			<param name="offset" label="Offset value (0-1)" type="float" value="0.0"/>
+  		</when>
+    		<when value="nucleic">
+      			<param name="select_strategy" type="select" label="Strategy">
+        			<option value="mafft --quiet --auto" selected="true">Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size)</option>
+        			<option value="mafft --quiet --retree 1 --maxiterate 0">FFT-NS-1 (Very fast, recommended for &gt; 2.000 sequences; progressive method)</option>
+				<option value="mafft --retree 2 --maxiterate 0 --quiet">FFT-NS-2 (Fast, progressive method)</option>
+				<option value="mafft --retree 2 --maxiterate 2 --nofft --quiet">medium (Iterative refinement method, two cycles only)</option>
+				<option value="mafft --retree 2 --maxiterate 2 --quiet">FFT-NS-i (Slow, iterative refinement method)</option>
+				<option value="mafft --ep 0 --genafpair --maxiterate 1000 --quiet">E-INS-I (Very slow, recommended for &lt; 2 sequences with multiple conserved domains and long gaps)</option>
+				<option value="mafft --localpair --maxiterate 1000 --quiet">L-INS-I (Very slow, recommended for &lt; 200 sequences whith one conserved domain and long gaps)</option>
+				<option value="mafft --globalpair --maxiterate 1000 --quiet">C-INS-I (Very slow, recommended for &lt; 200 sequences with global homology)</option>
+				<option value="mafft --quiet --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm)</option>
+				<option value="mafft-qinsi --quiet">Q-INS-I (Extremely slow; secondary structure of RNA is considered; recommended for a global alignment of highly divergent ncRNAs with &lt; 200 sequences, &lt; 1.000 nucleotides)</option>
+				<option value="mafft-xinsi --quiet">X-INS-I (Applicable to up to ~50 sequences to ~1,000 nucleotides. Multiple structural alignment by combining pairwise structural alignments given by an external program.)</option>
+      			</param>
+       			<param name="matrixNucl" type="select" label="Scoring matrix for nucleic acid sequences">
+				<option value="--kimura 1">1PAM / kappa=2</option>
+				<option value="--kimura 20">20PAM / kappa=2</option>
+				<option value="--kimura 200">200PAM / kappa=2</option>
+      			</param>
+      			<param name="gap_open" label="Gap openingpenalty (5-200)" type="integer" value="50"/>
+      			<param name="offset" label="Offset value (0-1)" type="float" value="0.0"/>
+  		</when>
+	</conditional> ## end conditional advanced
+</inputs>
+<outputs>
+	<data format="fasta" name="output">
+		<change_format>
+			<when input="oformat" value="clustal" format="clustal"/>
+		</change_format>
+    	</data>
+</outputs>
+<help>
+**What it does**
+MAFFT is a multiple sequence alignment program for proteins and nucleotides using fast fourier transform. 
+
+If no advanced options are selected, the following default paramters will be used:
+- for proteins: mafft-FFT-NS-2 method (Fast, progressive method), BLOSUM62 substitution matrix, gap opening penalty 1.53 and offset value 0.00
+
+- for nucleic acids: mafft-FFT-NS-2 method (Fast, progressive method), 200PAM/kappa=2 substitution matrix, gap opening penalty 1.53 and offset value 0.00
+
+
+MAFFT offers a range of multiple alignment methods, classified into three types, (a) the progressive method, (b) the iterative refinement method with the WSP score, and (c) the iterative refinment method using both the WSP and consistency scores. In general, there is a tradeoff between speed and accuracy. The order of speed is a > b > c, whereas the order of accuracy is a &lt; b &lt; c. 
+
+- Auto (FFT-NS-1, FFT-NS-2, FFT-NS-i or L-INS-i; depends on data size) (a,b or c) 
+
+- FFT-NS-1 (Very fast, recommended for &gt; 2.000 sequences; progressive method) (a)
+
+- FFT-NS-2 (Fast, progressive method) (DEFAULT if no advanced options) (a)
+
+- medium (Iterative refinement method, two cycles only) (b)
+
+- FFT-NS-i (Slow, iterative refinement method) (b)
+
+- E-INS-I (Very slow, recommended for &lt; 2 sequences with multiple conserved domains and long gaps) (c)
+
+- L-INS-I (Very slow, recommended for &lt; 200 sequences whith one conserved domain and long gaps) (c)
+
+- G-INS-I (Very slow, recommended for &lt; 200 sequences with global homology) (c)
+
+- NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm) (a)
+
+For nucleotides only, there are still additional alignment methods:
+
+- Q-INS-I (Extremely slow; secondary structure of RNA is considered; recommended for a global alignment of highly divergent ncRNAs with &lt; 200 sequences, &lt; 1.000 nucleotides)
+
+- X-INS-I (Applicable to up to ~50 sequences to ~1,000 nucleotides. Multiple structural alignment by combining pairwise structural alignments given by an external program.)
+
+
+Depending on the nature of the sequences in the input file, the advanced options change. When "for protein input files" is selected from the advanced options, BLOSUM or JTT substitution matrices can be chosen. The selection "for nucleic acid input files" only offers substitution matrices of the type PAM / kappa = x. For nucleic acids, two extra strategies are available compared to proteins, namely X-INS-i and Q-INS-i.
+
+**Documentation** 
+
+Mafft website http://mafft.cbrc.jp/alignment/software/
+
+Manpages of Mafft at http://mafft.cbrc.jp/alignment/software/manual/manual.html
+
+More information about the algorithms can be found at http://mafft.cbrc.jp/alignment/software/algorithms/algorithms.html#GLE.
+
+
+**Author and affiliation**
+
+
+Katrien Bernaerts and Domantas Motiejunas, 21/06/2012
+
+Corresponding author: domantas dot motiejunas at cropdesign dot com
+
+
+
+Affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+
+**Terms of use**
+
+Galaxy wrapper for Mafft – multiple aligment tool - Copyright (C) 2012 CropDesign N.V. - this software may be used, copied and redistributed, with or without modification freely, without advance permission, provided that the above Copyright statement is reproduced with each copy. 
+THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE (INCLUDING NEGLIGENCE OR OTHERWISE).
+
+
+**Citation**
+
+- Katoh, Toh 2010 (Bioinformatics 26:1899-1900). Parallelization of the MAFFT multiple sequence alignment program.(describes the multithread version; Linux only) 
+
+- Katoh, Asimenos, Toh 2009 (Methods in Molecular Biology 537:39-64). Multiple Alignment of DNA Sequences with MAFFT. In Bioinformatics for DNA Sequence Analysis edited by D. Posada (outlines DNA alignment methods and several tips including group-to-group alignment and rough clustering of a large number of sequences) 
+
+- Katoh, Toh 2008 (BMC Bioinformatics 9:212). Improved accuracy of multiple ncRNA alignment by incorporating structural information into a MAFFT-based framework. (describes RNA structural alignment methods)
+
+- Katoh, Toh 2008 (Briefings in Bioinformatics 9:286-298). Recent developments in the MAFFT multiple sequence alignment program. (outlines version 6; Fast Breaking Paper in Thomson Reuters' ScienceWatch) 
+
+- Katoh, Toh 2007 (Bioinformatics 23:372-374) Errata. PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences. (describes the PartTree algorithm) 
+
+- Katoh, Kuma, Toh, Miyata 2005 (Nucleic Acids Res. 33:511-518). MAFFT version 5: improvement in accuracy of multiple sequence alignment. (describes [ancestral versions of] the G-INS-i, L-INS-i and E-INS-i strategies) 
+
+- Katoh, Misawa, Kuma, Miyata 2002 (Nucleic Acids Res. 30:3059-3066). MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform. (describes the FFT-NS-1, FFT-NS-2 and FFT-NS-i strategies) 
+
+</help>
+</tool>