comparison mafft.xml @ 15:bf28a8cff401 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mafft commit 2f6456c314c010fd73f5eeaf809a9afce47353af
author bgruening
date Wed, 20 Mar 2024 07:34:52 +0000
parents 6f28e90db932
children
comparison
equal deleted inserted replaced
14:6f28e90db932 15:bf28a8cff401
1 <?xml version="1.0" encoding="UTF-8"?> 1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="rbc_mafft" name="MAFFT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 2 <tool id="rbc_mafft" name="MAFFT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
3 <description>Multiple alignment program for amino acid or nucleotide sequences</description> 3 <description>Multiple alignment program for amino acid or nucleotide sequences</description>
4 <macros> 4 <macros>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="biotools"/> 7 <expand macro="biotools"/>
8 <expand macro="requirements" /> 8 <expand macro="requirements" />
9 <stdio> 9 <stdio>
10 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> 10 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" />
11 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> 11 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" />
12 </stdio> 12 </stdio>
13 <version_command> <![CDATA[ 13 <version_command><![CDATA[mafft --version]]></version_command>
14 mafft --version 14 <command><![CDATA[
15 ]]> 15 ## Concatenate all input datasets no matter how they were provided
16 </version_command> 16 bash inputs.sh &&
17 <command> 17
18 <![CDATA[ 18 ## Count total number of sequences across input datasets
19 19 ## Can't do this on the concatenated input data prepared above because it's
20 #if $cond_flavour.flavourType == 'custom' 20 ## just a regular file and we don't have Galaxy-generated metadata for it.
21 #if $cond_flavour.dist_flavour.distance_method == '--fastapair' 21 #set sequence_count = 0
22 export FASTA_4_MAFFT=`which fasta36`; 22 #if $input.mapping == "implicit"
23 #for $batch in $input.batches:
24 #set sequence_count += int($batch.inputs.metadata.sequences)
25 #end for
26 #elif $input.mapping == "merge"
27 #for $batch in $input.batches:
28 #for $dataset in $batch.inputs:
29 #set sequence_count += int($dataset.metadata.sequences)
30 #end for
31 #end for
32 #end if
33
34 ## For those cases in which MAFFT needs fasta3, set an env variable to make it
35 ## find the executable. Necessary because the current version of MAFFT still
36 ## expects a fasta34 executable in path, but we bundle a newer version.
37 #if $flavour.type == "custom"
38 #if $flavour.guidetree.guidetree_generation == "original"
39 #if $flavour.guidetree.dist_flavour.distance_method == "--fastapair"
40 export FASTA_4_MAFFT=`which @FASTA3_EXEC@` &&
41 #end if
42 #elif $flavour.guidetree.guidetree_generation == "parttree"
43 #if $flavour.guidetree.parttree_selection.parttree_option == "--fastaparttree"
44 export FASTA_4_MAFFT=`which @FASTA3_EXEC@` &&
45 #end if
46 #end if
47 #end if
48
49 ## groupsize warning
50 #if $flavour.type == "custom"
51 #if $flavour.guidetree.guidetree_generation == "parttree"
52 #if $flavour.guidetree.parttree_selection.groupsize > $sequence_count
53 echo "WARNING = Chosen groupsize number larger than number of input sequences. Not recommended for MAFFT." &&
54 #end if
55 #end if
56 #end if
57
58 ## run MAFFT with predefined MSA flavours or custom settings
59 #if $flavour.type == "custom"
60 mafft
61 #if $flavour.guidetree.guidetree_generation == "original"
62 #if $flavour.guidetree.dist_flavour.distance_method == "--6merpair"
63 --6merpair
64 --retree $flavour.guidetree.dist_flavour.retree
65 #elif $flavour.guidetree.dist_flavour.distance_method == "--globalpair"
66 --globalpair
67 --weighti $flavour.guidetree.dist_flavour.weighti
68 #if $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel > 0
69 --allowshift --unalignlevel $flavour.guidetree.dist_flavour.treat_unrelated_segments.unalignlevel
23 #end if 70 #end if
71 $flavour.guidetree.dist_flavour.treat_unrelated_segments.leavegappyregion
72 #elif $flavour.guidetree.dist_flavour.distance_method == "--localpair"
73 --localpair
74 --weighti $flavour.guidetree.dist_flavour.weighti
75 --lop $flavour.guidetree.dist_flavour.lop
76 --lep $flavour.guidetree.dist_flavour.lep
77 --lexp $flavour.guidetree.dist_flavour.lexp
78 #elif flavour.guidetree.dist_flavour.distance_method == "--genafpair"
79 --genafpair
80 --weighti $flavour.guidetree.dist_flavour.weighti
81 --lop $flavour.guidetree.dist_flavour.lop
82 --lep $flavour.guidetree.dist_flavour.lep
83 --lexp $flavour.guidetree.dist_flavour.lexp
84 --LOP $flavour.guidetree.dist_flavour.LOP
85 --LEXP $flavour.guidetree.dist_flavour.LEXP
86 #elif $flavour.guidetree.dist_flavour.distance_method == "--fastapair"
87 --fastapair
88 --weighti $flavour.guidetree.dist_flavour.weighti
24 #end if 89 #end if
25 90 #elif $flavour.guidetree.guidetree_generation == "parttree"
26 #if $cond_flavour.flavourType != 'custom' 91 $flavour.guidetree.parttree_selection.parttree_option
27 $cond_flavour.flavourType 92 --retree $flavour.guidetree.parttree_selection.retree
28 #elif $cond_flavour.flavourType == 'custom' 93 --partsize $flavour.guidetree.parttree_selection.partsize
29 ### full parameter options 94 #if $flavour.guidetree.parttree_selection.groupsize != -1
30 mafft 95 --groupsize $flavour.guidetree.parttree_selection.groupsize
31 $cond_flavour.dist_flavour.distance_method
32 #if $cond_flavour.dist_flavour.distance_method == '--6merpair'
33 --retree $cond_flavour.dist_flavour.retree
34 $cond_flavour.dist_flavour.distance_method.usetree.parttree
35
36 #if $cond_flavour.dist_flavour.distance_method.usetree.parttree==--parttree
37 $cond_flavour.dist_flavour.distance_method.usetree.treedistance
38 $cond_flavour.dist_flavour.distance_method.usetree.partsize
39 $cond_flavour.dist_flavour.distance_method.usetree.groupsize
40 #end if
41
42 #elif $cond_flavour.dist_flavour.distance_method == '--globalpair'
43 --weighti $cond_flavour.dist_flavour.weighti
44 #elif $cond_flavour.dist_flavour.distance_method == '--localpair'
45 --weighti $cond_flavour.dist_flavour.weighti
46 --lop $cond_flavour.dist_flavour.lop
47 --lep $cond_flavour.dist_flavour.lep
48 --lexp $cond_flavour.dist_flavour.lexp
49 #elif $cond_flavour.dist_flavour.distance_method == '--genafpair'
50 --weighti $cond_flavour.dist_flavour.weighti
51 --lop $cond_flavour.dist_flavour.lop
52 --lep $cond_flavour.dist_flavour.lep
53 --lexp $cond_flavour.dist_flavour.lexp
54 --LOP $cond_flavour.dist_flavour.skipLOP
55 --EXP $cond_flavour.dist_flavour.skipEXP 1
56 #elif $cond_flavour.dist_flavour.distance_method == '--fastapair'
57 --weighti $cond_flavour.dist_flavour.weighti
58 #end if
59 --maxiterate $cond_flavour.iterations
60 $cond_flavour.fft
61 $cond_flavour.score
62 #end if 96 #end if
63 97 #end if
64 ## specify threads to use 98 ## progressive alignment calculation
65 --thread \${GALAXY_SLOTS:-1} 99 --maxiterate $flavour.progressive_alignment_calculation.maxiterate
66 $datatype 100 $flavour.progressive_alignment_calculation.fft
67 --ep $ep 101 $flavour.progressive_alignment_calculation.noscore
68 --op $op 102 #else
69 103 $flavour.type
70 #if $matrix_condition.matrix == "BLOSUM" 104 #if $flavour.type == "mafft-ginsi" or "--globalpair" in str($flavour.type)
71 --bl $matrix_condition.BLOSUM 105 #if $flavour.treat_unrelated_segments.unalignlevel > 0
72 #elif $matrix_condition.matrix == "PAM" 106 --allowshift --unalignlevel $flavour.treat_unrelated_segments.unalignlevel
73 --jtt $matrix_condition.PAM
74 --tm $matrix_condition.tm
75 #elif $matrix_condition.matrix == "custom"
76 --aamatrix '$matrix_condition.matrixfile'
77 --fmodel $matrix_condition.fmodel
78 #end if 107 #end if
79 108 $flavour.treat_unrelated_segments.leavegappyregion
80 $reorder 109 #end if
81 $getTree 110 #end if
82 $outputFormat 111
83 '$inputSequences' > '$outputAlignment'; 112 ## handle scoring matrix
84 113 $datatype_selection.datatype
85 #if $getTree == "--treeout" 114 #if $datatype_selection.datatype != ""
86 mv '${inputSequences}.tree' '$outputTree'; 115 #if $datatype_selection.scoring_matrix.type == "custom"
87 #end if 116 --aamatrix '$datatype_selection.scoring_matrix.aamatrix'
88 ]]> 117 #else
89 </command> 118 $datatype_selection.scoring_matrix.type $datatype_selection.scoring_matrix.coefficient
90 <inputs> 119 #end if
91 <param name="inputSequences" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/> 120 $datatype_selection.fmodel
92 <param name="datatype" type="select" label="Data type"> 121 ## gap penalties
93 <option value="">Auto detection</option> 122 #if $datatype_selection.gap_costs.use_defaults == "no"
94 <option value="--nuc">Nucleic acids</option> 123 --ep $datatype_selection.gap_costs.ep --op $datatype_selection.gap_costs.op
95 <option value="--amino">Amino acids</option> 124 #end if
96 </param> 125 #end if
97 <conditional name="cond_flavour"> 126
98 <param name="flavourType" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section."> 127
99 <option value="mafft --auto">auto</option> 128 ## output options
100 <option value="mafft-fftns" selected="true">fftns</option> 129 $reorder
101 <option value="mafft-fftnsi">fftnsi</option> 130 $outputFormat
102 <option value="mafft-nwns">nwns</option> 131 $treeout
103 <option value="mafft-nwnsi">nwnsi</option> 132
104 <option value="mafft-einsi">einsi</option> 133 ## specify threads to use
105 <option value="mafft-ginsi">ginsi</option> 134 ## disable multithreading during iterative refinement step for reproducibility
106 <option value="mafft-linsi">linsi</option> 135 ## cmp. https://mafft.cbrc.jp/alignment/software/multithreading.html
107 <option value="mafft-qinsi">qinsi</option> 136 --thread \${GALAXY_SLOTS:-1} --threadit 0
108 <option value="mafft-xinsi">xinsi</option> 137
109 <option value="custom">Custom Parameters</option> 138 input.fa > '$outputAlignment'
110 </param> 139
111 <when value="mafft-fftns"/> 140 ## Output alignment tree
112 <when value="mafft --auto"/> 141 #if $treeout
113 <when value="mafft-fftnsi"/> 142 && mv input.fa.tree '$outputTree'
114 <when value="mafft-nwns"/> 143 #end if
115 <when value="mafft-nwnsi"/> 144 ]]></command>
116 <when value="mafft-einsi"/> 145 <configfiles>
117 <when value="mafft-ginsi"/> 146 <configfile filename="inputs.sh"><![CDATA[
118 <when value="mafft-linsi"/> 147 #if $input.mapping == "implicit"
119 <when value="mafft-qinsi"/> 148 #for $batch in $input.batches:
120 <when value="mafft-xinsi"/> 149 cat $batch.inputs >> input.fa
121 <when value="custom"> 150 #end for
122 <conditional name="dist_flavour"> 151 #elif $input.mapping == "merge"
123 <param name="distance_method" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data"> 152 #for $batch in $input.batches:
124 <option value="--6merpair" selected="true">Shared 6mers distance (fastest)</option> 153 #for $dataset in $batch.inputs:
125 <option value="--globalpair">Global alignment (Needleman-Wunsch)</option> 154 cat $dataset >> input.fa
126 <option value="--localpair">Local alignment (Smith-Waterman)</option> 155 #end for
127 <option value="--genafpair">Local, affine gap cost</option> 156 #end for
128 <option value="--fastapair">All pairwise alignments are computed with FASTA</option> 157 #end if
129 </param> 158 ]]></configfile>
130 <when value="--6merpair"> 159 </configfiles>
131 <param name="retree" type="integer" value="2" min="1" max="100" label="Guide tree is built this number of times in the progressive stage." help="Valid with 6mer distance" /> 160 <inputs>
132 <conditional name="usetree"> 161 <conditional name="input">
133 <param name="parttree" type="select" label="Use a fast tree-building method?" help="Recommended for a large number (> ~10,000) of sequences are input" > 162 <param name="mapping" type="select" label="For multiple inputs generate" help="All you have is a single dataset with the sequences to align? You can skip this help text and continue with the default setting. For multiple input datasets, the first mode will launch separate MAFFT jobs for all sequences from the first, second, ..., n-th dataset/element from each input batch, respectively, resulting in n separate MSAs. The second mode will concatenate all input sequences from all inputs for a single run of MAFFT and will generate a single MSA.">
134 <option value="--parttree" selected="true">Yes</option> 163 <option value="implicit">one or several MSAs depending on input structure</option>
135 <option value="">No</option> 164 <option value="merge">a single MSA of all sequences from all inputs</option>
136 </param> 165 </param>
137 <when value="--parttree"> 166 <when value="implicit">
138 <param name="treedistance" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data"> 167 <repeat name="batches" title="Input batch" default="1" min="1">
139 <option value="--fastaparttree" selected="true">Distances based on FASTA</option> 168 <param name="inputs" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format. Add Dataset for concatenation of every additional dataset with each file of the first upload panel"/>
140 <option value="--dpparttree">Distances based on DP. (Needleman-Wunsch)</option> 169 </repeat>
141 </param> 170 </when>
142 <param name="partsize" type="integer" value="50" min="0" max="1000" label="Number of partitions in the PartTree algorithm" /> 171 <when value="merge">
143 <param name="groupsize" type="integer" value="" min="0" max="1000" label="Do not make alignment larger than ... sequences" /> 172 <repeat name="batches" title="Input batch" default="1" min="1">
144 </when> 173 <param name="inputs" multiple="true" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/>
145 <when value=""/> 174 </repeat>
146 </conditional> 175 </when>
147 </when>
148 <when value="--globalpair">
149 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
150 </when>
151 <when value="--localpair">
152 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
153 <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" />
154 <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" />
155 <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" />
156 </when>
157 <when value="--genafpair">
158 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
159 <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" />
160 <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" />
161 <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" />
162 <param name="skipLOP" type="float" value="-6.00" label="Gap opening penalty to skip the alignment" help="-6.00 default value" />
163 <param name="skipEXP" type="float" value="0.00" label="Gap extension penalty to skip the alignment" help="0 default value" />
164 </when>
165 <when value="--fastapair">
166 <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
167 </when>
168 </conditional> 176 </conditional>
169 <param name="iterations" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" /> 177 <conditional name="datatype_selection">
170 <param name="fft" type="boolean" truevalue="--fft" falsevalue="--nofft" checked="True" label="Use FFT approximation in group-to-group alignment?" /> 178 <param name="datatype" type="select" label="Type of sequences" help="The tool can try to detect the type of the input sequences, but you likely want to declare it explicitly. Doing so will also give you control over the scoring matrix used for the alignment, while autodetection will result in the Kimura PAM200 and the BLOSUM62 matrix being used for nucleic acids and protein alignments, respectively.">
171 <param name="score" type="boolean" truevalue="" falsevalue="--noscore" checked="True" label="Check alignment score in the iterative refinement stage?" /> 179 <option value="">auto-detect</option>
172 </when> 180 <option value="--nuc">Nucleic acids</option>
173 </conditional> 181 <option value="--amino">Amino acids</option>
174 <param name="ep" type="float" value="0.0" label="Gap extend penalty for group-to-group alignment" help="Offset value, which works like gap extension penalty, for group-to-group alignment. For E-INS-i, 0 is recommended to allow large gaps" /> 182 </param>
175 <param name="op" type="float" value="1.53" label="Gap opening penalty at group-to-group alignment." help="1.53 default value" /> 183 <when value="" />
176 <conditional name="matrix_condition"> 184 <when value="--nuc">
177 <param name="matrix" type="select" label="Matrix selection" display="radio" help="Usefull only for amino acids" > 185 <conditional name="scoring_matrix">
178 <option value="">No matrix</option> 186 <param name="type" type="select" label="Type of scoring matrix" help="See the tool help below for details about the available options.">
179 <option value="BLOSUM" selected="true">BLOSUM</option> 187 <option value="--kimura">Kimura</option>
180 <option value="PAM">PAM</option> 188 </param>
181 <option value="custom">Custom</option> 189 <when value="--kimura">
182 </param> 190 <param argument="--kimura" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix" />
183 <when value=""/> 191 </when>
184 <when value="BLOSUM"> 192 </conditional>
185 <param name="BLOSUM" type="select" display="radio" label="Coefficient of the BLOSUM matrix"> 193 <expand macro="misc_scoring_scheme" />
186 <option value="30">30</option> 194 </when>
187 <option value="45">45</option> 195 <when value="--amino">
188 <option value="62" selected="true">62</option> 196 <conditional name="scoring_matrix">
189 <option value="80">80</option> 197 <param name="type" type="select" label="Type of scoring matrix" help="See the tool help below for details about the available options.">
198 <option value="--bl" selected="true">BLOSUM</option>
199 <option value="--jtt">JTT</option>
200 <option value="--tm">transmembrane protein-optimized JTT</option>
201 <option value="custom">custom matrix</option>
202 </param>
203 <when value="--bl">
204 <param argument="--bl" name="coefficient" type="select" display="radio" label="Coefficient of the BLOSUM matrix">
205 <option value="30">30</option>
206 <option value="45">45</option>
207 <option value="62" selected="true">62</option>
208 <option value="80">80</option>
209 </param>
210 </when>
211 <when value="--jtt">
212 <param argument="--jtt" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix" />
213 </when>
214 <when value="--tm">
215 <param argument="--tm" name="coefficient" type="integer" value="200" min="1" label="PAM value of the matrix"/>
216 </when>
217 <when value="custom">
218 <param argument="--aamatrix" type="data" format="txt" label="User-defined AA scoring matrix" help="The expected format of the matrix is the same as that used by BLAST."/>
219 </when>
220 </conditional>
221 <expand macro="misc_scoring_scheme" />
222 </when>
223 </conditional>
224 <conditional name="flavour">
225 <param name="type" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section. With 'Auto', the tool automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size from few to many respectively. Default setting: FFT-NS-2.">
226 <option value="mafft --auto">Auto</option>
227 <option value="mafft-fftns --retree 1">FFT-NS-1 (very fast, progressive method; use for &gt;2,000 sequences)</option>
228 <option value="mafft-fftns" selected="true">FFT-NS-2 (fast, progressive method)</option>
229 <option value="mafft-nwns">NW-NS-2 (fast, progressive method without FFT approximation)</option>
230 <option value="mafft --retree 1 --maxiterate 0 --nofft --parttree">NW-NS-PartTree-1 (very fast, progressive method using the PartTree algorithm; for ~10,000 to ~50,000 sequences)</option>
231 <option value="mafft --maxiterate 0 --globalpair">G-INS-1 (slow, progressive method with an accurate guide tree)</option>
232 <option value="mafft-fftnsi">FFT-NS-i (slow, iterative refinement method)</option>
233 <option value="mafft-nwnsi">NW-NS-i (slow, iterative refinement method without FFT approximation)</option>
234 <option value="mafft-einsi">E-INS-i (very slow; use for &lt;200 sequences with multiple conserved domains and long gaps)</option>
235 <option value="mafft-linsi">L-INS-i (very slow; use for &lt;200 sequences with one conserved domain and long gaps)</option>
236 <option value="mafft-ginsi">G-INS-i (very slow; recommended for &lt;200 sequences with global homology)</option>
237 <option value="custom">Custom Parameters</option>
238 </param>
239 <when value="mafft --auto"/>
240 <when value="mafft-fftns --retree 1"/>
241 <when value="mafft-fftns"/>
242 <when value="mafft-nwns"/>
243 <when value="mafft --retree 1 --maxiterate 0 --nofft --parttree"/>
244 <when value="mafft --maxiterate 0 --globalpair">
245 <expand macro="global_align_options"/>
246 </when>
247 <when value="mafft-fftnsi"/>
248 <when value="mafft-nwnsi"/>
249 <when value="mafft-einsi"/>
250 <when value="mafft-linsi"/>
251 <when value="mafft-ginsi">
252 <expand macro="global_align_options"/>
253 </when>
254 <when value="custom">
255 <conditional name="guidetree">
256 <param name="guidetree_generation" type="select" label="GuideTree-Generation" help="Parttree is recommended for a large number (> ~10,000) of sequences as input">
257 <option value="original">Original guidetree building method of MAFFT</option>
258 <option value="parttree">Fast guidetree building method with PartTree-algorithm</option>
259 </param>
260 <when value="original">
261 <conditional name="dist_flavour">
262 <param name="distance_method" type="select" label="Distance method" help="Distance method must be chosen regarding your data">
263 <option value="--6merpair" selected="true">Shared 6mers distance (fastest) (--6merpair)</option>
264 <option value="--globalpair">Global alignment (Needleman-Wunsch) (--globalpair)</option>
265 <option value="--localpair">Local alignment (Smith-Waterman) (--localpair)</option>
266 <option value="--genafpair">Local, affine gap cost (--genafpair)</option>
267 <option value="--fastapair">All pairwise alignments are computed with FASTA (--fastapair)</option>
268 </param>
269 <when value="--6merpair">
270 <param argument="--retree" type="integer" value="2" min="1" max="3" label="Guide tree is build this number of times in the progressive stage."/>
271 </when>
272 <when value="--globalpair">
273 <expand macro="global_align_options"/>
274 <expand macro="weighti_param" />
275 </when>
276 <when value="--localpair">
277 <expand macro="weighti_param" />
278 <param argument="--lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value"/>
279 <param argument="--lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value"/>
280 <param argument="--lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" />
281 </when>
282 <when value="--genafpair">
283 <expand macro="weighti_param" />
284 <param argument="--lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" />
285 <param argument="--lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" />
286 <param argument="--lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" />
287 <param argument="--LOP" type="float" value="-6.00" label="Gap opening penalty to skip the alignment" help="-6.00 default value" />
288 <param argument="--LEXP" type="float" value="0.00" label="Gap extension penalty to skip the alignment" help="0 default value" />
289 </when>
290 <when value="--fastapair">
291 <expand macro="weighti_param" />
292 </when>
293 </conditional>
294 </when>
295 <when value="parttree">
296 <conditional name="parttree_selection">
297 <param name="parttree_option" type="select" label="Which distance for the fast tree-building method?">
298 <option value="--parttree" selected="true">Fast tree-building method with the 6mer distance (--parttree)</option>
299 <option value="--fastaparttree">Distances based on FASTA (--fastaparttree)</option>
300 <option value="--dpparttree">Distances based on DP. (Needleman-Wunsch) (--dpparttree)</option>
301 </param>
302 <when value="--parttree">
303 <expand macro="parttree_parameters" />
304 </when>
305 <when value="--fastaparttree">
306 <expand macro="parttree_parameters" />
307 </when>
308 <when value="--dpparttree">
309 <expand macro="parttree_parameters" />
310 </when>
311 </conditional>
312 </when>
313 </conditional>
314 <section name="progressive_alignment_calculation" title="Progressive alignment calculation" expanded="true">
315 <param argument="--maxiterate" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" />
316 <param argument="--fft" type="boolean" truevalue="--fft" falsevalue="--nofft" checked="True" label="Use FFT approximation in group-to-group alignment?" />
317 <param argument="--noscore" type="boolean" truevalue="" falsevalue="--noscore" checked="True" label="Check alignment score in the iterative refinement stage?" />
318 </section>
319 </when>
320 </conditional>
321 <param argument="--reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" help="Default order is input order." />
322 <param argument="--treeout" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Keep alignment tree as output?" />
323 <param name="outputFormat" type="select" label="Output format">
324 <option value="" selected="true">FASTA</option>
325 <option value="--clustalout">ClustalW</option>
326 <option value="--phylipout">Phylip</option>
190 </param> 327 </param>
191 </when> 328 </inputs>
192 <when value="PAM"> 329 <outputs>
193 <param name="PAM" type="integer" value="80" min="1" max="350" label="Coefficient of the JTT PAM matrix" /> 330 <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}">
194 <param name="tm" type="integer" value="80" min="1" max="350" label="Coefficient of the transmembrane PAM matrix" /> 331 <change_format>
195 </when> 332 <when input="outputFormat" value="--clustalout" format="clustal"/>
196 <when value="custom"> 333 <when input="outputFormat" value="--phylipout" format="phylip"/>
197 <param name="matrixfile" type="data" format="txt" label="User-defined AA scoring matrix" help="The format of matrixfile is the same to that of BLAST. Ignored when nucleotide sequences are input."/> 334 </change_format>
198 <param name="fmodel" type="boolean" truevalue="--fmodel" falsevalue="" checked="False" label="Incorporate the AA/nuc composition information into the scoring matrix?" /> 335 </data>
199 </when> 336 <data name="outputTree" format="txt" label="${tool.name} Guide Tree">
200 </conditional> 337 <filter>treeout</filter>
201 <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" /> 338 </data>
202 <param name="getTree" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Display alignment tree ?" /> 339 </outputs>
203 <param name="outputFormat" type="select" label="Output format" help="Either FASTA or ClustalW"> 340 <tests>
204 <option value="" selected="true">FASTA</option> 341 <test expect_num_outputs="1">
205 <option value="--clustalout">ClustalW</option> 342 <conditional name="input">
206 <option value="--phylipout">Phylip</option> 343 <param name="mapping" value="implicit"/>
207 </param> 344 <repeat name="batches">
208 </inputs> 345 <param name="inputs" value="sample_amino.fa"/>
209 <outputs> 346 </repeat>
210 <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}"> 347 </conditional>
211 <change_format> 348 <output name="outputAlignment" ftype="fasta" file="mafft_default.aln"/>
212 <when input="outputFormat" value="--clustalout" format="clustal"/> 349 </test>
213 <when input="outputFormat" value="--phylipout" format="phylip"/> 350 <!-- test autodetection of suitable algorithm from input; expected to choose L-INS-i -->
214 </change_format> 351 <test expect_num_outputs="1">
215 </data> 352 <conditional name="input">
216 <data name="outputTree" format="txt" label="${tool.name} Guide Tree"> 353 <param name="mapping" value="implicit"/>
217 <filter>getTree == True</filter> 354 <repeat name="batches">
218 </data> 355 <param name="inputs" value="sample_amino.fa"/>
219 </outputs> 356 </repeat>
220 <tests> 357 </conditional>
221 <test expect_num_outputs="1" > 358 <conditional name="flavour">
222 <param name="inputSequences" value="sample.fa"/> 359 <param name="type" value="mafft --auto"/>
223 <param name="flavourType" value="mafft-fftns"/> 360 </conditional>
224 <param name="outputFormat" value=""/> 361 <output name="outputAlignment" ftype="fasta" file="mafft_auto_linsi.aln"/>
225 <output name="outputAlignment" ftype="fasta" file="mafft_fftns_result.aln"/> 362 </test>
226 </test> 363 <!-- test explicit specification of L-INS-i mode -->
227 <test expect_num_outputs="1" > 364 <test expect_num_outputs="1">
228 <param name="inputSequences" value="sample.fa"/> 365 <conditional name="input">
229 <param name="flavourType" value="mafft-nwns"/> 366 <param name="mapping" value="implicit"/>
230 <param name="outputFormat" value="--clustalout"/> 367 <repeat name="batches">
231 <output name="outputAlignment" ftype="clustal" file="mafft_nwns_result.aln" lines_diff="2" /> 368 <param name="inputs" value="sample_amino.fa"/>
232 </test> 369 </repeat>
233 <!-- WARNING: the results of the following test depends on #threads. 370 </conditional>
234 The result seems deterministic for single threaded execution, i.e. GALAXY_SLOTS=1 planemo test 371 <conditional name="flavour">
235 However, GH CI/CD uses 2 threads and results vary --> 372 <param name="type" value="mafft-linsi"/>
236 <test expect_num_outputs="1" > 373 </conditional>
237 <param name="inputSequences" value="sample.fa"/> 374 <output name="outputAlignment" ftype="fasta" file="mafft_auto_linsi.aln"/>
238 <param name="flavourType" value="custom"/> 375 </test>
239 <conditional name="matrix_condition"> 376 <test expect_num_outputs="1">
240 <param name="matrix" value="BLOSUM"/> 377 <conditional name="input">
241 </conditional> 378 <param name="mapping" value="implicit"/>
242 <param name="BLOSUM" value="62"/> 379 <repeat name="batches">
243 <param name="distance_method" value="--fastapair"/> 380 <param name="inputs" value="sample_amino.fa"/>
244 <param name="weighti" value="2.7"/> 381 </repeat>
245 <param name="iterations" value="1000"/> 382 </conditional>
246 <param name="outputFormat" value="--clustalout"/> 383 <conditional name="datatype_selection">
247 <output name="outputAlignment" ftype="clustal" file="mafft_custom_result.aln" compare="sim_size"> 384 <param name="datatype" value="--amino"/>
248 <assert_contents> 385 <conditional name="scoring_matrix">
249 <has_n_lines n="458" delta="0"/> 386 <param name="type" value="--bl"/>
250 <has_text text="CLUSTAL format alignment by MAFFT F-INS-i"/> 387 <param name="coefficient" value="80"/>
251 <has_text text="NPIVYGISHPKY"/> 388 </conditional>
252 <has_text text="1=="/> 389 </conditional>
253 <has_text text="36=="/> 390 <conditional name="flavour">
254 <has_line line="8=opsin, ------------------------------------------------------------"/> 391 <param name="type" value="mafft-fftns"/>
255 </assert_contents> 392 </conditional>
256 </output> 393 <param name="outputFormat" value="--clustalout"/>
257 </test> 394 <output name="outputAlignment" ftype="clustal" file="mafft_explicit_amino_blosum80.clustal.aln" />
258 </tests> 395 </test>
259 <help> <![CDATA[ 396 <test expect_num_outputs="1" >
260 **What it does** 397 <conditional name="input">
261 398 <param name="mapping" value="implicit"/>
262 MAFFT is a multiple sequence alignment program for unix-like operating systems. 399 <repeat name="batches">
263 It offers a range of multiple alignment methods, L-INS-i (accurate; for alignment of <∼200 sequences), 400 <param name="inputs" value="sample_nuc.fa"/>
264 FFT-NS-2 (fast; for alignment of <∼30,000 sequences), etc. 401 </repeat>
265 From the MAFFT man page, an overview of the different predefined flavours of the tool is as follows: 402 </conditional>
266 403 <conditional name="datatype_selection">
267 **Accuracy-oriented methods:** 404 <param name="datatype" value="--nuc"/>
268 405 <conditional name="scoring_matrix">
269 - L-INS-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information): 406 <param name="type" value="--kimura"/>
407 <param name="coefficient" value="40"/>
408 </conditional>
409 </conditional>
410 <conditional name="flavour">
411 <param name="type" value="mafft-fftns"/>
412 </conditional>
413 <param name="outputFormat" value="--phylipout"/>
414 <output name="outputAlignment" ftype="phylip" file="mafft_kimura40.phylip.aln" />
415 </test>
416 <test expect_num_outputs="1">
417 <conditional name="input">
418 <param name="mapping" value="implicit"/>
419 <repeat name="batches">
420 <param name="inputs" value="sample_amino.fa"/>
421 </repeat>
422 </conditional>
423 <conditional name="datatype_selection">
424 <param name="datatype" value="--amino"/>
425 </conditional>
426 <conditional name="flavour">
427 <param name="type" value="custom"/>
428 <conditional name="guidetree">
429 <param name="guidetree_generation" value="original"/>
430 <conditional name="dist_flavour">
431 <param name="distance_method" value="--globalpair"/>
432 <param name="weighti" value="3"/>
433 </conditional>
434 </conditional>
435 <section name="progressive_alignment_calculation">
436 <param name="maxiterate" value="1000"/>
437 </section>
438 </conditional>
439 <param name="outputFormat" value="--clustalout"/>
440 <output name="outputAlignment" ftype="clustal" file="mafft_custom_original.clustal.aln">
441 </output>
442 </test>
443 <test expect_num_outputs="1">
444 <conditional name="input">
445 <param name="mapping" value="implicit"/>
446 <repeat name="batches">
447 <param name="inputs" value="sample_amino.fa"/>
448 </repeat>
449 </conditional>
450 <conditional name="datatype_selection">
451 <param name="datatype" value="--amino"/>
452 </conditional>
453 <conditional name="flavour">
454 <param name="type" value="custom"/>
455 <conditional name="guidetree">
456 <param name="guidetree_generation" value="parttree"/>
457 <conditional name="parttree_selection">
458 <param name="parttree_option" value="--parttree"/>
459 <param name="retree" value="2"/>
460 </conditional>
461 </conditional>
462 </conditional>
463 <output name="outputAlignment" ftype="fasta" file="mafft_custom_parttree.aln" />
464 </test>
465 <!-- test concatenation of multiple inputs -->
466 <test expect_num_outputs="2">
467 <conditional name="input">
468 <param name="mapping" value="merge"/>
469 <repeat name="batches">
470 <param name="inputs" value="sample_amino.fa"/>
471 </repeat>
472 <repeat name="batches">
473 <param name="inputs" value="sample_nuc.fa"/>
474 </repeat>
475 </conditional>
476 <param name="treeout" value="true"/>
477 <output name="outputAlignment" ftype="fasta">
478 <metadata name="sequences" value="39"/>
479 </output>
480 </test>
481 </tests>
482 <help><![CDATA[
483 **What it does**
484
485 MAFFT is a multiple sequence alignment (MSA) program, which offers a range of multiple alignment methods.
486
487 Input types and alignment scoring matrices
488 ------------------------------------------
489
490 For the alignment of *protein* sequences, you can choose between:
491
492 - different flavors of BLOSUM matrices (`Henikoff S and Henikoff JG, 1992 <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC50453/>`__)
493 - JTT matrices with any point accepted mutation (PAM) rate (`Jones, Taylor and Thornton, 1992 <https://pubmed.ncbi.nlm.nih.gov/1633570/>`__)
494 - PAM-based matrices optimized for transmembrane proteins (`Jones, Taylor and Thornton, 1994 <https://pubmed.ncbi.nlm.nih.gov/8112466/>`__)
495
496 For nucleic acid sequence alignment, MAFFT uses Kimura's two parameter model (`Kimura 1980 <https://pubmed.ncbi.nlm.nih.gov/7463489/>`__)
497 with a transitions to transversions ratio of 2 (kappa 2), but lets you configure the PAM value.
498
499 The tool can also try to autodetect the sequence type from the input(s).
500 In this mode, it will use the BLOSUM 62 matrix if it detects amino acids input, and the Kimura kappa 2 PAM200 matrix for nucleic acids.
501
502
503 Pre-configured MSA methods
504 --------------------------
505
506 From the `MAFFT man page <https://mafft.cbrc.jp/alignment/software/manual/manual.html>`__, an overview of the different predefined flavours of the tool.
507
508 **Accuracy-oriented methods:**
509
510 - *L-INS-i* (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information):
511
270 - mafft --localpair --maxiterate 1000 input [> output] 512 - mafft --localpair --maxiterate 1000 input [> output]
271 - G-INS-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information): 513 - *G-INS-i* (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information):
514
272 - mafft --globalpair --maxiterate 1000 input [> output] 515 - mafft --globalpair --maxiterate 1000 input [> output]
273 - E-INS-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences): 516 - *E-INS-i* (suitable for sequences containing large unalignable regions; recommended for <200 sequences):
517
274 - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps. 518 - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps.
275 519
276 **Speed-oriented methods:** 520 **Speed-oriented methods:**
277 521
278 - FFT-NS-i (iterative refinement method; two cycles only): 522 - *FFT-NS-i* (iterative refinement method; two cycles only):
279 - mafft --retree 2 --maxiterate 2 input [> output] 523
280 - FFT-NS-i (iterative refinement method; max. 1000 iterations): 524 - mafft --retree 2 --maxiterate 2 input [> output]
281 - mafft --retree 2 --maxiterate 1000 input [> output] 525 - *FFT-NS-2* (fast; progressive method):
282 - FFT-NS-2 (fast; progressive method): 526
283 - mafft --retree 2 --maxiterate 0 input [> output] 527 - mafft --retree 2 --maxiterate 0 input [> output]
284 - FFT-NS-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree): 528 - *NW-NS-i* (iterative refinement method without FFT approximation; two cycles only):
285 - mafft --retree 1 --maxiterate 0 input [> output] 529
286 - NW-NS-i (iterative refinement method without FFT approximation; two cycles only): 530 - mafft --retree 2 --maxiterate 2 --nofft input [> output]
287 - mafft --retree 2 --maxiterate 2 --nofft input [> output] 531 - *NW-NS-2* (fast; progressive method without the FFT approximation):
288 - NW-NS-2 (fast; progressive method without the FFT approximation): 532
289 - mafft --retree 2 --maxiterate 0 --nofft input [> output] 533 - mafft --retree 2 --maxiterate 0 --nofft input [> output]
290 - NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm): 534 - *NW-NS-PartTree-1* (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm):
291 - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output] 535
292 536 - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output]
293 **Options:** 537 - *FFT-NS-1* (very fast; recommended for >2000 sequences; progressive method with a rough guide tree):
294 538
295 - --auto Automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2) 539 - mafft --retree 1 --maxiterate 0 input [> output]
296 - --adjustdirection Generate reverse complement sequences, as necessary, and align them together with the remaining sequences. In the case of protein alignment, these options are just ignored. 540 ]]></help>
297 - --op Gap opening penalty, default: 1.53 541 <expand macro="citations" />
298 - --ep Offset (works like gap extension penalty), default: 0.0
299 - --maxiterate Maximum number of iterative refinement, default: 0
300 - --clustalout Output: clustal format, default: fasta
301 - --retree number Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2
302 ]]>
303 </help>
304 <expand macro="citations" />
305 </tool> 542 </tool>