view mafft.xml @ 5:15974dd17515 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mafft commit fc3727b1b39cd5654a523d03e0df2b9ac87ddcda
author rnateam
date Tue, 20 Sep 2016 09:30:47 -0400
parents f0606dfd5195
children aaefa93c2dd2
line wrap: on
line source

<tool id="rbc_mafft" name="MAFFT" version="7.221.3">
    <description>Multiple alignment program for amino acid or nucleotide sequences</description>
    <requirements>
        <requirement type="package" version="7.221">mafft</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" />
        <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" />
    </stdio>
    <version_command>
    <![CDATA[
        mafft --version
    ]]>
    </version_command>
    <command>
    <![CDATA[
        #if $cond_flavour.flavourType != 'custom'
            $cond_flavour.flavourType
        #elif $cond_flavour.flavourType == 'custom'
            ### full parameter options
            mafft
            $cond_flavour.distance_method
            --retree $cond_flavour.retree
            --maxiterate $cond_flavour.iterations
        #end if

        ## specify threads to use
        --thread \${GALAXY_SLOTS:-1}

        $datatype
        --ep $ep
        --op $op
        $adjustdirection
        
        #if $matrix_condition.matrix == "BLOSUM"
          --bl ${matrix_condition.BLOSUM}
        #elif $matrix_condition.matrix == "PAM"
           --jtt ${matrix_condition.PAM}
        #end if

        $reorder
        $getTree
        $outputFormat
        $inputSequences > $outputAlignment;

        #if $getTree == "--treeout"
            mv ${inputSequences}.tree $outputTree;
        #end if
    ]]>
    </command>
    <inputs>
        <param name="inputSequences" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/>
        <param name="datatype" type="select" label="Data type">
            <option value="">Auto detection</option>
            <option value="--nuc">Nucleic acids</option>
            <option value="--amino">Amino acids</option>
        </param>
        <conditional name="cond_flavour">
            <param name="flavourType" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section.">
                <option value="mafft --auto">auto</option>
                <option value="mafft-fftns" selected="true">fftns</option>
                <option value="mafft-fftnsi">fftnsi</option>
                <option value="mafft-nwns">nwns</option>
                <option value="mafft-nwnsi">nwnsi</option>
                <option value="mafft-einsi">einsi</option>
                <option value="mafft-ginsi">ginsi</option>
                <option value="mafft-linsi">linsi</option>
                <option value="mafft-qinsi">qinsi</option>
                <option value="mafft-xinsi">xinsi</option>
                <option value="custom">Custom Parameters</option>
            </param>
            <when value="mafft-fftns"/>
            <when value="mafft --auto"/>
            <when value="mafft-fftnsi"/>
            <when value="mafft-nwns"/>
            <when value="mafft-nwnsi"/>
            <when value="mafft-einsi"/>
            <when value="mafft-ginsi"/>
            <when value="mafft-linsi"/>
            <when value="mafft-qinsi"/>
            <when value="mafft-xinsi"/>
            <when value="custom">
                <param name="distance_method" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data">
                    <option value="--6merpair" selected="true">Shared 6mers distance (fastest)</option>
                    <option value="--globalpair">Global alignment (Needleman-Wunsch)</option>
                    <option value="--localpair">Local alignment (Smith-Waterman)</option>
                    <option value="--genafpair">Local, affine gap cost</option>
                </param>
                <param name="retree" type="integer" value="2" min="1" max="100" label="Guide tree is built this number of times in the progressive stage." help="Valid with 6mer distance" />
                <param name="iterations" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" />
            </when>
        </conditional>
        <param name="ep" type="float" value="0.123" label="Gap extend penalty" help="Offset value, which works like gap extension penalty, for group-to-group alignment. For E-INS-i, 0 is recommended to allow large gaps" />
        <param name="op" type="float" value="1.53" label="Gap opening penalty" help="1.53 default value" />
        <param name="adjustdirection" type="select" display="radio" label="Direction of nucleotide sequences" help="Generate reverse complement sequences, as necessary, and align them together with the remaining sequences">
            <option value="--adjustdirection">adjust direction</option>
            <option value=" " selected="true">do not adjust direction</option>
        </param>
        <conditional name="matrix_condition">
            <param name="matrix" type="select" label="Matrix selection" display="radio" help="Usefull only for amino acids" >
                <option value="">No matrix</option>
                <option value="BLOSUM">BLOSUM</option>
                <option value="PAM">PAM</option>
            </param>
            <when value=""/>
            <when value="BLOSUM">
                <param name="BLOSUM" type="select" display="radio" label="Coefficient of the BLOSUM matrix">
                    <option value="30">30</option>
                    <option value="45">45</option>
                    <option value="62" selected="true">62</option>
                    <option value="80">80</option>
                </param>
            </when>
            <when value="PAM">
                <param name="PAM" type="integer" value="80" min="1" max="350" label="Coefficient of the PAM matrix" />
            </when>
        </conditional>
        <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" />
        <param name="getTree" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Display alignment tree ?" />
        <param name="outputFormat" type="select" label="Output format" help="Either FASTA or ClustalW">
            <option value="" selected="true">FASTA</option>
            <option value="--clustalout">ClustalW</option>
            <option value="--phylipout">Phylip</option>
        </param>
    </inputs>
    <outputs>
        <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}">
            <change_format>
                <when input="outputFormat" value="--clustalout" format="clustal"/>
                <when input="outputFormat" value="--phylipout" format="phylip"/>
            </change_format>
        </data>
        <data name="outputTree" format="txt" label="${tool.name} Guide Tree">
            <filter>getTree == True</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="inputSequences" value="sample.fa"/>
            <param name="flavourType" value="mafft-fftns"/>
            <param name="outputFormat" value=""/>
            <output name="outputAlignment" ftype="fasta" file="mafft_fftns_result.aln"/>
        </test>
        <test>
            <param name="inputSequences" value="sample.fa"/>
            <param name="flavourType" value="mafft-nwns"/>
            <param name="outputFormat" value="--clustalout"/>
            <output name="outputAlignment" ftype="clustal" file="mafft_nwns_result.aln"/>
        </test>
    </tests>
    <help>
    <![CDATA[
**What it does**

MAFFT is a multiple sequence alignment program for unix-like operating systems.  
It offers a range of multiple alignment methods, L-INS-i (accurate; for alignment of <∼200 sequences), 
FFT-NS-2 (fast; for alignment of <∼30,000 sequences), etc.

From the MAFFT man page, an overview of the different predefined flavours of the tool.

**Accuracy-oriented methods:**

- L-INS-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information):
    
    - mafft --localpair --maxiterate 1000 input [> output]

- G-INS-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information):
    
    - mafft --globalpair --maxiterate 1000 input [> output]

- E-INS-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences):
    
    - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps. 


**Speed-oriented methods:**

- FFT-NS-i (iterative refinement method; two cycles only):

    - mafft --retree 2 --maxiterate 2 input [> output]

- FFT-NS-i (iterative refinement method; max. 1000 iterations):

    - mafft --retree 2 --maxiterate 1000 input [> output]

- FFT-NS-2 (fast; progressive method):

    - mafft --retree 2 --maxiterate 0 input [> output]

- FFT-NS-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree):

    - mafft --retree 1 --maxiterate 0 input [> output]

- NW-NS-i (iterative refinement method without FFT approximation; two cycles only):

    - mafft --retree 2 --maxiterate 2 --nofft input [> output]

- NW-NS-2 (fast; progressive method without the FFT approximation):

    - mafft --retree 2 --maxiterate 0 --nofft input [> output]

- NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm):

    - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output]

**Options:**

--auto
    Automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2)
--adjustdirection
    Generate reverse complement sequences, as necessary, and align them together with the remaining sequences. In the case of protein alignment, these options are just ignored.
--op
    Gap opening penalty, default: 1.53
--ep
    Offset (works like gap extension penalty), default: 0.0
--maxiterate
    Maximum number of iterative refinement, default: 0
--clustalout
    Output: clustal format, default: fasta
--thread
    Number of threads (if unsure, --thread -1)
--retree number
    Guide tree is built number times in the progressive stage.  Valid with 6mer distance.  Default: 2
    ]]>
    </help>
    <citations>
        <citation type="doi">10.1093/molbev/mst010</citation>
    </citations>
</tool>