view mafft.xml @ 14:6f28e90db932 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/mafft commit 1570f3a28232b4b88385cdfbb68f79d80ff1dabb
author bgruening
date Tue, 31 Oct 2023 15:48:53 +0000
parents 4de39704d423
children bf28a8cff401
line wrap: on
line source

<?xml version="1.0" encoding="UTF-8"?>
<tool id="rbc_mafft" name="MAFFT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Multiple alignment program for amino acid or nucleotide sequences</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="biotools"/>
  <expand macro="requirements" />
  <stdio>
    <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" />
    <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" />
  </stdio>
  <version_command>    <![CDATA[
    mafft --version
    ]]>
  </version_command>
  <command>
    <![CDATA[

      #if $cond_flavour.flavourType == 'custom'
        #if $cond_flavour.dist_flavour.distance_method == '--fastapair'
          export FASTA_4_MAFFT=`which fasta36`;
        #end if
      #end if

      #if $cond_flavour.flavourType != 'custom'
        $cond_flavour.flavourType
      #elif $cond_flavour.flavourType == 'custom'
        ### full parameter options
        mafft
        $cond_flavour.dist_flavour.distance_method
        #if $cond_flavour.dist_flavour.distance_method == '--6merpair'
          --retree $cond_flavour.dist_flavour.retree
          $cond_flavour.dist_flavour.distance_method.usetree.parttree

          #if $cond_flavour.dist_flavour.distance_method.usetree.parttree==--parttree
            $cond_flavour.dist_flavour.distance_method.usetree.treedistance
            $cond_flavour.dist_flavour.distance_method.usetree.partsize
            $cond_flavour.dist_flavour.distance_method.usetree.groupsize
          #end if

        #elif $cond_flavour.dist_flavour.distance_method == '--globalpair'
          --weighti $cond_flavour.dist_flavour.weighti
        #elif $cond_flavour.dist_flavour.distance_method == '--localpair'
          --weighti $cond_flavour.dist_flavour.weighti
          --lop $cond_flavour.dist_flavour.lop
          --lep $cond_flavour.dist_flavour.lep
          --lexp $cond_flavour.dist_flavour.lexp
        #elif $cond_flavour.dist_flavour.distance_method == '--genafpair'
          --weighti $cond_flavour.dist_flavour.weighti
          --lop $cond_flavour.dist_flavour.lop
          --lep $cond_flavour.dist_flavour.lep
          --lexp $cond_flavour.dist_flavour.lexp
          --LOP $cond_flavour.dist_flavour.skipLOP
          --EXP $cond_flavour.dist_flavour.skipEXP 1
        #elif $cond_flavour.dist_flavour.distance_method == '--fastapair'
          --weighti $cond_flavour.dist_flavour.weighti
        #end if
        --maxiterate $cond_flavour.iterations
        $cond_flavour.fft
        $cond_flavour.score
      #end if

      ## specify threads to use
      --thread \${GALAXY_SLOTS:-1}
      $datatype
      --ep $ep
      --op $op

      #if $matrix_condition.matrix == "BLOSUM"
        --bl $matrix_condition.BLOSUM
      #elif $matrix_condition.matrix == "PAM"
        --jtt $matrix_condition.PAM
        --tm $matrix_condition.tm
      #elif $matrix_condition.matrix == "custom"
        --aamatrix  '$matrix_condition.matrixfile'
        --fmodel $matrix_condition.fmodel
      #end if

      $reorder
      $getTree
      $outputFormat
      '$inputSequences' > '$outputAlignment';

      #if $getTree == "--treeout"
        mv '${inputSequences}.tree' '$outputTree';
      #end if
    ]]>
  </command>
  <inputs>
    <param name="inputSequences" type="data" format="fasta" label="Sequences to align" help="Amino acid or nucleotide sequences in FASTA format."/>
    <param name="datatype" type="select" label="Data type">
      <option value="">Auto detection</option>
      <option value="--nuc">Nucleic acids</option>
      <option value="--amino">Amino acids</option>
    </param>
    <conditional name="cond_flavour">
      <param name="flavourType" type="select" label="MAFFT flavour" help="Run mafft with pre-defined input parameters. Specification of these parameters can be found in the help section.">
        <option value="mafft --auto">auto</option>
        <option value="mafft-fftns" selected="true">fftns</option>
        <option value="mafft-fftnsi">fftnsi</option>
        <option value="mafft-nwns">nwns</option>
        <option value="mafft-nwnsi">nwnsi</option>
        <option value="mafft-einsi">einsi</option>
        <option value="mafft-ginsi">ginsi</option>
        <option value="mafft-linsi">linsi</option>
        <option value="mafft-qinsi">qinsi</option>
        <option value="mafft-xinsi">xinsi</option>
        <option value="custom">Custom Parameters</option>
      </param>
      <when value="mafft-fftns"/>
      <when value="mafft --auto"/>
      <when value="mafft-fftnsi"/>
      <when value="mafft-nwns"/>
      <when value="mafft-nwnsi"/>
      <when value="mafft-einsi"/>
      <when value="mafft-ginsi"/>
      <when value="mafft-linsi"/>
      <when value="mafft-qinsi"/>
      <when value="mafft-xinsi"/>
      <when value="custom">
        <conditional name="dist_flavour">
          <param name="distance_method" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data">
            <option value="--6merpair" selected="true">Shared 6mers distance (fastest)</option>
            <option value="--globalpair">Global alignment (Needleman-Wunsch)</option>
            <option value="--localpair">Local alignment (Smith-Waterman)</option>
            <option value="--genafpair">Local, affine gap cost</option>
            <option value="--fastapair">All pairwise alignments are computed with FASTA</option>
          </param>
          <when value="--6merpair">
            <param name="retree" type="integer" value="2" min="1" max="100" label="Guide tree is built this number of times in the progressive stage." help="Valid with 6mer distance" />
            <conditional name="usetree">
              <param name="parttree" type="select" label="Use a fast tree-building method?" help="Recommended for a large number (> ~10,000) of sequences are input" >
                <option value="--parttree" selected="true">Yes</option>
                <option value="">No</option>
              </param>
              <when value="--parttree">
                <param name="treedistance" type="select" display="radio" label="Distance method" help="Distance method must be chosen regarding your data">
                  <option value="--fastaparttree" selected="true">Distances based on FASTA</option>
                  <option value="--dpparttree">Distances based on DP. (Needleman-Wunsch)</option>
                </param>
                <param name="partsize" type="integer" value="50" min="0" max="1000" label="Number of partitions in the PartTree algorithm" />
                <param name="groupsize" type="integer" value="" min="0" max="1000" label="Do not make alignment larger than ... sequences" />
              </when>
              <when value=""/>
            </conditional>
          </when>
          <when value="--globalpair">
            <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
          </when>
          <when value="--localpair">
            <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
            <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" />
            <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" />
            <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" />
          </when>
          <when value="--genafpair">
            <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
            <param name="lop" type="float" value="-2.0" label="Gap opening penalty at local pairwise alignment" help="-2.00 default value" />
            <param name="lep" type="float" value="0.1" label="Offset value at local pairwise alignment" help="0.1 default value" />
            <param name="lexp" type="float" value="-0.1" label="Gap extension penalty at local pairwise alignment." help="-0.1 default value" />
            <param name="skipLOP" type="float" value="-6.00" label="Gap opening penalty to skip the alignment" help="-6.00 default value" />
            <param name="skipEXP" type="float" value="0.00" label="Gap extension penalty to skip the alignment" help="0 default value" />
          </when>
          <when value="--fastapair">
            <param name="weighti" type="float" value="2.7" min="1" max="100" label="Weighting factor for the consistency term calculated from pairwise alignments." help="Valid when either of --globalpair, --localpair, --genafpair, --fastapair or --blastpair is selected." />
          </when>
        </conditional>
        <param name="iterations" type="integer" value="0" min="0" max="1000" label="Maximum number of iterations" help="1000 for maximum quality" />
        <param name="fft" type="boolean" truevalue="--fft" falsevalue="--nofft" checked="True" label="Use FFT approximation in group-to-group alignment?" />
        <param name="score" type="boolean" truevalue="" falsevalue="--noscore" checked="True" label="Check alignment score in the iterative refinement stage?" />
      </when>
    </conditional>
    <param name="ep" type="float" value="0.0" label="Gap extend penalty for group-to-group alignment" help="Offset value, which works like gap extension penalty, for group-to-group alignment. For E-INS-i, 0 is recommended to allow large gaps" />
    <param name="op" type="float" value="1.53" label="Gap opening penalty at group-to-group alignment." help="1.53 default value" />
    <conditional name="matrix_condition">
      <param name="matrix" type="select" label="Matrix selection" display="radio" help="Usefull only for amino acids" >
        <option value="">No matrix</option>
        <option value="BLOSUM" selected="true">BLOSUM</option>
        <option value="PAM">PAM</option>
        <option value="custom">Custom</option>
      </param>
      <when value=""/>
      <when value="BLOSUM">
        <param name="BLOSUM" type="select" display="radio" label="Coefficient of the BLOSUM matrix">
          <option value="30">30</option>
          <option value="45">45</option>
          <option value="62" selected="true">62</option>
          <option value="80">80</option>
        </param>
      </when>
      <when value="PAM">
        <param name="PAM" type="integer" value="80" min="1" max="350" label="Coefficient of the JTT PAM matrix" />
        <param name="tm" type="integer" value="80" min="1" max="350" label="Coefficient of the  transmembrane PAM matrix" />
      </when>
      <when value="custom">
        <param name="matrixfile" type="data" format="txt" label="User-defined AA scoring matrix" help="The format of matrixfile is the same to that of BLAST. Ignored when nucleotide sequences are input."/>
        <param name="fmodel" type="boolean" truevalue="--fmodel" falsevalue="" checked="False" label="Incorporate the AA/nuc composition information into the scoring matrix?" />
      </when>
    </conditional>
    <param name="reorder" type="boolean" truevalue="--reorder" falsevalue="" checked="False" label="Reorder output?" />
    <param name="getTree" type="boolean" truevalue="--treeout" falsevalue="" checked="False" label="Display alignment tree ?" />
    <param name="outputFormat" type="select" label="Output format" help="Either FASTA or ClustalW">
      <option value="" selected="true">FASTA</option>
      <option value="--clustalout">ClustalW</option>
      <option value="--phylipout">Phylip</option>
    </param>
  </inputs>
  <outputs>
    <data format="fasta" name="outputAlignment" label="${tool.name} on ${on_string}">
      <change_format>
        <when input="outputFormat" value="--clustalout" format="clustal"/>
        <when input="outputFormat" value="--phylipout" format="phylip"/>
      </change_format>
    </data>
    <data name="outputTree" format="txt" label="${tool.name} Guide Tree">
      <filter>getTree == True</filter>
    </data>
  </outputs>
  <tests>
    <test expect_num_outputs="1" >
      <param name="inputSequences" value="sample.fa"/>
      <param name="flavourType" value="mafft-fftns"/>
      <param name="outputFormat" value=""/>
      <output name="outputAlignment" ftype="fasta" file="mafft_fftns_result.aln"/>
    </test>
    <test expect_num_outputs="1" >
      <param name="inputSequences" value="sample.fa"/>
      <param name="flavourType" value="mafft-nwns"/>
      <param name="outputFormat" value="--clustalout"/>
      <output name="outputAlignment" ftype="clustal" file="mafft_nwns_result.aln" lines_diff="2" />
    </test>
    <!-- WARNING: the results of the following test depends on #threads.
    The result seems deterministic for single threaded execution, i.e. GALAXY_SLOTS=1 planemo test
    However, GH CI/CD uses 2 threads and results vary -->
    <test expect_num_outputs="1" >
      <param name="inputSequences" value="sample.fa"/>
      <param name="flavourType" value="custom"/>
      <conditional name="matrix_condition">
        <param name="matrix" value="BLOSUM"/>
      </conditional>
      <param name="BLOSUM" value="62"/>
      <param name="distance_method" value="--fastapair"/>
      <param name="weighti" value="2.7"/>
      <param name="iterations" value="1000"/>
      <param name="outputFormat" value="--clustalout"/>
      <output name="outputAlignment" ftype="clustal" file="mafft_custom_result.aln" compare="sim_size">
        <assert_contents>
          <has_n_lines n="458" delta="0"/>
          <has_text text="CLUSTAL format alignment by MAFFT F-INS-i"/>
          <has_text text="NPIVYGISHPKY"/>
          <has_text text="1=="/>
          <has_text text="36=="/>
          <has_line line="8=opsin,        ------------------------------------------------------------"/>
        </assert_contents>
      </output>
    </test>
  </tests>
  <help>    <![CDATA[
      **What it does**

      MAFFT is a multiple sequence alignment program for unix-like operating systems.
      It offers a range of multiple alignment methods, L-INS-i (accurate; for alignment of <∼200 sequences),
      FFT-NS-2 (fast; for alignment of <∼30,000 sequences), etc.
      From the MAFFT man page, an overview of the different predefined flavours of the tool is as follows:

      **Accuracy-oriented methods:**

      - L-INS-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information):
      - mafft --localpair --maxiterate 1000 input [> output]
      - G-INS-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information):
      - mafft --globalpair --maxiterate 1000 input [> output]
      - E-INS-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences):
      - mafft --ep 0 --genafpair --maxiterate 1000 input [> output]. For E-INS-i, the --ep 0 option is recommended to allow large gaps.

      **Speed-oriented methods:**

      - FFT-NS-i (iterative refinement method; two cycles only):
      - mafft --retree 2 --maxiterate 2 input [> output]
      - FFT-NS-i (iterative refinement method; max. 1000 iterations):
      - mafft --retree 2 --maxiterate 1000 input [> output]
      - FFT-NS-2 (fast; progressive method):
      - mafft --retree 2 --maxiterate 0 input [> output]
      - FFT-NS-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree):
      - mafft --retree 1 --maxiterate 0 input [> output]
      - NW-NS-i (iterative refinement method without FFT approximation; two cycles only):
      - mafft --retree 2 --maxiterate 2 --nofft input [> output]
      - NW-NS-2 (fast; progressive method without the FFT approximation):
      - mafft --retree 2 --maxiterate 0 --nofft input [> output]
      - NW-NS-PartTree-1 (recommended for ~10,000 to ~50,000 sequences; progressive method with the PartTree algorithm):
      - mafft --retree 1 --maxiterate 0 --nofft --parttree input [> output]

      **Options:**

      - --auto Automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2)
      - --adjustdirection Generate reverse complement sequences, as necessary, and align them together with the remaining sequences. In the case of protein alignment, these options are just ignored.
      - --op Gap opening penalty, default: 1.53
      - --ep Offset (works like gap extension penalty), default: 0.0
      - --maxiterate Maximum number of iterative refinement, default: 0
      - --clustalout Output: clustal format, default: fasta
      - --retree number Guide tree is built number times in the progressive stage. Valid with 6mer distance. Default: 2
      ]]>
    </help>
  <expand macro="citations" />
</tool>