view hyphy_gard.xml @ 33:659517798cc4 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 821e0f3e4546bc55e093ab63bd9f9b57f2bf772d
author iuc
date Thu, 18 Aug 2022 13:55:30 +0000
parents 26834f8534f9
children 5b777002c2de
line wrap: on
line source

<?xml version="1.0"?>
<tool id="hyphy_gard" name="HyPhy-GARD" version="@TOOL_VERSION@+galaxy1" profile="19.09">
    <description>Genetic Algorithm for Recombination Detection</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        ln -s '$input_file' input.$input_file.extension &&
        #set $input_file = 'input.%s' % $input_file.extension
        @HYPHYMPI@ gard
            --alignment ./$input_file
            --type '$datatype.value'
            #if str($datatype.value) == 'codon':
                --code '$datatype.gencodeid'
            #elif str($datatype.value) == 'amino-acid':
                --model '$datatype.model'
            #end if
            #if str($rate_cond.rate):
                --rv '$rate_cond.rate'
                --rate-classes '$rate_cond.rate_classes'
            #end if
            --output '$translated'
            --output-lf '$gard_output'
        @ERRORS@
    ]]></command>
    <inputs>
        <param name="input_file" type="data" format="fasta,fasta.gz,nex" label="Input FASTA or NEXUS file" />
        <conditional name="datatype">
            <param argument="--type" name="value" type="select" label="Alignment kind">
                <option value="nucleotide">Nucleotide</option>
                <option value="amino-acid">Amino acid</option>
                <option value="codon">Codon</option>
            </param>
            <when value="nucleotide"/>
            <when value="amino-acid">
                <expand macro="substitution" />
            </when>
            <when value="codon">
                <expand macro="gencode" />
            </when>
        </conditional>
        <conditional name="rate_cond">
            <param argument="--rv" name="rate" type="select" label="Rate variation">
                <option value="">None</option>
                <option value="GDD">General Discrete</option>
                <option value="Gamma">Beta-Gamma</option>
            </param>
            <when value=""/>
            <when value="GDD">
                <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" />
            </when>
            <when value="Gamma">
                <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="gard_output" format="nex" />
        <data name="translated" format="hyphy_results.json" />
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="fasta" value="gard-in1.fa"/>
            <output name="gard_output" file="gard-out1.nex" compare="sim_size"/>
            <output name="translated" file="gard-out1.json" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[

GARD : Genetic Algorithms for Recombination Detection.
======================================================

What does this do?
------------------

This tools screens an alignment of sequences for evidence of recombination in one or more sequences.
The main idea is that if sufficient recombination has occurred, then no single phylogenetic tree will
properly fit the entire length of the alignment and instead a separate tree will be preferred for each *nonrecombinant* segment.

Brief description
-----------------

This analysis implements a heuristic approach to screening alignments of sequences for
recombination, by using the CHC genetic algorithm (GA) to search for
phylogenetic incongruence among different partitions of the data. The
number of partitions is determined using a step-up procedure, while the
placement of breakpoints is searched for with the GA. The best fitting
model (based on c-AIC) is returned; and additional post-hoc tests run to
distinguish topological incongruence from rate-variation.

For each identified breakpoint, the support for its placement is calculated, and for each
non-recombinant fragment, a phylogenetic tree is inferred (using neighbor joining) and returned.

Input
-----

A *FASTA* sequence alignment

Output
------

A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).

A custom visualization module for viewing these results is available (see http://vision.hyphy.org/GARD for an example)


Tool options
------------
::


    --type              type of alignment to screen
                        Nucleotide [default].
                            Assumes aligned nucleotide data and screens the alignment using
                            the general time reversible model of sequence evolution.
                            This is the fastest option
                        Protein
                            Assumes aligned aminoacid sequences. One of several protein
                            substitution models may be used to screen the alignment.
                        Codon
                            Assumes an in-frame coding sequence alignment.
                            The Muse-Gaut 94 (GTR) model will be used to screen the alignment.
                            Selecting this option will dramatically increase run times.


    --code              Genetic code/translation table to use (for codon alignments).
                        Default value: Universal

    --model             The substitution model to use (for protein alignments).
                        default value: JTT

    --rv                The discrete distribution to use for modeling site to site rate variation.

                        None [default]
                            No rate variation. This is the fastest option in terms of run time, but
                            using it can result in false positives if there is significant site-to-site
                            rate variation
                        GDD
                            Use the general discrete distribution on N bins
                        Beta-Gamma
                            Use a discretized gamma with weights partitioned by a discretized beta
                            (see doi.org/10.1093/molbev/msi009)

    --rate-classes      How many site rate classes to use (if GDD or Beta-Gamma are selected)
                        default value: 4


    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/molbev/msl051</citation>
    </expand>
</tool>