view metaeuk_easy_predict.xml @ 0:5f6ebc3103ee draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaeuk commit a1b87b5a34d0f3971f0110a89f5e87a5937dc8d2"
author iuc
date Tue, 04 Aug 2020 15:43:31 -0400
parents
children f91548912113
line wrap: on
line source

<tool id="metaeuk_easy_predict" name="MetaEuk Easy Predict" version="@TOOL_VERSION@+galaxy0">
    <description>High-throughput gene discovery and annotation for large-scale eukaryotic metagenomics</description>
    <macros>
        <token name="@TOOL_VERSION@">2.ddf2742</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">metaeuk</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
        mkdir -p ./tmp &&
        metaeuk easy-predict 
            '$contigs'
            '$query'
            output.fasta
            "./tmp/metaeuk_working"
            --threads \${GALAXY_SLOTS:-1}
            --min-length '${min_length}'
            -e '${segment_eval}'
            --metaeuk-eval '${metaeuk_eval}'
            --metaeuk-tcov '${metaeuk_tcov}'
        #if $adv.adv_options == "yes"
            --max-intron '${adv.max_intron}'
            --min-intron '${adv.min_intron}'
        #end if
        && mv output.fasta '$output'
    ]]></command>
    <inputs>
        <param name="contigs" type="data" format="fasta" label="Contigs to search again" />
        <param name="query" type="data" format="fasta" label="Proteins to search against contigs" />
        <param argument="-e" name="segment_eval" type="float" value="100" label="Maximum e-value of individual match segment" />
        <param argument="--min-length" name="min_length" type="integer" value="15" label="Minimum number of codons in predicted open reading frame" />
        <param argument="--metaeuk-eval" name="metaeuk_eval" type="float" value="0.001" label="Maximum e-value of combined exon set" />
        <param argument="--metaeuk-tcov" name="metaeuk_tcov" type="float" value="0.5" label="Minimum length ratio of combined exon set to target" />
        <conditional name="adv">
            <param type="select"  name="adv_options" label="Show advanced options">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param argument="--max-intron" name="max_intron" value="10000" type="integer" label="Maximum intron size" />
                <param argument="--min-intron" name="min_intron" value="15" type="integer" label="Minimum intron size" />
            </when>
            <when value="no">
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output" format="fasta" />
    </outputs>

    <tests>
        <test>
            <param name="contigs" ftype="fasta" value="contigs.fna" />
            <param name="query" ftype="fasta" value="proteins.faa" />
            <output name="output" ftype="fasta" value="output.fasta" />
        </test>
        <test>
            <param name="contigs" ftype="fasta" value="contigs.fna" />
            <param name="query" ftype="fasta" value="proteins.faa" />
            <conditional name="adv">
                <param name="adv_options" value="yes" />
                <param name="max_intron" value="1000" />
            </conditional>
            <assert_command>
                <has_text text="--max-intron '1000'" />
            </assert_command>            
            <output name="output" ftype="fasta" value="output.fasta" />
        </test>
    </tests>
    <help><![CDATA[

    MetaEuk_ is a modular toolkit designed for large-scale gene discovery and
    annotation in eukaryotic metagenomic contigs. Metaeuk combines the fast and
    sensitive homology search capabilities of MMseqs2_ with a dynamic programming
    procedure to recover optimal exons sets. It reduces redundancies in multiple
    discoveries of the same gene and resolves conflicting gene predictions on
    the same strand. 

    This tool implements the easy-predict command from metaeuk, which combines
    metaeuk modules into a pipeline for protein alignment prediction. Input is
    the contigs you want to search for protein hits and the proteins you want
    to search against those contigs. Output is FASTA format predicted ORFs, with
    exons annotated in the header according to the metaeuk header format_.

    .. _MetaEuk: https://github.com/soedinglab/metaeuk
    .. _MMseqs2: https://github.com/soedinglab/MMseqs2
    .. _format: https://github.com/soedinglab/metaeuk#the-metaeuk-header
    ]]></help>
    <citations>
        <citation type="doi">10.1186/s40168-020-00808-x</citation>
    </citations>
</tool>