view metaeuk_easy_linclust.xml @ 0:b11620b9577a draft default tip

Uploaded
author dnbenso
date Tue, 23 Nov 2021 02:47:16 +0000
parents
children
line wrap: on
line source

<tool id="metaeuk_easy_linclust" name="MetaEuk Easy Linclust" version="@TOOL_VERSION@+galaxy0">
    <description>High-throughput gene discovery and annotation for large-scale eukaryotic metagenomics</description>
    <xrefs>
        <xref type="bio.tools">MetaEuk</xref>
    </xrefs>
    <macros>
        <token name="@TOOL_VERSION@">5.34c21f2</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">metaeuk</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
            metaeuk easy-linclust
            '$contigs'
            result
            "./tmp"
            --cov-mode '${cov_mode}'
            -c '${abovematch}'
            --threads \${GALAXY_SLOTS:-2}
            #if $adv.adv_options == "yes":
                #if $adv.alignment_mode:
                  --alignment-mode '${adv.alignment_mode}'
                #end if
	        #if $adv.belowmatch:
                    -e '${adv.belowmatch}'
                #end if
	        #if $adv.min_seq_id:
                    --min-seq-id '${adv.min_seq_id}'
                #end if
	        #if $adv.min_aln_len:
                    --min-aln-len '${adv.min_aln_len}'
                #end if
	        #if $adv.seq_id_mode:
                    --seq-id-mode '${adv.seq_id_mode}'
                #end if
	        #if $adv.cluster_mode:
                    --cluster-mode '${adv.cluster_mode}'
                #end if
	        #if $adv.kmer_per_seq:
                    --kmer-per-seq '${adv.kmer_per_seq}'
                #end if
	        #if $adv.kmer_per_seq_scale:
                    --kmer-per-seq-scale '${adv.kmer_per_seq_scale}'
                #end if
	        #if $adv.verbosity:
                    -v '${adv.verbosity}'
                #end if
            #end if
    ]]></command>
    <inputs>
        <param name="contigs" type="data" format="fasta" label="Contigs to cluster" />
        <param argument="--cov-mode" name="cov_mode" type="integer" min="0" max="5" value="0" label="Integer between 0 and 5 - see below for details" />
        <param argument="-c" name="abovematch" type="float" min="0" max="1" value="0.800" label="list matches above this fraction of aligned (covered) residues (see --cov-mode) [0.800]" />
        <conditional name="adv">
            <param type="select"  name="adv_options" label="Show advanced options">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param argument="--alignment-mode" name="alignment_mode" type="integer" optional="true" min="0" max="1" label="Integer between 0 and 4 - see below for details" />
                <param argument="-e" name="belowmatch" type="float" optional="true" min="0" label="List matches below this E-value (range 0.0-inf) [0.001]" />
                <param argument="--min-seq-id" name="min_seq_id" type="float" optional="true" min="0" max="1" label="List matches above this sequence identity (for clustering) (range 0.0-1.0) [0.000]" />
                <param argument="--min-aln-len" name="min_aln_len" type="integer" optional="true" min="0" label="Minimum alignment length (range 0-INT_MAX) [0]" />
                <param argument="--seq-id-mode" name="seq_id_mode" type="integer" optional="true" min="0" max="2" label="0: alignment length 1: shorter, 2: longer sequence [0]" />
                <param argument="--cluster-mode" name="cluster_mode" type="integer" optional="true" min="0" max="3" label="Integer between 0 and 3 - see below for details" />
                <param argument="--kmer-per-seq" name="kmer_per_seq" type="integer" optional="true" min="1" label="k-mers per sequence [21]" />
                <param argument="--kmer-per-seq-scale" name="kmer_per_seq_scale" type="text" optional="true" label="Scale k-mer per sequence based on sequence length as kmer-per-seq val + scale x seqlen" help="e.g. [nucl:0.200,aa:0.000]" />
                <param argument="-v" name="verbosity" type="integer" optional="true" min="0" max="3" label="Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]" />
            </when>
            <when value="no">
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="fasta" label="Representatives" name="rep_seq" from_work_dir="result_rep_seq.fasta" />
        <data format="fasta" label="FASTA-like per cluster" name="all_seq" from_work_dir="result_all_seqs.fasta" />
        <data format="tabular" label="Adjecency list" name="cluster" from_work_dir="result_cluster.tsv" />
    </outputs>
    <tests>
        <test>
            <param name="contigs" ftype="fasta" value="DB.fasta" />
            <param name="cov_mode" value="1" />
            <param name="abovematch" value="0.85" />
            <output name="rep_seq" ftype="fasta" value="result_rep_seq.fasta" />
            <output name="all_seq" ftype="fasta" value="result_all_seqs.fasta" />
            <output name="cluster" ftype="tabular" value="result_cluster.tsv" />
        </test>
    </tests>
    <help><![CDATA[
**MetaEuk**

`MetaEuk`_ is a modular toolkit designed for large-scale gene discovery and annotation in eukaryotic metagenomic contigs. Metaeuk combines the fast and sensitive homology search capabilities of MMseqs2_ with a dynamic programming procedure to recover optimal exons sets. It reduces redundancies in multiple discoveries of the same gene and resolves conflicting gene predictions on the same strand. 

This tool implements the easy-predict command from metaeuk, which combines metaeuk modules into a pipeline for protein alignment prediction. Input is the contigs you want to search for protein hits and the proteins you want to search against those contigs. Output is FASTA format predicted ORFs, with exons annotated in the header according to the metaeuk header format_.

----

**easy-linclust**

This tool implements the `easy-linclust`_ command from `MMseqs2`_, which is a clustering in linear time. It is magnitudes faster but a bit less sensitive than clustering.

----

**Running metaeuk easy-linclust**

metaeuk easy-linclust <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]> <o:clusterPrefix> <tmpDir> [options]

* **--alignment-mode INT** How to compute the alignment:
        | 0: automatic
        | 1: only score and end_pos
        | 2: also start_pos and cov
        | 3: also seq.id
        | 4: only ungapped alignment [0]

* **-e FLOAT** List matches below this E-value (range 0.0-inf) [0.001]

* **--min-seq-id FLOAT** List matches above this sequence identity (for clustering) (range 0.0-1.0) [0.000]

* **--min-aln-len INT** Minimum alignment length (range 0-INT_MAX) [0]

* **--seq-id-mode INT** 0: alignment length 1: shorter, 2: longer sequence [0]

* **-c FLOAT** List matches above this fraction of aligned (covered) residues (see --cov-mode) [0.800]

* **--cov-mode INT**
        | 0: coverage of query and target
        | 1: coverage of target
        | 2: coverage of query
        | 3: target seq. length has to be at least x% of query length
        | 4: query seq. length has to be at least x% of target length
        | 5: short seq. needs to be at least x% of the other seq. length [0]

* **--cluster-mode INT**
        | 0: Set-Cover (greedy)
        | 1: Connected component (BLASTclust)
        | 2,3: Greedy clustering by sequence length (CDHIT) [0]
        
* **--kmer-per-seq INT** k-mers per sequence [21]

* **--kmer-per-seq-scale TWIN** Scale k-mer per sequence based on sequence length as kmer-per-seq val + scale x seqlen [nucl:0.200,aa:0.000]

* **--threads INT** Number of CPU-cores used (all by default) [20]

* **--compressed INT** Write compressed output [0]

* **-v INT** Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]


.. _MetaEuk: https://github.com/soedinglab/metaeuk

.. _MMseqs2: https://github.com/soedinglab/MMseqs2

.. _easy-linclust: https://github.com/soedinglab/mmseqs2/wiki#linclust

.. _format: https://github.com/soedinglab/metaeuk#the-metaeuk-header

    ]]></help>
    <citations>
        <citation type="doi">10.1186/s40168-020-00808-x</citation>
    </citations>
</tool>