Mercurial > repos > iuc > dram_annotate

<tool id="dram_annotate" name="DRAM annotate" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
    <description>metagenome-assembled-genomes (MAGs)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements">
    </expand>
    <expand macro="stdio"/>
    <command detect_errors="exit_code"><![CDATA[
#import os

## DRAM names the genbank output based on the input_fasta file name
#set input_fasta_file_name = $os.path.splitext($os.path.basename(str($input_fasta)))[0]
#set output_gbk_file_name = $input_fasta_file_name + '.gbk'

#if str($custom_hmm_dbs_cond.custom_dbs_hmm) == 'yes':
    #for count, additional_db in enumerate($custom_hmm_dbs_cond.custom_hmm_dbs):
        ln -s '$additional_db.custom_hmm_loc' ${count}.hmm &&
    #end for
#end if


DRAM.py annotate
--input_fasta '$input_fasta'
--output_dir 'output_dir'
--min_contig_size $min_contig_size
--prodigal_mode '$prodigal_mode'
--trans_table $trans_table
--bit_score_threshold $bit_score_threshold
--rbh_bit_score_threshold $rbh_bit_score_threshold
$kofam_use_dbcan2_thresholds
#if str($custom_fasta_dbs_cond.custom_dbs_fasta) == 'yes':
    #for additional_db in $custom_fasta_dbs_cond.custom_fasta_dbs:
        --custom_db_name '$additional_db.custom_db_name'
        --custom_fasta_loc '$additional_db.custom_fasta_loc'
    #end for
#end if
#if str($custom_hmm_dbs_cond.custom_dbs_hmm) == 'yes':
    #for count, additional_db in enumerate($custom_hmm_dbs_cond.custom_hmm_dbs):
        --custom_hmm_name '$additional_db.custom_hmm_name'
        --custom_hmm_loc ${count}.hmm
        #if $additional_db.custom_hmm_cutoffs_loc:
            --custom_hmm_cutoffs_loc '$additional_db.custom_hmm_cutoffs_loc'
        #end if
    #end for
#end if
$use_uniref
$use_vogdb
$low_mem_mode
$skip_trnascan
--threads \${GALAXY_SLOTS:-10}
&& (test -f 'output_dir/genes.faa' && mv 'output_dir/genes.faa' '$output_genes_faa' || echo 'No genes.faa output produced')
&& (test -f 'output_dir/genes.fna' && mv 'output_dir/genes.fna' '$output_genes_fna' || echo 'No genes.fna output produced')
&& (test -f 'output_dir/genes.gff' && mv 'output_dir/genes.gff' '$output_genes_gff' || echo 'No genes.gff output produced')
&& (test -f 'output_dir/scaffolds.fna' && mv 'output_dir/scaffolds.fna' '$output_scaffolds_fna' || echo 'No scaffolds.fna output produced')
&& (test -f 'output_dir/rrnas.tsv' && mv 'output_dir/rrnas.tsv' '$output_rrnas' || echo 'No rrnas.tsv output produced')
&& (test -f 'output_dir/trnas.tsv' && mv 'output_dir/trnas.tsv' '$output_trnas' || echo 'No trnas.tsv output produced')
&& (test -f 'output_dir/genbank/$output_gbk_file_name' && mv 'output_dir/genbank/$output_gbk_file_name' '$output_genbank' || echo 'No genbank output produced')
&& (test -f 'output_dir/annotations.tsv' && mv 'output_dir/annotations.tsv' '$output_annotations' || echo 'No annotations.tsv output produced')
    ]]></command>
    <inputs>
        <param argument="--input_fasta" type="data" format="fasta,fasta.gz" label="FASTA file"/>
        <param argument="--min_contig_size" type="integer" min="0" value="2500" label="Minimum contig size to be used for gene prediction"/>
        <param argument="--prodigal_mode" type="select" label="Select mode of prodigal to use for gene calling" help="Single mode requires genomes which are high quality with low contamination and long contigs (average length &gt;3 Kbp)">
            <option value="single" selected="true">single</option>
            <option value="meta">meta</option>
            <option value="train">train</option>
        </param>
        <param argument="--trans_table" type="integer" min="1" max="25" value="11" label="Translation table for prodigal to use for gene calling"/>
        <param argument="--bit_score_threshold" type="integer" min="1" value="60" label="Minimum bitScore of search to retain hits"/>
        <param argument="--rbh_bit_score_threshold" type="integer" min="1" value="350" label="Minimum bitScore of reverse best hits to retain hits"/>
        <param argument="--kofam_use_dbcan2_thresholds" type="boolean" truevalue="--kofam_use_dbcan2_thresholds" falsevalue="" checked="false" label="Use dbcan2 suggested HMM cutoffs for KOfam annotation instead of KOfam recommended cutoffs?"/>
        <conditional name="custom_fasta_dbs_cond">
            <param name="custom_dbs_fasta" type="select" label="Specify additional fasta files against which to annotate?">
                <option value="no" selected="true">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <repeat name="custom_fasta_dbs" title="FASTA file" min="1">
                    <param argument="--custom_db_name" type="text" value="CF1" label="Custom FASTA database name">
                        <validator type="empty_field"/>
                        <expand macro="sanitizer"/>
                    </param>
                    <param argument="--custom_fasta_loc" type="data" format="fasta,fasta.gz" label="Custom database FASTA file"/>
                </repeat>
            </when>
        </conditional>
        <conditional name="custom_hmm_dbs_cond">
            <param name="custom_dbs_hmm" type="select" label="Specify additional hmm files against which to annotate?">
                <option value="no" selected="true">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="no"/>
            <when value="yes">
                <repeat name="custom_hmm_dbs" title="HMM file" min="1">
                    <param argument="--custom_hmm_name" type="text" value="CH1" label="Custom HMM database name">
                        <validator type="empty_field"/>
                        <expand macro="sanitizer"/>
                    </param>
                    <param argument="--custom_hmm_loc" type="data" format="hmm2,hmm3" label="Custom database HMM file"/>
                    <param argument="--custom_hmm_cutoffs_loc" type="data" format="tabular,tsv" optional="true" label="Custom HMM cutoffs and descriptions file" help="Optional, leave blank to ignore"/>
                </repeat>
            </when>
        </conditional>
        <param argument="--use_uniref" type="boolean" truevalue="--use_uniref" falsevalue="" checked="false" label="Annotate the input FASTA files against UniRef?" help="Increases run time and memory requirements"/>
        <param argument="--use_vogdb" type="boolean" truevalue="--use_vogdb" falsevalue="" checked="false" label="Annotate the input FASTA files against VOGDB?" help="Decreases run time"/>
        <param argument="--low_mem_mode" type="boolean" truevalue="--low_mem_mode" falsevalue="" checked="false" label="Skip annotating with uniref and use kofam?" help="Decreases memory requirements"/>
        <param argument="--skip_trnascan" type="boolean" truevalue="--skip_trnascan" falsevalue="" checked="false" label="Skip transcan"/>
    </inputs>
    <outputs>
        <data name="output_genes_faa" format="fasta" label="${tool.name} on ${on_string}: genes.faa"/>
        <data name="output_genes_fna" format="fasta" label="${tool.name} on ${on_string}: genes.fna"/>
        <data name="output_genes_gff" format="gff3" label="${tool.name} on ${on_string}: genes.gff"/>
        <data name="output_scaffolds_fna" format="fasta" label="${tool.name} on ${on_string}: scaffolds"/>
        <data name="output_rrnas" format="tabular" label="${tool.name} on ${on_string}: rrnas"/>
        <data name="output_trnas" format="tabular" label="${tool.name} on ${on_string}: trnas"/>
        <data name="output_genbank" format="genbank" label="${tool.name} on ${on_string}: genbank"/>
        <data name="output_annotations" format="tabular" label="${tool.name} on ${on_string}: annotations"/>
    </outputs>
    <tests>
        <test expect_num_outputs="8">
            <param name="input_fasta" value="input_annotate1.fasta.gz" ftype="fasta.gz"/>
            <param name="prodigal_mode" value="meta"/>
            <output name="output_genes_faa">
                <assert_contents>
                    <has_text text="rank"/>
                </assert_contents>
            </output>
            <output name="output_genes_fna">
                <assert_contents>
                    <has_text text="rank"/>
                </assert_contents>
            </output>
            <output name="output_genes_gff">
                <assert_contents>
                    <has_text text="gff-version 3"/>
                </assert_contents>
            </output>
            <output name="output_scaffolds_fna">
                <assert_contents>
                    <has_text text="scaffold"/>
                </assert_contents>
            </output>
            <output name="output_rrnas">
                <assert_contents>
                    <has_text text="scaffold"/>
                </assert_contents>
            </output>
            <output name="output_trnas">
                <assert_contents>
                    <has_text text="fasta"/>
                </assert_contents>
            </output>
            <output name="output_genbank">
                <assert_contents>
                    <has_text text="LOCUS"/>
                </assert_contents>
            </output>
            <output name="output_annotations">
                <assert_contents>
                    <has_text text="fasta"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_fasta" value="input_annotate1.fasta.gz" ftype="fasta.gz"/>
            <param name="custom_dbs_fasta" value="yes"/>
            <repeat name="custom_fasta_dbs">
                <param name="custom_db_name" value="CF1"/>
                <param name="custom_fasta_loc" value="annotate_custom.fasta" ftype="fasta"/>
            </repeat>
            <param name="custom_dbs_hmm" value="yes"/>
            <repeat name="custom_hmm_dbs">
                <param name="custom_hmm_name" value="CH1"/>
                <param name="custom_hmm_loc" value="annotate_custom.hmm" ftype="hmm3"/>
            </repeat>
            <param name="prodigal_mode" value="meta"/>
            <output name="output_genbank">
                <assert_contents>
                    <has_text text="LOCUS"/>
                    <has_text text="gc_cont"/>
                </assert_contents>
            </output>
            <output name="output_annotations">
                <assert_contents>
                    <has_text text="fasta"/>
                    <has_text text="gene_position"/>
                    <has_n_columns n="9"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
**What it does**

@WHATITDOESHEADER@

This tool annotates genes by assigning database identifiers to them.  Short contigs (default less than 2,500 bp) are initially
removed  and then Prodigal is used to detect open reading frames (ORFs) and to predict their amino acid sequences.  Next, DRAM
searches all amino acid sequences against multiple databases, providing a single Raw output.  When gene annotation is complete,
all results are merged in a single tabular annotation table, including the best hit for each database, for user comparison.

This tool accepts assembly-derived fastA files which may come from unbinned data (metagenome contig or scaffold files) or
genome-resolved data from one or many organisms (isolate genomes, single-amplified genome (SAGs), MAGs).

This tool produces the following outputs.

 * A tabular file with all annotations from Pfam, UniProt, dbCAN and MEROPS databases for all genes in the input genomes
 * Genbank files for each genome
 * A gff file of all annotations across genomes
 * A fasta file of each open reading frame amino acid sequence and best ranked annotation
 * Tabular files with tRNAs and rRNAs

@WHATITDOESFOOTER@
    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Fri, 23 Aug 2024 08:17:55 +0000
parents	0515750e7b10
children