view funannotate_annotate.xml @ 11:9c15ca7e764e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/funannotate commit 9e3708d04faea0f1be4ea8918e859d6f2c7eb31d
author iuc
date Wed, 26 Jun 2024 09:41:52 +0000
parents d0336d67702b
children
line wrap: on
line source

<tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy5">
    <description>annotation</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools" />
    <requirements>
        <expand macro="requirements" />
    </requirements>
    <version_command>funannotate check --show-versions</version_command>
    <command><![CDATA[
## Needed by some subprocess invoked internally
## https://github.com/nextgenusfs/funannotate/issues/905
export FUNANNOTATE_DB='$database.fields.path'

&&

funannotate annotate

#if $input.input_type == 'gbk'
    --genbank '${input.genbank}'
#else
    --gff '${input.gff}'
    --fasta '${input.fasta}'
    --species '${input.species}'
#end if

--out output
--tmpdir "\${_GALAXY_JOB_TMP_DIR:-/tmp}"

--database '$database.fields.path'

#if $sbt:
    --sbt '${sbt}'
#end if

#if $annotations:
    --annotations '${annotations}'
#end if

#if $eggnog:
    --eggnog '${eggnog}'
#end if

#if $antismash:
    --antismash '${antismash}'
#end if

#if $iprscan:
    --iprscan '${iprscan}'
#end if

#if $phobius:
    --phobius '${phobius}'
#end if

--busco_db '${busco_db}'

--isolate '${isolate}'
--strain '${strain}'

#if $rename:
    --rename '${rename}'
#end if
#if $fix:
    --fix '${fix}'
#end if
#if $remove:
    --remove '${remove}'
#end if
--header_length $header_length
--cpus \${GALAXY_SLOTS:-2}

&&

## Funannotate sometimes leaves multiple *part.tbl and *part.sqn files:
## https://github.com/nextgenusfs/funannotate/issues/777
## The partial tbl files are combined by funannotate and are deleted below.
## The sqn files are discrete and are collected with discover_datasets.
find output/annotate_results
-regex ".*part_[0-9]+\.\(tbl\)$"
-delete

    ]]></command>
    <inputs>

        <conditional name="input">
            <param name="input_type" type="select" label="Input format">
                <option value="gbk" selected="True">GenBank (from 'Funannotate predict annotation' tool)</option>
                <option value="gff">GFF</option>
            </param>
            <when value="gbk">
                <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from 'Funannotate predict annotation' tool" />
            </when>
            <when value="gff">
                <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" />
                <param argument="--fasta" type="data" format="fasta" label="Genome sequence" />
                <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species">
                    <validator type="empty_field" />
                </param>
            </when>
        </conditional>

        <param name="database" label="Funannotate database" type="select">
            <options from_data_table="funannotate">
                <column name="value" index="0" />
                <column name="name" index="1" />
                <column name="path" index="3" />
                <filter type="sort_by" column="0" />
                <filter type="static_value" column="2" value="1.0" />
            </options>
        </param>

        <param argument="--sbt" type="data" format="txt" optional="true" label="NCBI submission template file" help="Create it on https://submit.ncbi.nlm.nih.gov/genbank/template/submission/ (or leave empty to use a default one, not suitable for submission at NCBI)" />

        <param argument="--eggnog" type="data" format="tabular" optional="true" label="Eggnog-mapper annotations file" help="'annotations' output from 'eggNOG Mapper' tool" />
        <param argument="--antismash" type="data" format="genbank" optional="true" label="antiSMASH secondary metabolism results" help="Genbank output from 'Antismash' tool" />
        <param argument="--iprscan" type="data" format="xml" optional="true" label="InterProScan5 XML file" help="XML output from InterProScan" />
        <param argument="--phobius" type="data" format="tabular" optional="true" label="Phobius pre-computed results" />

        <param argument="--busco_db" type="select" label="BUSCO models">
            <expand macro="busco_species"/>
        </param>

        <param argument="--annotations" type="data" format="tabular" optional="true" label="Custom annotations" help="3 column tsv file" />

        <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" />
        <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" />

        <param argument="--rename" type="text" label="locus_tag from NCBI to rename GFF gene models with" />
        <param argument="--fix" type="data" format="tabular" optional="true" label="Gene/Product names fixed" help="TSV: GeneID	Name	Product" />
        <param argument="--remove" type="data" format="tabular" optional="true" label="Gene/Product names to remove" help="TSV: Gene	Product" />

        <param argument="--header_length" type="integer" value="16" min="1" label="Maximum length of FASTA headers" help="The NCBI max FASTA header length is 16. Increase if you don't submit to NCBI." />
        <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
            <option value="gbk" selected="true">Annotated genome (genbank)</option>
            <option value="annotations">TSV file of all annotations added to genome. (i.e. import into excel)</option>
            <option value="contigs_fsa">Multi-fasta file of contigs, split at gaps (use for NCBI submission)</option>
            <option value="agp">AGP file; showing linkage/location of contigs (use for NCBI submission)</option>
            <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option>
            <option value="sqn">NCBI Sequin genome file (use for NCBI submission)</option>
            <option value="scaffolds_fa">Multi-fasta file of scaffolds</option>
            <option value="proteins_fa">Multi-fasta file of protein coding genes</option>
            <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option>
            <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option>
            <option value="gff3">Annotation in GFF3 format</option>
            <option value="discrepency">tbl2asn summary report of annotated genome</option>
            <option value="stats">Statistics</option>
            <option value="must_fix">TSV file of Gene Name/Product deflines that failed to pass tbl2asn checks and must be fixed</option>
            <option value="need_curating">TSV file of Gene Name/Product defines that need to be curated</option>
            <option value="new_names_passed">TSV file of Gene Name/Product deflines that passed tbl2asn but are not in Gene2Products database.</option>
        </param>
    </inputs>
    <outputs>
        <data name='gbk' format='genbank' label="${tool.name} on ${on_string}: annotated genome (genbank)" from_work_dir="output/annotate_results/*.gbk">
            <filter>outputs and 'gbk' in outputs</filter>
        </data>
        <data name='annot' format='tabular' label="${tool.name} on ${on_string}: all annotations" from_work_dir="output/annotate_results/*.annotations.txt">
            <filter>outputs and 'annotations' in outputs</filter>
        </data>
        <data name='contigs_fsa' format='fasta' label="${tool.name} on ${on_string}: contigs fasta, split at gaps" from_work_dir="output/annotate_results/*.contigs.fsa">
            <filter>outputs and 'contigs_fsa' in outputs</filter>
        </data>
        <data name='agp' format='tabular' label="${tool.name} on ${on_string}: AGP file" from_work_dir="output/annotate_results/*.agp">
            <filter>outputs and 'agp' in outputs</filter>
        </data>
        <data name='tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="output/annotate_results/*.tbl">
            <filter>outputs and 'tbl' in outputs</filter>
        </data>
        <collection name="sqn" type="list" label="${tool.name} on ${on_string}: NCBI Sequin genome files">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.sqn" directory="output/annotate_results" format="txt" recurse="false"/>
            <filter>outputs and 'sqn' in outputs</filter>
        </collection>
        <data name='fa_scaffolds' format='fasta' label="${tool.name} on ${on_string}: scaffolds sequences" from_work_dir="output/annotate_results/*.scaffolds.fa">
            <filter>outputs and 'scaffolds_fa' in outputs</filter>
        </data>
        <data name='fa_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="output/annotate_results/*.proteins.fa">
            <filter>outputs and 'proteins_fa' in outputs</filter>
        </data>
        <data name='fa_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="output/annotate_results/*.mrna-transcripts.fa">
            <filter>outputs and 'mrna_transcripts_fa' in outputs</filter>
        </data>
        <data name='fa_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="output/annotate_results/*.cds-transcripts.fa">
            <filter>outputs and 'cds_transcripts_fa' in outputs</filter>
        </data>
        <data name='gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="output/annotate_results/*.gff3">
            <filter>outputs and 'gff3' in outputs</filter>
        </data>
        <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="output/annotate_results/*.discrepency.report.txt">
            <filter>outputs and 'discrepency' in outputs</filter>
        </data>
        <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="output/annotate_results/*.stats.json">
            <filter>outputs and 'gbk' in outputs</filter>
        </data>
        <data name='must_fix' format='json' label="${tool.name} on ${on_string}: Gene Name/Product must-fix" from_work_dir="output/annotate_results/Gene2Products.must-fix.txt">
            <filter>outputs and 'must_fix' in outputs</filter>
        </data>
        <data name='need_curating' format='json' label="${tool.name} on ${on_string}: Gene Name/Product need-curating" from_work_dir="output/annotate_results/Gene2Products.need-curating.txt">
            <filter>outputs and 'need_curating' in outputs</filter>
        </data>
        <data name='new_names_passed' format='json' label="${tool.name} on ${on_string}: Gene Name/Product new-names-passed" from_work_dir="output/annotate_results/Gene2Products.new-names-passed.txt">
            <filter>outputs and 'new_names_passed' in outputs</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="16">
            <conditional name="input">
                <param name="input_type" value="gbk" />
                <param name="genbank" value="predict_augustus/Genus_species.gbk" />
            </conditional>
            <param name="database" value="2021-07-20-120000" />
            <param name="busco_db" value="insecta" />
            <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
            <output name="gbk">
                <assert_contents>
                    <has_text text="DEFINITION  Genus species." />
                </assert_contents>
            </output>
            <output name="annot">
                <assert_contents>
                    <has_text text="EC_number" />
                    <has_text text="EOG090W0T3K" />
                </assert_contents>
            </output>
            <output name="contigs_fsa">
                <assert_contents>
                    <has_text text=">contig_1" />
                </assert_contents>
            </output>
            <output name="agp">
                <assert_contents>
                    <has_text text="contig_1" />
                </assert_contents>
            </output>
            <output name="tbl">
                <assert_contents>
                    <has_text text="locus_tag" />
                </assert_contents>
            </output>
            <output_collection name="sqn" type="list">
                <element name="Genus_species">
                    <assert_contents>
                        <has_text text="Seq-submit" />
                    </assert_contents>
                </element>
            </output_collection>
            <output name="fa_scaffolds">
                <assert_contents>
                    <has_text text=">sample" />
                </assert_contents>
            </output>
            <output name="fa_proteins">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_mrna">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_cds">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="gff3">
                <assert_contents>
                    <has_text text="ID=FUN_000001;" />
                </assert_contents>
            </output>
            <output name="tbl2asn_report">
                <assert_contents>
                    <has_text text="Discrepancy Report Results" />
                </assert_contents>
            </output>
            <output name="stats">
                <assert_contents>
                    <has_text text="avg_gene_length" />
                </assert_contents>
            </output>
            <output name="must_fix">
                <assert_contents>
                    <has_text text="tbl2asn Error" />
                </assert_contents>
            </output>
            <output name="need_curating">
                <assert_contents>
                    <has_text text="Original Description" />
                </assert_contents>
            </output>
            <output name="new_names_passed">
                <assert_contents>
                    <has_text text="Passed Description" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="16">
            <conditional name="input">
                <param name="input_type" value="gff" />
                <param name="gff" value="predict_augustus/Genus_species.gff3" />
                <param name="fasta" value="genome.fa" />
                <param name="species" value="Genus species" />
            </conditional>
            <param name="database" value="2021-07-20-120000" />
            <param name="busco_db" value="insecta" />
            <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
            <output name="gbk">
                <assert_contents>
                    <has_text text="DEFINITION  Genus species." />
                </assert_contents>
            </output>
            <output name="annot">
                <assert_contents>
                    <has_text text="EC_number" />
                    <has_text text="EOG090W0T3K" />
                </assert_contents>
            </output>
            <output name="contigs_fsa">
                <assert_contents>
                    <has_text text=">contig_1" />
                </assert_contents>
            </output>
            <output name="agp">
                <assert_contents>
                    <has_text text="contig_1" />
                </assert_contents>
            </output>
            <output name="tbl">
                <assert_contents>
                    <has_text text="locus_tag" />
                </assert_contents>
            </output>
            <output_collection name="sqn" type="list">
                <element name="Genus_species">
                    <assert_contents>
                        <has_text text="Seq-submit" />
                    </assert_contents>
                </element>
            </output_collection>
            <output name="fa_scaffolds">
                <assert_contents>
                    <has_text text=">sample" />
                </assert_contents>
            </output>
            <output name="fa_proteins">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_mrna">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_cds">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="gff3">
                <assert_contents>
                    <has_text text="ID=FUN_000001;" />
                </assert_contents>
            </output>
            <output name="tbl2asn_report">
                <assert_contents>
                    <has_text text="Discrepancy Report Results" />
                </assert_contents>
            </output>
            <output name="stats">
                <assert_contents>
                    <has_text text="avg_gene_length" />
                </assert_contents>
            </output>
            <output name="must_fix">
                <assert_contents>
                    <has_text text="tbl2asn Error" />
                </assert_contents>
            </output>
            <output name="need_curating">
                <assert_contents>
                    <has_text text="Original Description" />
                </assert_contents>
            </output>
            <output name="new_names_passed">
                <assert_contents>
                    <has_text text="Passed Description" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Funannotate_ annotate
---------------------

Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).

This script functionally annotates the results from funannotate predict. It pulls
annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology.

.. _Funannotate: http://funannotate.readthedocs.io
    ]]></help>
    <expand macro="citations" />
</tool>