view funannotate_annotate.xml @ 7:b6b5cd37f2a2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/funannotate commit 90acc0e23230b2e29e1d38a90bca4009c84c7e67
author iuc
date Fri, 20 Oct 2023 10:07:28 +0000
parents 582fd74dc2a1
children 756376aeb1ae
line wrap: on
line source

<tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>annotation</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <expand macro="requirements" />
    </requirements>
    <version_command>funannotate check --show-versions</version_command>
    <command><![CDATA[
## Needed by some subprocess invoked internally
## https://github.com/nextgenusfs/funannotate/issues/905
export FUNANNOTATE_DB='$database.fields.path'

&&

funannotate annotate

#if $input.input_type == 'gbk'
    --genbank '${input.genbank}'
#else
    --gff '${input.gff}'
    --fasta '${input.fasta}'
    --species '${input.species}'
#end if

--out output

--database '$database.fields.path'

#if $sbt:
    --sbt '${sbt}'
#end if

#if $annotations:
    --annotations '${annotations}'
#end if

#if $eggnog:
    --eggnog '${eggnog}'
#end if

#if $antismash:
    --antismash '${antismash}'
#end if

#if $iprscan:
    --iprscan '${iprscan}'
#end if

#if $phobius:
    --phobius '${phobius}'
#end if

--busco_db '${busco_db}'

--isolate '${isolate}'
--strain '${strain}'

#if $rename:
    --rename '${rename}'
#end if
#if $fix:
    --fix '${fix}'
#end if
#if $remove:
    --remove '${remove}'
#end if

--cpus \${GALAXY_SLOTS:-2}

&&

## funannotate sometimes leaves multiple *part.tbl and *part.sqn files
## https://github.com/nextgenusfs/funannotate/issues/777
find output/annotate_results 
-regex ".*part_[0-9]\+\.\(sqn\|tbl\)$"
-delete

&&

mv output/annotate_results/*.gbk out.gbk &&
mv output/annotate_results/*.annotations.txt out.annotations.txt &&
mv output/annotate_results/*.contigs.fsa out.contigs.fsa &&
mv output/annotate_results/*.agp out.agp &&
mv output/annotate_results/*.tbl out.tbl &&
mv output/annotate_results/*.sqn out.sqn &&
mv output/annotate_results/*.scaffolds.fa out.scaffolds.fa &&
mv output/annotate_results/*.proteins.fa out.proteins.fa &&
mv output/annotate_results/*.mrna-transcripts.fa out.mrna-transcripts.fa &&
mv output/annotate_results/*.cds-transcripts.fa out.cds-transcripts.fa &&
mv output/annotate_results/*.gff3 out.gff3 &&
mv output/annotate_results/*.discrepency.report.txt out.discrepency.report.txt &&
mv output/annotate_results/*.stats.json out.stats.json
    ]]></command>
    <inputs>

        <conditional name="input">
            <param name="input_type" type="select" label="Input format">
                <option value="gbk" selected="True">GenBank (from 'Funannotate predict annotation' tool)</option>
                <option value="gff">GFF</option>
            </param>
            <when value="gbk">
                <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from 'Funannotate predict annotation' tool" />
            </when>
            <when value="gff">
                <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" />
                <param argument="--fasta" type="data" format="fasta" label="Genome sequence" />
                <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species">
                    <validator type="empty_field" />
                </param>
            </when>
        </conditional>



        <param name="database" label="Funannotate database" type="select">
            <options from_data_table="funannotate">
                <column name="value" index="0" />
                <column name="name" index="1" />
                <column name="path" index="3" />
                <filter type="sort_by" column="0" />
                <filter type="static_value" column="2" value="1.0" />
            </options>
        </param>

        <param argument="--sbt" type="data" format="txt" optional="true" label="NCBI submission template file" help="Create it on https://submit.ncbi.nlm.nih.gov/genbank/template/submission/ (or leave empty to use a default one, not suitable for submission at NCBI)" />

        <param argument="--eggnog" type="data" format="tabular" optional="true" label="Eggnog-mapper annotations file" help="'annotations' output from 'eggNOG Mapper' tool" />
        <param argument="--antismash" type="data" format="genbank" optional="true" label="antiSMASH secondary metabolism results" help="Genbank output from 'Antismash' tool" />
        <param argument="--iprscan" type="data" format="xml" optional="true" label="InterProScan5 XML file" help="XML output from InterProScan" />
        <param argument="--phobius" type="data" format="tabular" optional="true" label="Phobius pre-computed results" />

        <param argument="--busco_db" type="select" label="BUSCO models">
            <expand macro="busco_species"/>
        </param>

        <param argument="--annotations" type="data" format="tabular" optional="true" label="Custom annotations" help="3 column tsv file" />

        <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" />
        <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" />

        <param argument="--rename" type="text" label="locus_tag from NCBI to rename GFF gene models with" />
        <param argument="--fix" type="data" format="tabular" optional="true" label="Gene/Product names fixed" help="TSV: GeneID	Name	Product" />
        <param argument="--remove" type="data" format="tabular" optional="true" label="Gene/Product names to remove" help="TSV: Gene	Product" />

        <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
            <option value="gbk" selected="true">Annotated genome (genbank)</option>
            <option value="annotations">TSV file of all annotations added to genome. (i.e. import into excel)</option>
            <option value="contigs_fsa">Multi-fasta file of contigs, split at gaps (use for NCBI submission)</option>
            <option value="agp">AGP file; showing linkage/location of contigs (use for NCBI submission)</option>
            <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option>
            <option value="sqn">NCBI Sequin genome file (use for NCBI submission)</option>
            <option value="scaffolds_fa">Multi-fasta file of scaffolds</option>
            <option value="proteins_fa">Multi-fasta file of protein coding genes</option>
            <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option>
            <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option>
            <option value="gff3">Annotation in GFF3 format</option>
            <option value="discrepency">tbl2asn summary report of annotated genome</option>
            <option value="stats">Statistics</option>
            <option value="must_fix">TSV file of Gene Name/Product deflines that failed to pass tbl2asn checks and must be fixed</option>
            <option value="need_curating">TSV file of Gene Name/Product defines that need to be curated</option>
            <option value="new_names_passed">TSV file of Gene Name/Product deflines that passed tbl2asn but are not in Gene2Products database.</option>
        </param>
    </inputs>
    <outputs>
        <data name='gbk' format='genbank' label="${tool.name} on ${on_string}: annotated genome (genbank)" from_work_dir="out.gbk">
            <filter>outputs and 'gbk' in outputs</filter>
        </data>
        <data name='annot' format='tabular' label="${tool.name} on ${on_string}: all annotations" from_work_dir="out.annotations.txt">
            <filter>outputs and 'annotations' in outputs</filter>
        </data>
        <data name='contigs_fsa' format='fasta' label="${tool.name} on ${on_string}: contigs fasta, split at gaps" from_work_dir="out.contigs.fsa">
            <filter>outputs and 'contigs_fsa' in outputs</filter>
        </data>
        <data name='agp' format='tabular' label="${tool.name} on ${on_string}: AGP file" from_work_dir="out.agp">
            <filter>outputs and 'agp' in outputs</filter>
        </data>
        <data name='tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl">
            <filter>outputs and 'tbl' in outputs</filter>
        </data>
        <data name='sqn' format='txt' label="${tool.name} on ${on_string}: NCBI Sequin genome" from_work_dir="out.sqn">
            <filter>outputs and 'sqn' in outputs</filter>
        </data>
        <data name='fa_scaffolds' format='fasta' label="${tool.name} on ${on_string}: scaffolds sequences" from_work_dir="out.scaffolds.fa">
            <filter>outputs and 'scaffolds_fa' in outputs</filter>
        </data>
        <data name='fa_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa">
            <filter>outputs and 'proteins_fa' in outputs</filter>
        </data>
        <data name='fa_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa">
            <filter>outputs and 'mrna_transcripts_fa' in outputs</filter>
        </data>
        <data name='fa_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa">
            <filter>outputs and 'cds_transcripts_fa' in outputs</filter>
        </data>
        <data name='gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3">
            <filter>outputs and 'gff3' in outputs</filter>
        </data>
        <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt">
            <filter>outputs and 'discrepency' in outputs</filter>
        </data>
        <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json">
            <filter>outputs and 'gbk' in outputs</filter>
        </data>
        <data name='must_fix' format='json' label="${tool.name} on ${on_string}: Gene Name/Product must-fix" from_work_dir="output/annotate_results/Gene2Products.must-fix.txt">
            <filter>outputs and 'must_fix' in outputs</filter>
        </data>
        <data name='need_curating' format='json' label="${tool.name} on ${on_string}: Gene Name/Product need-curating" from_work_dir="output/annotate_results/Gene2Products.need-curating.txt">
            <filter>outputs and 'need_curating' in outputs</filter>
        </data>
        <data name='new_names_passed' format='json' label="${tool.name} on ${on_string}: Gene Name/Product new-names-passed" from_work_dir="output/annotate_results/Gene2Products.new-names-passed.txt">
            <filter>outputs and 'new_names_passed' in outputs</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="16">
            <conditional name="input">
                <param name="input_type" value="gbk" />
                <param name="genbank" value="predict_augustus/Genus_species.gbk" />
            </conditional>
            <param name="database" value="2021-07-20-120000" />
            <param name="busco_db" value="insecta" />
            <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
            <output name="gbk">
                <assert_contents>
                    <has_text text="DEFINITION  Genus species." />
                </assert_contents>
            </output>
            <output name="annot">
                <assert_contents>
                    <has_text text="EC_number" />
                    <has_text text="EOG090W0T3K" />
                </assert_contents>
            </output>
            <output name="contigs_fsa">
                <assert_contents>
                    <has_text text=">contig_1" />
                </assert_contents>
            </output>
            <output name="agp">
                <assert_contents>
                    <has_text text="contig_1" />
                </assert_contents>
            </output>
            <output name="tbl">
                <assert_contents>
                    <has_text text="locus_tag" />
                </assert_contents>
            </output>
            <output name="sqn">
                <assert_contents>
                    <has_text text="Seq-submit" />
                </assert_contents>
            </output>
            <output name="fa_scaffolds">
                <assert_contents>
                    <has_text text=">sample" />
                </assert_contents>
            </output>
            <output name="fa_proteins">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_mrna">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_cds">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="gff3">
                <assert_contents>
                    <has_text text="ID=FUN_000001;" />
                </assert_contents>
            </output>
            <output name="tbl2asn_report">
                <assert_contents>
                    <has_text text="Discrepancy Report Results" />
                </assert_contents>
            </output>
            <output name="stats">
                <assert_contents>
                    <has_text text="avg_gene_length" />
                </assert_contents>
            </output>
            <output name="must_fix">
                <assert_contents>
                    <has_text text="tbl2asn Error" />
                </assert_contents>
            </output>
            <output name="need_curating">
                <assert_contents>
                    <has_text text="Original Description" />
                </assert_contents>
            </output>
            <output name="new_names_passed">
                <assert_contents>
                    <has_text text="Passed Description" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="16">
            <conditional name="input">
                <param name="input_type" value="gff" />
                <param name="gff" value="predict_augustus/Genus_species.gff3" />
                <param name="fasta" value="genome.fa" />
                <param name="species" value="Genus species" />
            </conditional>
            <param name="database" value="2021-07-20-120000" />
            <param name="busco_db" value="insecta" />
            <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
            <output name="gbk">
                <assert_contents>
                    <has_text text="DEFINITION  Genus species." />
                </assert_contents>
            </output>
            <output name="annot">
                <assert_contents>
                    <has_text text="EC_number" />
                    <has_text text="EOG090W0T3K" />
                </assert_contents>
            </output>
            <output name="contigs_fsa">
                <assert_contents>
                    <has_text text=">contig_1" />
                </assert_contents>
            </output>
            <output name="agp">
                <assert_contents>
                    <has_text text="contig_1" />
                </assert_contents>
            </output>
            <output name="tbl">
                <assert_contents>
                    <has_text text="locus_tag" />
                </assert_contents>
            </output>
            <output name="sqn">
                <assert_contents>
                    <has_text text="Seq-submit" />
                </assert_contents>
            </output>
            <output name="fa_scaffolds">
                <assert_contents>
                    <has_text text=">sample" />
                </assert_contents>
            </output>
            <output name="fa_proteins">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_mrna">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="fa_transcripts_cds">
                <assert_contents>
                    <has_text text=">FUN_000001-T1 FUN_000001" />
                </assert_contents>
            </output>
            <output name="gff3">
                <assert_contents>
                    <has_text text="ID=FUN_000001;" />
                </assert_contents>
            </output>
            <output name="tbl2asn_report">
                <assert_contents>
                    <has_text text="Discrepancy Report Results" />
                </assert_contents>
            </output>
            <output name="stats">
                <assert_contents>
                    <has_text text="avg_gene_length" />
                </assert_contents>
            </output>
            <output name="must_fix">
                <assert_contents>
                    <has_text text="tbl2asn Error" />
                </assert_contents>
            </output>
            <output name="need_curating">
                <assert_contents>
                    <has_text text="Original Description" />
                </assert_contents>
            </output>
            <output name="new_names_passed">
                <assert_contents>
                    <has_text text="Passed Description" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Funannotate_ annotate
---------------------

Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).

This script functionally annotates the results from funannotate predict. It pulls
annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology.

.. _Funannotate: http://funannotate.readthedocs.io
    ]]></help>
    <expand macro="citations" />
</tool>