diff funannotate_annotate.xml @ 0:a5baa4ff168d draft

"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
author iuc
date Mon, 04 Oct 2021 19:39:38 +0000
parents
children aa19eaac7d4b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/funannotate_annotate.xml	Mon Oct 04 19:39:38 2021 +0000
@@ -0,0 +1,413 @@
+<tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>annotation</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="requirements" />
+    </requirements>
+    <version_command>funannotate check --show-versions</version_command>
+    <command><![CDATA[
+
+#if $uglyTestingHack == "true":
+    ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager)
+    ## Need to copy too as the test_data is read only on CI
+    cp -r '${database.fields.path}' './hacked_database' &&
+    sed -i.bak 's|/tmp/prout|'`pwd`'/hacked_database|' './hacked_database/trained_species/fly/info.json' &&
+#end if
+
+funannotate annotate
+
+#if $input.input_type == 'gbk'
+    --genbank '${input.genbank}'
+#else
+    --gff '${input.gff}'
+    --fasta '${input.fasta}'
+    --species '${input.species}'
+#end if
+
+--out output
+
+#if $uglyTestingHack == "true":
+    --database `pwd`'/hacked_database'
+#else
+    --database '$database.fields.path'
+#end if
+
+#if $sbt:
+    --sbt '${sbt}'
+#end if
+
+#if $annotations:
+    --annotations '${annotations}'
+#end if
+
+#if $eggnog:
+    --eggnog '${eggnog}'
+#end if
+
+#if $antismash:
+    --antismash '${antismash}'
+#end if
+
+#if $iprscan:
+    --iprscan '${iprscan}'
+#end if
+
+#if $phobius:
+    --phobius '${phobius}'
+#end if
+
+--busco_db '${busco_db}'
+
+--isolate '${isolate}'
+--strain '${strain}'
+
+#if $rename:
+    --rename '${rename}'
+#end if
+#if $fix:
+    --fix '${fix}'
+#end if
+#if $remove:
+    --remove '${remove}'
+#end if
+
+--cpus \${GALAXY_SLOTS:-2}
+
+&&
+
+mv output/annotate_results/*.gbk out.gbk &&
+mv output/annotate_results/*.annotations.txt out.annotations.txt &&
+mv output/annotate_results/*.contigs.fsa out.contigs.fsa &&
+mv output/annotate_results/*.agp out.agp &&
+mv output/annotate_results/*.tbl out.tbl &&
+mv output/annotate_results/*.sqn out.sqn &&
+mv output/annotate_results/*.scaffolds.fa out.scaffolds.fa &&
+mv output/annotate_results/*.proteins.fa out.proteins.fa &&
+mv output/annotate_results/*.mrna-transcripts.fa out.mrna-transcripts.fa &&
+mv output/annotate_results/*.cds-transcripts.fa out.cds-transcripts.fa &&
+mv output/annotate_results/*.gff3 out.gff3 &&
+mv output/annotate_results/*.discrepency.report.txt out.discrepency.report.txt &&
+mv output/annotate_results/*.stats.json out.stats.json
+    ]]></command>
+    <inputs>
+
+        <conditional name="input">
+            <param name="input_type" type="select" label="Input format">
+                <option value="gbk" selected="True">GenBank (from 'Funannotate predict annotation' tool)</option>
+                <option value="gff">GFF</option>
+            </param>
+            <when value="gbk">
+                <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from 'Funannotate predict annotation' tool" />
+            </when>
+            <when value="gff">
+                <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" />
+                <param argument="--fasta" type="data" format="fasta" label="Genome sequence" />
+                <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species">
+                    <validator type="empty_field" />
+                </param>
+            </when>
+        </conditional>
+
+
+
+        <param name="database" label="Funannotate database" type="select">
+            <options from_data_table="funannotate">
+                <column name="value" index="0" />
+                <column name="name" index="1" />
+                <column name="path" index="3" />
+                <filter type="sort_by" column="0" />
+                <filter type="static_value" column="2" value="1.0" />
+            </options>
+        </param>
+
+        <param argument="--sbt" type="data" format="sbt" optional="true" label="NCBI submission template file" help="Create it on https://submit.ncbi.nlm.nih.gov/genbank/template/submission/ (or leave empty to use a default one, not suitable for submission at NCBI)" />
+
+        <param argument="--eggnog" type="data" format="tabular" optional="true" label="Eggnog-mapper annotations file" help="'annotations' output from 'eggNOG Mapper' tool" />
+        <param argument="--antismash" type="data" format="genbank" optional="true" label="antiSMASH secondary metabolism results" help="Genbank output from 'Antismash' tool" />
+        <param argument="--iprscan" type="data" format="xml" optional="true" label="InterProScan5 XML file" help="XML output from InterProScan" />
+        <param argument="--phobius" type="data" format="tabular" optional="true" label="Phobius pre-computed results" />
+
+        <param argument="--busco_db" type="select" label="BUSCO models">
+            <expand macro="busco_species"/>
+        </param>
+
+        <param argument="--annotations" type="data" format="tabular" optional="true" label="Custom annotations" help="3 column tsv file" />
+
+        <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" />
+        <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" />
+
+        <param argument="--rename" type="text" label="locus_tag from NCBI to rename GFF gene models with" />
+        <param argument="--fix" type="data" format="tabular" optional="true" label="Gene/Product names fixed" help="TSV: GeneID	Name	Product" />
+        <param argument="--remove" type="data" format="tabular" optional="true" label="Gene/Product names to remove" help="TSV: Gene	Product" />
+
+        <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
+            <option value="gbk" selected="true">Annotated genome (genbank)</option>
+            <option value="annotations">TSV file of all annotations added to genome. (i.e. import into excel)</option>
+            <option value="contigs_fsa">Multi-fasta file of contigs, split at gaps (use for NCBI submission)</option>
+            <option value="agp">AGP file; showing linkage/location of contigs (use for NCBI submission)</option>
+            <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option>
+            <option value="sqn">NCBI Sequin genome file (use for NCBI submission)</option>
+            <option value="scaffolds_fa">Multi-fasta file of scaffolds</option>
+            <option value="proteins_fa">Multi-fasta file of protein coding genes</option>
+            <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option>
+            <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option>
+            <option value="gff3">Annotation in GFF3 format</option>
+            <option value="discrepency">tbl2asn summary report of annotated genome</option>
+            <option value="stats">Statistics</option>
+            <option value="must_fix">TSV file of Gene Name/Product deflines that failed to pass tbl2asn checks and must be fixed</option>
+            <option value="need_curating">TSV file of Gene Name/Product defines that need to be curated</option>
+            <option value="new_names_passed">TSV file of Gene Name/Product deflines that passed tbl2asn but are not in Gene2Products database.</option>
+        </param>
+
+        <!-- Need this to change path in the test funannotate_db -->
+        <param type="hidden" name="uglyTestingHack" value="" />
+    </inputs>
+    <outputs>
+        <data name='gbk' format='genbank' label="${tool.name} on ${on_string}: annotated genome (genbank)" from_work_dir="out.gbk">
+            <filter>outputs and 'gbk' in outputs</filter>
+        </data>
+        <data name='annot' format='tabular' label="${tool.name} on ${on_string}: all annotations" from_work_dir="out.annotations.txt">
+            <filter>outputs and 'annotations' in outputs</filter>
+        </data>
+        <data name='contigs_fsa' format='fasta' label="${tool.name} on ${on_string}: contigs fasta, split at gaps" from_work_dir="out.contigs.fsa">
+            <filter>outputs and 'contigs_fsa' in outputs</filter>
+        </data>
+        <data name='agp' format='tabular' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.agp">
+            <filter>outputs and 'agp' in outputs</filter>
+        </data>
+        <data name='tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl">
+            <filter>outputs and 'tbl' in outputs</filter>
+        </data>
+        <data name='sqn' format='txt' label="${tool.name} on ${on_string}: NCBI Sequin genome" from_work_dir="out.sqn">
+            <filter>outputs and 'sqn' in outputs</filter>
+        </data>
+        <data name='fa_scaffolds' format='fasta' label="${tool.name} on ${on_string}: scaffolds sequences" from_work_dir="out.scaffolds.fa">
+            <filter>outputs and 'scaffolds_fa' in outputs</filter>
+        </data>
+        <data name='fa_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa">
+            <filter>outputs and 'proteins_fa' in outputs</filter>
+        </data>
+        <data name='fa_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa">
+            <filter>outputs and 'mrna_transcripts_fa' in outputs</filter>
+        </data>
+        <data name='fa_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa">
+            <filter>outputs and 'cds_transcripts_fa' in outputs</filter>
+        </data>
+        <data name='gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3">
+            <filter>outputs and 'gff3' in outputs</filter>
+        </data>
+        <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt">
+            <filter>outputs and 'discrepency' in outputs</filter>
+        </data>
+        <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json">
+            <filter>outputs and 'gbk' in outputs</filter>
+        </data>
+        <data name='must_fix' format='json' label="${tool.name} on ${on_string}: Gene Name/Product must-fix" from_work_dir="output/annotate_results/Gene2Products.must-fix.txt">
+            <filter>outputs and 'must_fix' in outputs</filter>
+        </data>
+        <data name='need_curating' format='json' label="${tool.name} on ${on_string}: Gene Name/Product need-curating" from_work_dir="output/annotate_results/Gene2Products.need-curating.txt">
+            <filter>outputs and 'need_curating' in outputs</filter>
+        </data>
+        <data name='new_names_passed' format='json' label="${tool.name} on ${on_string}: Gene Name/Product new-names-passed" from_work_dir="output/annotate_results/Gene2Products.new-names-passed.txt">
+            <filter>outputs and 'new_names_passed' in outputs</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input">
+                <param name="input_type" value="gbk" />
+                <param name="genbank" value="predict_augustus/Genus_species.gbk" />
+            </conditional>
+            <param name="database" value="2021-07-20-120000" />
+            <param name="busco_db" value="insecta" />
+            <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
+            <output name="gbk">
+                <assert_contents>
+                    <has_text text="DEFINITION  Genus species." />
+                </assert_contents>
+            </output>
+            <output name="annot">
+                <assert_contents>
+                    <has_text text="EC_number" />
+                    <has_text text="EOG090W0T3K" />
+                </assert_contents>
+            </output>
+            <output name="contigs_fsa">
+                <assert_contents>
+                    <has_text text=">contig_1" />
+                </assert_contents>
+            </output>
+            <output name="agp">
+                <assert_contents>
+                    <has_text text="contig_1" />
+                </assert_contents>
+            </output>
+            <output name="tbl">
+                <assert_contents>
+                    <has_text text="locus_tag" />
+                </assert_contents>
+            </output>
+            <output name="sqn">
+                <assert_contents>
+                    <has_text text="Seq-submit" />
+                </assert_contents>
+            </output>
+            <output name="fa_scaffolds">
+                <assert_contents>
+                    <has_text text=">sample" />
+                </assert_contents>
+            </output>
+            <output name="fa_proteins">
+                <assert_contents>
+                    <has_text text=">FUN_000001-T1 FUN_000001" />
+                </assert_contents>
+            </output>
+            <output name="fa_transcripts_mrna">
+                <assert_contents>
+                    <has_text text=">FUN_000001-T1 FUN_000001" />
+                </assert_contents>
+            </output>
+            <output name="fa_transcripts_cds">
+                <assert_contents>
+                    <has_text text=">FUN_000001-T1 FUN_000001" />
+                </assert_contents>
+            </output>
+            <output name="gff3">
+                <assert_contents>
+                    <has_text text="ID=FUN_000001;" />
+                </assert_contents>
+            </output>
+            <output name="tbl2asn_report">
+                <assert_contents>
+                    <has_text text="Discrepancy Report Results" />
+                </assert_contents>
+            </output>
+            <output name="stats">
+                <assert_contents>
+                    <has_text text="avg_gene_length" />
+                </assert_contents>
+            </output>
+            <output name="must_fix">
+                <assert_contents>
+                    <has_text text="tbl2asn Error" />
+                </assert_contents>
+            </output>
+            <output name="need_curating">
+                <assert_contents>
+                    <has_text text="Original Description" />
+                </assert_contents>
+            </output>
+            <output name="new_names_passed">
+                <assert_contents>
+                    <has_text text="Passed Description" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="input_type" value="gff" />
+                <param name="gff" value="predict_augustus/Genus_species.gff3" />
+                <param name="fasta" value="genome.fa" />
+                <param name="species" value="Genus species" />
+            </conditional>
+            <param name="database" value="2021-07-20-120000" />
+            <param name="busco_db" value="insecta" />
+            <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
+            <output name="gbk">
+                <assert_contents>
+                    <has_text text="DEFINITION  Genus species." />
+                </assert_contents>
+            </output>
+            <output name="annot">
+                <assert_contents>
+                    <has_text text="EC_number" />
+                    <has_text text="EOG090W0T3K" />
+                </assert_contents>
+            </output>
+            <output name="contigs_fsa">
+                <assert_contents>
+                    <has_text text=">contig_1" />
+                </assert_contents>
+            </output>
+            <output name="agp">
+                <assert_contents>
+                    <has_text text="contig_1" />
+                </assert_contents>
+            </output>
+            <output name="tbl">
+                <assert_contents>
+                    <has_text text="locus_tag" />
+                </assert_contents>
+            </output>
+            <output name="sqn">
+                <assert_contents>
+                    <has_text text="Seq-submit" />
+                </assert_contents>
+            </output>
+            <output name="fa_scaffolds">
+                <assert_contents>
+                    <has_text text=">sample" />
+                </assert_contents>
+            </output>
+            <output name="fa_proteins">
+                <assert_contents>
+                    <has_text text=">FUN_000001-T1 FUN_000001" />
+                </assert_contents>
+            </output>
+            <output name="fa_transcripts_mrna">
+                <assert_contents>
+                    <has_text text=">FUN_000001-T1 FUN_000001" />
+                </assert_contents>
+            </output>
+            <output name="fa_transcripts_cds">
+                <assert_contents>
+                    <has_text text=">FUN_000001-T1 FUN_000001" />
+                </assert_contents>
+            </output>
+            <output name="gff3">
+                <assert_contents>
+                    <has_text text="ID=FUN_000001;" />
+                </assert_contents>
+            </output>
+            <output name="tbl2asn_report">
+                <assert_contents>
+                    <has_text text="Discrepancy Report Results" />
+                </assert_contents>
+            </output>
+            <output name="stats">
+                <assert_contents>
+                    <has_text text="avg_gene_length" />
+                </assert_contents>
+            </output>
+            <output name="must_fix">
+                <assert_contents>
+                    <has_text text="tbl2asn Error" />
+                </assert_contents>
+            </output>
+            <output name="need_curating">
+                <assert_contents>
+                    <has_text text="Original Description" />
+                </assert_contents>
+            </output>
+            <output name="new_names_passed">
+                <assert_contents>
+                    <has_text text="Passed Description" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Funannotate_ annotate
+---------------------
+
+Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).
+
+This script functionally annotates the results from funannotate predict. It pulls
+annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology.
+
+.. _Funannotate: http://funannotate.readthedocs.io
+    ]]></help>
+    <expand macro="citations" />
+</tool>