Mercurial > repos > iuc > funannotate_annotate
diff funannotate_annotate.xml @ 0:a5baa4ff168d draft
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
author | iuc |
---|---|
date | Mon, 04 Oct 2021 19:39:38 +0000 |
parents | |
children | aa19eaac7d4b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/funannotate_annotate.xml Mon Oct 04 19:39:38 2021 +0000 @@ -0,0 +1,413 @@ +<tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> + <description>annotation</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <expand macro="requirements" /> + </requirements> + <version_command>funannotate check --show-versions</version_command> + <command><![CDATA[ + +#if $uglyTestingHack == "true": + ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager) + ## Need to copy too as the test_data is read only on CI + cp -r '${database.fields.path}' './hacked_database' && + sed -i.bak 's|/tmp/prout|'`pwd`'/hacked_database|' './hacked_database/trained_species/fly/info.json' && +#end if + +funannotate annotate + +#if $input.input_type == 'gbk' + --genbank '${input.genbank}' +#else + --gff '${input.gff}' + --fasta '${input.fasta}' + --species '${input.species}' +#end if + +--out output + +#if $uglyTestingHack == "true": + --database `pwd`'/hacked_database' +#else + --database '$database.fields.path' +#end if + +#if $sbt: + --sbt '${sbt}' +#end if + +#if $annotations: + --annotations '${annotations}' +#end if + +#if $eggnog: + --eggnog '${eggnog}' +#end if + +#if $antismash: + --antismash '${antismash}' +#end if + +#if $iprscan: + --iprscan '${iprscan}' +#end if + +#if $phobius: + --phobius '${phobius}' +#end if + +--busco_db '${busco_db}' + +--isolate '${isolate}' +--strain '${strain}' + +#if $rename: + --rename '${rename}' +#end if +#if $fix: + --fix '${fix}' +#end if +#if $remove: + --remove '${remove}' +#end if + +--cpus \${GALAXY_SLOTS:-2} + +&& + +mv output/annotate_results/*.gbk out.gbk && +mv output/annotate_results/*.annotations.txt out.annotations.txt && +mv output/annotate_results/*.contigs.fsa out.contigs.fsa && +mv output/annotate_results/*.agp out.agp && +mv output/annotate_results/*.tbl out.tbl && +mv output/annotate_results/*.sqn out.sqn && +mv output/annotate_results/*.scaffolds.fa out.scaffolds.fa && +mv output/annotate_results/*.proteins.fa out.proteins.fa && +mv output/annotate_results/*.mrna-transcripts.fa out.mrna-transcripts.fa && +mv output/annotate_results/*.cds-transcripts.fa out.cds-transcripts.fa && +mv output/annotate_results/*.gff3 out.gff3 && +mv output/annotate_results/*.discrepency.report.txt out.discrepency.report.txt && +mv output/annotate_results/*.stats.json out.stats.json + ]]></command> + <inputs> + + <conditional name="input"> + <param name="input_type" type="select" label="Input format"> + <option value="gbk" selected="True">GenBank (from 'Funannotate predict annotation' tool)</option> + <option value="gff">GFF</option> + </param> + <when value="gbk"> + <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from 'Funannotate predict annotation' tool" /> + </when> + <when value="gff"> + <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" /> + <param argument="--fasta" type="data" format="fasta" label="Genome sequence" /> + <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> + <validator type="empty_field" /> + </param> + </when> + </conditional> + + + + <param name="database" label="Funannotate database" type="select"> + <options from_data_table="funannotate"> + <column name="value" index="0" /> + <column name="name" index="1" /> + <column name="path" index="3" /> + <filter type="sort_by" column="0" /> + <filter type="static_value" column="2" value="1.0" /> + </options> + </param> + + <param argument="--sbt" type="data" format="sbt" optional="true" label="NCBI submission template file" help="Create it on https://submit.ncbi.nlm.nih.gov/genbank/template/submission/ (or leave empty to use a default one, not suitable for submission at NCBI)" /> + + <param argument="--eggnog" type="data" format="tabular" optional="true" label="Eggnog-mapper annotations file" help="'annotations' output from 'eggNOG Mapper' tool" /> + <param argument="--antismash" type="data" format="genbank" optional="true" label="antiSMASH secondary metabolism results" help="Genbank output from 'Antismash' tool" /> + <param argument="--iprscan" type="data" format="xml" optional="true" label="InterProScan5 XML file" help="XML output from InterProScan" /> + <param argument="--phobius" type="data" format="tabular" optional="true" label="Phobius pre-computed results" /> + + <param argument="--busco_db" type="select" label="BUSCO models"> + <expand macro="busco_species"/> + </param> + + <param argument="--annotations" type="data" format="tabular" optional="true" label="Custom annotations" help="3 column tsv file" /> + + <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" /> + <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" /> + + <param argument="--rename" type="text" label="locus_tag from NCBI to rename GFF gene models with" /> + <param argument="--fix" type="data" format="tabular" optional="true" label="Gene/Product names fixed" help="TSV: GeneID Name Product" /> + <param argument="--remove" type="data" format="tabular" optional="true" label="Gene/Product names to remove" help="TSV: Gene Product" /> + + <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated"> + <option value="gbk" selected="true">Annotated genome (genbank)</option> + <option value="annotations">TSV file of all annotations added to genome. (i.e. import into excel)</option> + <option value="contigs_fsa">Multi-fasta file of contigs, split at gaps (use for NCBI submission)</option> + <option value="agp">AGP file; showing linkage/location of contigs (use for NCBI submission)</option> + <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option> + <option value="sqn">NCBI Sequin genome file (use for NCBI submission)</option> + <option value="scaffolds_fa">Multi-fasta file of scaffolds</option> + <option value="proteins_fa">Multi-fasta file of protein coding genes</option> + <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option> + <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option> + <option value="gff3">Annotation in GFF3 format</option> + <option value="discrepency">tbl2asn summary report of annotated genome</option> + <option value="stats">Statistics</option> + <option value="must_fix">TSV file of Gene Name/Product deflines that failed to pass tbl2asn checks and must be fixed</option> + <option value="need_curating">TSV file of Gene Name/Product defines that need to be curated</option> + <option value="new_names_passed">TSV file of Gene Name/Product deflines that passed tbl2asn but are not in Gene2Products database.</option> + </param> + + <!-- Need this to change path in the test funannotate_db --> + <param type="hidden" name="uglyTestingHack" value="" /> + </inputs> + <outputs> + <data name='gbk' format='genbank' label="${tool.name} on ${on_string}: annotated genome (genbank)" from_work_dir="out.gbk"> + <filter>outputs and 'gbk' in outputs</filter> + </data> + <data name='annot' format='tabular' label="${tool.name} on ${on_string}: all annotations" from_work_dir="out.annotations.txt"> + <filter>outputs and 'annotations' in outputs</filter> + </data> + <data name='contigs_fsa' format='fasta' label="${tool.name} on ${on_string}: contigs fasta, split at gaps" from_work_dir="out.contigs.fsa"> + <filter>outputs and 'contigs_fsa' in outputs</filter> + </data> + <data name='agp' format='tabular' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.agp"> + <filter>outputs and 'agp' in outputs</filter> + </data> + <data name='tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl"> + <filter>outputs and 'tbl' in outputs</filter> + </data> + <data name='sqn' format='txt' label="${tool.name} on ${on_string}: NCBI Sequin genome" from_work_dir="out.sqn"> + <filter>outputs and 'sqn' in outputs</filter> + </data> + <data name='fa_scaffolds' format='fasta' label="${tool.name} on ${on_string}: scaffolds sequences" from_work_dir="out.scaffolds.fa"> + <filter>outputs and 'scaffolds_fa' in outputs</filter> + </data> + <data name='fa_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa"> + <filter>outputs and 'proteins_fa' in outputs</filter> + </data> + <data name='fa_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa"> + <filter>outputs and 'mrna_transcripts_fa' in outputs</filter> + </data> + <data name='fa_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa"> + <filter>outputs and 'cds_transcripts_fa' in outputs</filter> + </data> + <data name='gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3"> + <filter>outputs and 'gff3' in outputs</filter> + </data> + <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt"> + <filter>outputs and 'discrepency' in outputs</filter> + </data> + <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json"> + <filter>outputs and 'gbk' in outputs</filter> + </data> + <data name='must_fix' format='json' label="${tool.name} on ${on_string}: Gene Name/Product must-fix" from_work_dir="output/annotate_results/Gene2Products.must-fix.txt"> + <filter>outputs and 'must_fix' in outputs</filter> + </data> + <data name='need_curating' format='json' label="${tool.name} on ${on_string}: Gene Name/Product need-curating" from_work_dir="output/annotate_results/Gene2Products.need-curating.txt"> + <filter>outputs and 'need_curating' in outputs</filter> + </data> + <data name='new_names_passed' format='json' label="${tool.name} on ${on_string}: Gene Name/Product new-names-passed" from_work_dir="output/annotate_results/Gene2Products.new-names-passed.txt"> + <filter>outputs and 'new_names_passed' in outputs</filter> + </data> + </outputs> + <tests> + <test> + <conditional name="input"> + <param name="input_type" value="gbk" /> + <param name="genbank" value="predict_augustus/Genus_species.gbk" /> + </conditional> + <param name="database" value="2021-07-20-120000" /> + <param name="busco_db" value="insecta" /> + <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" /> + <output name="gbk"> + <assert_contents> + <has_text text="DEFINITION Genus species." /> + </assert_contents> + </output> + <output name="annot"> + <assert_contents> + <has_text text="EC_number" /> + <has_text text="EOG090W0T3K" /> + </assert_contents> + </output> + <output name="contigs_fsa"> + <assert_contents> + <has_text text=">contig_1" /> + </assert_contents> + </output> + <output name="agp"> + <assert_contents> + <has_text text="contig_1" /> + </assert_contents> + </output> + <output name="tbl"> + <assert_contents> + <has_text text="locus_tag" /> + </assert_contents> + </output> + <output name="sqn"> + <assert_contents> + <has_text text="Seq-submit" /> + </assert_contents> + </output> + <output name="fa_scaffolds"> + <assert_contents> + <has_text text=">sample" /> + </assert_contents> + </output> + <output name="fa_proteins"> + <assert_contents> + <has_text text=">FUN_000001-T1 FUN_000001" /> + </assert_contents> + </output> + <output name="fa_transcripts_mrna"> + <assert_contents> + <has_text text=">FUN_000001-T1 FUN_000001" /> + </assert_contents> + </output> + <output name="fa_transcripts_cds"> + <assert_contents> + <has_text text=">FUN_000001-T1 FUN_000001" /> + </assert_contents> + </output> + <output name="gff3"> + <assert_contents> + <has_text text="ID=FUN_000001;" /> + </assert_contents> + </output> + <output name="tbl2asn_report"> + <assert_contents> + <has_text text="Discrepancy Report Results" /> + </assert_contents> + </output> + <output name="stats"> + <assert_contents> + <has_text text="avg_gene_length" /> + </assert_contents> + </output> + <output name="must_fix"> + <assert_contents> + <has_text text="tbl2asn Error" /> + </assert_contents> + </output> + <output name="need_curating"> + <assert_contents> + <has_text text="Original Description" /> + </assert_contents> + </output> + <output name="new_names_passed"> + <assert_contents> + <has_text text="Passed Description" /> + </assert_contents> + </output> + </test> + <test> + <conditional name="input"> + <param name="input_type" value="gff" /> + <param name="gff" value="predict_augustus/Genus_species.gff3" /> + <param name="fasta" value="genome.fa" /> + <param name="species" value="Genus species" /> + </conditional> + <param name="database" value="2021-07-20-120000" /> + <param name="busco_db" value="insecta" /> + <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" /> + <output name="gbk"> + <assert_contents> + <has_text text="DEFINITION Genus species." /> + </assert_contents> + </output> + <output name="annot"> + <assert_contents> + <has_text text="EC_number" /> + <has_text text="EOG090W0T3K" /> + </assert_contents> + </output> + <output name="contigs_fsa"> + <assert_contents> + <has_text text=">contig_1" /> + </assert_contents> + </output> + <output name="agp"> + <assert_contents> + <has_text text="contig_1" /> + </assert_contents> + </output> + <output name="tbl"> + <assert_contents> + <has_text text="locus_tag" /> + </assert_contents> + </output> + <output name="sqn"> + <assert_contents> + <has_text text="Seq-submit" /> + </assert_contents> + </output> + <output name="fa_scaffolds"> + <assert_contents> + <has_text text=">sample" /> + </assert_contents> + </output> + <output name="fa_proteins"> + <assert_contents> + <has_text text=">FUN_000001-T1 FUN_000001" /> + </assert_contents> + </output> + <output name="fa_transcripts_mrna"> + <assert_contents> + <has_text text=">FUN_000001-T1 FUN_000001" /> + </assert_contents> + </output> + <output name="fa_transcripts_cds"> + <assert_contents> + <has_text text=">FUN_000001-T1 FUN_000001" /> + </assert_contents> + </output> + <output name="gff3"> + <assert_contents> + <has_text text="ID=FUN_000001;" /> + </assert_contents> + </output> + <output name="tbl2asn_report"> + <assert_contents> + <has_text text="Discrepancy Report Results" /> + </assert_contents> + </output> + <output name="stats"> + <assert_contents> + <has_text text="avg_gene_length" /> + </assert_contents> + </output> + <output name="must_fix"> + <assert_contents> + <has_text text="tbl2asn Error" /> + </assert_contents> + </output> + <output name="need_curating"> + <assert_contents> + <has_text text="Original Description" /> + </assert_contents> + </output> + <output name="new_names_passed"> + <assert_contents> + <has_text text="Passed Description" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +Funannotate_ annotate +--------------------- + +Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes). + +This script functionally annotates the results from funannotate predict. It pulls +annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology. + +.. _Funannotate: http://funannotate.readthedocs.io + ]]></help> + <expand macro="citations" /> +</tool>