Mercurial > repos > iuc > funannotate_annotate
comparison funannotate_annotate.xml @ 0:a5baa4ff168d draft
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
| author | iuc | 
|---|---|
| date | Mon, 04 Oct 2021 19:39:38 +0000 | 
| parents | |
| children | aa19eaac7d4b | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:a5baa4ff168d | 
|---|---|
| 1 <tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | |
| 2 <description>annotation</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <requirements> | |
| 7 <expand macro="requirements" /> | |
| 8 </requirements> | |
| 9 <version_command>funannotate check --show-versions</version_command> | |
| 10 <command><![CDATA[ | |
| 11 | |
| 12 #if $uglyTestingHack == "true": | |
| 13 ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager) | |
| 14 ## Need to copy too as the test_data is read only on CI | |
| 15 cp -r '${database.fields.path}' './hacked_database' && | |
| 16 sed -i.bak 's|/tmp/prout|'`pwd`'/hacked_database|' './hacked_database/trained_species/fly/info.json' && | |
| 17 #end if | |
| 18 | |
| 19 funannotate annotate | |
| 20 | |
| 21 #if $input.input_type == 'gbk' | |
| 22 --genbank '${input.genbank}' | |
| 23 #else | |
| 24 --gff '${input.gff}' | |
| 25 --fasta '${input.fasta}' | |
| 26 --species '${input.species}' | |
| 27 #end if | |
| 28 | |
| 29 --out output | |
| 30 | |
| 31 #if $uglyTestingHack == "true": | |
| 32 --database `pwd`'/hacked_database' | |
| 33 #else | |
| 34 --database '$database.fields.path' | |
| 35 #end if | |
| 36 | |
| 37 #if $sbt: | |
| 38 --sbt '${sbt}' | |
| 39 #end if | |
| 40 | |
| 41 #if $annotations: | |
| 42 --annotations '${annotations}' | |
| 43 #end if | |
| 44 | |
| 45 #if $eggnog: | |
| 46 --eggnog '${eggnog}' | |
| 47 #end if | |
| 48 | |
| 49 #if $antismash: | |
| 50 --antismash '${antismash}' | |
| 51 #end if | |
| 52 | |
| 53 #if $iprscan: | |
| 54 --iprscan '${iprscan}' | |
| 55 #end if | |
| 56 | |
| 57 #if $phobius: | |
| 58 --phobius '${phobius}' | |
| 59 #end if | |
| 60 | |
| 61 --busco_db '${busco_db}' | |
| 62 | |
| 63 --isolate '${isolate}' | |
| 64 --strain '${strain}' | |
| 65 | |
| 66 #if $rename: | |
| 67 --rename '${rename}' | |
| 68 #end if | |
| 69 #if $fix: | |
| 70 --fix '${fix}' | |
| 71 #end if | |
| 72 #if $remove: | |
| 73 --remove '${remove}' | |
| 74 #end if | |
| 75 | |
| 76 --cpus \${GALAXY_SLOTS:-2} | |
| 77 | |
| 78 && | |
| 79 | |
| 80 mv output/annotate_results/*.gbk out.gbk && | |
| 81 mv output/annotate_results/*.annotations.txt out.annotations.txt && | |
| 82 mv output/annotate_results/*.contigs.fsa out.contigs.fsa && | |
| 83 mv output/annotate_results/*.agp out.agp && | |
| 84 mv output/annotate_results/*.tbl out.tbl && | |
| 85 mv output/annotate_results/*.sqn out.sqn && | |
| 86 mv output/annotate_results/*.scaffolds.fa out.scaffolds.fa && | |
| 87 mv output/annotate_results/*.proteins.fa out.proteins.fa && | |
| 88 mv output/annotate_results/*.mrna-transcripts.fa out.mrna-transcripts.fa && | |
| 89 mv output/annotate_results/*.cds-transcripts.fa out.cds-transcripts.fa && | |
| 90 mv output/annotate_results/*.gff3 out.gff3 && | |
| 91 mv output/annotate_results/*.discrepency.report.txt out.discrepency.report.txt && | |
| 92 mv output/annotate_results/*.stats.json out.stats.json | |
| 93 ]]></command> | |
| 94 <inputs> | |
| 95 | |
| 96 <conditional name="input"> | |
| 97 <param name="input_type" type="select" label="Input format"> | |
| 98 <option value="gbk" selected="True">GenBank (from 'Funannotate predict annotation' tool)</option> | |
| 99 <option value="gff">GFF</option> | |
| 100 </param> | |
| 101 <when value="gbk"> | |
| 102 <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from 'Funannotate predict annotation' tool" /> | |
| 103 </when> | |
| 104 <when value="gff"> | |
| 105 <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" /> | |
| 106 <param argument="--fasta" type="data" format="fasta" label="Genome sequence" /> | |
| 107 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> | |
| 108 <validator type="empty_field" /> | |
| 109 </param> | |
| 110 </when> | |
| 111 </conditional> | |
| 112 | |
| 113 | |
| 114 | |
| 115 <param name="database" label="Funannotate database" type="select"> | |
| 116 <options from_data_table="funannotate"> | |
| 117 <column name="value" index="0" /> | |
| 118 <column name="name" index="1" /> | |
| 119 <column name="path" index="3" /> | |
| 120 <filter type="sort_by" column="0" /> | |
| 121 <filter type="static_value" column="2" value="1.0" /> | |
| 122 </options> | |
| 123 </param> | |
| 124 | |
| 125 <param argument="--sbt" type="data" format="sbt" optional="true" label="NCBI submission template file" help="Create it on https://submit.ncbi.nlm.nih.gov/genbank/template/submission/ (or leave empty to use a default one, not suitable for submission at NCBI)" /> | |
| 126 | |
| 127 <param argument="--eggnog" type="data" format="tabular" optional="true" label="Eggnog-mapper annotations file" help="'annotations' output from 'eggNOG Mapper' tool" /> | |
| 128 <param argument="--antismash" type="data" format="genbank" optional="true" label="antiSMASH secondary metabolism results" help="Genbank output from 'Antismash' tool" /> | |
| 129 <param argument="--iprscan" type="data" format="xml" optional="true" label="InterProScan5 XML file" help="XML output from InterProScan" /> | |
| 130 <param argument="--phobius" type="data" format="tabular" optional="true" label="Phobius pre-computed results" /> | |
| 131 | |
| 132 <param argument="--busco_db" type="select" label="BUSCO models"> | |
| 133 <expand macro="busco_species"/> | |
| 134 </param> | |
| 135 | |
| 136 <param argument="--annotations" type="data" format="tabular" optional="true" label="Custom annotations" help="3 column tsv file" /> | |
| 137 | |
| 138 <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" /> | |
| 139 <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" /> | |
| 140 | |
| 141 <param argument="--rename" type="text" label="locus_tag from NCBI to rename GFF gene models with" /> | |
| 142 <param argument="--fix" type="data" format="tabular" optional="true" label="Gene/Product names fixed" help="TSV: GeneID Name Product" /> | |
| 143 <param argument="--remove" type="data" format="tabular" optional="true" label="Gene/Product names to remove" help="TSV: Gene Product" /> | |
| 144 | |
| 145 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated"> | |
| 146 <option value="gbk" selected="true">Annotated genome (genbank)</option> | |
| 147 <option value="annotations">TSV file of all annotations added to genome. (i.e. import into excel)</option> | |
| 148 <option value="contigs_fsa">Multi-fasta file of contigs, split at gaps (use for NCBI submission)</option> | |
| 149 <option value="agp">AGP file; showing linkage/location of contigs (use for NCBI submission)</option> | |
| 150 <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option> | |
| 151 <option value="sqn">NCBI Sequin genome file (use for NCBI submission)</option> | |
| 152 <option value="scaffolds_fa">Multi-fasta file of scaffolds</option> | |
| 153 <option value="proteins_fa">Multi-fasta file of protein coding genes</option> | |
| 154 <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option> | |
| 155 <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option> | |
| 156 <option value="gff3">Annotation in GFF3 format</option> | |
| 157 <option value="discrepency">tbl2asn summary report of annotated genome</option> | |
| 158 <option value="stats">Statistics</option> | |
| 159 <option value="must_fix">TSV file of Gene Name/Product deflines that failed to pass tbl2asn checks and must be fixed</option> | |
| 160 <option value="need_curating">TSV file of Gene Name/Product defines that need to be curated</option> | |
| 161 <option value="new_names_passed">TSV file of Gene Name/Product deflines that passed tbl2asn but are not in Gene2Products database.</option> | |
| 162 </param> | |
| 163 | |
| 164 <!-- Need this to change path in the test funannotate_db --> | |
| 165 <param type="hidden" name="uglyTestingHack" value="" /> | |
| 166 </inputs> | |
| 167 <outputs> | |
| 168 <data name='gbk' format='genbank' label="${tool.name} on ${on_string}: annotated genome (genbank)" from_work_dir="out.gbk"> | |
| 169 <filter>outputs and 'gbk' in outputs</filter> | |
| 170 </data> | |
| 171 <data name='annot' format='tabular' label="${tool.name} on ${on_string}: all annotations" from_work_dir="out.annotations.txt"> | |
| 172 <filter>outputs and 'annotations' in outputs</filter> | |
| 173 </data> | |
| 174 <data name='contigs_fsa' format='fasta' label="${tool.name} on ${on_string}: contigs fasta, split at gaps" from_work_dir="out.contigs.fsa"> | |
| 175 <filter>outputs and 'contigs_fsa' in outputs</filter> | |
| 176 </data> | |
| 177 <data name='agp' format='tabular' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.agp"> | |
| 178 <filter>outputs and 'agp' in outputs</filter> | |
| 179 </data> | |
| 180 <data name='tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl"> | |
| 181 <filter>outputs and 'tbl' in outputs</filter> | |
| 182 </data> | |
| 183 <data name='sqn' format='txt' label="${tool.name} on ${on_string}: NCBI Sequin genome" from_work_dir="out.sqn"> | |
| 184 <filter>outputs and 'sqn' in outputs</filter> | |
| 185 </data> | |
| 186 <data name='fa_scaffolds' format='fasta' label="${tool.name} on ${on_string}: scaffolds sequences" from_work_dir="out.scaffolds.fa"> | |
| 187 <filter>outputs and 'scaffolds_fa' in outputs</filter> | |
| 188 </data> | |
| 189 <data name='fa_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa"> | |
| 190 <filter>outputs and 'proteins_fa' in outputs</filter> | |
| 191 </data> | |
| 192 <data name='fa_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa"> | |
| 193 <filter>outputs and 'mrna_transcripts_fa' in outputs</filter> | |
| 194 </data> | |
| 195 <data name='fa_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa"> | |
| 196 <filter>outputs and 'cds_transcripts_fa' in outputs</filter> | |
| 197 </data> | |
| 198 <data name='gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3"> | |
| 199 <filter>outputs and 'gff3' in outputs</filter> | |
| 200 </data> | |
| 201 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt"> | |
| 202 <filter>outputs and 'discrepency' in outputs</filter> | |
| 203 </data> | |
| 204 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json"> | |
| 205 <filter>outputs and 'gbk' in outputs</filter> | |
| 206 </data> | |
| 207 <data name='must_fix' format='json' label="${tool.name} on ${on_string}: Gene Name/Product must-fix" from_work_dir="output/annotate_results/Gene2Products.must-fix.txt"> | |
| 208 <filter>outputs and 'must_fix' in outputs</filter> | |
| 209 </data> | |
| 210 <data name='need_curating' format='json' label="${tool.name} on ${on_string}: Gene Name/Product need-curating" from_work_dir="output/annotate_results/Gene2Products.need-curating.txt"> | |
| 211 <filter>outputs and 'need_curating' in outputs</filter> | |
| 212 </data> | |
| 213 <data name='new_names_passed' format='json' label="${tool.name} on ${on_string}: Gene Name/Product new-names-passed" from_work_dir="output/annotate_results/Gene2Products.new-names-passed.txt"> | |
| 214 <filter>outputs and 'new_names_passed' in outputs</filter> | |
| 215 </data> | |
| 216 </outputs> | |
| 217 <tests> | |
| 218 <test> | |
| 219 <conditional name="input"> | |
| 220 <param name="input_type" value="gbk" /> | |
| 221 <param name="genbank" value="predict_augustus/Genus_species.gbk" /> | |
| 222 </conditional> | |
| 223 <param name="database" value="2021-07-20-120000" /> | |
| 224 <param name="busco_db" value="insecta" /> | |
| 225 <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" /> | |
| 226 <output name="gbk"> | |
| 227 <assert_contents> | |
| 228 <has_text text="DEFINITION Genus species." /> | |
| 229 </assert_contents> | |
| 230 </output> | |
| 231 <output name="annot"> | |
| 232 <assert_contents> | |
| 233 <has_text text="EC_number" /> | |
| 234 <has_text text="EOG090W0T3K" /> | |
| 235 </assert_contents> | |
| 236 </output> | |
| 237 <output name="contigs_fsa"> | |
| 238 <assert_contents> | |
| 239 <has_text text=">contig_1" /> | |
| 240 </assert_contents> | |
| 241 </output> | |
| 242 <output name="agp"> | |
| 243 <assert_contents> | |
| 244 <has_text text="contig_1" /> | |
| 245 </assert_contents> | |
| 246 </output> | |
| 247 <output name="tbl"> | |
| 248 <assert_contents> | |
| 249 <has_text text="locus_tag" /> | |
| 250 </assert_contents> | |
| 251 </output> | |
| 252 <output name="sqn"> | |
| 253 <assert_contents> | |
| 254 <has_text text="Seq-submit" /> | |
| 255 </assert_contents> | |
| 256 </output> | |
| 257 <output name="fa_scaffolds"> | |
| 258 <assert_contents> | |
| 259 <has_text text=">sample" /> | |
| 260 </assert_contents> | |
| 261 </output> | |
| 262 <output name="fa_proteins"> | |
| 263 <assert_contents> | |
| 264 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 265 </assert_contents> | |
| 266 </output> | |
| 267 <output name="fa_transcripts_mrna"> | |
| 268 <assert_contents> | |
| 269 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 270 </assert_contents> | |
| 271 </output> | |
| 272 <output name="fa_transcripts_cds"> | |
| 273 <assert_contents> | |
| 274 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 275 </assert_contents> | |
| 276 </output> | |
| 277 <output name="gff3"> | |
| 278 <assert_contents> | |
| 279 <has_text text="ID=FUN_000001;" /> | |
| 280 </assert_contents> | |
| 281 </output> | |
| 282 <output name="tbl2asn_report"> | |
| 283 <assert_contents> | |
| 284 <has_text text="Discrepancy Report Results" /> | |
| 285 </assert_contents> | |
| 286 </output> | |
| 287 <output name="stats"> | |
| 288 <assert_contents> | |
| 289 <has_text text="avg_gene_length" /> | |
| 290 </assert_contents> | |
| 291 </output> | |
| 292 <output name="must_fix"> | |
| 293 <assert_contents> | |
| 294 <has_text text="tbl2asn Error" /> | |
| 295 </assert_contents> | |
| 296 </output> | |
| 297 <output name="need_curating"> | |
| 298 <assert_contents> | |
| 299 <has_text text="Original Description" /> | |
| 300 </assert_contents> | |
| 301 </output> | |
| 302 <output name="new_names_passed"> | |
| 303 <assert_contents> | |
| 304 <has_text text="Passed Description" /> | |
| 305 </assert_contents> | |
| 306 </output> | |
| 307 </test> | |
| 308 <test> | |
| 309 <conditional name="input"> | |
| 310 <param name="input_type" value="gff" /> | |
| 311 <param name="gff" value="predict_augustus/Genus_species.gff3" /> | |
| 312 <param name="fasta" value="genome.fa" /> | |
| 313 <param name="species" value="Genus species" /> | |
| 314 </conditional> | |
| 315 <param name="database" value="2021-07-20-120000" /> | |
| 316 <param name="busco_db" value="insecta" /> | |
| 317 <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" /> | |
| 318 <output name="gbk"> | |
| 319 <assert_contents> | |
| 320 <has_text text="DEFINITION Genus species." /> | |
| 321 </assert_contents> | |
| 322 </output> | |
| 323 <output name="annot"> | |
| 324 <assert_contents> | |
| 325 <has_text text="EC_number" /> | |
| 326 <has_text text="EOG090W0T3K" /> | |
| 327 </assert_contents> | |
| 328 </output> | |
| 329 <output name="contigs_fsa"> | |
| 330 <assert_contents> | |
| 331 <has_text text=">contig_1" /> | |
| 332 </assert_contents> | |
| 333 </output> | |
| 334 <output name="agp"> | |
| 335 <assert_contents> | |
| 336 <has_text text="contig_1" /> | |
| 337 </assert_contents> | |
| 338 </output> | |
| 339 <output name="tbl"> | |
| 340 <assert_contents> | |
| 341 <has_text text="locus_tag" /> | |
| 342 </assert_contents> | |
| 343 </output> | |
| 344 <output name="sqn"> | |
| 345 <assert_contents> | |
| 346 <has_text text="Seq-submit" /> | |
| 347 </assert_contents> | |
| 348 </output> | |
| 349 <output name="fa_scaffolds"> | |
| 350 <assert_contents> | |
| 351 <has_text text=">sample" /> | |
| 352 </assert_contents> | |
| 353 </output> | |
| 354 <output name="fa_proteins"> | |
| 355 <assert_contents> | |
| 356 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 357 </assert_contents> | |
| 358 </output> | |
| 359 <output name="fa_transcripts_mrna"> | |
| 360 <assert_contents> | |
| 361 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 362 </assert_contents> | |
| 363 </output> | |
| 364 <output name="fa_transcripts_cds"> | |
| 365 <assert_contents> | |
| 366 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
| 367 </assert_contents> | |
| 368 </output> | |
| 369 <output name="gff3"> | |
| 370 <assert_contents> | |
| 371 <has_text text="ID=FUN_000001;" /> | |
| 372 </assert_contents> | |
| 373 </output> | |
| 374 <output name="tbl2asn_report"> | |
| 375 <assert_contents> | |
| 376 <has_text text="Discrepancy Report Results" /> | |
| 377 </assert_contents> | |
| 378 </output> | |
| 379 <output name="stats"> | |
| 380 <assert_contents> | |
| 381 <has_text text="avg_gene_length" /> | |
| 382 </assert_contents> | |
| 383 </output> | |
| 384 <output name="must_fix"> | |
| 385 <assert_contents> | |
| 386 <has_text text="tbl2asn Error" /> | |
| 387 </assert_contents> | |
| 388 </output> | |
| 389 <output name="need_curating"> | |
| 390 <assert_contents> | |
| 391 <has_text text="Original Description" /> | |
| 392 </assert_contents> | |
| 393 </output> | |
| 394 <output name="new_names_passed"> | |
| 395 <assert_contents> | |
| 396 <has_text text="Passed Description" /> | |
| 397 </assert_contents> | |
| 398 </output> | |
| 399 </test> | |
| 400 </tests> | |
| 401 <help><![CDATA[ | |
| 402 Funannotate_ annotate | |
| 403 --------------------- | |
| 404 | |
| 405 Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes). | |
| 406 | |
| 407 This script functionally annotates the results from funannotate predict. It pulls | |
| 408 annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology. | |
| 409 | |
| 410 .. _Funannotate: http://funannotate.readthedocs.io | |
| 411 ]]></help> | |
| 412 <expand macro="citations" /> | |
| 413 </tool> | 
