comparison funannotate_annotate.xml @ 0:a5baa4ff168d draft

"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
author iuc
date Mon, 04 Oct 2021 19:39:38 +0000
parents
children aa19eaac7d4b
comparison
equal deleted inserted replaced
-1:000000000000 0:a5baa4ff168d
1 <tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>annotation</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <requirements>
7 <expand macro="requirements" />
8 </requirements>
9 <version_command>funannotate check --show-versions</version_command>
10 <command><![CDATA[
11
12 #if $uglyTestingHack == "true":
13 ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager)
14 ## Need to copy too as the test_data is read only on CI
15 cp -r '${database.fields.path}' './hacked_database' &&
16 sed -i.bak 's|/tmp/prout|'`pwd`'/hacked_database|' './hacked_database/trained_species/fly/info.json' &&
17 #end if
18
19 funannotate annotate
20
21 #if $input.input_type == 'gbk'
22 --genbank '${input.genbank}'
23 #else
24 --gff '${input.gff}'
25 --fasta '${input.fasta}'
26 --species '${input.species}'
27 #end if
28
29 --out output
30
31 #if $uglyTestingHack == "true":
32 --database `pwd`'/hacked_database'
33 #else
34 --database '$database.fields.path'
35 #end if
36
37 #if $sbt:
38 --sbt '${sbt}'
39 #end if
40
41 #if $annotations:
42 --annotations '${annotations}'
43 #end if
44
45 #if $eggnog:
46 --eggnog '${eggnog}'
47 #end if
48
49 #if $antismash:
50 --antismash '${antismash}'
51 #end if
52
53 #if $iprscan:
54 --iprscan '${iprscan}'
55 #end if
56
57 #if $phobius:
58 --phobius '${phobius}'
59 #end if
60
61 --busco_db '${busco_db}'
62
63 --isolate '${isolate}'
64 --strain '${strain}'
65
66 #if $rename:
67 --rename '${rename}'
68 #end if
69 #if $fix:
70 --fix '${fix}'
71 #end if
72 #if $remove:
73 --remove '${remove}'
74 #end if
75
76 --cpus \${GALAXY_SLOTS:-2}
77
78 &&
79
80 mv output/annotate_results/*.gbk out.gbk &&
81 mv output/annotate_results/*.annotations.txt out.annotations.txt &&
82 mv output/annotate_results/*.contigs.fsa out.contigs.fsa &&
83 mv output/annotate_results/*.agp out.agp &&
84 mv output/annotate_results/*.tbl out.tbl &&
85 mv output/annotate_results/*.sqn out.sqn &&
86 mv output/annotate_results/*.scaffolds.fa out.scaffolds.fa &&
87 mv output/annotate_results/*.proteins.fa out.proteins.fa &&
88 mv output/annotate_results/*.mrna-transcripts.fa out.mrna-transcripts.fa &&
89 mv output/annotate_results/*.cds-transcripts.fa out.cds-transcripts.fa &&
90 mv output/annotate_results/*.gff3 out.gff3 &&
91 mv output/annotate_results/*.discrepency.report.txt out.discrepency.report.txt &&
92 mv output/annotate_results/*.stats.json out.stats.json
93 ]]></command>
94 <inputs>
95
96 <conditional name="input">
97 <param name="input_type" type="select" label="Input format">
98 <option value="gbk" selected="True">GenBank (from 'Funannotate predict annotation' tool)</option>
99 <option value="gff">GFF</option>
100 </param>
101 <when value="gbk">
102 <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from 'Funannotate predict annotation' tool" />
103 </when>
104 <when value="gff">
105 <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" />
106 <param argument="--fasta" type="data" format="fasta" label="Genome sequence" />
107 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species">
108 <validator type="empty_field" />
109 </param>
110 </when>
111 </conditional>
112
113
114
115 <param name="database" label="Funannotate database" type="select">
116 <options from_data_table="funannotate">
117 <column name="value" index="0" />
118 <column name="name" index="1" />
119 <column name="path" index="3" />
120 <filter type="sort_by" column="0" />
121 <filter type="static_value" column="2" value="1.0" />
122 </options>
123 </param>
124
125 <param argument="--sbt" type="data" format="sbt" optional="true" label="NCBI submission template file" help="Create it on https://submit.ncbi.nlm.nih.gov/genbank/template/submission/ (or leave empty to use a default one, not suitable for submission at NCBI)" />
126
127 <param argument="--eggnog" type="data" format="tabular" optional="true" label="Eggnog-mapper annotations file" help="'annotations' output from 'eggNOG Mapper' tool" />
128 <param argument="--antismash" type="data" format="genbank" optional="true" label="antiSMASH secondary metabolism results" help="Genbank output from 'Antismash' tool" />
129 <param argument="--iprscan" type="data" format="xml" optional="true" label="InterProScan5 XML file" help="XML output from InterProScan" />
130 <param argument="--phobius" type="data" format="tabular" optional="true" label="Phobius pre-computed results" />
131
132 <param argument="--busco_db" type="select" label="BUSCO models">
133 <expand macro="busco_species"/>
134 </param>
135
136 <param argument="--annotations" type="data" format="tabular" optional="true" label="Custom annotations" help="3 column tsv file" />
137
138 <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" />
139 <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" />
140
141 <param argument="--rename" type="text" label="locus_tag from NCBI to rename GFF gene models with" />
142 <param argument="--fix" type="data" format="tabular" optional="true" label="Gene/Product names fixed" help="TSV: GeneID Name Product" />
143 <param argument="--remove" type="data" format="tabular" optional="true" label="Gene/Product names to remove" help="TSV: Gene Product" />
144
145 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
146 <option value="gbk" selected="true">Annotated genome (genbank)</option>
147 <option value="annotations">TSV file of all annotations added to genome. (i.e. import into excel)</option>
148 <option value="contigs_fsa">Multi-fasta file of contigs, split at gaps (use for NCBI submission)</option>
149 <option value="agp">AGP file; showing linkage/location of contigs (use for NCBI submission)</option>
150 <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option>
151 <option value="sqn">NCBI Sequin genome file (use for NCBI submission)</option>
152 <option value="scaffolds_fa">Multi-fasta file of scaffolds</option>
153 <option value="proteins_fa">Multi-fasta file of protein coding genes</option>
154 <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option>
155 <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option>
156 <option value="gff3">Annotation in GFF3 format</option>
157 <option value="discrepency">tbl2asn summary report of annotated genome</option>
158 <option value="stats">Statistics</option>
159 <option value="must_fix">TSV file of Gene Name/Product deflines that failed to pass tbl2asn checks and must be fixed</option>
160 <option value="need_curating">TSV file of Gene Name/Product defines that need to be curated</option>
161 <option value="new_names_passed">TSV file of Gene Name/Product deflines that passed tbl2asn but are not in Gene2Products database.</option>
162 </param>
163
164 <!-- Need this to change path in the test funannotate_db -->
165 <param type="hidden" name="uglyTestingHack" value="" />
166 </inputs>
167 <outputs>
168 <data name='gbk' format='genbank' label="${tool.name} on ${on_string}: annotated genome (genbank)" from_work_dir="out.gbk">
169 <filter>outputs and 'gbk' in outputs</filter>
170 </data>
171 <data name='annot' format='tabular' label="${tool.name} on ${on_string}: all annotations" from_work_dir="out.annotations.txt">
172 <filter>outputs and 'annotations' in outputs</filter>
173 </data>
174 <data name='contigs_fsa' format='fasta' label="${tool.name} on ${on_string}: contigs fasta, split at gaps" from_work_dir="out.contigs.fsa">
175 <filter>outputs and 'contigs_fsa' in outputs</filter>
176 </data>
177 <data name='agp' format='tabular' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.agp">
178 <filter>outputs and 'agp' in outputs</filter>
179 </data>
180 <data name='tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl">
181 <filter>outputs and 'tbl' in outputs</filter>
182 </data>
183 <data name='sqn' format='txt' label="${tool.name} on ${on_string}: NCBI Sequin genome" from_work_dir="out.sqn">
184 <filter>outputs and 'sqn' in outputs</filter>
185 </data>
186 <data name='fa_scaffolds' format='fasta' label="${tool.name} on ${on_string}: scaffolds sequences" from_work_dir="out.scaffolds.fa">
187 <filter>outputs and 'scaffolds_fa' in outputs</filter>
188 </data>
189 <data name='fa_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa">
190 <filter>outputs and 'proteins_fa' in outputs</filter>
191 </data>
192 <data name='fa_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa">
193 <filter>outputs and 'mrna_transcripts_fa' in outputs</filter>
194 </data>
195 <data name='fa_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa">
196 <filter>outputs and 'cds_transcripts_fa' in outputs</filter>
197 </data>
198 <data name='gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3">
199 <filter>outputs and 'gff3' in outputs</filter>
200 </data>
201 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt">
202 <filter>outputs and 'discrepency' in outputs</filter>
203 </data>
204 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json">
205 <filter>outputs and 'gbk' in outputs</filter>
206 </data>
207 <data name='must_fix' format='json' label="${tool.name} on ${on_string}: Gene Name/Product must-fix" from_work_dir="output/annotate_results/Gene2Products.must-fix.txt">
208 <filter>outputs and 'must_fix' in outputs</filter>
209 </data>
210 <data name='need_curating' format='json' label="${tool.name} on ${on_string}: Gene Name/Product need-curating" from_work_dir="output/annotate_results/Gene2Products.need-curating.txt">
211 <filter>outputs and 'need_curating' in outputs</filter>
212 </data>
213 <data name='new_names_passed' format='json' label="${tool.name} on ${on_string}: Gene Name/Product new-names-passed" from_work_dir="output/annotate_results/Gene2Products.new-names-passed.txt">
214 <filter>outputs and 'new_names_passed' in outputs</filter>
215 </data>
216 </outputs>
217 <tests>
218 <test>
219 <conditional name="input">
220 <param name="input_type" value="gbk" />
221 <param name="genbank" value="predict_augustus/Genus_species.gbk" />
222 </conditional>
223 <param name="database" value="2021-07-20-120000" />
224 <param name="busco_db" value="insecta" />
225 <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
226 <output name="gbk">
227 <assert_contents>
228 <has_text text="DEFINITION Genus species." />
229 </assert_contents>
230 </output>
231 <output name="annot">
232 <assert_contents>
233 <has_text text="EC_number" />
234 <has_text text="EOG090W0T3K" />
235 </assert_contents>
236 </output>
237 <output name="contigs_fsa">
238 <assert_contents>
239 <has_text text=">contig_1" />
240 </assert_contents>
241 </output>
242 <output name="agp">
243 <assert_contents>
244 <has_text text="contig_1" />
245 </assert_contents>
246 </output>
247 <output name="tbl">
248 <assert_contents>
249 <has_text text="locus_tag" />
250 </assert_contents>
251 </output>
252 <output name="sqn">
253 <assert_contents>
254 <has_text text="Seq-submit" />
255 </assert_contents>
256 </output>
257 <output name="fa_scaffolds">
258 <assert_contents>
259 <has_text text=">sample" />
260 </assert_contents>
261 </output>
262 <output name="fa_proteins">
263 <assert_contents>
264 <has_text text=">FUN_000001-T1 FUN_000001" />
265 </assert_contents>
266 </output>
267 <output name="fa_transcripts_mrna">
268 <assert_contents>
269 <has_text text=">FUN_000001-T1 FUN_000001" />
270 </assert_contents>
271 </output>
272 <output name="fa_transcripts_cds">
273 <assert_contents>
274 <has_text text=">FUN_000001-T1 FUN_000001" />
275 </assert_contents>
276 </output>
277 <output name="gff3">
278 <assert_contents>
279 <has_text text="ID=FUN_000001;" />
280 </assert_contents>
281 </output>
282 <output name="tbl2asn_report">
283 <assert_contents>
284 <has_text text="Discrepancy Report Results" />
285 </assert_contents>
286 </output>
287 <output name="stats">
288 <assert_contents>
289 <has_text text="avg_gene_length" />
290 </assert_contents>
291 </output>
292 <output name="must_fix">
293 <assert_contents>
294 <has_text text="tbl2asn Error" />
295 </assert_contents>
296 </output>
297 <output name="need_curating">
298 <assert_contents>
299 <has_text text="Original Description" />
300 </assert_contents>
301 </output>
302 <output name="new_names_passed">
303 <assert_contents>
304 <has_text text="Passed Description" />
305 </assert_contents>
306 </output>
307 </test>
308 <test>
309 <conditional name="input">
310 <param name="input_type" value="gff" />
311 <param name="gff" value="predict_augustus/Genus_species.gff3" />
312 <param name="fasta" value="genome.fa" />
313 <param name="species" value="Genus species" />
314 </conditional>
315 <param name="database" value="2021-07-20-120000" />
316 <param name="busco_db" value="insecta" />
317 <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" />
318 <output name="gbk">
319 <assert_contents>
320 <has_text text="DEFINITION Genus species." />
321 </assert_contents>
322 </output>
323 <output name="annot">
324 <assert_contents>
325 <has_text text="EC_number" />
326 <has_text text="EOG090W0T3K" />
327 </assert_contents>
328 </output>
329 <output name="contigs_fsa">
330 <assert_contents>
331 <has_text text=">contig_1" />
332 </assert_contents>
333 </output>
334 <output name="agp">
335 <assert_contents>
336 <has_text text="contig_1" />
337 </assert_contents>
338 </output>
339 <output name="tbl">
340 <assert_contents>
341 <has_text text="locus_tag" />
342 </assert_contents>
343 </output>
344 <output name="sqn">
345 <assert_contents>
346 <has_text text="Seq-submit" />
347 </assert_contents>
348 </output>
349 <output name="fa_scaffolds">
350 <assert_contents>
351 <has_text text=">sample" />
352 </assert_contents>
353 </output>
354 <output name="fa_proteins">
355 <assert_contents>
356 <has_text text=">FUN_000001-T1 FUN_000001" />
357 </assert_contents>
358 </output>
359 <output name="fa_transcripts_mrna">
360 <assert_contents>
361 <has_text text=">FUN_000001-T1 FUN_000001" />
362 </assert_contents>
363 </output>
364 <output name="fa_transcripts_cds">
365 <assert_contents>
366 <has_text text=">FUN_000001-T1 FUN_000001" />
367 </assert_contents>
368 </output>
369 <output name="gff3">
370 <assert_contents>
371 <has_text text="ID=FUN_000001;" />
372 </assert_contents>
373 </output>
374 <output name="tbl2asn_report">
375 <assert_contents>
376 <has_text text="Discrepancy Report Results" />
377 </assert_contents>
378 </output>
379 <output name="stats">
380 <assert_contents>
381 <has_text text="avg_gene_length" />
382 </assert_contents>
383 </output>
384 <output name="must_fix">
385 <assert_contents>
386 <has_text text="tbl2asn Error" />
387 </assert_contents>
388 </output>
389 <output name="need_curating">
390 <assert_contents>
391 <has_text text="Original Description" />
392 </assert_contents>
393 </output>
394 <output name="new_names_passed">
395 <assert_contents>
396 <has_text text="Passed Description" />
397 </assert_contents>
398 </output>
399 </test>
400 </tests>
401 <help><![CDATA[
402 Funannotate_ annotate
403 ---------------------
404
405 Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes).
406
407 This script functionally annotates the results from funannotate predict. It pulls
408 annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology.
409
410 .. _Funannotate: http://funannotate.readthedocs.io
411 ]]></help>
412 <expand macro="citations" />
413 </tool>