Mercurial > repos > iuc > funannotate_annotate
comparison funannotate_annotate.xml @ 0:a5baa4ff168d draft
"planemo upload commit 87560553f1dbbd3e0ab7d7157fa5a7f32f61dca1"
author | iuc |
---|---|
date | Mon, 04 Oct 2021 19:39:38 +0000 |
parents | |
children | aa19eaac7d4b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a5baa4ff168d |
---|---|
1 <tool id="funannotate_annotate" name="Funannotate functional" profile="20.01" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | |
2 <description>annotation</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <requirements> | |
7 <expand macro="requirements" /> | |
8 </requirements> | |
9 <version_command>funannotate check --show-versions</version_command> | |
10 <command><![CDATA[ | |
11 | |
12 #if $uglyTestingHack == "true": | |
13 ## funannotate_db contains some hard coded path, need to rewrite one for tests (not in real life when using data manager) | |
14 ## Need to copy too as the test_data is read only on CI | |
15 cp -r '${database.fields.path}' './hacked_database' && | |
16 sed -i.bak 's|/tmp/prout|'`pwd`'/hacked_database|' './hacked_database/trained_species/fly/info.json' && | |
17 #end if | |
18 | |
19 funannotate annotate | |
20 | |
21 #if $input.input_type == 'gbk' | |
22 --genbank '${input.genbank}' | |
23 #else | |
24 --gff '${input.gff}' | |
25 --fasta '${input.fasta}' | |
26 --species '${input.species}' | |
27 #end if | |
28 | |
29 --out output | |
30 | |
31 #if $uglyTestingHack == "true": | |
32 --database `pwd`'/hacked_database' | |
33 #else | |
34 --database '$database.fields.path' | |
35 #end if | |
36 | |
37 #if $sbt: | |
38 --sbt '${sbt}' | |
39 #end if | |
40 | |
41 #if $annotations: | |
42 --annotations '${annotations}' | |
43 #end if | |
44 | |
45 #if $eggnog: | |
46 --eggnog '${eggnog}' | |
47 #end if | |
48 | |
49 #if $antismash: | |
50 --antismash '${antismash}' | |
51 #end if | |
52 | |
53 #if $iprscan: | |
54 --iprscan '${iprscan}' | |
55 #end if | |
56 | |
57 #if $phobius: | |
58 --phobius '${phobius}' | |
59 #end if | |
60 | |
61 --busco_db '${busco_db}' | |
62 | |
63 --isolate '${isolate}' | |
64 --strain '${strain}' | |
65 | |
66 #if $rename: | |
67 --rename '${rename}' | |
68 #end if | |
69 #if $fix: | |
70 --fix '${fix}' | |
71 #end if | |
72 #if $remove: | |
73 --remove '${remove}' | |
74 #end if | |
75 | |
76 --cpus \${GALAXY_SLOTS:-2} | |
77 | |
78 && | |
79 | |
80 mv output/annotate_results/*.gbk out.gbk && | |
81 mv output/annotate_results/*.annotations.txt out.annotations.txt && | |
82 mv output/annotate_results/*.contigs.fsa out.contigs.fsa && | |
83 mv output/annotate_results/*.agp out.agp && | |
84 mv output/annotate_results/*.tbl out.tbl && | |
85 mv output/annotate_results/*.sqn out.sqn && | |
86 mv output/annotate_results/*.scaffolds.fa out.scaffolds.fa && | |
87 mv output/annotate_results/*.proteins.fa out.proteins.fa && | |
88 mv output/annotate_results/*.mrna-transcripts.fa out.mrna-transcripts.fa && | |
89 mv output/annotate_results/*.cds-transcripts.fa out.cds-transcripts.fa && | |
90 mv output/annotate_results/*.gff3 out.gff3 && | |
91 mv output/annotate_results/*.discrepency.report.txt out.discrepency.report.txt && | |
92 mv output/annotate_results/*.stats.json out.stats.json | |
93 ]]></command> | |
94 <inputs> | |
95 | |
96 <conditional name="input"> | |
97 <param name="input_type" type="select" label="Input format"> | |
98 <option value="gbk" selected="True">GenBank (from 'Funannotate predict annotation' tool)</option> | |
99 <option value="gff">GFF</option> | |
100 </param> | |
101 <when value="gbk"> | |
102 <param argument="--genbank" type="data" format="genbank" label="Genome annotation in genbank format" help="Output from 'Funannotate predict annotation' tool" /> | |
103 </when> | |
104 <when value="gff"> | |
105 <param argument="--gff" type="data" format="gff3" label="Genome annotation in gff format" /> | |
106 <param argument="--fasta" type="data" format="fasta" label="Genome sequence" /> | |
107 <param argument="--species" type="text" optional="false" label="Name of the species to annotate" help="e.g. Genus species"> | |
108 <validator type="empty_field" /> | |
109 </param> | |
110 </when> | |
111 </conditional> | |
112 | |
113 | |
114 | |
115 <param name="database" label="Funannotate database" type="select"> | |
116 <options from_data_table="funannotate"> | |
117 <column name="value" index="0" /> | |
118 <column name="name" index="1" /> | |
119 <column name="path" index="3" /> | |
120 <filter type="sort_by" column="0" /> | |
121 <filter type="static_value" column="2" value="1.0" /> | |
122 </options> | |
123 </param> | |
124 | |
125 <param argument="--sbt" type="data" format="sbt" optional="true" label="NCBI submission template file" help="Create it on https://submit.ncbi.nlm.nih.gov/genbank/template/submission/ (or leave empty to use a default one, not suitable for submission at NCBI)" /> | |
126 | |
127 <param argument="--eggnog" type="data" format="tabular" optional="true" label="Eggnog-mapper annotations file" help="'annotations' output from 'eggNOG Mapper' tool" /> | |
128 <param argument="--antismash" type="data" format="genbank" optional="true" label="antiSMASH secondary metabolism results" help="Genbank output from 'Antismash' tool" /> | |
129 <param argument="--iprscan" type="data" format="xml" optional="true" label="InterProScan5 XML file" help="XML output from InterProScan" /> | |
130 <param argument="--phobius" type="data" format="tabular" optional="true" label="Phobius pre-computed results" /> | |
131 | |
132 <param argument="--busco_db" type="select" label="BUSCO models"> | |
133 <expand macro="busco_species"/> | |
134 </param> | |
135 | |
136 <param argument="--annotations" type="data" format="tabular" optional="true" label="Custom annotations" help="3 column tsv file" /> | |
137 | |
138 <param argument="--isolate" type="text" label="Isolate name" help="If relevant (e.g. Af293)" /> | |
139 <param argument="--strain" type="text" label="Strain name" help="If relevant (e.g. FGSCA4)" /> | |
140 | |
141 <param argument="--rename" type="text" label="locus_tag from NCBI to rename GFF gene models with" /> | |
142 <param argument="--fix" type="data" format="tabular" optional="true" label="Gene/Product names fixed" help="TSV: GeneID Name Product" /> | |
143 <param argument="--remove" type="data" format="tabular" optional="true" label="Gene/Product names to remove" help="TSV: Gene Product" /> | |
144 | |
145 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated"> | |
146 <option value="gbk" selected="true">Annotated genome (genbank)</option> | |
147 <option value="annotations">TSV file of all annotations added to genome. (i.e. import into excel)</option> | |
148 <option value="contigs_fsa">Multi-fasta file of contigs, split at gaps (use for NCBI submission)</option> | |
149 <option value="agp">AGP file; showing linkage/location of contigs (use for NCBI submission)</option> | |
150 <option value="tbl">NCBI tbl annotation file (use for NCBI submission)</option> | |
151 <option value="sqn">NCBI Sequin genome file (use for NCBI submission)</option> | |
152 <option value="scaffolds_fa">Multi-fasta file of scaffolds</option> | |
153 <option value="proteins_fa">Multi-fasta file of protein coding genes</option> | |
154 <option value="mrna_transcripts_fa">Multi-fasta file of transcripts (mRNA)</option> | |
155 <option value="cds_transcripts_fa">Multi-fasta file of transcripts (CDS)</option> | |
156 <option value="gff3">Annotation in GFF3 format</option> | |
157 <option value="discrepency">tbl2asn summary report of annotated genome</option> | |
158 <option value="stats">Statistics</option> | |
159 <option value="must_fix">TSV file of Gene Name/Product deflines that failed to pass tbl2asn checks and must be fixed</option> | |
160 <option value="need_curating">TSV file of Gene Name/Product defines that need to be curated</option> | |
161 <option value="new_names_passed">TSV file of Gene Name/Product deflines that passed tbl2asn but are not in Gene2Products database.</option> | |
162 </param> | |
163 | |
164 <!-- Need this to change path in the test funannotate_db --> | |
165 <param type="hidden" name="uglyTestingHack" value="" /> | |
166 </inputs> | |
167 <outputs> | |
168 <data name='gbk' format='genbank' label="${tool.name} on ${on_string}: annotated genome (genbank)" from_work_dir="out.gbk"> | |
169 <filter>outputs and 'gbk' in outputs</filter> | |
170 </data> | |
171 <data name='annot' format='tabular' label="${tool.name} on ${on_string}: all annotations" from_work_dir="out.annotations.txt"> | |
172 <filter>outputs and 'annotations' in outputs</filter> | |
173 </data> | |
174 <data name='contigs_fsa' format='fasta' label="${tool.name} on ${on_string}: contigs fasta, split at gaps" from_work_dir="out.contigs.fsa"> | |
175 <filter>outputs and 'contigs_fsa' in outputs</filter> | |
176 </data> | |
177 <data name='agp' format='tabular' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.agp"> | |
178 <filter>outputs and 'agp' in outputs</filter> | |
179 </data> | |
180 <data name='tbl' format='txt' label="${tool.name} on ${on_string}: NCBI tbl annotation file" from_work_dir="out.tbl"> | |
181 <filter>outputs and 'tbl' in outputs</filter> | |
182 </data> | |
183 <data name='sqn' format='txt' label="${tool.name} on ${on_string}: NCBI Sequin genome" from_work_dir="out.sqn"> | |
184 <filter>outputs and 'sqn' in outputs</filter> | |
185 </data> | |
186 <data name='fa_scaffolds' format='fasta' label="${tool.name} on ${on_string}: scaffolds sequences" from_work_dir="out.scaffolds.fa"> | |
187 <filter>outputs and 'scaffolds_fa' in outputs</filter> | |
188 </data> | |
189 <data name='fa_proteins' format='fasta' label="${tool.name} on ${on_string}: protein sequences" from_work_dir="out.proteins.fa"> | |
190 <filter>outputs and 'proteins_fa' in outputs</filter> | |
191 </data> | |
192 <data name='fa_transcripts_mrna' format='fasta' label="${tool.name} on ${on_string}: transcript mRNA sequences" from_work_dir="out.mrna-transcripts.fa"> | |
193 <filter>outputs and 'mrna_transcripts_fa' in outputs</filter> | |
194 </data> | |
195 <data name='fa_transcripts_cds' format='fasta' label="${tool.name} on ${on_string}: transcript CDS sequences" from_work_dir="out.cds-transcripts.fa"> | |
196 <filter>outputs and 'cds_transcripts_fa' in outputs</filter> | |
197 </data> | |
198 <data name='gff3' format='gff3' label="${tool.name} on ${on_string}: annotation (GFF3)" from_work_dir="out.gff3"> | |
199 <filter>outputs and 'gff3' in outputs</filter> | |
200 </data> | |
201 <data name='tbl2asn_report' format='txt' label="${tool.name} on ${on_string}: tbl2asn summary report of annotated genome" from_work_dir="out.discrepency.report.txt"> | |
202 <filter>outputs and 'discrepency' in outputs</filter> | |
203 </data> | |
204 <data name='stats' format='json' label="${tool.name} on ${on_string}: stats" from_work_dir="out.stats.json"> | |
205 <filter>outputs and 'gbk' in outputs</filter> | |
206 </data> | |
207 <data name='must_fix' format='json' label="${tool.name} on ${on_string}: Gene Name/Product must-fix" from_work_dir="output/annotate_results/Gene2Products.must-fix.txt"> | |
208 <filter>outputs and 'must_fix' in outputs</filter> | |
209 </data> | |
210 <data name='need_curating' format='json' label="${tool.name} on ${on_string}: Gene Name/Product need-curating" from_work_dir="output/annotate_results/Gene2Products.need-curating.txt"> | |
211 <filter>outputs and 'need_curating' in outputs</filter> | |
212 </data> | |
213 <data name='new_names_passed' format='json' label="${tool.name} on ${on_string}: Gene Name/Product new-names-passed" from_work_dir="output/annotate_results/Gene2Products.new-names-passed.txt"> | |
214 <filter>outputs and 'new_names_passed' in outputs</filter> | |
215 </data> | |
216 </outputs> | |
217 <tests> | |
218 <test> | |
219 <conditional name="input"> | |
220 <param name="input_type" value="gbk" /> | |
221 <param name="genbank" value="predict_augustus/Genus_species.gbk" /> | |
222 </conditional> | |
223 <param name="database" value="2021-07-20-120000" /> | |
224 <param name="busco_db" value="insecta" /> | |
225 <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" /> | |
226 <output name="gbk"> | |
227 <assert_contents> | |
228 <has_text text="DEFINITION Genus species." /> | |
229 </assert_contents> | |
230 </output> | |
231 <output name="annot"> | |
232 <assert_contents> | |
233 <has_text text="EC_number" /> | |
234 <has_text text="EOG090W0T3K" /> | |
235 </assert_contents> | |
236 </output> | |
237 <output name="contigs_fsa"> | |
238 <assert_contents> | |
239 <has_text text=">contig_1" /> | |
240 </assert_contents> | |
241 </output> | |
242 <output name="agp"> | |
243 <assert_contents> | |
244 <has_text text="contig_1" /> | |
245 </assert_contents> | |
246 </output> | |
247 <output name="tbl"> | |
248 <assert_contents> | |
249 <has_text text="locus_tag" /> | |
250 </assert_contents> | |
251 </output> | |
252 <output name="sqn"> | |
253 <assert_contents> | |
254 <has_text text="Seq-submit" /> | |
255 </assert_contents> | |
256 </output> | |
257 <output name="fa_scaffolds"> | |
258 <assert_contents> | |
259 <has_text text=">sample" /> | |
260 </assert_contents> | |
261 </output> | |
262 <output name="fa_proteins"> | |
263 <assert_contents> | |
264 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
265 </assert_contents> | |
266 </output> | |
267 <output name="fa_transcripts_mrna"> | |
268 <assert_contents> | |
269 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
270 </assert_contents> | |
271 </output> | |
272 <output name="fa_transcripts_cds"> | |
273 <assert_contents> | |
274 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
275 </assert_contents> | |
276 </output> | |
277 <output name="gff3"> | |
278 <assert_contents> | |
279 <has_text text="ID=FUN_000001;" /> | |
280 </assert_contents> | |
281 </output> | |
282 <output name="tbl2asn_report"> | |
283 <assert_contents> | |
284 <has_text text="Discrepancy Report Results" /> | |
285 </assert_contents> | |
286 </output> | |
287 <output name="stats"> | |
288 <assert_contents> | |
289 <has_text text="avg_gene_length" /> | |
290 </assert_contents> | |
291 </output> | |
292 <output name="must_fix"> | |
293 <assert_contents> | |
294 <has_text text="tbl2asn Error" /> | |
295 </assert_contents> | |
296 </output> | |
297 <output name="need_curating"> | |
298 <assert_contents> | |
299 <has_text text="Original Description" /> | |
300 </assert_contents> | |
301 </output> | |
302 <output name="new_names_passed"> | |
303 <assert_contents> | |
304 <has_text text="Passed Description" /> | |
305 </assert_contents> | |
306 </output> | |
307 </test> | |
308 <test> | |
309 <conditional name="input"> | |
310 <param name="input_type" value="gff" /> | |
311 <param name="gff" value="predict_augustus/Genus_species.gff3" /> | |
312 <param name="fasta" value="genome.fa" /> | |
313 <param name="species" value="Genus species" /> | |
314 </conditional> | |
315 <param name="database" value="2021-07-20-120000" /> | |
316 <param name="busco_db" value="insecta" /> | |
317 <param name="outputs" value="gbk,annotations,contigs_fsa,agp,tbl,sqn,scaffolds_fa,proteins_fa,mrna_transcripts_fa,cds_transcripts_fa,gff3,discrepency,stats,must_fix,need_curating,new_names_passed" /> | |
318 <output name="gbk"> | |
319 <assert_contents> | |
320 <has_text text="DEFINITION Genus species." /> | |
321 </assert_contents> | |
322 </output> | |
323 <output name="annot"> | |
324 <assert_contents> | |
325 <has_text text="EC_number" /> | |
326 <has_text text="EOG090W0T3K" /> | |
327 </assert_contents> | |
328 </output> | |
329 <output name="contigs_fsa"> | |
330 <assert_contents> | |
331 <has_text text=">contig_1" /> | |
332 </assert_contents> | |
333 </output> | |
334 <output name="agp"> | |
335 <assert_contents> | |
336 <has_text text="contig_1" /> | |
337 </assert_contents> | |
338 </output> | |
339 <output name="tbl"> | |
340 <assert_contents> | |
341 <has_text text="locus_tag" /> | |
342 </assert_contents> | |
343 </output> | |
344 <output name="sqn"> | |
345 <assert_contents> | |
346 <has_text text="Seq-submit" /> | |
347 </assert_contents> | |
348 </output> | |
349 <output name="fa_scaffolds"> | |
350 <assert_contents> | |
351 <has_text text=">sample" /> | |
352 </assert_contents> | |
353 </output> | |
354 <output name="fa_proteins"> | |
355 <assert_contents> | |
356 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
357 </assert_contents> | |
358 </output> | |
359 <output name="fa_transcripts_mrna"> | |
360 <assert_contents> | |
361 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
362 </assert_contents> | |
363 </output> | |
364 <output name="fa_transcripts_cds"> | |
365 <assert_contents> | |
366 <has_text text=">FUN_000001-T1 FUN_000001" /> | |
367 </assert_contents> | |
368 </output> | |
369 <output name="gff3"> | |
370 <assert_contents> | |
371 <has_text text="ID=FUN_000001;" /> | |
372 </assert_contents> | |
373 </output> | |
374 <output name="tbl2asn_report"> | |
375 <assert_contents> | |
376 <has_text text="Discrepancy Report Results" /> | |
377 </assert_contents> | |
378 </output> | |
379 <output name="stats"> | |
380 <assert_contents> | |
381 <has_text text="avg_gene_length" /> | |
382 </assert_contents> | |
383 </output> | |
384 <output name="must_fix"> | |
385 <assert_contents> | |
386 <has_text text="tbl2asn Error" /> | |
387 </assert_contents> | |
388 </output> | |
389 <output name="need_curating"> | |
390 <assert_contents> | |
391 <has_text text="Original Description" /> | |
392 </assert_contents> | |
393 </output> | |
394 <output name="new_names_passed"> | |
395 <assert_contents> | |
396 <has_text text="Passed Description" /> | |
397 </assert_contents> | |
398 </output> | |
399 </test> | |
400 </tests> | |
401 <help><![CDATA[ | |
402 Funannotate_ annotate | |
403 --------------------- | |
404 | |
405 Funannotate_ is a pipeline for genome annotation (built specifically for fungi, but will also work with higher eukaryotes). | |
406 | |
407 This script functionally annotates the results from funannotate predict. It pulls | |
408 annotation from PFAM, InterPro, EggNog, UniProtKB, MEROPS, CAZyme, and GO ontology. | |
409 | |
410 .. _Funannotate: http://funannotate.readthedocs.io | |
411 ]]></help> | |
412 <expand macro="citations" /> | |
413 </tool> |