Mercurial > repos > iuc > bakta
comparison bakta.xml @ 0:1a27ad3d0cdf draft
planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit 7d595b05b8d595f554b57dadbf1beb0b39733af3
author | iuc |
---|---|
date | Thu, 01 Sep 2022 17:28:43 +0000 |
parents | |
children | da5f1924bb2e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1a27ad3d0cdf |
---|---|
1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description> | |
3 genome annotation via alignment-free sequence identification | |
4 </description> | |
5 <macros> | |
6 <import>macro.xml</import> | |
7 </macros> | |
8 <expand macro='edam'/> | |
9 <expand macro='xrefs'/> | |
10 <expand macro="requirements"/> | |
11 <expand macro="version_command"/> | |
12 | |
13 <command detect_errors="aggressive"><![CDATA[ | |
14 bakta | |
15 #*====================================== | |
16 CPU option | |
17 ======================================*# | |
18 --threads \${GALAXY_SLOTS:-1} | |
19 #*====================================== | |
20 Bakta database | |
21 ======================================*# | |
22 --db $input_option.db_select.fields.path | |
23 #if $input_option.min_contig_length | |
24 --min-contig-length $input_option.min_contig_length | |
25 #else if $annotation.compliant | |
26 --min-contig-length 200 | |
27 #else | |
28 --min-contig-length 1 | |
29 #end if | |
30 --prefix bakta_output | |
31 #*====================================== | |
32 Organism options | |
33 genus/species/strain/plasmid | |
34 ======================================*# | |
35 #if $organism.genus | |
36 --genus '$organism.genus' | |
37 #end if | |
38 #if $organism.species | |
39 --species '$organism.species' | |
40 #end if | |
41 #if $organism.strain | |
42 --strain '$organism.strain' | |
43 #end if | |
44 #if $organism.plasmid | |
45 --plasmid '$organism.plasmid' | |
46 #end if | |
47 #*====================================== | |
48 Annotation options | |
49 gram type, prodigal/protein file | |
50 ======================================*# | |
51 $annotation.complete | |
52 #if $annotation.prodigal | |
53 --prodigal-tf '$annotation.prodigal' | |
54 #end if | |
55 #if $annotation.translation_table | |
56 --translation-table '$annotation.translation_table' | |
57 #end if | |
58 #if $annotation.gram | |
59 --gram '$annotation.gram' | |
60 #end if | |
61 $annotation.keep_contig_headers | |
62 #if $annotation.replicons | |
63 --replicons '$annotation.replicons' | |
64 #end if | |
65 $annotation.compliant | |
66 #if $annotation.proteins | |
67 --proteins '$annotation.proteins' | |
68 #end if | |
69 #*====================================== | |
70 Workflow OPTIONS | |
71 skip some step of the bakta analysis | |
72 ======================================*# | |
73 | |
74 #if "skip_trna" in $workflow.skip_analysis | |
75 --skip-trna | |
76 #end if | |
77 #if "skip_tmrna" in $workflow.skip_analysis | |
78 --skip-tmrna | |
79 #end if | |
80 #if "skip_rrna" in $workflow.skip_analysis | |
81 --skip-rrna | |
82 #end if | |
83 #if "skip_ncrna" in $workflow.skip_analysis | |
84 --skip-ncrna | |
85 #end if | |
86 #if "skip_ncrna_region" in $workflow.skip_analysis | |
87 --skip-ncrna-region | |
88 #end if | |
89 #if "skip_crispr" in $workflow.skip_analysis | |
90 --skip-crispr | |
91 #end if | |
92 #if "skip_cds" in $workflow.skip_analysis | |
93 --skip-cds | |
94 #end if | |
95 #if "skip_sorf" in $workflow.skip_analysis | |
96 --skip-sorf | |
97 #end if | |
98 #if "skip_gap" in $workflow.skip_analysis | |
99 --skip-gap | |
100 #end if | |
101 #if "skip_ori" in $workflow.skip_analysis | |
102 --skip-ori | |
103 #end if | |
104 | |
105 #*====================================== | |
106 Genome file | |
107 ======================================*# | |
108 '$input_option.input_file' | |
109 #*====================================== | |
110 LOG file | |
111 ======================================*# | |
112 | tee '$logfile' | |
113 ]]></command> | |
114 <inputs> | |
115 <!-- DB and file INPUT --> | |
116 <section name="input_option" title="Input/Output options" expanded="true"> | |
117 <param name="db_select" type="select" label="The bakta database"> | |
118 <options from_data_table="bakta_database"> | |
119 <validator message="No bakta database is available" type="no_options"/> | |
120 </options> | |
121 </param> | |
122 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> | |
123 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> | |
124 </section> | |
125 <!-- Organism INFORMATION OPTIONS --> | |
126 <section name="organism" title="Optional organism options" expanded="false"> | |
127 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> | |
128 <validator type="regex">^[a-zA-Z]+$</validator> | |
129 </param> | |
130 <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'"> | |
131 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator> | |
132 </param> | |
133 <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai"> | |
134 <validator type="regex">^[a-zA-Z]+$</validator> | |
135 </param> | |
136 <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1"> | |
137 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator> | |
138 </param> | |
139 </section> | |
140 <!-- ANNOTATION --> | |
141 <section name="annotation" title="Optional annotation"> | |
142 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/> | |
143 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> | |
144 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11"> | |
145 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | |
146 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option> | |
147 </param> | |
148 <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown"> | |
149 <option value="+">Gram+</option> | |
150 <option value="-">Gram-</option> | |
151 <option value="?" selected="true">Unknown</option> | |
152 </param> | |
153 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> | |
154 <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> | |
155 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> | |
156 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> | |
157 </section> | |
158 <!-- PARAMETER FOR WORKFLOW ANALYSIS --> | |
159 <section name="workflow" title="Workflow option to skip steps"> | |
160 <param name="skip_analysis" type="select" display="checkboxes" multiple="true" label="Select steps to skip"> | |
161 <option value="skip_trna"> Skip tRNA detection and annotation </option> | |
162 <option value="skip_tmrna"> Skip tmRNA detection and annotation </option> | |
163 <option value="skip_rrna"> Skip rRNA detection and annotation </option> | |
164 <option value="skip_ncrna"> Skip ncRNA detection and annotation </option> | |
165 <option value="skip_ncrna_region"> Skip ncRNA region detection and annotation </option> | |
166 <option value="skip_crispr"> Skip CRISPR array detection and annotation </option> | |
167 <option value="skip_cds"> Skip CDS detection and annotation </option> | |
168 <option value="skip_sorf"> Skip sORF detection and annotation </option> | |
169 <option value="skip_gap"> Skip gap detection and annotation </option> | |
170 <option value="skip_ori"> Skip oriC/oriT detection and annotation </option> | |
171 </param> | |
172 </section> | |
173 <section name="output_files" title="Selection of the output files"> | |
174 <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection"> | |
175 <option value="file_tsv" selected="true"> Annotation file in TSV </option> | |
176 <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option> | |
177 <option value="file_gbff" selected="true"> Annotations and sequences in GenBank format </option> | |
178 <option value="file_embl" selected="true"> Annotations and sequences in EMBL format </option> | |
179 <option value="file_fna" selected="true"> Replicon/contig DNA sequences as FASTA </option> | |
180 <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option> | |
181 <option value="file_faa" selected="true"> CDS/sORF amino acid sequences as FASTA </option> | |
182 <option value="hypo_tsv" selected="true"> Hypothetical protein CDS in TSV</option> | |
183 <option value="hypo_fa" selected="true"> Hypothetical protein CDS amino sequences as FASTA</option> | |
184 <option value="sum_txt" selected="true"> Summary as TXT</option> | |
185 <option value="file_json" selected="true"> Information on each annotated feature as JSON </option> | |
186 <option value="log_txt" selected="true"> Log file as TXT </option> | |
187 </param> | |
188 </section> | |
189 | |
190 </inputs> | |
191 <outputs> | |
192 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"> | |
193 <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter> | |
194 </data> | |
195 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"> | |
196 <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter> | |
197 </data> | |
198 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"> | |
199 <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter> | |
200 </data> | |
201 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"> | |
202 <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter> | |
203 </data> | |
204 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"> | |
205 <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter> | |
206 </data> | |
207 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"> | |
208 <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter> | |
209 </data> | |
210 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"> | |
211 <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter> | |
212 </data> | |
213 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv"> | |
214 <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter> | |
215 </data> | |
216 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa"> | |
217 <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter> | |
218 </data> | |
219 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_summary.txt"> | |
220 <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter> | |
221 </data> | |
222 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"> | |
223 <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter> | |
224 </data> | |
225 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"> | |
226 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter> | |
227 </data> | |
228 </outputs> | |
229 | |
230 <tests> | |
231 <test expect_num_outputs="12"> <!-- TEST_1 database + input --> | |
232 <section name="input_option" > | |
233 <param name="db_select" value="test-db-bakta"/> | |
234 <param name="input_file" value="NC_002127.1.fna"/> | |
235 </section> | |
236 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4"> | |
237 <assert_contents> | |
238 <has_text_matching n="1" expression="Genome size: 1,330 bp"/> | |
239 <has_n_lines n="90" delta="1"/> | |
240 </assert_contents> | |
241 </output> | |
242 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="1"/> | |
243 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"> | |
244 <assert_contents> | |
245 <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/> | |
246 </assert_contents> | |
247 </output> | |
248 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4"> | |
249 <assert_contents> | |
250 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/> | |
251 </assert_contents> | |
252 </output> | |
253 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2"/> | |
254 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/> | |
255 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/> | |
256 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/> | |
257 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv"/> | |
258 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/> | |
259 <output name="summary_txt" value="TEST_1/TEST_1.txt"> | |
260 <assert_contents> | |
261 <has_text_matching expression="N50: 1330"/> | |
262 </assert_contents> | |
263 </output> | |
264 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4"> | |
265 <assert_contents> | |
266 <has_text_matching expression="0.6203007518796992"/> | |
267 </assert_contents> | |
268 </output> | |
269 </test> | |
270 <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> | |
271 <section name="input_option" > | |
272 <param name="db_select" value="test-db-bakta"/> | |
273 <param name="input_file" value="NC_002127.1.fna"/> | |
274 <param name="min_contig_length" value="250"/> | |
275 </section> | |
276 <section name="organism"> | |
277 <param name="genus" value="Escherichia"/> | |
278 <param name="species" value="coli O157:H7"/> | |
279 <param name="strain" value="Sakai"/> | |
280 <param name="plasmid" value="pOSAK1"/> | |
281 </section> | |
282 <section name="annotation"> | |
283 <param name="--gram" value="-"/> | |
284 <param name="keep_contig_headers" value="true"/> | |
285 </section> | |
286 <section name="workflow"> | |
287 <param name="skip_analysis" value="skip_trna,skip_tmrna"/> | |
288 </section> | |
289 <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4"> | |
290 <assert_contents> | |
291 <has_text_matching expression="Genome size: 1,330 bp"/> | |
292 </assert_contents> | |
293 </output> | |
294 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2"> | |
295 <assert_contents> | |
296 <has_text_matching expression="IHHALP_00005"/> | |
297 </assert_contents> | |
298 </output> | |
299 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2"> | |
300 <assert_contents> | |
301 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> | |
302 </assert_contents> | |
303 </output> | |
304 <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5"> | |
305 <assert_contents> | |
306 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/> | |
307 </assert_contents> | |
308 </output> | |
309 <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4"> | |
310 <assert_contents> | |
311 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/> | |
312 </assert_contents> | |
313 </output> | |
314 <output name="annotation_fna" value="TEST_2/TEST_2.fna"/> | |
315 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/> | |
316 <output name="annotation_faa" value="TEST_2/TEST_2.faa"/> | |
317 <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/> | |
318 <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/> | |
319 <output name="summary_txt" value="TEST_2/TEST_2.txt"> | |
320 <assert_contents> | |
321 <has_text_matching expression="N50: 1330"/> | |
322 </assert_contents> | |
323 </output> | |
324 <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4"> | |
325 <assert_contents> | |
326 <has_text_matching expression="0.4518796992481203"/> | |
327 </assert_contents> | |
328 </output> | |
329 </test> | |
330 <test expect_num_outputs="12"> <!-- TEST_3 test all skip steps --> | |
331 <section name="input_option" > | |
332 <param name="db_select" value="test-db-bakta"/> | |
333 <param name="input_file" value="NC_002127.1.fna"/> | |
334 <param name="min_contig_length" value="350"/> | |
335 </section> | |
336 <section name="workflow"> | |
337 <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/> | |
338 </section> | |
339 <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4"> | |
340 <assert_contents> | |
341 <has_text_matching expression="Genome size: 1,330 bp"/> | |
342 </assert_contents> | |
343 </output> | |
344 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1"/> | |
345 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2"/> | |
346 <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/> | |
347 <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/> | |
348 <output name="annotation_fna" value="TEST_3/TEST_3.fna"/> | |
349 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/> | |
350 <output name="annotation_faa" value="TEST_3/TEST_3.faa"/> | |
351 <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/> | |
352 </test> | |
353 <test expect_num_outputs="12"> <!-- TEST_4 annotations --> | |
354 <section name="input_option" > | |
355 <param name="db_select" value="test-db-bakta"/> | |
356 <param name="input_file" value="NC_002127.1.fna"/> | |
357 </section> | |
358 <section name="annotation"> | |
359 <param name="complete" value="true"/> | |
360 <param name="translation_table" value="4"/> | |
361 <param name="prodigal" value="prodigal.tf"/> | |
362 <param name="replicons" value="replicons.tsv"/> | |
363 <param name="compliant" value="true"/> | |
364 <param name="proteins" value="user-proteins.faa"/> | |
365 </section> | |
366 <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4"> | |
367 <assert_contents> | |
368 <has_text_matching expression="potential: 16"/> | |
369 </assert_contents> | |
370 </output> | |
371 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"/> | |
372 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2"> | |
373 <assert_contents> | |
374 <has_text_matching expression="ID=IHHALP_00005_gene;locus_tag=IHHALP_00005"/> | |
375 </assert_contents> | |
376 </output> | |
377 <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4"> | |
378 <assert_contents> | |
379 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/> | |
380 </assert_contents> | |
381 </output> | |
382 <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4"> | |
383 <assert_contents> | |
384 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/> | |
385 </assert_contents> | |
386 </output> | |
387 <output name="annotation_fna" value="TEST_4/TEST_4.fna"/> | |
388 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/> | |
389 <output name="annotation_faa" value="TEST_4/TEST_4.faa"/> | |
390 <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/> | |
391 <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/> | |
392 <output name="summary_txt" value="TEST_4/TEST_4.txt"> | |
393 <assert_contents> | |
394 <has_text_matching expression="GC: 45.2"/> | |
395 </assert_contents> | |
396 </output> | |
397 <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4"> | |
398 <assert_contents> | |
399 <has_text_matching expression="0.4518796992481203"/> | |
400 </assert_contents> | |
401 </output> | |
402 </test> | |
403 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary --> | |
404 <section name="input_option" > | |
405 <param name="db_select" value="test-db-bakta"/> | |
406 <param name="input_file" value="NC_002127.1.fna"/> | |
407 </section> | |
408 <section name="annotation"> | |
409 <param name="complete" value="true"/> | |
410 <param name="translation_table" value="4"/> | |
411 </section> | |
412 <section name="workflow"> | |
413 <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/> | |
414 </section> | |
415 <section name="output_files"> | |
416 <param name="output_selection" value="log_txt,sum_txt"/> | |
417 </section> | |
418 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="4"/> | |
419 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/> | |
420 </test> | |
421 </tests> | |
422 <help><![CDATA[**What it does** | |
423 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. | |
424 | |
425 *Comprehensive & taxonomy-independent database* | |
426 Bakta provides a large and taxonomy-independent database using UniProt's entire UniRef protein sequence cluster universe. | |
427 | |
428 *Protein sequence identification* | |
429 Bakta exactly identifies known identical protein sequences (IPS) from RefSeq and UniProt | |
430 allowing the fine-grained annotation of gene alleles (AMR) or closely related but distinct protein families. | |
431 This is achieved via an alignment-free sequence identification (AFSI) approach | |
432 using full-length MD5 protein sequence hash digests. | |
433 *Small proteins/short open reading frames* | |
434 Bakta detects and annotates small proteins/short open reading frames (sORF). | |
435 | |
436 *Expert annotation systems* | |
437 To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes, | |
438 Bakta includes & merges different expert annotation systems. | |
439 Currently, Bakta uses NCBI's AMRFinderPlus for AMR gene annotations | |
440 as well as an generalized protein sequence expert system with distinct | |
441 coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI's BlastRules. | |
442 | |
443 *Comprehensive workflow* | |
444 Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT | |
445 and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS. | |
446 | |
447 *GFF3 & INSDC conform annotations* | |
448 Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission | |
449 (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats, | |
450 respectively for representative genomes of all ESKAPE species). | |
451 | |
452 *Bacteria & plasmids* | |
453 Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only. | |
454 | |
455 **Input options** | |
456 1. Choose a genome or assembly in fasta format to use bakta annotations | |
457 2. Choose A version of the Bakta database | |
458 | |
459 **Organism options** | |
460 You can specify informations about analysed fasta as text input for: | |
461 - genus | |
462 - species | |
463 - strain | |
464 - plasmid | |
465 | |
466 **Annotation options** | |
467 1. You can specify if all sequences (chromosome or plasmids) are complete or not | |
468 2. You can add your own prodigal traingin file for CDS predictionœ | |
469 3. The translation table could be modified, default is the 11th for bacteria | |
470 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow) | |
471 5. You can keep the name of contig present in the input file | |
472 6. You can specify your own replicon table as a TSV/CSV file | |
473 7. The compliance option is for ready to submit annotation file to Public database | |
474 as ENA, Genbank EMBL | |
475 8. You can specify a protein sequence file for annotation in GenBank or fasta formats | |
476 Using the Fasta format, each reference sequence can be provided in a short or long format: | |
477 | |
478 # short: | |
479 >id gene~~~product~~~dbxrefs | |
480 MAQ... | |
481 | |
482 # long: | |
483 >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs | |
484 MAQ... | |
485 | |
486 **Skip steps** | |
487 Some steps could be skiped: | |
488 - skip-trna Skip tRNA detection & annotation | |
489 - skip-tmrna Skip tmRNA detection & annotation | |
490 - skip-rrna Skip rRNA detection & annotation | |
491 - skip-ncrna Skip ncRNA detection & annotation | |
492 - skip-ncrna-region Skip ncRNA region detection & annotation | |
493 - skip-crispr Skip CRISPR array detection & annotation | |
494 - skip-cds Skip CDS detection & annotation | |
495 - skip-pseudo Skip pseudogene detection & annotation | |
496 - skip-sorf Skip sORF detection & annotation | |
497 - skip-gap Skip gap detection & annotation | |
498 - skip-ori Skip oriC/oriT detection & annotation | |
499 | |
500 **Output options** | |
501 Bakta produce numbers of output files, you can select what type of file you want: | |
502 - Summary of the annotation | |
503 - Annotated files | |
504 - Sequence files for nucleotide and/or amino acid | |
505 ]]></help> | |
506 <expand macro="citations"/> | |
507 </tool> |