comparison bakta.xml @ 0:1a27ad3d0cdf draft

planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit 7d595b05b8d595f554b57dadbf1beb0b39733af3
author iuc
date Thu, 01 Sep 2022 17:28:43 +0000
parents
children da5f1924bb2e
comparison
equal deleted inserted replaced
-1:000000000000 0:1a27ad3d0cdf
1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 genome annotation via alignment-free sequence identification
4 </description>
5 <macros>
6 <import>macro.xml</import>
7 </macros>
8 <expand macro='edam'/>
9 <expand macro='xrefs'/>
10 <expand macro="requirements"/>
11 <expand macro="version_command"/>
12
13 <command detect_errors="aggressive"><![CDATA[
14 bakta
15 #*======================================
16 CPU option
17 ======================================*#
18 --threads \${GALAXY_SLOTS:-1}
19 #*======================================
20 Bakta database
21 ======================================*#
22 --db $input_option.db_select.fields.path
23 #if $input_option.min_contig_length
24 --min-contig-length $input_option.min_contig_length
25 #else if $annotation.compliant
26 --min-contig-length 200
27 #else
28 --min-contig-length 1
29 #end if
30 --prefix bakta_output
31 #*======================================
32 Organism options
33 genus/species/strain/plasmid
34 ======================================*#
35 #if $organism.genus
36 --genus '$organism.genus'
37 #end if
38 #if $organism.species
39 --species '$organism.species'
40 #end if
41 #if $organism.strain
42 --strain '$organism.strain'
43 #end if
44 #if $organism.plasmid
45 --plasmid '$organism.plasmid'
46 #end if
47 #*======================================
48 Annotation options
49 gram type, prodigal/protein file
50 ======================================*#
51 $annotation.complete
52 #if $annotation.prodigal
53 --prodigal-tf '$annotation.prodigal'
54 #end if
55 #if $annotation.translation_table
56 --translation-table '$annotation.translation_table'
57 #end if
58 #if $annotation.gram
59 --gram '$annotation.gram'
60 #end if
61 $annotation.keep_contig_headers
62 #if $annotation.replicons
63 --replicons '$annotation.replicons'
64 #end if
65 $annotation.compliant
66 #if $annotation.proteins
67 --proteins '$annotation.proteins'
68 #end if
69 #*======================================
70 Workflow OPTIONS
71 skip some step of the bakta analysis
72 ======================================*#
73
74 #if "skip_trna" in $workflow.skip_analysis
75 --skip-trna
76 #end if
77 #if "skip_tmrna" in $workflow.skip_analysis
78 --skip-tmrna
79 #end if
80 #if "skip_rrna" in $workflow.skip_analysis
81 --skip-rrna
82 #end if
83 #if "skip_ncrna" in $workflow.skip_analysis
84 --skip-ncrna
85 #end if
86 #if "skip_ncrna_region" in $workflow.skip_analysis
87 --skip-ncrna-region
88 #end if
89 #if "skip_crispr" in $workflow.skip_analysis
90 --skip-crispr
91 #end if
92 #if "skip_cds" in $workflow.skip_analysis
93 --skip-cds
94 #end if
95 #if "skip_sorf" in $workflow.skip_analysis
96 --skip-sorf
97 #end if
98 #if "skip_gap" in $workflow.skip_analysis
99 --skip-gap
100 #end if
101 #if "skip_ori" in $workflow.skip_analysis
102 --skip-ori
103 #end if
104
105 #*======================================
106 Genome file
107 ======================================*#
108 '$input_option.input_file'
109 #*======================================
110 LOG file
111 ======================================*#
112 | tee '$logfile'
113 ]]></command>
114 <inputs>
115 <!-- DB and file INPUT -->
116 <section name="input_option" title="Input/Output options" expanded="true">
117 <param name="db_select" type="select" label="The bakta database">
118 <options from_data_table="bakta_database">
119 <validator message="No bakta database is available" type="no_options"/>
120 </options>
121 </param>
122 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/>
123 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/>
124 </section>
125 <!-- Organism INFORMATION OPTIONS -->
126 <section name="organism" title="Optional organism options" expanded="false">
127 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia">
128 <validator type="regex">^[a-zA-Z]+$</validator>
129 </param>
130 <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'">
131 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator>
132 </param>
133 <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai">
134 <validator type="regex">^[a-zA-Z]+$</validator>
135 </param>
136 <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1">
137 <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator>
138 </param>
139 </section>
140 <!-- ANNOTATION -->
141 <section name="annotation" title="Optional annotation">
142 <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/>
143 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/>
144 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11">
145 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
146 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option>
147 </param>
148 <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown">
149 <option value="+">Gram+</option>
150 <option value="-">Gram-</option>
151 <option value="?" selected="true">Unknown</option>
152 </param>
153 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/>
154 <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
155 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
156 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
157 </section>
158 <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
159 <section name="workflow" title="Workflow option to skip steps">
160 <param name="skip_analysis" type="select" display="checkboxes" multiple="true" label="Select steps to skip">
161 <option value="skip_trna"> Skip tRNA detection and annotation </option>
162 <option value="skip_tmrna"> Skip tmRNA detection and annotation </option>
163 <option value="skip_rrna"> Skip rRNA detection and annotation </option>
164 <option value="skip_ncrna"> Skip ncRNA detection and annotation </option>
165 <option value="skip_ncrna_region"> Skip ncRNA region detection and annotation </option>
166 <option value="skip_crispr"> Skip CRISPR array detection and annotation </option>
167 <option value="skip_cds"> Skip CDS detection and annotation </option>
168 <option value="skip_sorf"> Skip sORF detection and annotation </option>
169 <option value="skip_gap"> Skip gap detection and annotation </option>
170 <option value="skip_ori"> Skip oriC/oriT detection and annotation </option>
171 </param>
172 </section>
173 <section name="output_files" title="Selection of the output files">
174 <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
175 <option value="file_tsv" selected="true"> Annotation file in TSV </option>
176 <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option>
177 <option value="file_gbff" selected="true"> Annotations and sequences in GenBank format </option>
178 <option value="file_embl" selected="true"> Annotations and sequences in EMBL format </option>
179 <option value="file_fna" selected="true"> Replicon/contig DNA sequences as FASTA </option>
180 <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option>
181 <option value="file_faa" selected="true"> CDS/sORF amino acid sequences as FASTA </option>
182 <option value="hypo_tsv" selected="true"> Hypothetical protein CDS in TSV</option>
183 <option value="hypo_fa" selected="true"> Hypothetical protein CDS amino sequences as FASTA</option>
184 <option value="sum_txt" selected="true"> Summary as TXT</option>
185 <option value="file_json" selected="true"> Information on each annotated feature as JSON </option>
186 <option value="log_txt" selected="true"> Log file as TXT </option>
187 </param>
188 </section>
189
190 </inputs>
191 <outputs>
192 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv">
193 <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter>
194 </data>
195 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3">
196 <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter>
197 </data>
198 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff">
199 <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter>
200 </data>
201 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl">
202 <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter>
203 </data>
204 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna">
205 <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter>
206 </data>
207 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn">
208 <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter>
209 </data>
210 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa">
211 <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter>
212 </data>
213 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv">
214 <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter>
215 </data>
216 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa">
217 <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter>
218 </data>
219 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_summary.txt">
220 <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter>
221 </data>
222 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json">
223 <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter>
224 </data>
225 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file">
226 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter>
227 </data>
228 </outputs>
229
230 <tests>
231 <test expect_num_outputs="12"> <!-- TEST_1 database + input -->
232 <section name="input_option" >
233 <param name="db_select" value="test-db-bakta"/>
234 <param name="input_file" value="NC_002127.1.fna"/>
235 </section>
236 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4">
237 <assert_contents>
238 <has_text_matching n="1" expression="Genome size: 1,330 bp"/>
239 <has_n_lines n="90" delta="1"/>
240 </assert_contents>
241 </output>
242 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="1"/>
243 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2">
244 <assert_contents>
245 <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/>
246 </assert_contents>
247 </output>
248 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4">
249 <assert_contents>
250 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
251 </assert_contents>
252 </output>
253 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2"/>
254 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/>
255 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/>
256 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/>
257 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv"/>
258 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/>
259 <output name="summary_txt" value="TEST_1/TEST_1.txt">
260 <assert_contents>
261 <has_text_matching expression="N50: 1330"/>
262 </assert_contents>
263 </output>
264 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4">
265 <assert_contents>
266 <has_text_matching expression="0.6203007518796992"/>
267 </assert_contents>
268 </output>
269 </test>
270 <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps -->
271 <section name="input_option" >
272 <param name="db_select" value="test-db-bakta"/>
273 <param name="input_file" value="NC_002127.1.fna"/>
274 <param name="min_contig_length" value="250"/>
275 </section>
276 <section name="organism">
277 <param name="genus" value="Escherichia"/>
278 <param name="species" value="coli O157:H7"/>
279 <param name="strain" value="Sakai"/>
280 <param name="plasmid" value="pOSAK1"/>
281 </section>
282 <section name="annotation">
283 <param name="--gram" value="-"/>
284 <param name="keep_contig_headers" value="true"/>
285 </section>
286 <section name="workflow">
287 <param name="skip_analysis" value="skip_trna,skip_tmrna"/>
288 </section>
289 <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4">
290 <assert_contents>
291 <has_text_matching expression="Genome size: 1,330 bp"/>
292 </assert_contents>
293 </output>
294 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2">
295 <assert_contents>
296 <has_text_matching expression="IHHALP_00005"/>
297 </assert_contents>
298 </output>
299 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2">
300 <assert_contents>
301 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
302 </assert_contents>
303 </output>
304 <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5">
305 <assert_contents>
306 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
307 </assert_contents>
308 </output>
309 <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4">
310 <assert_contents>
311 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
312 </assert_contents>
313 </output>
314 <output name="annotation_fna" value="TEST_2/TEST_2.fna"/>
315 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
316 <output name="annotation_faa" value="TEST_2/TEST_2.faa"/>
317 <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/>
318 <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/>
319 <output name="summary_txt" value="TEST_2/TEST_2.txt">
320 <assert_contents>
321 <has_text_matching expression="N50: 1330"/>
322 </assert_contents>
323 </output>
324 <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4">
325 <assert_contents>
326 <has_text_matching expression="0.4518796992481203"/>
327 </assert_contents>
328 </output>
329 </test>
330 <test expect_num_outputs="12"> <!-- TEST_3 test all skip steps -->
331 <section name="input_option" >
332 <param name="db_select" value="test-db-bakta"/>
333 <param name="input_file" value="NC_002127.1.fna"/>
334 <param name="min_contig_length" value="350"/>
335 </section>
336 <section name="workflow">
337 <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/>
338 </section>
339 <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4">
340 <assert_contents>
341 <has_text_matching expression="Genome size: 1,330 bp"/>
342 </assert_contents>
343 </output>
344 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1"/>
345 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2"/>
346 <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/>
347 <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/>
348 <output name="annotation_fna" value="TEST_3/TEST_3.fna"/>
349 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
350 <output name="annotation_faa" value="TEST_3/TEST_3.faa"/>
351 <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/>
352 </test>
353 <test expect_num_outputs="12"> <!-- TEST_4 annotations -->
354 <section name="input_option" >
355 <param name="db_select" value="test-db-bakta"/>
356 <param name="input_file" value="NC_002127.1.fna"/>
357 </section>
358 <section name="annotation">
359 <param name="complete" value="true"/>
360 <param name="translation_table" value="4"/>
361 <param name="prodigal" value="prodigal.tf"/>
362 <param name="replicons" value="replicons.tsv"/>
363 <param name="compliant" value="true"/>
364 <param name="proteins" value="user-proteins.faa"/>
365 </section>
366 <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4">
367 <assert_contents>
368 <has_text_matching expression="potential: 16"/>
369 </assert_contents>
370 </output>
371 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"/>
372 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2">
373 <assert_contents>
374 <has_text_matching expression="ID=IHHALP_00005_gene;locus_tag=IHHALP_00005"/>
375 </assert_contents>
376 </output>
377 <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4">
378 <assert_contents>
379 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
380 </assert_contents>
381 </output>
382 <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4">
383 <assert_contents>
384 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
385 </assert_contents>
386 </output>
387 <output name="annotation_fna" value="TEST_4/TEST_4.fna"/>
388 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
389 <output name="annotation_faa" value="TEST_4/TEST_4.faa"/>
390 <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/>
391 <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/>
392 <output name="summary_txt" value="TEST_4/TEST_4.txt">
393 <assert_contents>
394 <has_text_matching expression="GC: 45.2"/>
395 </assert_contents>
396 </output>
397 <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4">
398 <assert_contents>
399 <has_text_matching expression="0.4518796992481203"/>
400 </assert_contents>
401 </output>
402 </test>
403 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
404 <section name="input_option" >
405 <param name="db_select" value="test-db-bakta"/>
406 <param name="input_file" value="NC_002127.1.fna"/>
407 </section>
408 <section name="annotation">
409 <param name="complete" value="true"/>
410 <param name="translation_table" value="4"/>
411 </section>
412 <section name="workflow">
413 <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/>
414 </section>
415 <section name="output_files">
416 <param name="output_selection" value="log_txt,sum_txt"/>
417 </section>
418 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="4"/>
419 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
420 </test>
421 </tests>
422 <help><![CDATA[**What it does**
423 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.
424
425 *Comprehensive & taxonomy-independent database*
426 Bakta provides a large and taxonomy-independent database using UniProt's entire UniRef protein sequence cluster universe.
427
428 *Protein sequence identification*
429 Bakta exactly identifies known identical protein sequences (IPS) from RefSeq and UniProt
430 allowing the fine-grained annotation of gene alleles (AMR) or closely related but distinct protein families.
431 This is achieved via an alignment-free sequence identification (AFSI) approach
432 using full-length MD5 protein sequence hash digests.
433 *Small proteins/short open reading frames*
434 Bakta detects and annotates small proteins/short open reading frames (sORF).
435
436 *Expert annotation systems*
437 To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes,
438 Bakta includes & merges different expert annotation systems.
439 Currently, Bakta uses NCBI's AMRFinderPlus for AMR gene annotations
440 as well as an generalized protein sequence expert system with distinct
441 coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI's BlastRules.
442
443 *Comprehensive workflow*
444 Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT
445 and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS.
446
447 *GFF3 & INSDC conform annotations*
448 Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission
449 (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats,
450 respectively for representative genomes of all ESKAPE species).
451
452 *Bacteria & plasmids*
453 Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only.
454
455 **Input options**
456 1. Choose a genome or assembly in fasta format to use bakta annotations
457 2. Choose A version of the Bakta database
458
459 **Organism options**
460 You can specify informations about analysed fasta as text input for:
461 - genus
462 - species
463 - strain
464 - plasmid
465
466 **Annotation options**
467 1. You can specify if all sequences (chromosome or plasmids) are complete or not
468 2. You can add your own prodigal traingin file for CDS predictionœ
469 3. The translation table could be modified, default is the 11th for bacteria
470 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow)
471 5. You can keep the name of contig present in the input file
472 6. You can specify your own replicon table as a TSV/CSV file
473 7. The compliance option is for ready to submit annotation file to Public database
474 as ENA, Genbank EMBL
475 8. You can specify a protein sequence file for annotation in GenBank or fasta formats
476 Using the Fasta format, each reference sequence can be provided in a short or long format:
477
478 # short:
479 >id gene~~~product~~~dbxrefs
480 MAQ...
481
482 # long:
483 >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs
484 MAQ...
485
486 **Skip steps**
487 Some steps could be skiped:
488 - skip-trna Skip tRNA detection & annotation
489 - skip-tmrna Skip tmRNA detection & annotation
490 - skip-rrna Skip rRNA detection & annotation
491 - skip-ncrna Skip ncRNA detection & annotation
492 - skip-ncrna-region Skip ncRNA region detection & annotation
493 - skip-crispr Skip CRISPR array detection & annotation
494 - skip-cds Skip CDS detection & annotation
495 - skip-pseudo Skip pseudogene detection & annotation
496 - skip-sorf Skip sORF detection & annotation
497 - skip-gap Skip gap detection & annotation
498 - skip-ori Skip oriC/oriT detection & annotation
499
500 **Output options**
501 Bakta produce numbers of output files, you can select what type of file you want:
502 - Summary of the annotation
503 - Annotated files
504 - Sequence files for nucleotide and/or amino acid
505 ]]></help>
506 <expand macro="citations"/>
507 </tool>