Mercurial > repos > iuc > bakta
comparison bakta.xml @ 7:ba6990f72184 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit e277883fca66013904bae930f04e7f3be5fcb1a2
author | iuc |
---|---|
date | Wed, 05 Jun 2024 14:22:02 +0000 |
parents | 92eee5f31117 |
children |
comparison
equal
deleted
inserted
replaced
6:92eee5f31117 | 7:ba6990f72184 |
---|---|
1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | 1 <tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
2 <description> | 2 <description> |
3 Genome annotation via alignment-free sequence identification | 3 Rapid and standardized annotation of bacterial genomes, MAGs and plasmids |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>macro.xml</import> | 6 <import>macro.xml</import> |
7 </macros> | 7 </macros> |
8 <expand macro='edam'/> | 8 <expand macro="xrefs"/> |
9 <expand macro='xrefs'/> | |
10 <expand macro="requirements"/> | 9 <expand macro="requirements"/> |
11 <expand macro="version_command"/> | 10 <expand macro="version_command"/> |
12 | |
13 <command detect_errors="aggressive"><![CDATA[ | 11 <command detect_errors="aggressive"><![CDATA[ |
14 | 12 mkdir -p ./database_path/amrfinderplus-db && |
15 mkdir -p ./database_path/amrfinderplus-db && | 13 ln -s '$(input_option.bakta_db_select.fields.path)'/* database_path && |
16 ln -s '$(input_option.bakta_db_select.fields.path)'/* database_path && | 14 ln -s '$(input_option.amrfinder_db_select.fields.path)/' database_path/amrfinderplus-db/latest && |
17 ln -s '$(input_option.amrfinder_db_select.fields.path)/' database_path/amrfinderplus-db/latest && | 15 |
18 bakta --verbose | 16 bakta |
19 | 17 --verbose |
20 #*====================================== | 18 #*====================================== |
21 CPU option | 19 CPU option |
22 ======================================*# | 20 ======================================*# |
23 --threads \${GALAXY_SLOTS:-1} | 21 --threads \${GALAXY_SLOTS:-1} |
24 #*====================================== | 22 #*====================================== |
25 Bakta database | 23 Bakta database |
26 ======================================*# | 24 ======================================*# |
27 --db './database_path' | 25 --db './database_path' |
28 --output 'bakta_output' | 26 --output 'bakta_output' |
29 #if $input_option.min_contig_length | 27 #if $input_option.min_contig_length |
30 --min-contig-length $input_option.min_contig_length | 28 --min-contig-length $input_option.min_contig_length |
31 #else if $annotation.compliant | 29 #else if $annotation.compliant |
32 --min-contig-length 200 | 30 --min-contig-length 200 |
33 #else | 31 #else |
34 --min-contig-length 1 | 32 --min-contig-length 1 |
35 #end if | 33 #end if |
36 --prefix bakta_output | 34 --prefix bakta_output |
37 #*====================================== | 35 #*====================================== |
38 Organism options | 36 Organism options |
39 genus/species/strain/plasmid | 37 genus/species/strain/plasmid |
40 ======================================*# | 38 ======================================*# |
41 #if $organism.genus | 39 #if $organism.genus |
42 --genus '$organism.genus' | 40 --genus '$organism.genus' |
43 #end if | 41 #end if |
44 #if $organism.species | 42 #if $organism.species |
45 --species '$organism.species' | 43 --species '$organism.species' |
46 #end if | 44 #end if |
47 #if $organism.strain | 45 #if $organism.strain |
48 --strain '$organism.strain' | 46 --strain '$organism.strain' |
49 #end if | 47 #end if |
50 #if $organism.plasmid | 48 #if $organism.plasmid |
51 --plasmid '$organism.plasmid' | 49 --plasmid '$organism.plasmid' |
52 #end if | 50 #end if |
53 #*====================================== | 51 #*====================================== |
54 Annotation options | 52 Annotation options |
55 gram type, prodigal/protein file | 53 gram type, prodigal/protein file |
56 ======================================*# | 54 ======================================*# |
57 $annotation.complete | 55 $annotation.complete |
58 #if $annotation.prodigal | 56 #if $annotation.prodigal |
59 --prodigal-tf '$annotation.prodigal' | 57 --prodigal-tf '$annotation.prodigal' |
60 #end if | 58 #end if |
61 #if $annotation.translation_table | 59 #if $annotation.translation_table |
62 --translation-table '$annotation.translation_table' | 60 --translation-table '$annotation.translation_table' |
63 #end if | 61 #end if |
64 --gram '?' | 62 --gram '?' |
65 $annotation.keep_contig_headers | 63 $annotation.keep_contig_headers |
66 #if $annotation.replicons | 64 #if $annotation.replicons |
67 --replicons '$annotation.replicons' | 65 --replicons '$annotation.replicons' |
68 #end if | 66 #end if |
69 $annotation.compliant | 67 $annotation.compliant |
70 #if $annotation.proteins | 68 #if $annotation.proteins |
71 --proteins '$annotation.proteins' | 69 --proteins '$annotation.proteins' |
72 #end if | 70 #end if |
73 #if $annotation.regions | 71 #if $annotation.regions |
74 --regions '$annotation.regions' | 72 --regions '$annotation.regions' |
75 #end if | 73 #end if |
76 #*====================================== | 74 #*====================================== |
77 Workflow OPTIONS | 75 Workflow OPTIONS |
78 skip some step of the bakta analysis | 76 skip some step of the bakta analysis |
79 ======================================*# | 77 ======================================*# |
80 | 78 |
81 #echo " ".join($workflow.skip_analysis) | 79 #echo " ".join($workflow.skip_analysis) |
82 | 80 |
83 #*====================================== | 81 #*====================================== |
84 Genome file | 82 Genome file |
85 ======================================*# | 83 ======================================*# |
86 '$input_option.input_file' | 84 '$input_option.input_file' |
87 #*====================================== | 85 #*====================================== |
88 LOG file | 86 LOG file |
89 ======================================*# | 87 ======================================*# |
90 | tee '$logfile' | 88 | tee '$logfile' |
91 ]]></command> | 89 ]]></command> |
92 <inputs> | 90 <inputs> |
93 <!-- DB and file INPUT --> | 91 <!-- DB and file INPUT --> |
94 <section name="input_option" title="Input/Output options" expanded="true"> | 92 <section name="input_option" title="Input/Output options" expanded="true"> |
95 <param name="bakta_db_select" type="select" label="The bakta database"> | 93 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> |
94 <param argument="--min-contig-length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode)"/> | |
95 <param name="bakta_db_select" type="select" label="Bakta database"> | |
96 <options from_data_table="bakta_database"> | 96 <options from_data_table="bakta_database"> |
97 <filter type="static_value" value="@COMPATIBLE_BAKTA_VERSION@" column="bakta_version"/> | 97 <filter type="static_value" value="@COMPATIBLE_BAKTA_VERSION@" column="bakta_version"/> |
98 <validator message="No bakta database is available" type="no_options"/> | 98 <validator message="No bakta database is available" type="no_options"/> |
99 </options> | 99 </options> |
100 </param> | 100 </param> |
101 <param name="amrfinder_db_select" type="select" label="The amrfinderplus database"> | 101 <param name="amrfinder_db_select" type="select" optional="true" label="AMRFinderPlus database" help="The selection of this database is not needed if Bakta database version is higher 5.0"> |
102 <options from_data_table="amrfinderplus_database"> | 102 <options from_data_table="amrfinderplus_versioned_database"> |
103 <validator message="No amrfinderplus database is available" type="no_options"/> | 103 <filter type="static_value" value="3.12" column="db_version"/> |
104 <validator message="No AMRFinderPlus database is available" type="no_options"/> | |
104 </options> | 105 </options> |
105 </param> | 106 </param> |
106 | |
107 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> | |
108 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> | |
109 </section> | 107 </section> |
110 <!-- Organism INFORMATION OPTIONS --> | 108 <!-- Organism INFORMATION OPTIONS --> |
111 <section name="organism" title="Optional organism options" expanded="false"> | 109 <section name="organism" title="Optional organism options" expanded="false"> |
112 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> | 110 <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia"> |
113 <validator type="regex">^[a-zA-Z]+$</validator> | 111 <validator type="regex">^[a-zA-Z]+$</validator> |
128 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> | 126 <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/> |
129 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11"> | 127 <param name="translation_table" type="select" optional="true" label="Translation table" help="Default is the bacterial table 11"> |
130 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | 128 <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> |
131 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option> | 129 <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option> |
132 </param> | 130 </param> |
133 <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/> | 131 <param argument="--keep-contig-headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header"/> |
134 <param argument="--replicons" type="data" format="tabular,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> | 132 <param argument="--replicons" type="data" format="tabular,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/> |
135 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> | 133 <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/> |
136 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> | 134 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> |
137 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" label="Metagenome mode" help="Run in metagenome mode. This only affects CDS prediction"/> | 135 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" label="Metagenome mode" help="Run in metagenome mode. This only affects CDS prediction"/> |
138 <param argument="--regions" type="data" format="gff,genbank" optional="true" label="Pre-annotated regions" help="Regions only, no functional annotations."/> | 136 <param argument="--regions" type="data" format="gff,genbank" optional="true" label="Pre-annotated regions" help="Regions only, no functional annotations."/> |
169 <option value="file_json" selected="false">Information on each annotated feature as JSON</option> | 167 <option value="file_json" selected="false">Information on each annotated feature as JSON</option> |
170 <option value="file_plot" selected="true">Plot of the annotation result as SVG</option> | 168 <option value="file_plot" selected="true">Plot of the annotation result as SVG</option> |
171 <option value="log_txt" selected="false">Log file as TXT</option> | 169 <option value="log_txt" selected="false">Log file as TXT</option> |
172 </param> | 170 </param> |
173 </section> | 171 </section> |
174 | |
175 </inputs> | 172 </inputs> |
176 <outputs> | 173 <outputs> |
177 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output/bakta_output.tsv" label="${tool.name} on ${on_string}: annotation_summary"> | 174 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output/bakta_output.tsv" label="${tool.name} on ${on_string}: Summary"> |
178 <filter>output_files['output_selection'] and "file_tsv" in output_files['output_selection']</filter> | 175 <filter>output_files['output_selection'] and "file_tsv" in output_files['output_selection']</filter> |
179 </data> | 176 </data> |
180 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output/bakta_output.gff3" label="${tool.name} on ${on_string}: Annotation_and_sequences"> | 177 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output/bakta_output.gff3" label="${tool.name} on ${on_string}: Annotation and sequences (GFF3)"> |
181 <filter>output_files['output_selection'] and "file_gff3" in output_files['output_selection']</filter> | 178 <filter>output_files['output_selection'] and "file_gff3" in output_files['output_selection']</filter> |
182 </data> | 179 </data> |
183 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output/bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"> | 180 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output/bakta_output.gbff" label="${tool.name} on ${on_string}: Annotations and sequences (GenBank format)"> |
184 <filter>output_files['output_selection'] and "file_gbff" in output_files['output_selection']</filter> | 181 <filter>output_files['output_selection'] and "file_gbff" in output_files['output_selection']</filter> |
185 </data> | 182 </data> |
186 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output/bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"> | 183 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output/bakta_output.embl" label="${tool.name} on ${on_string}: Annotations and sequences (EMBL format)"> |
187 <filter>output_files['output_selection'] and "file_embl" in output_files['output_selection']</filter> | 184 <filter>output_files['output_selection'] and "file_embl" in output_files['output_selection']</filter> |
188 </data> | 185 </data> |
189 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output/bakta_output.fna" label="${tool.name} on ${on_string}: Contig_sequences"> | 186 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output/bakta_output.fna" label="${tool.name} on ${on_string}: Replicon/contig DNA sequences"> |
190 <filter>output_files['output_selection'] and "file_fna" in output_files['output_selection']</filter> | 187 <filter>output_files['output_selection'] and "file_fna" in output_files['output_selection']</filter> |
191 </data> | 188 </data> |
192 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output/bakta_output.ffn" label="${tool.name} on ${on_string}: Nucleotide_sequences"> | 189 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output/bakta_output.ffn" label="${tool.name} on ${on_string}: Feature nucleotide sequences"> |
193 <filter>output_files['output_selection'] and "file_ffn" in output_files['output_selection']</filter> | 190 <filter>output_files['output_selection'] and "file_ffn" in output_files['output_selection']</filter> |
194 </data> | 191 </data> |
195 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output/bakta_output.faa" label="${tool.name} on ${on_string}: Amino_acid_sequences"> | 192 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output/bakta_output.faa" label="${tool.name} on ${on_string}: CDS/sORF amino acid sequences"> |
196 <filter>output_files['output_selection'] and "file_faa" in output_files['output_selection']</filter> | 193 <filter>output_files['output_selection'] and "file_faa" in output_files['output_selection']</filter> |
197 </data> | 194 </data> |
198 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output/bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: hypothetical_annotation_summary"> | 195 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output/bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: Hypothetical protein CDS summary"> |
199 <filter>output_files['output_selection'] and "hypo_tsv" in output_files['output_selection']</filter> | 196 <filter>output_files['output_selection'] and "hypo_tsv" in output_files['output_selection']</filter> |
200 </data> | 197 </data> |
201 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output/bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: hypothetical_amino_acid_sequences"> | 198 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output/bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: Hypothetical protein CDS amino sequences"> |
202 <filter>output_files['output_selection'] and "hypo_fa" in output_files['output_selection']</filter> | 199 <filter>output_files['output_selection'] and "hypo_fa" in output_files['output_selection']</filter> |
203 </data> | 200 </data> |
204 <data name="summary_txt" format="txt" from_work_dir="bakta_output/bakta_output.txt" label="${tool.name} on ${on_string}: Analysis_summary"> | 201 <data name="summary_txt" format="txt" from_work_dir="bakta_output/bakta_output.txt" label="${tool.name} on ${on_string}: Summary (TXT)"> |
205 <filter>output_files['output_selection'] and "sum_txt" in output_files['output_selection']</filter> | 202 <filter>output_files['output_selection'] and "sum_txt" in output_files['output_selection']</filter> |
206 </data> | 203 </data> |
207 <data name="annotation_json" format="json" from_work_dir="bakta_output/bakta_output.json" label="${tool.name} on ${on_string}: annotation_machine_readable"> | 204 <data name="annotation_json" format="json" from_work_dir="bakta_output/bakta_output.json" label="${tool.name} on ${on_string}: Information on each annotated feature (JSON)"> |
208 <filter>output_files['output_selection'] and "file_json" in output_files['output_selection']</filter> | 205 <filter>output_files['output_selection'] and "file_json" in output_files['output_selection']</filter> |
209 </data> | 206 </data> |
210 <data name="annotation_plot" format="svg" from_work_dir="bakta_output/bakta_output.svg" label="${tool.name} on ${on_string}: Plot of the annotation"> | 207 <data name="annotation_plot" format="svg" from_work_dir="bakta_output/bakta_output.svg" label="${tool.name} on ${on_string}: Plot of the annotation"> |
211 <filter>output_files['output_selection'] and "file_plot" in output_files['output_selection']</filter> | 208 <filter>output_files['output_selection'] and "file_plot" in output_files['output_selection']</filter> |
212 </data> | 209 </data> |
213 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"> | 210 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: Log file"> |
214 <filter>output_files['output_selection'] and "log_txt" in output_files['output_selection']</filter> | 211 <filter>output_files['output_selection'] and "log_txt" in output_files['output_selection']</filter> |
215 </data> | 212 </data> |
216 </outputs> | 213 </outputs> |
217 <tests> | 214 <tests> |
218 <test expect_num_outputs="13"> <!-- TEST_1 database + input --> | 215 <test expect_num_outputs="13"> <!-- TEST_1 database + input --> |
219 <section name="input_option" > | 216 <section name="input_option" > |
220 <param name="bakta_db_select" value="V5.0_2022-08-19"/> | |
221 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | |
222 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> | 217 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> |
223 <param name="min_contig_length" value="250"/> | 218 <param name="min_contig_length" value="250"/> |
219 <param name="bakta_db_select" value="V5.1_light_2024-01-19"/> | |
220 <param name="amrfinder_db_select" value="V3.12-2024-05-02.2"/> | |
224 </section> | 221 </section> |
225 <section name="output_files"> | 222 <section name="output_files"> |
226 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/> | 223 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/> |
227 </section> | 224 </section> |
228 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/> | 225 <output name="annotation_tsv" ftype="tabular"> |
229 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/> | 226 <assert_contents> |
230 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/> | 227 <has_n_lines n="8"/> |
231 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/> | 228 <has_text text="IHHALP_00005"/> |
232 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/> | 229 <has_text text="hypothetical protein"/> |
233 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/> | 230 </assert_contents> |
234 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/> | 231 </output> |
235 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/> | 232 <output name="annotation_gff3" ftype="gff3"> |
236 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/> | 233 <assert_contents> |
237 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/> | 234 <has_n_lines n="36"/> |
238 <output name="annotation_plot" value="TEST_1/TEST_1_plot.svg" ftype="svg" compare="sim_size"/> | 235 <has_text text="Prodigal"/> |
239 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/> | 236 <has_text text="ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein"/> |
237 </assert_contents> | |
238 </output> | |
239 <output name="annotation_gbff" ftype="tabular"> | |
240 <assert_contents> | |
241 <has_n_lines n="81"/> | |
242 <has_text text="DEFINITION"/> | |
243 <has_text text="/inference"/> | |
244 </assert_contents> | |
245 </output> | |
246 <output name="annotation_embl" ftype="tabular"> | |
247 <assert_contents> | |
248 <has_text text="##Genome Annotation Summary:##"/> | |
249 <has_text text="/product="/> | |
250 </assert_contents> | |
251 </output> | |
252 <output name="annotation_fna" ftype="fasta"> | |
253 <assert_contents> | |
254 <has_text text=">contig_1"/> | |
255 <has_text text="TCTTCTGCGAG"/> | |
256 </assert_contents> | |
257 </output> | |
258 <output name="annotation_ffn" ftype="fasta"> | |
259 <assert_contents> | |
260 <has_text text=">IHHALP_00005 hypothetical protein"/> | |
261 <has_text text="ATGACAAAACGAAGTG"/> | |
262 </assert_contents> | |
263 </output> | |
264 <output name="annotation_faa" ftype="fasta"> | |
265 <assert_contents> | |
266 <has_text text=">IHHALP_00005 hypothetical protein"/> | |
267 <has_text text="MTKRSGSNTR"/> | |
268 </assert_contents> | |
269 </output> | |
270 <output name="hypotheticals_tsv" ftype="tabular"> | |
271 <assert_contents> | |
272 <has_n_lines n="5"/> | |
273 <has_text text="IHHALP_00010"/> | |
274 <has_text text="Sequence Id"/> | |
275 </assert_contents> | |
276 </output> | |
277 <output name="hypotheticals_faa" ftype="fasta"> | |
278 <assert_contents> | |
279 <has_text text=">IHHALP_00010 hypothetical protein"/> | |
280 <has_text text="MNKQQQTALNM"/> | |
281 </assert_contents> | |
282 </output> | |
283 <output name="summary_txt" ftype="txt"> | |
284 <assert_contents> | |
285 <has_text text="coding density: 62.0"/> | |
286 <has_text text="CDSs: 2"/> | |
287 </assert_contents> | |
288 </output> | |
289 <output name="annotation_plot" ftype="svg"> | |
290 <assert_contents> | |
291 <has_size value="418990" delta="1000"/> | |
292 </assert_contents> | |
293 </output> | |
294 <output name="annotation_json" ftype="json"> | |
295 <assert_contents> | |
296 <has_text text="coding_ratio"/> | |
297 <has_text text="n50"/> | |
298 <has_text text="hypothetical protein"/> | |
299 </assert_contents> | |
300 </output> | |
240 <output name="logfile" ftype="txt"> | 301 <output name="logfile" ftype="txt"> |
241 <expand macro="assert_content_test"/> | 302 <expand macro="assert_content_test"/> |
242 </output> | 303 </output> |
243 </test> | 304 </test> |
244 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> | 305 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps --> |
245 <section name="input_option" > | 306 <section name="input_option" > |
246 <param name="bakta_db_select" value="V5.0_2022-08-19"/> | |
247 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | |
248 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> | 307 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> |
249 <param name="min_contig_length" value="250"/> | 308 <param name="min_contig_length" value="250"/> |
309 <param name="bakta_db_select" value="V5.1_light_2024-01-19"/> | |
310 <param name="amrfinder_db_select" value="V3.12-2024-05-02.2"/> | |
250 </section> | 311 </section> |
251 <section name="organism"> | 312 <section name="organism"> |
252 <param name="genus" value="Escherichia"/> | 313 <param name="genus" value="Escherichia"/> |
253 <param name="species" value="coli O157:H7"/> | 314 <param name="species" value="coli O157:H7"/> |
254 <param name="strain" value="Sakai"/> | 315 <param name="strain" value="Sakai"/> |
258 <param name="keep_contig_headers" value="true"/> | 319 <param name="keep_contig_headers" value="true"/> |
259 </section> | 320 </section> |
260 <section name="workflow"> | 321 <section name="workflow"> |
261 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/> | 322 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/> |
262 </section> | 323 </section> |
263 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4"> | 324 <output name="annotation_tsv"> |
264 <assert_contents> | 325 <assert_contents> |
326 <has_n_lines n="8"/> | |
265 <has_text_matching expression="IHHALP_00005"/> | 327 <has_text_matching expression="IHHALP_00005"/> |
266 </assert_contents> | 328 </assert_contents> |
267 </output> | 329 </output> |
268 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4"> | 330 <output name="annotation_gff3"> |
269 <assert_contents> | 331 <assert_contents> |
332 <has_n_lines n="37"/> | |
270 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> | 333 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/> |
271 </assert_contents> | 334 </assert_contents> |
272 </output> | 335 </output> |
273 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/> | 336 <output name="annotation_ffn" ftype="fasta"> |
274 <output name="annotation_plot" value="TEST_2/TEST_2_plot.svg" ftype="svg" compare="sim_size"/> | 337 <assert_contents> |
338 <has_text text=">IHHALP_00005 hypothetical protein"/> | |
339 <has_text text="ATGACAAAACGAAGTG"/> | |
340 </assert_contents> | |
341 </output> | |
342 <output name="annotation_plot" ftype="svg"> | |
343 <assert_contents> | |
344 <has_size value="418990" delta="1000"/> | |
345 </assert_contents> | |
346 </output> | |
275 </test> | 347 </test> |
276 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps --> | 348 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps and test previous bakta version --> |
277 <section name="input_option" > | 349 <section name="input_option" > |
278 <param name="bakta_db_select" value="V5.0_2022-08-19"/> | |
279 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | |
280 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> | 350 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> |
281 <param name="min_contig_length" value="350"/> | 351 <param name="min_contig_length" value="350"/> |
352 <param name="bakta_db_select" value="V5.0_2022-08-19"/> | |
353 <param name="amrfinder_db_select" value="V3.12-2024-05-02.2"/> | |
282 </section> | 354 </section> |
283 <section name="workflow"> | 355 <section name="workflow"> |
284 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori,--skip-plot"/> | 356 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori,--skip-plot"/> |
285 </section> | 357 </section> |
286 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/> | 358 <output name="annotation_tsv"> |
287 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/> | 359 <assert_contents> |
288 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/> | 360 <has_n_lines n="6"/> |
289 </test> | 361 <has_text_matching expression="DbXrefs"/> |
290 <test expect_num_outputs="4"> <!-- TEST_4 annotations --> | 362 </assert_contents> |
291 <section name="input_option" > | 363 </output> |
292 <param name="bakta_db_select" value="V5.0_2022-08-19"/> | 364 <output name="annotation_gff3" ftype="gff3"> |
293 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | 365 <assert_contents> |
294 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> | 366 <has_n_lines n="34"/> |
295 </section> | 367 <has_text text=">contig_1"/> |
296 <section name="annotation"> | 368 </assert_contents> |
297 <param name="complete" value="true"/> | 369 </output> |
298 <param name="prodigal" value="prodigal.tf"/> | 370 <output name="annotation_ffn" ftype="fasta"> |
299 <param name="translation_table" value="4"/> | 371 <assert_contents> |
300 <param name="replicons" value="replicons.tsv" ftype="tabular"/> | 372 <has_size value="0"/> |
301 <param name="compliant" value="true"/> | 373 </assert_contents> |
302 <param name="proteins" value="user-proteins.faa" ftype="fasta"/> | 374 </output> |
303 </section> | 375 </test> |
304 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/> | 376 <test expect_num_outputs="4"> <!-- TEST_4 annotations --> |
305 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/> | 377 <section name="input_option" > |
306 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/> | 378 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> |
307 <output name="annotation_plot" value="TEST_4/TEST_4_plot.svg" ftype="svg" compare="sim_size"/> | 379 <param name="bakta_db_select" value="V5.1_light_2024-01-19"/> |
308 </test> | 380 <param name="amrfinder_db_select" value="V3.12-2024-05-02.2"/> |
309 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary --> | 381 </section> |
310 <section name="input_option" > | 382 <section name="annotation"> |
311 <param name="bakta_db_select" value="V5.0_2022-08-19"/> | 383 <param name="complete" value="true"/> |
312 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | 384 <param name="prodigal" value="prodigal.tf"/> |
313 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> | 385 <param name="translation_table" value="4"/> |
314 </section> | 386 <param name="replicons" value="replicons.tsv" ftype="tabular"/> |
315 <section name="annotation"> | 387 <param name="compliant" value="true"/> |
316 <param name="complete" value="true"/> | 388 <param name="proteins" value="user-proteins.faa" ftype="fasta"/> |
317 <param name="translation_table" value="4"/> | 389 </section> |
318 </section> | 390 <output name="annotation_tsv"> |
319 <section name="workflow"> | 391 <assert_contents> |
320 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori,--skip-plot"/> | 392 <has_n_lines n="8"/> |
321 </section> | 393 <has_text_matching expression="IHHALP_00005"/> |
322 <section name="output_files"> | 394 </assert_contents> |
323 <param name="output_selection" value="log_txt,sum_txt"/> | 395 </output> |
324 </section> | 396 <output name="annotation_gff3" ftype="gff3"> |
325 <output name="logfile" ftype="txt"> | 397 <assert_contents> |
326 <expand macro="assert_content_test"/> | 398 <has_n_lines n="13"/> |
327 </output> | 399 <has_text text="Prodigal"/> |
328 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/> | 400 <has_text text="ID=IHHALP_00010_gene;locus_tag=IHHALP_00010"/> |
329 </test> | 401 </assert_contents> |
330 <test expect_num_outputs="13"> <!-- TEST_6 metagenome option --> | 402 </output> |
331 <section name="input_option" > | 403 <output name="annotation_ffn" ftype="fasta"> |
332 <param name="bakta_db_select" value="V5.0_2022-08-19"/> | 404 <assert_contents> |
333 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/> | 405 <has_text text=">IHHALP_00005 hypothetical protein"/> |
334 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> | 406 <has_text text="ATGACAAAACGAAGTG"/> |
335 </section> | 407 </assert_contents> |
336 <section name="annotation"> | 408 </output> |
337 <param name="meta" value="true"/> | 409 <output name="annotation_plot" ftype="svg"> |
338 </section> | 410 <assert_contents> |
339 <section name="output_files"> | 411 <has_size value="418990" delta="1000"/> |
340 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/> | 412 </assert_contents> |
341 </section> | 413 </output> |
342 <output name="annotation_tsv" value="TEST_6/TEST_6.tsv" lines_diff="2"/> | 414 </test> |
343 <output name="annotation_gff3" value="TEST_6/TEST_6.gff3" lines_diff="2"/> | 415 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary --> |
344 <output name="annotation_gbff" value="TEST_6/TEST_6.gbff" lines_diff="8"/> | 416 <section name="input_option" > |
345 <output name="annotation_embl" value="TEST_6/TEST_6.embl" lines_diff="6"/> | 417 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> |
346 <output name="annotation_fna" value="TEST_6/TEST_6.fna"/> | 418 <param name="bakta_db_select" value="V5.1_light_2024-01-19"/> |
347 <output name="annotation_ffn" value="TEST_6/TEST_6.ffn"/> | 419 <param name="amrfinder_db_select" value="V3.12-2024-05-02.2"/> |
348 <output name="annotation_faa" value="TEST_6/TEST_6.faa"/> | 420 </section> |
349 <output name="hypotheticals_tsv" value="TEST_6/TEST_6.hypotheticals.tsv" lines_diff="4"/> | 421 <section name="annotation"> |
350 <output name="hypotheticals_faa" value="TEST_6/TEST_6.hypotheticals.faa"/> | 422 <param name="complete" value="true"/> |
351 <output name="summary_txt" value="TEST_6/TEST_6.txt" lines_diff="4"/> | 423 <param name="translation_table" value="4"/> |
352 <output name="annotation_plot" value="TEST_6/TEST_6_plot.svg" ftype="svg" compare="sim_size"/> | 424 </section> |
353 <output name="annotation_json" value="TEST_6/TEST_6.json" lines_diff="6"/> | 425 <section name="workflow"> |
354 <output name="logfile" ftype="txt"> | 426 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori,--skip-plot"/> |
355 <expand macro="assert_content_test"/> | 427 </section> |
356 </output> | 428 <section name="output_files"> |
357 </test> | 429 <param name="output_selection" value="log_txt,sum_txt"/> |
430 </section> | |
431 <output name="logfile" ftype="txt"> | |
432 <expand macro="assert_content_test"/> | |
433 </output> | |
434 <output name="summary_txt" ftype="txt"> | |
435 <assert_contents> | |
436 <has_n_lines n="30"/> | |
437 <has_text text="N50: 1330"/> | |
438 <has_text text="oriTs: 0"/> | |
439 </assert_contents> | |
440 </output> | |
441 </test> | |
442 <test expect_num_outputs="13"> <!-- TEST_6 metagenome option --> | |
443 <section name="input_option" > | |
444 <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/> | |
445 <param name="bakta_db_select" value="V5.1_light_2024-01-19"/> | |
446 <param name="amrfinder_db_select" value="V3.12-2024-05-02.2"/> | |
447 </section> | |
448 <section name="annotation"> | |
449 <param name="meta" value="true"/> | |
450 </section> | |
451 <section name="output_files"> | |
452 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/> | |
453 </section> | |
454 <output name="annotation_tsv" ftype="tabular"> | |
455 <assert_contents> | |
456 <has_n_lines n="8"/> | |
457 <has_text text="IHHALP_00005"/> | |
458 <has_text text="hypothetical protein"/> | |
459 </assert_contents> | |
460 </output> | |
461 <output name="annotation_gff3" ftype="gff3"> | |
462 <assert_contents> | |
463 <has_n_lines n="36"/> | |
464 <has_text text="Prodigal"/> | |
465 <has_text text="ID=IHHALP_00005;Name=hypothetical protein;locus_tag=IHHALP_00005;product=hypothetical protein"/> | |
466 </assert_contents> | |
467 </output> | |
468 <output name="annotation_gbff" ftype="tabular"> | |
469 <assert_contents> | |
470 <has_n_lines n="81"/> | |
471 <has_text text="DEFINITION"/> | |
472 <has_text text="/inference"/> | |
473 </assert_contents> | |
474 </output> | |
475 <output name="annotation_embl" ftype="tabular"> | |
476 <assert_contents> | |
477 <has_text text="##Genome Annotation Summary:##"/> | |
478 <has_text text="/product="/> | |
479 </assert_contents> | |
480 </output> | |
481 <output name="annotation_fna" ftype="fasta"> | |
482 <assert_contents> | |
483 <has_text text=">contig_1"/> | |
484 <has_text text="TCTTCTGCGAG"/> | |
485 </assert_contents> | |
486 </output> | |
487 <output name="annotation_ffn" ftype="fasta"> | |
488 <assert_contents> | |
489 <has_text text=">IHHALP_00005 hypothetical protein"/> | |
490 <has_text text="ATGACAAAACGAAGTG"/> | |
491 </assert_contents> | |
492 </output> | |
493 <output name="annotation_faa" ftype="fasta"> | |
494 <assert_contents> | |
495 <has_text text=">IHHALP_00005 hypothetical protein"/> | |
496 <has_text text="MTKRSGSNTR"/> | |
497 </assert_contents> | |
498 </output> | |
499 <output name="hypotheticals_tsv" ftype="tabular"> | |
500 <assert_contents> | |
501 <has_n_lines n="5"/> | |
502 <has_text text="IHHALP_00010"/> | |
503 <has_text text="Sequence Id"/> | |
504 </assert_contents> | |
505 </output> | |
506 <output name="hypotheticals_faa" ftype="fasta"> | |
507 <assert_contents> | |
508 <has_text text=">IHHALP_00010 hypothetical protein"/> | |
509 <has_text text="MNKQQQTALNM"/> | |
510 </assert_contents> | |
511 </output> | |
512 <output name="summary_txt" ftype="txt"> | |
513 <assert_contents> | |
514 <has_n_lines n="30"/> | |
515 <has_text text="coding density: 62.0"/> | |
516 <has_text text="CDSs: 2"/> | |
517 </assert_contents> | |
518 </output> | |
519 <output name="annotation_plot" ftype="svg"> | |
520 <assert_contents> | |
521 <has_size value="418990" delta="1000"/> | |
522 </assert_contents> | |
523 </output> | |
524 <output name="annotation_json" ftype="json"> | |
525 <assert_contents> | |
526 <has_text text="coding_ratio"/> | |
527 <has_text text="n50"/> | |
528 <has_text text="hypothetical protein"/> | |
529 </assert_contents> | |
530 </output> | |
531 <output name="logfile" ftype="txt"> | |
532 <expand macro="assert_content_test"/> | |
533 </output> | |
534 </test> | |
358 </tests> | 535 </tests> |
359 <help><![CDATA[**What it does** | 536 <help><![CDATA[ |
360 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. | 537 **What it does** |
361 | 538 |
362 *Comprehensive & taxonomy-independent database* | 539 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. |
363 Bakta provides a large and taxonomy-independent database using UniProt's entire UniRef protein sequence cluster universe. | 540 |
364 | 541 *Comprehensive & taxonomy-independent database* |
365 *Protein sequence identification* | 542 Bakta provides a large and taxonomy-independent database using UniProt's entire UniRef protein sequence cluster universe. |
366 Bakta exactly identifies known identical protein sequences (IPS) from RefSeq and UniProt | 543 |
367 allowing the fine-grained annotation of gene alleles (AMR) or closely related but distinct protein families. | 544 *Protein sequence identification* |
368 This is achieved via an alignment-free sequence identification (AFSI) approach | 545 Bakta exactly identifies known identical protein sequences (IPS) from RefSeq and UniProt |
369 using full-length MD5 protein sequence hash digests. | 546 allowing the fine-grained annotation of gene alleles (AMR) or closely related but distinct protein families. |
370 *Small proteins/short open reading frames* | 547 This is achieved via an alignment-free sequence identification (AFSI) approach |
371 Bakta detects and annotates small proteins/short open reading frames (sORF). | 548 using full-length MD5 protein sequence hash digests. |
372 | 549 *Small proteins/short open reading frames* |
373 *Expert annotation systems* | 550 Bakta detects and annotates small proteins/short open reading frames (sORF). |
374 To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes, | 551 |
375 Bakta includes & merges different expert annotation systems. | 552 *Expert annotation systems* |
376 Currently, Bakta uses NCBI's AMRFinderPlus for AMR gene annotations | 553 To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes, |
377 as well as an generalized protein sequence expert system with distinct | 554 Bakta includes & merges different expert annotation systems. |
378 coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI's BlastRules. | 555 Currently, Bakta uses NCBI's AMRFinderPlus for AMR gene annotations |
379 | 556 as well as an generalized protein sequence expert system with distinct |
380 *Comprehensive workflow* | 557 coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI's BlastRules. |
381 Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT | 558 |
382 and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS. | 559 *Comprehensive workflow* |
383 | 560 Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT |
384 *GFF3 & INSDC conform annotations* | 561 and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS. |
385 Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission | 562 |
386 (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats, | 563 *GFF3 & INSDC conform annotations* |
387 respectively for representative genomes of all ESKAPE species). | 564 Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission |
388 | 565 (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats, |
389 *Bacteria & plasmids* | 566 respectively for representative genomes of all ESKAPE species). |
390 Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only. | 567 |
391 | 568 *Bacteria & plasmids* |
392 **Input options** | 569 Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only. |
393 1. Choose a genome or assembly in fasta format to use bakta annotations | 570 |
394 2. Choose A version of the Bakta database | 571 **Input options** |
395 | 572 |
396 **Organism options** | 573 1. Choose a genome or assembly in fasta format to use bakta annotations |
397 You can specify informations about analysed fasta as text input for: | 574 2. Choose A version of the Bakta database |
398 - genus | 575 |
399 - species | 576 **Organism options** |
400 - strain | 577 You can specify informations about analysed fasta as text input for: |
401 - plasmid | 578 - genus |
402 | 579 - species |
403 **Annotation options** | 580 - strain |
404 1. You can specify if all sequences (chromosome or plasmids) are complete or not | 581 - plasmid |
405 2. You can add your own prodigal training file for CDS predictionœ | 582 |
406 3. The translation table could be modified, default is the 11th for bacteria | 583 **Annotation options** |
407 4. You can specify if bacteria is gram -/+ or unknonw (default value is unknow) | 584 1. You can specify if all sequences (chromosome or plasmids) are complete or not |
408 5. You can keep the name of contig present in the input file | 585 2. You can add your own prodigal training file for CDS predictionœ |
409 6. You can specify your own replicon table as a TSV/CSV file | 586 3. The translation table could be modified, default is the 11th for bacteria |
410 7. The compliance option is for ready to submit annotation file to Public database | 587 4. You can specify if bacteria is gram -/+ or unknonw (default value is unknow) |
411 as ENA, Genbank EMBL | 588 5. You can keep the name of contig present in the input file |
412 8. You can specify a protein sequence file for annotation in GenBank or fasta formats | 589 6. You can specify your own replicon table as a TSV/CSV file |
413 Using the Fasta format, each reference sequence can be provided in a short or long format: | 590 7. The compliance option is for ready to submit annotation file to Public database |
414 | 591 as ENA, Genbank EMBL |
415 # short: | 592 8. You can specify a protein sequence file for annotation in GenBank or fasta formats |
416 >id gene~~~product~~~dbxrefs | 593 Using the Fasta format, each reference sequence can be provided in a short or long format: |
417 MAQ... | 594 |
418 | 595 # short: |
419 # long: | 596 >id gene~~~product~~~dbxrefs |
420 >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs | 597 MAQ... |
421 MAQ... | 598 |
422 | 599 # long: |
423 **Skip steps** | 600 >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs |
424 Some steps could be skiped: | 601 MAQ... |
425 - skip-trna Skip tRNA detection & annotation | 602 |
426 - skip-tmrna Skip tmRNA detection & annotation | 603 **Skip steps** |
427 - skip-rrna Skip rRNA detection & annotation | 604 Some steps could be skiped: |
428 - skip-ncrna Skip ncRNA detection & annotation | 605 - skip-trna Skip tRNA detection & annotation |
429 - skip-ncrna-region Skip ncRNA region detection & annotation | 606 - skip-tmrna Skip tmRNA detection & annotation |
430 - skip-crispr Skip CRISPR array detection & annotation | 607 - skip-rrna Skip rRNA detection & annotation |
431 - skip-cds Skip CDS detection & annotation | 608 - skip-ncrna Skip ncRNA detection & annotation |
432 - skip-pseudo Skip pseudogene detection & annotation | 609 - skip-ncrna-region Skip ncRNA region detection & annotation |
433 - skip-sorf Skip sORF detection & annotation | 610 - skip-crispr Skip CRISPR array detection & annotation |
434 - skip-gap Skip gap detection & annotation | 611 - skip-cds Skip CDS detection & annotation |
435 - skip-ori Skip oriC/oriT detection & annotation | 612 - skip-pseudo Skip pseudogene detection & annotation |
436 | 613 - skip-sorf Skip sORF detection & annotation |
437 **Output options** | 614 - skip-gap Skip gap detection & annotation |
438 Bakta produce numbers of output files, you can select what type of file you want: | 615 - skip-ori Skip oriC/oriT detection & annotation |
439 - Summary of the annotation | 616 |
440 - Annotated files | 617 **Output options** |
441 - Sequence files for nucleotide and/or amino acid | 618 Bakta produce numbers of output files, you can select what type of file you want: |
619 - Summary of the annotation | |
620 - Annotated files | |
621 - Sequence files for nucleotide and/or amino acid | |
442 ]]></help> | 622 ]]></help> |
443 <expand macro="citations"/> | 623 <expand macro="citations"/> |
444 </tool> | 624 </tool> |