comparison bakta.xml @ 2:debdc1469b41 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit 74f8fe2e7de713920026d372b28c73adb04ee97c
author iuc
date Wed, 21 Dec 2022 20:59:54 +0000
parents da5f1924bb2e
children 865ece5ca178
comparison
equal deleted inserted replaced
1:da5f1924bb2e 2:debdc1469b41
9 <expand macro='xrefs'/> 9 <expand macro='xrefs'/>
10 <expand macro="requirements"/> 10 <expand macro="requirements"/>
11 <expand macro="version_command"/> 11 <expand macro="version_command"/>
12 12
13 <command detect_errors="aggressive"><![CDATA[ 13 <command detect_errors="aggressive"><![CDATA[
14 mkdir ./database_path &&
15 ln -s '$(input_option.bakta_db_select.fields.path)/'* database_path &&
16 ln -s '$(input_option.amrfinder_db_select.fields.path)' database_path &&
17
14 bakta 18 bakta
15 #*====================================== 19 #*======================================
16 CPU option 20 CPU option
17 ======================================*# 21 ======================================*#
18 --threads \${GALAXY_SLOTS:-1} 22 --threads \${GALAXY_SLOTS:-1}
19 #*====================================== 23 #*======================================
20 Bakta database 24 Bakta database
21 ======================================*# 25 ======================================*#
22 --db $input_option.db_select.fields.path 26 --db ./database_path
23 #if $input_option.min_contig_length 27 #if $input_option.min_contig_length
24 --min-contig-length $input_option.min_contig_length 28 --min-contig-length $input_option.min_contig_length
25 #else if $annotation.compliant 29 #else if $annotation.compliant
26 --min-contig-length 200 30 --min-contig-length 200
27 #else 31 #else
69 #*====================================== 73 #*======================================
70 Workflow OPTIONS 74 Workflow OPTIONS
71 skip some step of the bakta analysis 75 skip some step of the bakta analysis
72 ======================================*# 76 ======================================*#
73 77
74 #if "skip_trna" in $workflow.skip_analysis 78 #echo " ".join($workflow.skip_analysis)
75 --skip-trna
76 #end if
77 #if "skip_tmrna" in $workflow.skip_analysis
78 --skip-tmrna
79 #end if
80 #if "skip_rrna" in $workflow.skip_analysis
81 --skip-rrna
82 #end if
83 #if "skip_ncrna" in $workflow.skip_analysis
84 --skip-ncrna
85 #end if
86 #if "skip_ncrna_region" in $workflow.skip_analysis
87 --skip-ncrna-region
88 #end if
89 #if "skip_crispr" in $workflow.skip_analysis
90 --skip-crispr
91 #end if
92 #if "skip_cds" in $workflow.skip_analysis
93 --skip-cds
94 #end if
95 #if "skip_sorf" in $workflow.skip_analysis
96 --skip-sorf
97 #end if
98 #if "skip_gap" in $workflow.skip_analysis
99 --skip-gap
100 #end if
101 #if "skip_ori" in $workflow.skip_analysis
102 --skip-ori
103 #end if
104 79
105 #*====================================== 80 #*======================================
106 Genome file 81 Genome file
107 ======================================*# 82 ======================================*#
108 '$input_option.input_file' 83 '$input_option.input_file'
112 | tee '$logfile' 87 | tee '$logfile'
113 ]]></command> 88 ]]></command>
114 <inputs> 89 <inputs>
115 <!-- DB and file INPUT --> 90 <!-- DB and file INPUT -->
116 <section name="input_option" title="Input/Output options" expanded="true"> 91 <section name="input_option" title="Input/Output options" expanded="true">
117 <param name="db_select" type="select" label="The bakta database"> 92 <param name="bakta_db_select" type="select" label="The bakta database">
118 <options from_data_table="bakta_database"> 93 <options from_data_table="bakta_database">
119 <validator message="No bakta database is available" type="no_options"/> 94 <filter type="static_value" value="@BAKTA_VERSION@" column="bakta_version"/>
95 <validator message="No bakta database is available" type="no_options"/>
120 </options> 96 </options>
121 </param> 97 </param>
98 <param name="amrfinder_db_select" type="select" label="The amrfinderplus database">
99 <options from_data_table="amrfinderplus_database">
100 <validator message="No amrfinderplus database is available" type="no_options"/>
101 </options>
102 </param>
103
122 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/> 104 <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/>
123 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/> 105 <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/>
124 </section> 106 </section>
125 <!-- Organism INFORMATION OPTIONS --> 107 <!-- Organism INFORMATION OPTIONS -->
126 <section name="organism" title="Optional organism options" expanded="false"> 108 <section name="organism" title="Optional organism options" expanded="false">
156 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/> 138 <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
157 </section> 139 </section>
158 <!-- PARAMETER FOR WORKFLOW ANALYSIS --> 140 <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
159 <section name="workflow" title="Workflow option to skip steps"> 141 <section name="workflow" title="Workflow option to skip steps">
160 <param name="skip_analysis" type="select" display="checkboxes" multiple="true" label="Select steps to skip"> 142 <param name="skip_analysis" type="select" display="checkboxes" multiple="true" label="Select steps to skip">
161 <option value="skip_trna"> Skip tRNA detection and annotation </option> 143 <option value="--skip-trna"> Skip tRNA detection and annotation </option>
162 <option value="skip_tmrna"> Skip tmRNA detection and annotation </option> 144 <option value="--skip-tmrna"> Skip tmRNA detection and annotation </option>
163 <option value="skip_rrna"> Skip rRNA detection and annotation </option> 145 <option value="--skip-rrna"> Skip rRNA detection and annotation </option>
164 <option value="skip_ncrna"> Skip ncRNA detection and annotation </option> 146 <option value="--skip-ncrna"> Skip ncRNA detection and annotation </option>
165 <option value="skip_ncrna_region"> Skip ncRNA region detection and annotation </option> 147 <option value="--skip-ncrna-region"> Skip ncRNA region detection and annotation </option>
166 <option value="skip_crispr"> Skip CRISPR array detection and annotation </option> 148 <option value="--skip-crispr"> Skip CRISPR array detection and annotation </option>
167 <option value="skip_cds"> Skip CDS detection and annotation </option> 149 <option value="--skip-cds"> Skip CDS detection and annotation </option>
168 <option value="skip_sorf"> Skip sORF detection and annotation </option> 150 <option value="--skip-pseudo"> Skip pseudogene detection and annotation </option>
169 <option value="skip_gap"> Skip gap detection and annotation </option> 151 <option value="--skip-sorf"> Skip sORF detection and annotation </option>
170 <option value="skip_ori"> Skip oriC/oriT detection and annotation </option> 152 <option value="--skip-gap"> Skip gap detection and annotation </option>
153 <option value="--skip-ori"> Skip oriC/oriT detection and annotation </option>
171 </param> 154 </param>
172 </section> 155 </section>
173 <section name="output_files" title="Selection of the output files"> 156 <section name="output_files" title="Selection of the output files">
174 <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection"> 157 <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
175 <option value="file_tsv" selected="true"> Annotation file in TSV </option> 158 <option value="file_tsv" selected="true"> Annotation file in TSV </option>
176 <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option> 159 <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option>
177 <option value="file_gbff" selected="true"> Annotations and sequences in GenBank format </option> 160 <option value="file_gbff" selected="false"> Annotations and sequences in GenBank format </option>
178 <option value="file_embl" selected="true"> Annotations and sequences in EMBL format </option> 161 <option value="file_embl" selected="false"> Annotations and sequences in EMBL format </option>
179 <option value="file_fna" selected="true"> Replicon/contig DNA sequences as FASTA </option> 162 <option value="file_fna" selected="false"> Replicon/contig DNA sequences as FASTA </option>
180 <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option> 163 <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option>
181 <option value="file_faa" selected="true"> CDS/sORF amino acid sequences as FASTA </option> 164 <option value="file_faa" selected="false"> CDS/sORF amino acid sequences as FASTA </option>
182 <option value="hypo_tsv" selected="true"> Hypothetical protein CDS in TSV</option> 165 <option value="hypo_tsv" selected="false"> Hypothetical protein CDS in TSV</option>
183 <option value="hypo_fa" selected="true"> Hypothetical protein CDS amino sequences as FASTA</option> 166 <option value="hypo_fa" selected="false"> Hypothetical protein CDS amino sequences as FASTA</option>
184 <option value="sum_txt" selected="true"> Summary as TXT</option> 167 <option value="sum_txt" selected="false"> Summary as TXT</option>
185 <option value="file_json" selected="true"> Information on each annotated feature as JSON </option> 168 <option value="file_json" selected="false"> Information on each annotated feature as JSON </option>
186 <option value="log_txt" selected="true"> Log file as TXT </option> 169 <option value="file_plot" selected="true"> Plot of the annotation result as SVG </option>
170 <option value="log_txt" selected="false"> Log file as TXT </option>
187 </param> 171 </param>
188 </section> 172 </section>
189 173
190 </inputs> 174 </inputs>
191 <outputs> 175 <outputs>
192 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv"> 176 <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: annotation_summary">
193 <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter> 177 <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter>
194 </data> 178 </data>
195 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3"> 179 <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: Annotation_and_sequences">
196 <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter> 180 <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter>
197 </data> 181 </data>
198 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff"> 182 <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff">
199 <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter> 183 <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter>
200 </data> 184 </data>
201 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl"> 185 <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl">
202 <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter> 186 <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter>
203 </data> 187 </data>
204 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna"> 188 <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: Contig_sequences">
205 <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter> 189 <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter>
206 </data> 190 </data>
207 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn"> 191 <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: Nucleotide_sequences">
208 <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter> 192 <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter>
209 </data> 193 </data>
210 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa"> 194 <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: Amino_acid_sequences">
211 <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter> 195 <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter>
212 </data> 196 </data>
213 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv"> 197 <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: hypothetical_annotation_summary">
214 <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter> 198 <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter>
215 </data> 199 </data>
216 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa"> 200 <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: hypothetical_amino_acid_sequences">
217 <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter> 201 <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter>
218 </data> 202 </data>
219 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_summary.txt"> 203 <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: Analysis_summary">
220 <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter> 204 <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter>
221 </data> 205 </data>
222 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json"> 206 <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: annotation_machine_readable">
223 <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter> 207 <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter>
208 </data>
209 <data name="annotation_plot" format="svg" from_work_dir="bakta_output.svg" label="${tool.name} on ${on_string}: Plot of the annotation">
210 <filter> output_files['output_selection'] and "file_plot" in output_files['output_selection'] </filter>
224 </data> 211 </data>
225 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file"> 212 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file">
226 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter> 213 <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter>
227 </data> 214 </data>
228 </outputs> 215 </outputs>
229
230 <tests> 216 <tests>
231 <test expect_num_outputs="12"> <!-- TEST_1 database + input --> 217 <test expect_num_outputs="13"> <!-- TEST_1 database + input -->
232 <section name="input_option" > 218 <section name="input_option" >
233 <param name="db_select" value="test-db-bakta"/> 219 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
234 <param name="input_file" value="NC_002127.1.fna"/> 220 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
235 </section> 221 <param name="input_file" value="NC_002127.1.fna"/>
236 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="10"> 222 <param name="min_contig_length" value="250"/>
237 <assert_contents> 223 </section>
238 <has_text_matching n="1" expression="Genome size: 1,330 bp"/> 224 <section name="output_files">
239 <has_n_lines n="94" delta="1"/> 225 <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/>
240 </assert_contents> 226 </section>
241 </output> 227 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="2"/>
242 <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="1"/> 228 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"/>
243 <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2"> 229 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="8"/>
244 <assert_contents> 230 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="6"/>
245 <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/> 231 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/>
246 </assert_contents> 232 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/>
247 </output> 233 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/>
248 <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4"> 234 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/>
249 <assert_contents> 235 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/>
250 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/> 236 <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/>
251 </assert_contents> 237 <output name="annotation_plot">
252 </output> 238 <assert_contents>
253 <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2"/> 239 <has_size value="418991" delta="1000"/>
254 <output name="annotation_fna" value="TEST_1/TEST_1.fna"/> 240 </assert_contents>
255 <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/> 241 </output>
256 <output name="annotation_faa" value="TEST_1/TEST_1.faa"/> 242
257 <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv"/> 243 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/>
258 <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/> 244 <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/>
259 <output name="summary_txt" value="TEST_1/TEST_1.txt">
260 <assert_contents>
261 <has_text_matching expression="N50: 1330"/>
262 </assert_contents>
263 </output>
264 <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4">
265 <assert_contents>
266 <has_text_matching expression="0.6203007518796992"/>
267 </assert_contents>
268 </output>
269 </test>
270 <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps -->
271 <section name="input_option" >
272 <param name="db_select" value="test-db-bakta"/>
273 <param name="input_file" value="NC_002127.1.fna"/>
274 <param name="min_contig_length" value="250"/>
275 </section>
276 <section name="organism">
277 <param name="genus" value="Escherichia"/>
278 <param name="species" value="coli O157:H7"/>
279 <param name="strain" value="Sakai"/>
280 <param name="plasmid" value="pOSAK1"/>
281 </section>
282 <section name="annotation">
283 <param name="--gram" value="-"/>
284 <param name="keep_contig_headers" value="true"/>
285 </section>
286 <section name="workflow">
287 <param name="skip_analysis" value="skip_trna,skip_tmrna"/>
288 </section>
289 <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4">
290 <assert_contents>
291 <has_text_matching expression="Genome size: 1,330 bp"/>
292 </assert_contents>
293 </output>
294 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2">
295 <assert_contents>
296 <has_text_matching expression="IHHALP_00005"/>
297 </assert_contents>
298 </output>
299 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2">
300 <assert_contents>
301 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
302 </assert_contents>
303 </output>
304 <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5">
305 <assert_contents>
306 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
307 </assert_contents>
308 </output>
309 <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4">
310 <assert_contents>
311 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
312 </assert_contents>
313 </output>
314 <output name="annotation_fna" value="TEST_2/TEST_2.fna"/>
315 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
316 <output name="annotation_faa" value="TEST_2/TEST_2.faa"/>
317 <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/>
318 <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/>
319 <output name="summary_txt" value="TEST_2/TEST_2.txt">
320 <assert_contents>
321 <has_text_matching expression="N50: 1330"/>
322 </assert_contents>
323 </output>
324 <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4">
325 <assert_contents>
326 <has_text_matching expression="0.4518796992481203"/>
327 </assert_contents>
328 </output>
329 </test>
330 <test expect_num_outputs="12"> <!-- TEST_3 test all skip steps -->
331 <section name="input_option" >
332 <param name="db_select" value="test-db-bakta"/>
333 <param name="input_file" value="NC_002127.1.fna"/>
334 <param name="min_contig_length" value="350"/>
335 </section>
336 <section name="workflow">
337 <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/>
338 </section>
339 <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4">
340 <assert_contents>
341 <has_text_matching expression="Genome size: 1,330 bp"/>
342 </assert_contents>
343 </output>
344 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1"/>
345 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2"/>
346 <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/>
347 <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/>
348 <output name="annotation_fna" value="TEST_3/TEST_3.fna"/>
349 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
350 <output name="annotation_faa" value="TEST_3/TEST_3.faa"/>
351 <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/>
352 </test> 245 </test>
353 <test expect_num_outputs="12"> <!-- TEST_4 annotations --> 246 <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps -->
354 <section name="input_option" > 247 <section name="input_option" >
355 <param name="db_select" value="test-db-bakta"/> 248 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
249 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
356 <param name="input_file" value="NC_002127.1.fna"/> 250 <param name="input_file" value="NC_002127.1.fna"/>
251 <param name="min_contig_length" value="250"/>
252 </section>
253 <section name="organism">
254 <param name="genus" value="Escherichia"/>
255 <param name="species" value="coli O157:H7"/>
256 <param name="strain" value="Sakai"/>
257 <param name="plasmid" value="pOSAK1"/>
357 </section> 258 </section>
358 <section name="annotation"> 259 <section name="annotation">
359 <param name="complete" value="true"/> 260 <param name="--gram" value="-"/>
360 <param name="translation_table" value="4"/> 261 <param name="keep_contig_headers" value="true"/>
361 <param name="prodigal" value="prodigal.tf"/> 262 </section>
362 <param name="replicons" value="replicons.tsv"/> 263 <section name="workflow">
363 <param name="compliant" value="true"/> 264 <param name="skip_analysis" value="--skip-trna,--skip-tmrna"/>
364 <param name="proteins" value="user-proteins.faa"/> 265 </section>
365 </section> 266 <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="4">
366 <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4"> 267 <assert_contents>
367 <assert_contents> 268 <has_text_matching expression="IHHALP_00005"/>
368 <has_text_matching expression="potential: 16"/> 269 </assert_contents>
369 </assert_contents> 270 </output>
370 </output> 271 <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="4">
371 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"/> 272 <assert_contents>
372 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2"> 273 <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
373 <assert_contents> 274 </assert_contents>
374 <has_text_matching expression="ID=IHHALP_00005_gene;locus_tag=IHHALP_00005"/> 275 </output>
375 </assert_contents> 276 <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
376 </output> 277 <output name="annotation_plot">
377 <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4"> 278 <assert_contents>
378 <assert_contents> 279 <has_size value="418991" delta="1000"/>
379 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
380 </assert_contents>
381 </output>
382 <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4">
383 <assert_contents>
384 <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
385 </assert_contents>
386 </output>
387 <output name="annotation_fna" value="TEST_4/TEST_4.fna"/>
388 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
389 <output name="annotation_faa" value="TEST_4/TEST_4.faa"/>
390 <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/>
391 <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/>
392 <output name="summary_txt" value="TEST_4/TEST_4.txt">
393 <assert_contents>
394 <has_text_matching expression="GC: 45.2"/>
395 </assert_contents>
396 </output>
397 <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4">
398 <assert_contents>
399 <has_text_matching expression="0.4518796992481203"/>
400 </assert_contents> 280 </assert_contents>
401 </output> 281 </output>
402 </test> 282 </test>
403 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary --> 283 <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps -->
404 <section name="input_option" > 284 <section name="input_option" >
405 <param name="db_select" value="test-db-bakta"/> 285 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
286 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
406 <param name="input_file" value="NC_002127.1.fna"/> 287 <param name="input_file" value="NC_002127.1.fna"/>
407 </section> 288 <param name="min_contig_length" value="350"/>
408 <section name="annotation">
409 <param name="complete" value="true"/>
410 <param name="translation_table" value="4"/>
411 </section> 289 </section>
412 <section name="workflow"> 290 <section name="workflow">
413 <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/> 291 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
414 </section> 292 </section>
415 <section name="output_files"> 293 <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/>
416 <param name="output_selection" value="log_txt,sum_txt"/> 294 <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/>
417 </section> 295 <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
418 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="4"/> 296 <output name="annotation_plot">
419 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/> 297 <assert_contents>
420 </test> 298 <has_size value="418399" delta="1000"/>
299 </assert_contents>
300 </output>
301 </test>
302 <test expect_num_outputs="4"> <!-- TEST_4 annotations -->
303 <section name="input_option" >
304 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
305 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
306 <param name="input_file" value="NC_002127.1.fna"/>
307 </section>
308 <section name="annotation">
309 <param name="complete" value="true"/>
310 <param name="prodigal" value="prodigal.tf"/>
311 <param name="translation_table" value="4"/>
312 <param name="replicons" value="replicons.tsv"/>
313 <param name="compliant" value="true"/>
314 <param name="proteins" value="user-proteins.faa"/>
315 </section>
316 <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/>
317 <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/>
318 <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
319 <output name="annotation_plot">
320 <assert_contents>
321 <has_size value="418399" delta="1000"/>
322 </assert_contents>
323 </output>
324 </test>
325 <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
326 <section name="input_option" >
327 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
328 <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
329 <param name="input_file" value="NC_002127.1.fna"/>
330 </section>
331 <section name="annotation">
332 <param name="complete" value="true"/>
333 <param name="translation_table" value="4"/>
334 </section>
335 <section name="workflow">
336 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
337 </section>
338 <section name="output_files">
339 <param name="output_selection" value="log_txt,sum_txt"/>
340 </section>
341 <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/>
342 <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
343 </test>
421 </tests> 344 </tests>
422 <help><![CDATA[**What it does** 345 <help><![CDATA[**What it does**
423 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs. 346 Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.
424 347
425 *Comprehensive & taxonomy-independent database* 348 *Comprehensive & taxonomy-independent database*
463 - strain 386 - strain
464 - plasmid 387 - plasmid
465 388
466 **Annotation options** 389 **Annotation options**
467 1. You can specify if all sequences (chromosome or plasmids) are complete or not 390 1. You can specify if all sequences (chromosome or plasmids) are complete or not
468 2. You can add your own prodigal traingin file for CDS predictionœ 391 2. You can add your own prodigal training file for CDS predictionœ
469 3. The translation table could be modified, default is the 11th for bacteria 392 3. The translation table could be modified, default is the 11th for bacteria
470 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow) 393 4. You can specify if bacteria is gram -/+ or unknonw (default value unknow)
471 5. You can keep the name of contig present in the input file 394 5. You can keep the name of contig present in the input file
472 6. You can specify your own replicon table as a TSV/CSV file 395 6. You can specify your own replicon table as a TSV/CSV file
473 7. The compliance option is for ready to submit annotation file to Public database 396 7. The compliance option is for ready to submit annotation file to Public database