diff bakta.xml @ 4:3f0aa1b3e816 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/bakta commit e2c4ab5546b135ff903545073c3223aab166d8f5
author iuc
date Mon, 24 Jul 2023 11:22:16 +0000
parents 865ece5ca178
children 728dacaf08a9
line wrap: on
line diff
--- a/bakta.xml	Fri Feb 10 14:20:09 2023 +0000
+++ b/bakta.xml	Mon Jul 24 11:22:16 2023 +0000
@@ -11,10 +11,12 @@
     <expand macro="version_command"/>
 
     <command detect_errors="aggressive"><![CDATA[
+
         mkdir -p ./database_path/amrfinderplus-db &&
         ln -s '$(input_option.bakta_db_select.fields.path)'/* database_path &&
         ln -s '$(input_option.amrfinder_db_select.fields.path)/' database_path/amrfinderplus-db/latest &&
-        bakta
+        bakta --verbose
+
         #*======================================
                     CPU option
         ======================================*#
@@ -22,7 +24,8 @@
         #*======================================
                     Bakta database
         ======================================*#
-        --db ./database_path
+        --db './database_path'
+        --output 'bakta_output'
         #if $input_option.min_contig_length
             --min-contig-length $input_option.min_contig_length
         #else if $annotation.compliant
@@ -88,8 +91,8 @@
         <section name="input_option" title="Input/Output options" expanded="true">
             <param name="bakta_db_select" type="select" label="The bakta database">
                 <options from_data_table="bakta_database">
-                  <filter type="static_value" value="@BAKTA_VERSION@" column="bakta_version"/>
-                  <validator message="No bakta database is available" type="no_options"/>
+                    <filter type="static_value" value="@COMPATIBLE_BAKTA_VERSION@" column="bakta_version"/>
+                    <validator message="No bakta database is available" type="no_options"/>
                 </options>
             </param>
             <param name="amrfinder_db_select" type="select" label="The amrfinderplus database">
@@ -128,87 +131,89 @@
             <param argument="--replicons" type="data" format="tabular,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
             <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
             <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
+            <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" label="Metagenome mode" help="Run in metagenome mode. This only affects CDS prediction"/>
         </section>
         <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
         <section name="workflow" title="Workflow option to skip steps">
             <param name="skip_analysis" type="select" display="checkboxes" multiple="true"  label="Select steps to skip">
-                <option value="--skip-trna"> Skip tRNA detection and annotation </option>
-                <option value="--skip-tmrna"> Skip tmRNA detection and annotation </option>
-                <option value="--skip-rrna"> Skip rRNA detection and annotation </option>
-                <option value="--skip-ncrna"> Skip ncRNA detection and annotation </option>
-                <option value="--skip-ncrna-region"> Skip ncRNA region detection and annotation </option>
-                <option value="--skip-crispr"> Skip CRISPR array detection and annotation </option>
-                <option value="--skip-cds"> Skip CDS detection and annotation </option>
-                <option value="--skip-pseudo"> Skip pseudogene detection and annotation </option>
-                <option value="--skip-sorf"> Skip sORF detection and annotation </option>
-                <option value="--skip-gap"> Skip gap detection and annotation </option>
-                <option value="--skip-ori"> Skip oriC/oriT detection and annotation </option>
+                <option value="--skip-trna">Skip tRNA detection and annotation</option>
+                <option value="--skip-tmrna">Skip tmRNA detection and annotation</option>
+                <option value="--skip-rrna">Skip rRNA detection and annotation</option>
+                <option value="--skip-ncrna">Skip ncRNA detection and annotation</option>
+                <option value="--skip-ncrna-region">Skip ncRNA region detection and annotation</option>
+                <option value="--skip-crispr">Skip CRISPR array detection and annotation</option>
+                <option value="--skip-cds">Skip CDS detection and annotation</option>
+                <option value="--skip-pseudo">Skip pseudogene detection and annotation</option>
+                <option value="--skip-sorf">Skip sORF detection and annotation</option>
+                <option value="--skip-gap">Skip gap detection and annotation</option>
+                <option value="--skip-ori">Skip oriC/oriT detection and annotation</option>
+                <option value="--skip-plot">Skip generation of circular genome plots</option>
             </param>
         </section>
         <section name="output_files" title="Selection of the output files">
           <param name="output_selection" type="select" display="checkboxes" multiple="true"  label="Output files selection">
-              <option value="file_tsv" selected="true"> Annotation file in TSV </option>
-              <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option>
-              <option value="file_gbff" selected="false"> Annotations and sequences in GenBank format </option>
-              <option value="file_embl" selected="false"> Annotations and sequences in EMBL format </option>
-              <option value="file_fna" selected="false"> Replicon/contig DNA sequences as FASTA  </option>
-              <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option>
-              <option value="file_faa" selected="false"> CDS/sORF amino acid sequences as FASTA </option>
-              <option value="hypo_tsv" selected="false"> Hypothetical protein CDS in TSV</option>
-              <option value="hypo_fa" selected="false"> Hypothetical protein CDS amino sequences as FASTA</option>
-              <option value="sum_txt" selected="false"> Summary as TXT</option>
-              <option value="file_json" selected="false"> Information on each annotated feature as JSON </option>
-              <option value="file_plot" selected="true"> Plot of the annotation result as SVG </option>
-              <option value="log_txt" selected="false"> Log file as TXT </option>
+              <option value="file_tsv" selected="true">Annotation file in TSV</option>
+              <option value="file_gff3" selected="true">Annotation and sequence in GFF3</option>
+              <option value="file_gbff" selected="false">Annotations and sequences in GenBank format</option>
+              <option value="file_embl" selected="false">Annotations and sequences in EMBL format</option>
+              <option value="file_fna" selected="false">Replicon/contig DNA sequences as FASTA</option>
+              <option value="file_ffn" selected="true">Feature nucleotide sequences as FASTA</option>
+              <option value="file_faa" selected="false">CDS/sORF amino acid sequences as FASTA</option>
+              <option value="hypo_tsv" selected="false">Hypothetical protein CDS in TSV</option>
+              <option value="hypo_fa" selected="false">Hypothetical protein CDS amino sequences as FASTA</option>
+              <option value="sum_txt" selected="false">Summary as TXT</option>
+              <option value="file_json" selected="false">Information on each annotated feature as JSON</option>
+              <option value="file_plot" selected="true">Plot of the annotation result as SVG</option>
+              <option value="log_txt" selected="false">Log file as TXT</option>
           </param>
         </section>
 
     </inputs>
     <outputs>
-        <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: annotation_summary">
-            <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter>
+        <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output/bakta_output.tsv" label="${tool.name} on ${on_string}: annotation_summary">
+            <filter>output_files['output_selection'] and "file_tsv" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: Annotation_and_sequences">
-            <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter>
+        <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output/bakta_output.gff3" label="${tool.name} on ${on_string}: Annotation_and_sequences">
+            <filter>output_files['output_selection'] and "file_gff3" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff">
-            <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter>
+        <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output/bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff">
+            <filter>output_files['output_selection'] and "file_gbff" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl">
-            <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter>
+        <data name="annotation_embl" format="tabular" from_work_dir="bakta_output/bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl">
+            <filter>output_files['output_selection'] and "file_embl" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: Contig_sequences">
-            <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter>
+        <data name="annotation_fna" format="fasta" from_work_dir="bakta_output/bakta_output.fna" label="${tool.name} on ${on_string}: Contig_sequences">
+            <filter>output_files['output_selection'] and "file_fna" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: Nucleotide_sequences">
-            <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter>
+        <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output/bakta_output.ffn" label="${tool.name} on ${on_string}: Nucleotide_sequences">
+            <filter>output_files['output_selection'] and "file_ffn" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: Amino_acid_sequences">
-            <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter>
+        <data name="annotation_faa" format="fasta" from_work_dir="bakta_output/bakta_output.faa" label="${tool.name} on ${on_string}: Amino_acid_sequences">
+            <filter>output_files['output_selection'] and "file_faa" in output_files['output_selection']</filter>
         </data>
-        <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: hypothetical_annotation_summary">
-            <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter>
+        <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output/bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: hypothetical_annotation_summary">
+            <filter>output_files['output_selection'] and "hypo_tsv" in output_files['output_selection']</filter>
         </data>
-        <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: hypothetical_amino_acid_sequences">
-          <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter>
+        <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output/bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: hypothetical_amino_acid_sequences">
+          <filter>output_files['output_selection'] and "hypo_fa" in output_files['output_selection']</filter>
         </data>
-        <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: Analysis_summary">
-            <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter>
+        <data name="summary_txt" format="txt" from_work_dir="bakta_output/bakta_output.txt" label="${tool.name} on ${on_string}: Analysis_summary">
+            <filter>output_files['output_selection'] and "sum_txt" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: annotation_machine_readable">
-            <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter>
+        <data name="annotation_json" format="json" from_work_dir="bakta_output/bakta_output.json" label="${tool.name} on ${on_string}: annotation_machine_readable">
+            <filter>output_files['output_selection'] and "file_json" in output_files['output_selection']</filter>
         </data>
-        <data name="annotation_plot" format="svg" from_work_dir="bakta_output.svg" label="${tool.name} on ${on_string}: Plot of the annotation">
-            <filter> output_files['output_selection'] and "file_plot" in output_files['output_selection'] </filter>
+        <data name="annotation_plot" format="svg" from_work_dir="bakta_output/bakta_output.svg" label="${tool.name} on ${on_string}: Plot of the annotation">
+            <filter>output_files['output_selection'] and "file_plot" in output_files['output_selection']</filter>
         </data>
         <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file">
-            <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter>
+            <filter>output_files['output_selection'] and "log_txt" in output_files['output_selection']</filter>
         </data>
     </outputs>
     <tests>
         <test expect_num_outputs="13"> <!-- TEST_1 database + input -->
            <section name="input_option" >
-               <param name="bakta_db_select" value="V0.1_2022-08-29"/>
+               <param name="bakta_db_select" value="V5.0_2022-08-19"/>
                <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
                <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
                <param name="min_contig_length" value="250"/>
@@ -226,17 +231,13 @@
            <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv" lines_diff="4"/>
            <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/>
            <output name="summary_txt" value="TEST_1/TEST_1.txt" lines_diff="4"/>
-           <output name="annotation_plot">
-               <assert_contents>
-                   <has_size value="418991" delta="1000"/>
-               </assert_contents>
-           </output>
+           <output name="annotation_plot" value="TEST_1/TEST_1_plot.svg" ftype="svg" compare="sim_size"/>
           <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="6"/>
-          <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="6"/>
+          <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="15"/>
        </test>
        <test expect_num_outputs="4"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps  -->
            <section name="input_option" >
-               <param name="bakta_db_select" value="V0.1_2022-08-29"/>
+               <param name="bakta_db_select" value="V5.0_2022-08-19"/>
                <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
                <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
                <param name="min_contig_length" value="250"/>
@@ -264,34 +265,25 @@
                </assert_contents>
            </output>
            <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
-           <output name="annotation_plot">
-               <assert_contents>
-                   <has_size value="418991" delta="1000"/>
-               </assert_contents>
-           </output>
+           <output name="annotation_plot" value="TEST_2/TEST_2_plot.svg" ftype="svg" compare="sim_size"/>
        </test>
        <test expect_num_outputs="4"> <!-- TEST_3 test all skip steps  -->
            <section name="input_option" >
-               <param name="bakta_db_select" value="V0.1_2022-08-29"/>
+               <param name="bakta_db_select" value="V5.0_2022-08-19"/>
                <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
                <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
                <param name="min_contig_length" value="350"/>
            </section>
            <section name="workflow">
-               <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
+               <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori,--skip-plot"/>
            </section>
            <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="4"/>
            <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="4"/>
            <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
-           <output name="annotation_plot">
-               <assert_contents>
-                   <has_size value="418399" delta="1000"/>
-               </assert_contents>
-           </output>
          </test>
          <test expect_num_outputs="4"> <!-- TEST_4 annotations   -->
              <section name="input_option" >
-                 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
+                 <param name="bakta_db_select" value="V5.0_2022-08-19"/>
                  <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
                  <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
              </section>
@@ -306,15 +298,11 @@
              <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="4"/>
              <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="4"/>
              <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
-            <output name="annotation_plot">
-               <assert_contents>
-                   <has_size value="418399" delta="1000"/>
-               </assert_contents>
-           </output>
+            <output name="annotation_plot" value="TEST_4/TEST_4_plot.svg" ftype="svg" compare="sim_size"/>
          </test>
          <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
              <section name="input_option" >
-                 <param name="bakta_db_select" value="V0.1_2022-08-29"/>
+                 <param name="bakta_db_select" value="V5.0_2022-08-19"/>
                  <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
                  <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
              </section>
@@ -323,14 +311,40 @@
                  <param name="translation_table" value="4"/>
              </section>
              <section name="workflow">
-                 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori"/>
+                 <param name="skip_analysis" value="--skip-trna,--skip-tmrna,--skip-rrna,--skip-ncrna,--skip-ncrna-region,--skip-crispr,--skip-cds,--skip-sorf,--skip-gap,--skip-ori,--skip-plot"/>
              </section>
              <section name="output_files">
                  <param name="output_selection" value="log_txt,sum_txt"/>
              </section>
-             <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="6"/>
+             <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="13"/>
              <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
          </test>
+         <test expect_num_outputs="13"> <!-- TEST_6 metagenome option -->
+            <section name="input_option" >
+                <param name="bakta_db_select" value="V5.0_2022-08-19"/>
+                <param name="amrfinder_db_select" value="V3.6-2020-03-20.1"/>
+                <param name="input_file" value="NC_002127.1.fna" ftype="fasta"/>
+            </section>
+            <section name="annotation">
+                <param name="meta" value="true"/>
+            </section>
+            <section name="output_files">
+                <param name="output_selection" value="file_tsv,file_gff3,file_gbff,file_embl,file_fna,file_ffn,file_faa,hypo_tsv,hypo_fa,sum_txt,file_json,file_plot,log_txt"/>
+            </section>
+            <output name="annotation_tsv" value="TEST_6/TEST_6.tsv" lines_diff="2"/>
+            <output name="annotation_gff3" value="TEST_6/TEST_6.gff3" lines_diff="2"/>
+            <output name="annotation_gbff" value="TEST_6/TEST_6.gbff" lines_diff="8"/>
+            <output name="annotation_embl" value="TEST_6/TEST_6.embl" lines_diff="6"/>
+            <output name="annotation_fna" value="TEST_6/TEST_6.fna"/>
+            <output name="annotation_ffn" value="TEST_6/TEST_6.ffn"/>
+            <output name="annotation_faa" value="TEST_6/TEST_6.faa"/>
+            <output name="hypotheticals_tsv" value="TEST_6/TEST_6.hypotheticals.tsv" lines_diff="4"/>
+            <output name="hypotheticals_faa" value="TEST_6/TEST_6.hypotheticals.faa"/>
+            <output name="summary_txt" value="TEST_6/TEST_6.txt" lines_diff="4"/>
+            <output name="annotation_plot" value="TEST_6/TEST_6_plot.svg" ftype="svg" compare="sim_size"/>
+           <output name="annotation_json" value="TEST_6/TEST_6.json" lines_diff="6"/>
+           <output name="logfile" value="TEST_6/TEST_6.log" lines_diff="15"/>
+        </test>
     </tests>
     <help><![CDATA[**What it does**
           Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.