Mercurial > repos > iuc > bakta

diff bakta.xml @ 0:1a27ad3d0cdf draft
planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/bakta commit 7d595b05b8d595f554b57dadbf1beb0b39733af3
author: iuc
date: Thu, 01 Sep 2022 17:28:43 +0000
children: da5f1924bb2e
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bakta.xml	Thu Sep 01 17:28:43 2022 +0000
@@ -0,0 +1,507 @@
+<tool id="bakta" name="Bakta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        genome annotation via alignment-free sequence identification
+    </description>
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+    <expand macro='edam'/>
+    <expand macro='xrefs'/>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+
+    <command detect_errors="aggressive"><![CDATA[
+        bakta
+        #*======================================
+                    CPU option
+        ======================================*#
+        --threads \${GALAXY_SLOTS:-1}
+        #*======================================
+                    Bakta database
+        ======================================*#
+        --db $input_option.db_select.fields.path
+        #if $input_option.min_contig_length
+            --min-contig-length $input_option.min_contig_length
+        #else if $annotation.compliant
+            --min-contig-length 200
+        #else
+            --min-contig-length 1
+        #end if
+        --prefix bakta_output
+        #*======================================
+                  Organism options
+              genus/species/strain/plasmid
+        ======================================*#
+        #if $organism.genus
+            --genus '$organism.genus'
+        #end if
+        #if $organism.species
+            --species '$organism.species'
+        #end if
+        #if $organism.strain
+            --strain '$organism.strain'
+        #end if
+        #if $organism.plasmid
+            --plasmid '$organism.plasmid'
+        #end if
+        #*======================================
+                    Annotation options
+            gram type, prodigal/protein file
+        ======================================*#
+        $annotation.complete
+        #if $annotation.prodigal
+            --prodigal-tf '$annotation.prodigal'
+        #end if
+        #if $annotation.translation_table
+            --translation-table '$annotation.translation_table'
+        #end if
+        #if $annotation.gram
+            --gram '$annotation.gram'
+        #end if
+        $annotation.keep_contig_headers
+        #if $annotation.replicons
+            --replicons '$annotation.replicons'
+        #end if
+        $annotation.compliant
+        #if $annotation.proteins
+            --proteins '$annotation.proteins'
+        #end if
+        #*======================================
+                    Workflow OPTIONS
+         skip some step of the bakta analysis
+        ======================================*#
+
+        #if "skip_trna" in $workflow.skip_analysis
+            --skip-trna
+        #end if
+        #if "skip_tmrna" in $workflow.skip_analysis
+            --skip-tmrna
+        #end if
+        #if "skip_rrna" in $workflow.skip_analysis
+            --skip-rrna
+        #end if
+        #if "skip_ncrna" in $workflow.skip_analysis
+            --skip-ncrna
+        #end if
+        #if "skip_ncrna_region" in $workflow.skip_analysis
+            --skip-ncrna-region
+        #end if
+        #if "skip_crispr" in $workflow.skip_analysis
+            --skip-crispr
+        #end if
+        #if "skip_cds" in $workflow.skip_analysis
+            --skip-cds
+        #end if
+        #if "skip_sorf" in $workflow.skip_analysis
+            --skip-sorf
+        #end if
+        #if "skip_gap" in $workflow.skip_analysis
+            --skip-gap
+        #end if
+        #if "skip_ori" in $workflow.skip_analysis
+            --skip-ori
+        #end if
+
+        #*======================================
+                    Genome file
+        ======================================*#
+        '$input_option.input_file'
+        #*======================================
+                    LOG file
+        ======================================*#
+        | tee '$logfile'
+        ]]></command>
+    <inputs>
+      <!-- DB and file INPUT -->
+        <section name="input_option" title="Input/Output options" expanded="true">
+            <param name="db_select" type="select" label="The bakta database">
+                <options from_data_table="bakta_database">
+                    <validator message="No bakta database is available" type="no_options"/>
+                </options>
+            </param>
+            <param name="input_file" type="data" format="fasta,fasta.gz" label="Select genome in fasta format"/>
+            <param name="min_contig_length" type="integer" optional="true" min="0" label="Minimum contig size" help="Minimum contig size (default = 1; 200 in compliant mode) (--min-contig-length)"/>
+        </section>
+        <!-- Organism INFORMATION OPTIONS -->
+        <section name="organism" title="Optional organism options" expanded="false">
+            <param argument="--genus" type="text" optional="true" label="Specify genus name" help="ex. Escherichia">
+                <validator type="regex">^[a-zA-Z]+$</validator>
+            </param>
+            <param argument="--species" type="text" optional="true" label="Specify species name" help="ex. 'coli O157:H7'">
+                <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator>
+            </param>
+            <param argument="--strain" type="text" optional="true" label="Specify strain name" help="ex. Sakai">
+                <validator type="regex">^[a-zA-Z]+$</validator>
+            </param>
+            <param argument="--plasmid" type="text" optional="true" label="Specify plasmid name" help="ex. pOSAK1">
+                <validator type="regex">^[a-zA-Z0-9\s(:\-/)]+$</validator>
+            </param>
+        </section>
+        <!-- ANNOTATION -->
+        <section name="annotation" title="Optional annotation">
+            <param argument="--complete" type="boolean" truevalue="--complete" falsevalue="" label="Complete replicons" help="All sequences are complete replicons (chromosome/plasmid[s])"/>
+            <param argument="--prodigal" type="data" format="txt" optional="true" label="Prodigal file" help="Prodigal training file for CDS prediction"/>
+            <param name="translation_table"  type="select" optional="true" label="Translation table" help="Default is the bacterial table 11">
+                <option value="4">4 Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+                <option value="11" selected="true">11 Bacterial, Archaeal and Plant Plastid Code</option>
+            </param>
+            <param argument="--gram" type="select" optional="true" label="Gram type for signal peptide predictions" help="Gram type +/- or unknown. Default: unknown">
+                <option value="+">Gram+</option>
+                <option value="-">Gram-</option>
+                <option value="?" selected="true">Unknown</option>
+            </param>
+            <param name="keep_contig_headers" type="boolean" truevalue="--keep-contig-headers" falsevalue="" label="Keep original contig header (--keep-contig-headers)"/>
+            <param argument="--replicons" type="data" format="tsv,csv" optional="true" label="Replicon information table (tsv/csv)" help=""/>
+            <param argument="--compliant" type="boolean" truevalue="--compliant" falsevalue="" label="Force Genbank/ENA/DDJB compliance"/>
+            <param argument="--proteins" type="data" format="fasta" optional="true" label="Protein fasta file" help="Fasta file of trusted protein sequences for CDS annotation"/>
+        </section>
+        <!-- PARAMETER FOR WORKFLOW ANALYSIS -->
+        <section name="workflow" title="Workflow option to skip steps">
+            <param name="skip_analysis" type="select" display="checkboxes" multiple="true"  label="Select steps to skip">
+                <option value="skip_trna"> Skip tRNA detection and annotation </option>
+                <option value="skip_tmrna"> Skip tmRNA detection and annotation </option>
+                <option value="skip_rrna"> Skip rRNA detection and annotation </option>
+                <option value="skip_ncrna"> Skip ncRNA detection and annotation </option>
+                <option value="skip_ncrna_region"> Skip ncRNA region detection and annotation </option>
+                <option value="skip_crispr"> Skip CRISPR array detection and annotation </option>
+                <option value="skip_cds"> Skip CDS detection and annotation </option>
+                <option value="skip_sorf"> Skip sORF detection and annotation </option>
+                <option value="skip_gap"> Skip gap detection and annotation </option>
+                <option value="skip_ori"> Skip oriC/oriT detection and annotation </option>
+            </param>
+        </section>
+        <section name="output_files" title="Selection of the output files">
+          <param name="output_selection" type="select" display="checkboxes" multiple="true"  label="Output files selection">
+              <option value="file_tsv" selected="true"> Annotation file in TSV </option>
+              <option value="file_gff3" selected="true"> Annotation and sequence in GFF3 </option>
+              <option value="file_gbff" selected="true"> Annotations and sequences in GenBank format </option>
+              <option value="file_embl" selected="true"> Annotations and sequences in EMBL format </option>
+              <option value="file_fna" selected="true"> Replicon/contig DNA sequences as FASTA  </option>
+              <option value="file_ffn" selected="true"> Feature nucleotide sequences as FASTA </option>
+              <option value="file_faa" selected="true"> CDS/sORF amino acid sequences as FASTA </option>
+              <option value="hypo_tsv" selected="true"> Hypothetical protein CDS in TSV</option>
+              <option value="hypo_fa" selected="true"> Hypothetical protein CDS amino sequences as FASTA</option>
+              <option value="sum_txt" selected="true"> Summary as TXT</option>
+              <option value="file_json" selected="true"> Information on each annotated feature as JSON </option>
+              <option value="log_txt" selected="true"> Log file as TXT </option>
+          </param>
+        </section>
+
+    </inputs>
+    <outputs>
+        <data name="annotation_tsv" format="tabular" from_work_dir="bakta_output.tsv" label="${tool.name} on ${on_string}: bakta_output.tsv">
+            <filter> output_files['output_selection'] and "file_tsv" in output_files['output_selection'] </filter>
+        </data>
+        <data name="annotation_gff3" format="gff3" from_work_dir="bakta_output.gff3" label="${tool.name} on ${on_string}: bakta_output.gff3">
+            <filter> output_files['output_selection'] and "file_gff3" in output_files['output_selection'] </filter>
+        </data>
+        <data name="annotation_gbff" format="tabular" from_work_dir="bakta_output.gbff" label="${tool.name} on ${on_string}: bakta_output.gbff">
+            <filter> output_files['output_selection'] and "file_gbff" in output_files['output_selection'] </filter>
+        </data>
+        <data name="annotation_embl" format="tabular" from_work_dir="bakta_output.embl" label="${tool.name} on ${on_string}: bakta_output.embl">
+            <filter> output_files['output_selection'] and "file_embl" in output_files['output_selection'] </filter>
+        </data>
+        <data name="annotation_fna" format="fasta" from_work_dir="bakta_output.fna" label="${tool.name} on ${on_string}: bakta_output.fna">
+            <filter> output_files['output_selection'] and "file_fna" in output_files['output_selection'] </filter>
+        </data>
+        <data name="annotation_ffn" format="fasta" from_work_dir="bakta_output.ffn" label="${tool.name} on ${on_string}: bakta_output.ffn">
+            <filter> output_files['output_selection'] and "file_ffn" in output_files['output_selection'] </filter>
+        </data>
+        <data name="annotation_faa" format="fasta" from_work_dir="bakta_output.faa" label="${tool.name} on ${on_string}: bakta_output.faa">
+            <filter> output_files['output_selection'] and "file_faa" in output_files['output_selection'] </filter>
+        </data>
+        <data name="hypotheticals_tsv" format="tabular" from_work_dir="bakta_output.hypotheticals.tsv" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.tsv">
+            <filter> output_files['output_selection'] and "hypo_tsv" in output_files['output_selection'] </filter>
+        </data>
+        <data name="hypotheticals_faa" format="fasta" from_work_dir="bakta_output.hypotheticals.faa" label="${tool.name} on ${on_string}: bakta_output.hypotheticals.faa">
+          <filter> output_files['output_selection'] and "hypo_fa" in output_files['output_selection'] </filter>
+        </data>
+        <data name="summary_txt" format="txt" from_work_dir="bakta_output.txt" label="${tool.name} on ${on_string}: bakta_summary.txt">
+            <filter> output_files['output_selection'] and "sum_txt" in output_files['output_selection'] </filter>
+        </data>
+        <data name="annotation_json" format="json" from_work_dir="bakta_output.json" label="${tool.name} on ${on_string}: bakta_output.json">
+            <filter> output_files['output_selection'] and "file_json" in output_files['output_selection'] </filter>
+        </data>
+        <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file">
+            <filter> output_files['output_selection'] and "log_txt" in output_files['output_selection'] </filter>
+        </data>
+    </outputs>
+
+    <tests>
+      <test expect_num_outputs="12"> <!-- TEST_1 database + input -->
+          <section name="input_option" >
+              <param name="db_select" value="test-db-bakta"/>
+              <param name="input_file" value="NC_002127.1.fna"/>
+          </section>
+          <output name="logfile" value="TEST_1/TEST_1.log" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching n="1" expression="Genome size: 1,330 bp"/>
+                  <has_n_lines n="90" delta="1"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_tsv" value="TEST_1/TEST_1.tsv" lines_diff="1"/>
+          <output name="annotation_gff3" value="TEST_1/TEST_1.gff3" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="TTCTTCTGCGAGTTCGTGCAGCTTCTCACACATGGTGGCCTGCTCGTCAGCATCGAGTGC"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gbff" value="TEST_1/TEST_1.gbff" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_embl" value="TEST_1/TEST_1.embl" lines_diff="2"/>
+          <output name="annotation_fna" value="TEST_1/TEST_1.fna"/>
+          <output name="annotation_ffn" value="TEST_1/TEST_1.ffn"/>
+          <output name="annotation_faa" value="TEST_1/TEST_1.faa"/>
+          <output name="hypotheticals_tsv" value="TEST_1/TEST_1.hypotheticals.tsv"/>
+          <output name="hypotheticals_faa" value="TEST_1/TEST_1.hypotheticals.faa"/>
+          <output name="summary_txt" value="TEST_1/TEST_1.txt">
+              <assert_contents>
+                  <has_text_matching expression="N50: 1330"/>
+              </assert_contents>
+            </output>
+          <output name="annotation_json" value="TEST_1/TEST_1.json" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="0.6203007518796992"/>
+              </assert_contents>
+          </output>
+      </test>
+      <test expect_num_outputs="12"> <!-- TEST_2 another input, add organism info some annotations and skip 2 steps  -->
+          <section name="input_option" >
+              <param name="db_select" value="test-db-bakta"/>
+              <param name="input_file" value="NC_002127.1.fna"/>
+              <param name="min_contig_length" value="250"/>
+          </section>
+          <section name="organism">
+              <param name="genus" value="Escherichia"/>
+              <param name="species" value="coli O157:H7"/>
+              <param name="strain" value="Sakai"/>
+              <param name="plasmid" value="pOSAK1"/>
+          </section>
+          <section name="annotation">
+              <param name="--gram" value="-"/>
+              <param name="keep_contig_headers" value="true"/>
+          </section>
+          <section name="workflow">
+              <param name="skip_analysis" value="skip_trna,skip_tmrna"/>
+          </section>
+          <output name="logfile" value="TEST_2/TEST_2.log" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="Genome size: 1,330 bp"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_tsv" value="TEST_2/TEST_2.tsv" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="IHHALP_00005"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gff3" value="TEST_2/TEST_2.gff3" lines_diff="2">
+              <assert_contents>
+                  <has_text_matching expression="ID=NC_002127.1;Name=NC_002127.1;Is_circular=true"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_gbff" value="TEST_2/TEST_2.gbff" lines_diff="5">
+              <assert_contents>
+                  <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_embl" value="TEST_2/TEST_2.embl" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_fna" value="TEST_2/TEST_2.fna"/>
+          <output name="annotation_ffn" value="TEST_2/TEST_2.ffn"/>
+          <output name="annotation_faa" value="TEST_2/TEST_2.faa"/>
+          <output name="hypotheticals_tsv" value="TEST_2/TEST_2.hypotheticals.tsv"/>
+          <output name="hypotheticals_faa" value="TEST_2/TEST_2.hypotheticals.faa"/>
+          <output name="summary_txt" value="TEST_2/TEST_2.txt">
+              <assert_contents>
+                  <has_text_matching expression="N50: 1330"/>
+              </assert_contents>
+            </output>
+          <output name="annotation_json" value="TEST_2/TEST_2.json" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="0.4518796992481203"/>
+              </assert_contents>
+          </output>
+      </test>
+      <test expect_num_outputs="12"> <!-- TEST_3 test all skip steps  -->
+          <section name="input_option" >
+              <param name="db_select" value="test-db-bakta"/>
+              <param name="input_file" value="NC_002127.1.fna"/>
+              <param name="min_contig_length" value="350"/>
+          </section>
+          <section name="workflow">
+              <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/>
+          </section>
+          <output name="logfile" value="TEST_3/TEST_3.log" lines_diff="4">
+              <assert_contents>
+                  <has_text_matching expression="Genome size: 1,330 bp"/>
+              </assert_contents>
+          </output>
+          <output name="annotation_tsv" value="TEST_3/TEST_3.tsv" lines_diff="1"/>
+          <output name="annotation_gff3" value="TEST_3/TEST_3.gff3" lines_diff="2"/>
+          <output name="annotation_gbff" value="TEST_3/TEST_3.gbff" lines_diff="10"/>
+          <output name="annotation_embl" value="TEST_3/TEST_3.embl" lines_diff="4"/>
+          <output name="annotation_fna" value="TEST_3/TEST_3.fna"/>
+          <output name="annotation_ffn" value="TEST_3/TEST_3.ffn"/>
+          <output name="annotation_faa" value="TEST_3/TEST_3.faa"/>
+          <output name="annotation_json" value="TEST_3/TEST_3.json" lines_diff="4"/>
+        </test>
+        <test expect_num_outputs="12"> <!-- TEST_4 annotations   -->
+            <section name="input_option" >
+                <param name="db_select" value="test-db-bakta"/>
+                <param name="input_file" value="NC_002127.1.fna"/>
+            </section>
+            <section name="annotation">
+                <param name="complete" value="true"/>
+                <param name="translation_table" value="4"/>
+                <param name="prodigal" value="prodigal.tf"/>
+                <param name="replicons" value="replicons.tsv"/>
+                <param name="compliant" value="true"/>
+                <param name="proteins" value="user-proteins.faa"/>
+            </section>
+            <output name="logfile" value="TEST_4/TEST_4.log" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="potential: 16"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_tsv" value="TEST_4/TEST_4.tsv" lines_diff="2"/>
+            <output name="annotation_gff3" value="TEST_4/TEST_4.gff3" lines_diff="2">
+                <assert_contents>
+                    <has_text_matching expression="ID=IHHALP_00005_gene;locus_tag=IHHALP_00005"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_gbff" value="TEST_4/TEST_4.gbff" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_embl" value="TEST_4/TEST_4.embl" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="MTKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRA"/>
+                </assert_contents>
+            </output>
+            <output name="annotation_fna" value="TEST_4/TEST_4.fna"/>
+            <output name="annotation_ffn" value="TEST_4/TEST_4.ffn"/>
+            <output name="annotation_faa" value="TEST_4/TEST_4.faa"/>
+            <output name="hypotheticals_tsv" value="TEST_4/TEST_4.hypotheticals.tsv"/>
+            <output name="hypotheticals_faa" value="TEST_4/TEST_4.hypotheticals.faa"/>
+            <output name="summary_txt" value="TEST_4/TEST_4.txt">
+                <assert_contents>
+                    <has_text_matching expression="GC: 45.2"/>
+                </assert_contents>
+              </output>
+            <output name="annotation_json" value="TEST_4/TEST_4.json" lines_diff="4">
+                <assert_contents>
+                    <has_text_matching expression="0.4518796992481203"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2"> <!-- TEST_5 skip all steps and keep only the logfile and summary -->
+            <section name="input_option" >
+                <param name="db_select" value="test-db-bakta"/>
+                <param name="input_file" value="NC_002127.1.fna"/>
+            </section>
+            <section name="annotation">
+                <param name="complete" value="true"/>
+                <param name="translation_table" value="4"/>
+            </section>
+            <section name="workflow">
+                <param name="skip_analysis" value="skip_trna,skip_tmrna,skip_rrna,skip_ncrna,skip_ncrna_region,skip_crispr,skip_cds,skip_sorf,skip_gap,skip_ori"/>
+            </section>
+            <section name="output_files">
+                <param name="output_selection" value="log_txt,sum_txt"/>
+            </section>
+            <output name="logfile" value="TEST_5/TEST_5.log" lines_diff="4"/>
+            <output name="summary_txt" value="TEST_5/TEST_5.txt" lines_diff="4"/>
+        </test>
+    </tests>
+    <help><![CDATA[**What it does**
+          Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.
+
+        *Comprehensive & taxonomy-independent database*
+          Bakta provides a large and taxonomy-independent database using UniProt's entire UniRef protein sequence cluster universe.
+
+        *Protein sequence identification*
+          Bakta exactly identifies known identical protein sequences (IPS) from RefSeq and UniProt
+          allowing the fine-grained annotation of gene alleles (AMR) or closely related but distinct protein families.
+          This is achieved via an alignment-free sequence identification (AFSI) approach
+          using full-length MD5 protein sequence hash digests.
+        *Small proteins/short open reading frames*
+          Bakta detects and annotates small proteins/short open reading frames (sORF).
+
+        *Expert annotation systems*
+          To provide high quality annotations for certain proteins of higher interest, e.g. AMR & VF genes,
+          Bakta includes & merges different expert annotation systems.
+          Currently, Bakta uses NCBI's AMRFinderPlus for AMR gene annotations
+          as well as an generalized protein sequence expert system with distinct
+          coverage, identity and priority values for each sequence, currenlty comprising the VFDB as well as NCBI's BlastRules.
+
+        *Comprehensive workflow*
+          Bakta annotates ncRNA cis-regulatory regions, oriC/oriV/oriT
+          and assembly gaps as well as standard feature types: tRNA, tmRNA, rRNA, ncRNA genes, CRISPR, CDS.
+
+        *GFF3 & INSDC conform annotations*
+          Bakta writes GFF3 and INSDC-compliant (Genbank & EMBL) annotation files ready for submission
+          (checked via GenomeTools GFF3Validator, table2asn_GFF and ENA Webin-CLI for GFF3 and EMBL file formats,
+          respectively for representative genomes of all ESKAPE species).
+
+        *Bacteria & plasmids*
+          Bakta was designed to annotate bacteria (isolates & MAGs) and plasmids, only.
+
+        **Input options**
+          1. Choose a genome or assembly in fasta format to use bakta annotations
+          2. Choose A version of the Bakta database
+
+        **Organism options**
+        You can specify informations about analysed fasta as text input for:
+        - genus
+        - species
+        - strain
+        - plasmid
+
+        **Annotation options**
+        1. You can specify if all sequences (chromosome or plasmids) are complete or not
+        2. You can add your own prodigal traingin file for CDS predictionœ
+        3. The translation table could be modified, default is the 11th for bacteria
+        4. You can specify if bacteria is gram -/+ or unknonw (default value unknow)
+        5. You can keep the name of contig present in the input file
+        6. You can specify your own replicon table as a TSV/CSV file
+        7. The compliance option is for ready to submit annotation file to Public database
+        as ENA, Genbank EMBL
+        8. You can specify a protein sequence file for annotation in GenBank or fasta formats
+        Using the Fasta format, each reference sequence can be provided in a short or long format:
+
+        # short:
+        >id gene~~~product~~~dbxrefs
+        MAQ...
+
+        # long:
+        >id min_identity~~~min_query_cov~~~min_subject_cov~~~gene~~~product~~~dbxrefs
+        MAQ...
+
+        **Skip steps**
+        Some steps could be skiped:
+        - skip-trna           Skip tRNA detection & annotation
+        - skip-tmrna          Skip tmRNA detection & annotation
+        - skip-rrna           Skip rRNA detection & annotation
+        - skip-ncrna          Skip ncRNA detection & annotation
+        - skip-ncrna-region   Skip ncRNA region detection & annotation
+        - skip-crispr         Skip CRISPR array detection & annotation
+        - skip-cds            Skip CDS detection & annotation
+        - skip-pseudo         Skip pseudogene detection & annotation
+        - skip-sorf           Skip sORF detection & annotation
+        - skip-gap            Skip gap detection & annotation
+        - skip-ori            Skip oriC/oriT detection & annotation
+
+        **Output options**
+        Bakta produce numbers of output files, you can select what type of file you want:
+        - Summary of the annotation
+        - Annotated files
+        - Sequence files for nucleotide and/or amino acid
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
author	iuc
date	Thu, 01 Sep 2022 17:28:43 +0000
parents
children	da5f1924bb2e