diff est-abundance.xml @ 7:978ae4147c29 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit db62b99fe2c0e77e7ee63da5fb315f4b6d95170b
author iuc
date Mon, 22 May 2023 19:24:46 +0000
parents 79450f7fd718
children 1d4bd12f01cf
line wrap: on
line diff
--- a/est-abundance.xml	Fri Aug 26 07:21:39 2022 +0000
+++ b/est-abundance.xml	Mon May 22 19:24:46 2023 +0000
@@ -1,33 +1,38 @@
-<tool id="est_abundance" name="Estimate Abundance at Taxonomic Level" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-    <description>Bayesian Reestimation of Abundance with Kraken</description>
+<tool id="est_abundance" name="Bracken" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>to re-estimate abundance at a taxonomic level from kraken output</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="edam" />
-    <expand macro="xref" />
-    <expand macro="requirements" />
+    <expand macro="edam"/>
+    <expand macro="xref"/>
+    <expand macro="requirements"/>
     <command detect_errors="exit_code"><![CDATA[
+        ## Prevent logfile pipe below from quenching the raised error.
+        set -o pipefail &&
+
         est_abundance.py
-        -i '$input'
-        -k '$kmer_distr.fields.path'
-        -l $level
-        -t $threshold
-        -o '$report'
+            -i '$input'
+            -k '$kmer_distr.fields.path'
+            -l $level
+            -t $threshold
+            -o '$report'
+
         ##  --out-report needs to be set always, since it is written next to the input file
         --out-report bracken.report
+
         #if $logfile_output == "True"
             | tee '$logfile'
         #end if
     ]]>
     </command>
     <inputs>
-        <param name="input" type="data" format="tabular" label="Kraken report file" />
+        <param name="input" type="data" format="tabular" label="Kraken report file"/>
         <param label="Select a Kmer distribution" name="kmer_distr" type="select">
             <options from_data_table="bracken_databases">
-                <validator message="No database is available" type="no_options" />
+                <validator message="No database is available" type="no_options"/>
             </options>
         </param>
-        <param name="level" type="select" label="Level" help="Level to push all reads to" >
+        <param name="level" type="select" label="Level" help="Level to push all reads to">
             <option value="S" selected="true">Species</option>
             <option value="G">Genus</option>
             <option value="F">Family</option>
@@ -38,12 +43,12 @@
         </param>
         <param name="threshold" type="integer" value="10" label="Number of mismatches allowed when matching tag"
                help="Threshold for the minimum number of reads kraken must assign to a classification for that
-                     classification to be considered in the final abundance estimation." />
+                     classification to be considered in the final abundance estimation."/>
         <param argument="--out-report" type="boolean" checked="false" label="Produce Kraken-Style Bracken report"/>
         <param name="logfile_output" type="boolean" truevalue="True" falsevalue="False" label="Add log file output"/>
     </inputs>
     <outputs>
-        <data name="report" format="tabular" label="${tool.name} on ${on_string}: Report" />
+        <data name="report" format="tabular" label="${tool.name} on ${on_string}: Report"/>
         <data name="kraken_report" format="tabular" from_work_dir="bracken.report" label="${tool.name} on ${on_string}: Kraken style report">
             <filter>out_report</filter>
         </data>
@@ -54,25 +59,36 @@
     <tests>
         <test expect_num_outputs="1">
             <param name="input" value="NC_003198.1_simulated_kraken_report.txt" ftype="tabular"/>
-            <param name="level" value="S" />
+            <param name="level" value="S"/>
             <param name="kmer_distr" value="test_entry"/>
             <param name="logfile_output" value="False"/>
             <output name="report" file="NC_003198.1_simulated_bracken_report.txt" ftype="tabular"/>
         </test>
         <test expect_num_outputs="3">
             <param name="input" value="NC_011750.1_simulated_kraken_report.txt" ftype="tabular"/>
-            <param name="level" value="S" />
+            <param name="level" value="S"/>
             <param name="kmer_distr" value="test_entry"/>
             <param name="out_report" value="true"/>
             <param name="logfile_output" value="True"/>
             <output name="report" file="NC_011750.1_simulated_bracken_report.txt" ftype="tabular"/>
             <output name="kraken_report" file="NC_011750.1_simulated_kraken_style_bracken_report.txt" ftype="tabular"/>
-            <output name="logfile" file="test2.log" lines_diff="8" />
+            <output name="logfile" file="test2.log" lines_diff="8"/>
         </test>
     </tests>
     <help>
     <![CDATA[
-        Documentation can be found at `site <http://ccb.jhu.edu/software/bracken/index.shtml?t=manual>`
+        Bracken relies on Bayesian probabilities that derive from the knowledge about the Kraken classification of each read-length kmer from all genomes within the provided Kraken database. It takes the tabular report output of kraken/kraken2 representing abundance of all detected taxa and provides as output a table representing the re-estimated abundances of different taxa at the taxonomy level pre-determined by the user. For more information about the operation behind the scene, visit http://ccb.jhu.edu/software/bracken/index.shtml?t=manual.
+
+        Prior to abundance estimation with bracken, we must divide each genome in the Kraken database into read-length kmers, classify each of those kmers and store as a data structure. This indexing step has already been performed for you and it suffices to select the name of the correct kraken DB that you used for read classification.
+
+        Bracken output file format (tabular):
+           * Taxon name
+           * Taxonomy ID
+           * Level ID (S=Species, G=Genus, O=Order, F=Family, P=Phylum, K=Kingdom)
+           * Kraken assigned reads
+           * Added reads with abundance re-estimation
+           * Total reads after abundance re-estimation
+           * Fraction of total reads
     ]]></help>
-    <expand macro="citations" />
+    <expand macro="citations"/>
 </tool>