Mercurial > repos > iuc > bracken
diff est-abundance.xml @ 7:978ae4147c29 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit db62b99fe2c0e77e7ee63da5fb315f4b6d95170b
author | iuc |
---|---|
date | Mon, 22 May 2023 19:24:46 +0000 |
parents | 79450f7fd718 |
children | 1d4bd12f01cf |
line wrap: on
line diff
--- a/est-abundance.xml Fri Aug 26 07:21:39 2022 +0000 +++ b/est-abundance.xml Mon May 22 19:24:46 2023 +0000 @@ -1,33 +1,38 @@ -<tool id="est_abundance" name="Estimate Abundance at Taxonomic Level" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> - <description>Bayesian Reestimation of Abundance with Kraken</description> +<tool id="est_abundance" name="Bracken" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>to re-estimate abundance at a taxonomic level from kraken output</description> <macros> <import>macros.xml</import> </macros> - <expand macro="edam" /> - <expand macro="xref" /> - <expand macro="requirements" /> + <expand macro="edam"/> + <expand macro="xref"/> + <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ + ## Prevent logfile pipe below from quenching the raised error. + set -o pipefail && + est_abundance.py - -i '$input' - -k '$kmer_distr.fields.path' - -l $level - -t $threshold - -o '$report' + -i '$input' + -k '$kmer_distr.fields.path' + -l $level + -t $threshold + -o '$report' + ## --out-report needs to be set always, since it is written next to the input file --out-report bracken.report + #if $logfile_output == "True" | tee '$logfile' #end if ]]> </command> <inputs> - <param name="input" type="data" format="tabular" label="Kraken report file" /> + <param name="input" type="data" format="tabular" label="Kraken report file"/> <param label="Select a Kmer distribution" name="kmer_distr" type="select"> <options from_data_table="bracken_databases"> - <validator message="No database is available" type="no_options" /> + <validator message="No database is available" type="no_options"/> </options> </param> - <param name="level" type="select" label="Level" help="Level to push all reads to" > + <param name="level" type="select" label="Level" help="Level to push all reads to"> <option value="S" selected="true">Species</option> <option value="G">Genus</option> <option value="F">Family</option> @@ -38,12 +43,12 @@ </param> <param name="threshold" type="integer" value="10" label="Number of mismatches allowed when matching tag" help="Threshold for the minimum number of reads kraken must assign to a classification for that - classification to be considered in the final abundance estimation." /> + classification to be considered in the final abundance estimation."/> <param argument="--out-report" type="boolean" checked="false" label="Produce Kraken-Style Bracken report"/> <param name="logfile_output" type="boolean" truevalue="True" falsevalue="False" label="Add log file output"/> </inputs> <outputs> - <data name="report" format="tabular" label="${tool.name} on ${on_string}: Report" /> + <data name="report" format="tabular" label="${tool.name} on ${on_string}: Report"/> <data name="kraken_report" format="tabular" from_work_dir="bracken.report" label="${tool.name} on ${on_string}: Kraken style report"> <filter>out_report</filter> </data> @@ -54,25 +59,36 @@ <tests> <test expect_num_outputs="1"> <param name="input" value="NC_003198.1_simulated_kraken_report.txt" ftype="tabular"/> - <param name="level" value="S" /> + <param name="level" value="S"/> <param name="kmer_distr" value="test_entry"/> <param name="logfile_output" value="False"/> <output name="report" file="NC_003198.1_simulated_bracken_report.txt" ftype="tabular"/> </test> <test expect_num_outputs="3"> <param name="input" value="NC_011750.1_simulated_kraken_report.txt" ftype="tabular"/> - <param name="level" value="S" /> + <param name="level" value="S"/> <param name="kmer_distr" value="test_entry"/> <param name="out_report" value="true"/> <param name="logfile_output" value="True"/> <output name="report" file="NC_011750.1_simulated_bracken_report.txt" ftype="tabular"/> <output name="kraken_report" file="NC_011750.1_simulated_kraken_style_bracken_report.txt" ftype="tabular"/> - <output name="logfile" file="test2.log" lines_diff="8" /> + <output name="logfile" file="test2.log" lines_diff="8"/> </test> </tests> <help> <![CDATA[ - Documentation can be found at `site <http://ccb.jhu.edu/software/bracken/index.shtml?t=manual>` + Bracken relies on Bayesian probabilities that derive from the knowledge about the Kraken classification of each read-length kmer from all genomes within the provided Kraken database. It takes the tabular report output of kraken/kraken2 representing abundance of all detected taxa and provides as output a table representing the re-estimated abundances of different taxa at the taxonomy level pre-determined by the user. For more information about the operation behind the scene, visit http://ccb.jhu.edu/software/bracken/index.shtml?t=manual. + + Prior to abundance estimation with bracken, we must divide each genome in the Kraken database into read-length kmers, classify each of those kmers and store as a data structure. This indexing step has already been performed for you and it suffices to select the name of the correct kraken DB that you used for read classification. + + Bracken output file format (tabular): + * Taxon name + * Taxonomy ID + * Level ID (S=Species, G=Genus, O=Order, F=Family, P=Phylum, K=Kingdom) + * Kraken assigned reads + * Added reads with abundance re-estimation + * Total reads after abundance re-estimation + * Fraction of total reads ]]></help> - <expand macro="citations" /> + <expand macro="citations"/> </tool>