view est-abundance.xml @ 7:978ae4147c29 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit db62b99fe2c0e77e7ee63da5fb315f4b6d95170b
author iuc
date Mon, 22 May 2023 19:24:46 +0000
parents 79450f7fd718
children 1d4bd12f01cf
line wrap: on
line source

<tool id="est_abundance" name="Bracken" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to re-estimate abundance at a taxonomic level from kraken output</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam"/>
    <expand macro="xref"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        ## Prevent logfile pipe below from quenching the raised error.
        set -o pipefail &&

        est_abundance.py
            -i '$input'
            -k '$kmer_distr.fields.path'
            -l $level
            -t $threshold
            -o '$report'

        ##  --out-report needs to be set always, since it is written next to the input file
        --out-report bracken.report

        #if $logfile_output == "True"
            | tee '$logfile'
        #end if
    ]]>
    </command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Kraken report file"/>
        <param label="Select a Kmer distribution" name="kmer_distr" type="select">
            <options from_data_table="bracken_databases">
                <validator message="No database is available" type="no_options"/>
            </options>
        </param>
        <param name="level" type="select" label="Level" help="Level to push all reads to">
            <option value="S" selected="true">Species</option>
            <option value="G">Genus</option>
            <option value="F">Family</option>
            <option value="O">Order</option>
            <option value="C">Class</option>
            <option value="P">Phylum</option>
            <option value="D">Domain</option>
        </param>
        <param name="threshold" type="integer" value="10" label="Number of mismatches allowed when matching tag"
               help="Threshold for the minimum number of reads kraken must assign to a classification for that
                     classification to be considered in the final abundance estimation."/>
        <param argument="--out-report" type="boolean" checked="false" label="Produce Kraken-Style Bracken report"/>
        <param name="logfile_output" type="boolean" truevalue="True" falsevalue="False" label="Add log file output"/>
    </inputs>
    <outputs>
        <data name="report" format="tabular" label="${tool.name} on ${on_string}: Report"/>
        <data name="kraken_report" format="tabular" from_work_dir="bracken.report" label="${tool.name} on ${on_string}: Kraken style report">
            <filter>out_report</filter>
        </data>
        <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file">
            <filter> logfile_output == True </filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input" value="NC_003198.1_simulated_kraken_report.txt" ftype="tabular"/>
            <param name="level" value="S"/>
            <param name="kmer_distr" value="test_entry"/>
            <param name="logfile_output" value="False"/>
            <output name="report" file="NC_003198.1_simulated_bracken_report.txt" ftype="tabular"/>
        </test>
        <test expect_num_outputs="3">
            <param name="input" value="NC_011750.1_simulated_kraken_report.txt" ftype="tabular"/>
            <param name="level" value="S"/>
            <param name="kmer_distr" value="test_entry"/>
            <param name="out_report" value="true"/>
            <param name="logfile_output" value="True"/>
            <output name="report" file="NC_011750.1_simulated_bracken_report.txt" ftype="tabular"/>
            <output name="kraken_report" file="NC_011750.1_simulated_kraken_style_bracken_report.txt" ftype="tabular"/>
            <output name="logfile" file="test2.log" lines_diff="8"/>
        </test>
    </tests>
    <help>
    <![CDATA[
        Bracken relies on Bayesian probabilities that derive from the knowledge about the Kraken classification of each read-length kmer from all genomes within the provided Kraken database. It takes the tabular report output of kraken/kraken2 representing abundance of all detected taxa and provides as output a table representing the re-estimated abundances of different taxa at the taxonomy level pre-determined by the user. For more information about the operation behind the scene, visit http://ccb.jhu.edu/software/bracken/index.shtml?t=manual.

        Prior to abundance estimation with bracken, we must divide each genome in the Kraken database into read-length kmers, classify each of those kmers and store as a data structure. This indexing step has already been performed for you and it suffices to select the name of the correct kraken DB that you used for read classification.

        Bracken output file format (tabular):
           * Taxon name
           * Taxonomy ID
           * Level ID (S=Species, G=Genus, O=Order, F=Family, P=Phylum, K=Kingdom)
           * Kraken assigned reads
           * Added reads with abundance re-estimation
           * Total reads after abundance re-estimation
           * Fraction of total reads
    ]]></help>
    <expand macro="citations"/>
</tool>