view est-abundance.xml @ 8:1d4bd12f01cf draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bracken commit 2d68ebc8d29e6046811970c6ef04f683e9916857
author iuc
date Thu, 18 Jan 2024 15:38:05 +0000
parents 978ae4147c29
children
line wrap: on
line source

<tool id="est_abundance" name="Bracken" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to re-estimate abundance at a taxonomic level from kraken output</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam"/>
    <expand macro="xref"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        ## Prevent logfile pipe below from quenching the raised error.
        set -o pipefail &&

        est_abundance.py
            -i '$input'
            -k '$kmer_distr.fields.path'
            -l $level
            -t $threshold
            -o '$report'

        ##  --out-report needs to be set always, since it is written next to the input file
        --out-report bracken.report

        #if $logfile_output == "True"
            | tee '$logfile'
        #end if
    ]]>
    </command>
    <inputs>
        <param name="input" type="data" format="tabular" label="Kraken report file"/>
        <param label="Select a Kmer distribution" name="kmer_distr" type="select">
            <options from_data_table="bracken_databases">
                <validator message="No database is available" type="no_options"/>
            </options>
        </param>
        <param name="level" type="select" label="Level" help="Level to push all reads to">
            <option value="S2">Subspecies 2</option>
            <option value="S1">Subspecies 1</option>
            <option value="S" selected="true">Species</option>
            <option value="G">Genus</option>
            <option value="F">Family</option>
            <option value="O">Order</option>
            <option value="C">Class</option>
            <option value="P">Phylum</option>
            <option value="D">Domain</option>
        </param>
        <param name="threshold" type="integer" value="10" label="Number of mismatches allowed when matching tag"
               help="Threshold for the minimum number of reads kraken must assign to a classification for that
                     classification to be considered in the final abundance estimation."/>
        <param argument="--out-report" type="boolean" checked="false" label="Produce Kraken-Style Bracken report"/>
        <param name="logfile_output" type="boolean" truevalue="True" falsevalue="False" label="Add log file output"/>
    </inputs>
    <outputs>
        <data name="report" format="tabular" label="${tool.name} on ${on_string}: Report"/>
        <data name="kraken_report" format="tabular" from_work_dir="bracken.report" label="${tool.name} on ${on_string}: Kraken style report">
            <filter>out_report</filter>
        </data>
        <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log file">
            <filter> logfile_output == True </filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input" value="NC_003198.1_simulated_kraken_report.txt" ftype="tabular"/>
            <param name="level" value="S"/>
            <param name="kmer_distr" value="test_entry"/>
            <param name="logfile_output" value="False"/>
            <output name="report" file="NC_003198.1_simulated_bracken_report.txt" ftype="tabular"/>
        </test>
        <test expect_num_outputs="3">
            <param name="input" value="NC_011750.1_simulated_kraken_report.txt" ftype="tabular"/>
            <param name="level" value="S"/>
            <param name="kmer_distr" value="test_entry"/>
            <param name="out_report" value="true"/>
            <param name="logfile_output" value="True"/>
            <output name="report" file="NC_011750.1_simulated_bracken_report.txt" ftype="tabular"/>
            <output name="kraken_report" file="NC_011750.1_simulated_kraken_style_bracken_report.txt" ftype="tabular"/>
            <output name="logfile" file="test2.log" lines_diff="8"/>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="NC_003198.1_simulated_kraken_report.txt" ftype="tabular"/>
            <param name="level" value="S1"/>
            <param name="kmer_distr" value="test_entry"/>
            <param name="logfile_output" value="False"/>
            <output name="report" file="NC_003198.1_simulated_bracken_report_S1.txt" ftype="tabular"/>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="NC_003198.1_simulated_kraken_report.txt" ftype="tabular"/>
            <param name="level" value="S2"/>
            <param name="kmer_distr" value="test_entry"/>
            <param name="logfile_output" value="False"/>
            <output name="report" file="NC_003198.1_simulated_bracken_report_S2.txt" ftype="tabular"/>
        </test>
    </tests>
    <help>
    <![CDATA[
        Bracken relies on Bayesian probabilities that derive from the knowledge about the Kraken classification of each read-length kmer from all genomes within the provided Kraken database. It takes the tabular report output of kraken/kraken2 representing abundance of all detected taxa and provides as output a table representing the re-estimated abundances of different taxa at the taxonomy level pre-determined by the user. For more information about the operation behind the scene, visit http://ccb.jhu.edu/software/bracken/index.shtml?t=manual.

        Prior to abundance estimation with bracken, we must divide each genome in the Kraken database into read-length kmers, classify each of those kmers and store as a data structure. This indexing step has already been performed for you and it suffices to select the name of the correct kraken DB that you used for read classification.

        Bracken output file format (tabular):
           * Taxon name
           * Taxonomy ID
           * Level ID (S=Species, G=Genus, O=Order, F=Family, P=Phylum, K=Kingdom)
           * Kraken assigned reads
           * Added reads with abundance re-estimation
           * Total reads after abundance re-estimation
           * Fraction of total reads
    ]]></help>
    <expand macro="citations"/>
</tool>