view humann_renorm_table.xml @ 2:d7e9a3c41657 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 0966faf8782e9043772acfa32f4a4281687a19dd
author iuc
date Tue, 07 Feb 2023 19:46:09 +0000
parents e3391ccf21a0
children
line wrap: on
line source

<tool id="humann_renorm_table" name="Renormalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>a HUMAnN generated table</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[
humann_renorm_table
    --input '$input'
    -o '$output'
    --units '$units'
    --mode '$mode'
    --special '$special'
    $update_snames
    ]]></command>
    <inputs>
        <param argument="--input" type="data" format="tsv,tabular" label="Gene/pathway table"/>
        <param argument="--units" type="select" label="Normalization scheme">
            <option value="cpm" selected="true">Copies per million</option>
            <option value="relab">Relative abundance</option>
        </param>
        <param argument="--mode" type="select" label="Normalization level">
            <option value="community" selected="true">Normalization of all levels by community total</option>
            <option value="levelwise">Normalization of all levels by levelwise total</option>
        </param>
        <param argument="--special" type='boolean' truevalue='y' falsevalue='n' checked="true" label="Include the special features UNMAPPED, UNINTEGRATED, and UNGROUPED?"/>
        <param argument="--update-snames" type='boolean' truevalue='--update-snames' falsevalue='' checked="true" label="Update '-RPK' in sample names to appropriate suffix?"/>
    </inputs>
    <outputs>
        <data format="tabular" name="output"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input" value="demo_pathabundance.tsv"/>
            <param name="units" value="cpm"/>
            <param name="mode" value="community"/>
            <param name="special" value="n"/>
            <param name="update_snames" value=""/>
            <output name="output" ftype="tabular" file="cpm_community_renormalized_pathway_abundance.tsv">
                <assert_contents>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis" />
                    <has_text text="578694" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="demo_pathabundance.tsv"/>
            <param name="units" value="relab"/>
            <param name="mode" value="levelwise"/>
            <param name="special" value="y"/>
            <param name="update_snames" value=""/>
            <output name="output" ftype="tabular" file="relab_levelwise_renormalized_pathway_abundance.tsv">
                <assert_contents>
                    <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified" />
                    <has_text text="0.630281" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

HUMAnN quantifies genes and pathways in units of RPKs (reads per kilobase). These account for gene length but not sample sequencing depth. 
While there are some applications, e.g. strain profiling, where RPK units are superior to depth-normalized units, most of the 
time a user will renormalize their samples prior to downstream analysis.

This tool provides the choice to normalize to relative abundance or copies per million (CPM) units. Both of these represent 
"total sum scaling (TSS)"-style normalization: in the former case, each sample is constrained to sum to 1, whereas in the 
latter case (CPMs) samples are constrained to sum to 1 million. Units out of 1 million are often more convenient for tables
with many, many features (such as genefamilies.tsv tables).

Note: CPM as used here does not refer to unnormalized COUNTS per million, but rather copies per million.
CPMs as used here are a generic analog of the TPM (transcript per million) unit in RNA-seq. You may wish to use the 
abbreviation CoPM for added clarity.

By default, this tool normalizes all stratification levels to the sum of all community feature totals, but other options 
(such as level-wise normalization) are supported. "Special" features (such as UNMAPPED) can be included or excluded in the 
normalization process.
    ]]></help>
    <expand macro="citations"/>
</tool>