view humann_regroup_table.xml @ 3:b7d324b18be1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 32d4c566afe55179531fdd106dc5a996cc09bbb1
author iuc
date Wed, 29 Mar 2023 19:59:06 +0000
parents 26ce946f4da9
children
line wrap: on
line source

<tool id="humann_regroup_table" name="Regroup" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>HUMAnN table features</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[
humann_regroup_table
    --input '$input'
    --output '$output'
    --function '$function'
#if $grouping.type == "standard"
    --groups '$grouping.groups'
#else if $grouping.type == "large"
    --custom '$grouping.grouping.fields.path'
    $grouping.reversed
#else:
    --custom '$grouping.custom'
    $grouping.reversed
#end if
    --precision $precision
    --ungrouped '$ungrouped'
    --protected '$protected'
    ]]></command>
    <inputs>
        <param argument="--input" type="data" format="tsv,tabular" label="Gene families table"/>
        <param argument="--function" type="select" label="How to combine grouped features?">
            <option value="sum" selected="true">Sum</option>
            <option value="mean">Mean</option>
        </param>
        <conditional name="grouping">
            <param name="type" type="select" label="Grouping">
                <option value="standard">Basic grouping (to MetaCyc pathways)</option>
                <option value="large">Grouping with larger mapping</option>
                <option value="custom">Grouping with custom mapping</option>
            </param>
            <when value="standard">
                <param argument="--groups" type="select" label="Grouping">
                    <option value="uniref90_rxn">UniRef90 to MetaCyc pathways</option>
                    <option value="uniref50_rxn">UniRef50 to MetaCyc pathways</option>
                </param>
            </when>
            <when value="large">
                <param name="grouping" type="select" label="Mapping to use for the grouping">
                    <options from_data_table="humann_utility_mapping">
                        <validator message="No utility mapping is available" type="no_options" />
                        <filter type="regexp" column="0" value=".*map.*(?!_name)" keep="true"/>
                    </options>
                </param>
                <param argument="--reversed" type="boolean" checked="false" truevalue="--reversed" falsevalue="" label="Mapping from features to groups?"/>
            </when>
            <when value="custom">
                <param argument="--custom" type="data" format="tsv,txt,tabular" label="Custom mapping file for grouping" help="The format is explained in the help"/>
                <param argument="--reversed" type="boolean" checked="false" truevalue="--reversed" falsevalue="" label="Is the groups file reversed?" help="Mapping from features to groups"/>
            </when>
        </conditional>
        <param argument="--precision" type="integer" value="3" label="Decimal places to round to after applying function"/>
        <param argument="--ungrouped" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Include an 'UNGROUPED' group to capture features that did not belong to other groups?"/>
        <param argument="--protected" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Carry through protected features, such as 'UNMAPPED'?"/>
    </inputs>
    <outputs>
        <data name="output" format="tabular"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input" value="demo_genefamilies.tsv"/>
            <param name="function" value="sum"/>
            <conditional name="grouping">
                <param name="type" value="standard"/>
                <param name="groups" value="uniref90_rxn"/>
            </conditional>
            <param name="precision" value="3"/>
            <param name="ungrouped" value="Y"/>
            <param name="protected" value="Y"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="2.5.1.19-RXN|g__Bacteroides.s__Bacteroides_vulgatus"/>
                    <has_text text="ACETYLGLUTKIN-RXN|g__Bacteroides.s__Bacteroides_vulgatus"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="demo_genefamilies.tsv"/>
            <param name="function" value="mean"/>
            <conditional name="grouping">
                <param name="type" value="large"/>
                <param name="grouping" value="utility_mapping-full-map_go_uniref90-3.0.0-29042021"/>
            </conditional>
            <param name="precision" value="3"/>
            <param name="ungrouped" value="Y"/>
            <param name="protected" value="Y"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus" />
                    <has_text text="GO:0003866|g__Bacteroides.s__Bacteroides_vulgatus" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="demo_genefamilies.tsv"/>
            <param name="function" value="sum"/>
            <conditional name="grouping">
                <param name="type" value="custom"/>
                <!-- file generated using customizemapping file -->
                <param name="custom" value="test-db/utility_mapping/map_go_uniref90.txt"/>
            </conditional>
            <param name="precision" value="3"/>
            <param name="ungrouped" value="Y"/>
            <param name="protected" value="Y"/>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus" />
                    <has_text text="GO:0003866|g__Bacteroides.s__Bacteroides_vulgatus" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

This tool takes a table of feature values and a mapping of groups to component features to produce 
a new table with group values in place of feature values.

HUMAnN gene family output can contain a very large number of features depending on the complexity 
of your underlying sample. One way to explore this information in a simplified manner is via HUMAnN's 
own pathway coverage and abundance, which summarize the values of their member genes. However, this 
approach does not apply to gene families that are not associated with metabolic pathways.

To further simplify the exploration of gene family abundance data, users can regroup gene families 
into other functional categories using the current tool. This tool takes as arguments a gene 
family abundance table and a mapping (groups) file that indicates which gene families belong to which 
groups.

Out of the box, HUMAnN can regroup gene families to MetaCyc reactions (a step which is also 
used internally as part of MetaCyc pathway quantification). Users can use additional mapping files 
for both UniRef90 and UniRef50 gene families to the following systems:

- MetaCyc Reactions
- KEGG Orthogroups (KOs)
- Pfam domains
- Level-4 enzyme commission (EC) categories
- EggNOG (including COGs)
- Gene Ontology (GO)
- Informative GO

In most cases, mappings are directly inferred from the annotation of the corresponding UniRef centroid sequence in UniProt.

One exception to this are the "informative GO" (infogo1000) maps: These are informative subsets of GO computed from UniProt's 
annotations and the structure of the GO hierarchy specifically for HUMAnN (each informative GO term has >1,000 UniRef centroids 
annotated to it, but none of its progeny terms have >1,000 centroids so annotated).

If the "UNMAPPED" gene abundance feature is included in a user's input, it will automatically be carried forward to the final output. 
In addition, genes that do not group with a non-trivial feature are combined as an "UNGROUPED" group. By default, UNGROUPED reflects 
the total abundance of genes that did not belong to another group (similar in spirit to the "UNINTEGRATED" value reported in the pathway 
abundance file).


Some groups are not associated by default with human-readable names. To attach names to a regrouped table, use the HUMAnN rename tool
(The "GO" name map can be used for both raw GO and informative GO.)    

Inputs
======

Users are free to create and use additional mapping files and pass them to this tool. The format of a mapping file is:

``
group1  uniref1  uniref2  uniref3  ...
``

``
group2  uniref1  uniref5  ...
``

Where spaces between items above denote TABS. By default, feature abundances (such as gene families) are summed to produce group abundances.



]]></help>
    <expand macro="citations"/>
</tool>