diff humann_regroup_table.xml @ 0:26ce946f4da9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
author iuc
date Wed, 12 May 2021 08:59:30 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/humann_regroup_table.xml	Wed May 12 08:59:30 2021 +0000
@@ -0,0 +1,182 @@
+<tool id="humann_regroup_table" name="Regroup" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>HUMAnN table features</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+humann_regroup_table
+    --input '$input'
+    --output '$output'
+    --function '$function'
+#if $grouping.type == "standard"
+    --groups '$grouping.groups'
+#else if $grouping.type == "large"
+    --custom '$grouping.grouping.fields.path'
+    $grouping.reversed
+#else:
+    --custom '$grouping.custom'
+    $grouping.reversed
+#end if
+    --precision $precision
+    --ungrouped '$ungrouped'
+    --protected '$protected'
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="tsv,tabular" label="Gene families table"/>
+        <param argument="--function" type="select" label="How to combine grouped features?">
+            <option value="sum" selected="true">Sum</option>
+            <option value="mean">Mean</option>
+        </param>
+        <conditional name="grouping">
+            <param name="type" type="select" label="Grouping">
+                <option value="standard">Basic grouping (to MetaCyc pathways)</option>
+                <option value="large">Grouping with larger mapping</option>
+                <option value="custom">Grouping with custom mapping</option>
+            </param>
+            <when value="standard">
+                <param argument="--groups" type="select" label="Grouping">
+                    <option value="uniref90_rxn">UniRef90 to MetaCyc pathways</option>
+                    <option value="uniref50_rxn">UniRef50 to MetaCyc pathways</option>
+                </param>
+            </when>
+            <when value="large">
+                <param name="grouping" type="select" label="Mapping to use for the grouping">
+                    <options from_data_table="humann_utility_mapping">
+                        <validator message="No utility mapping is available" type="no_options" />
+                        <filter type="regexp" column="0" value=".*map.*(?!_name)" keep="true"/>
+                    </options>
+                </param>
+                <param argument="--reversed" type="boolean" checked="false" truevalue="--reversed" falsevalue="" label="Mapping from features to groups?"/>
+            </when>
+            <when value="custom">
+                <param argument="--custom" type="data" format="tsv,txt,tabular" label="Custom mapping file for grouping" help="The format is explained in the help"/>
+                <param argument="--reversed" type="boolean" checked="false" truevalue="--reversed" falsevalue="" label="Is the groups file reversed?" help="Mapping from features to groups"/>
+            </when>
+        </conditional>
+        <param argument="--precision" type="integer" value="3" label="Decimal places to round to after applying function"/>
+        <param argument="--ungrouped" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Include an 'UNGROUPED' group to capture features that did not belong to other groups?"/>
+        <param argument="--protected" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Carry through protected features, such as 'UNMAPPED'?"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" value="demo_genefamilies.tsv"/>
+            <param name="function" value="sum"/>
+            <conditional name="grouping">
+                <param name="type" value="standard"/>
+                <param name="groups" value="uniref90_rxn"/>
+            </conditional>
+            <param name="precision" value="3"/>
+            <param name="ungrouped" value="Y"/>
+            <param name="protected" value="Y"/>
+            <output name="output" ftype="tabular">
+                <assert_contents>
+                    <has_text text="2.5.1.19-RXN|g__Bacteroides.s__Bacteroides_vulgatus"/>
+                    <has_text text="ACETYLGLUTKIN-RXN|g__Bacteroides.s__Bacteroides_vulgatus"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input" value="demo_genefamilies.tsv"/>
+            <param name="function" value="mean"/>
+            <conditional name="grouping">
+                <param name="type" value="large"/>
+                <param name="grouping" value="utility_mapping-full-map_go_uniref90-3.0.0-29042021"/>
+            </conditional>
+            <param name="precision" value="3"/>
+            <param name="ungrouped" value="Y"/>
+            <param name="protected" value="Y"/>
+            <output name="output" ftype="tabular">
+                <assert_contents>
+                    <has_text text="GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus" />
+                    <has_text text="GO:0003866|g__Bacteroides.s__Bacteroides_vulgatus" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input" value="demo_genefamilies.tsv"/>
+            <param name="function" value="sum"/>
+            <conditional name="grouping">
+                <param name="type" value="custom"/>
+                <!-- file generated using customizemapping file -->
+                <param name="custom" value="test-db/utility_mapping/map_go_uniref90.txt"/>
+            </conditional>
+            <param name="precision" value="3"/>
+            <param name="ungrouped" value="Y"/>
+            <param name="protected" value="Y"/>
+            <output name="output" ftype="tabular">
+                <assert_contents>
+                    <has_text text="GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus" />
+                    <has_text text="GO:0003866|g__Bacteroides.s__Bacteroides_vulgatus" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+This tool takes a table of feature values and a mapping of groups to component features to produce 
+a new table with group values in place of feature values.
+
+HUMAnN gene family output can contain a very large number of features depending on the complexity 
+of your underlying sample. One way to explore this information in a simplified manner is via HUMAnN's 
+own pathway coverage and abundance, which summarize the values of their member genes. However, this 
+approach does not apply to gene families that are not associated with metabolic pathways.
+
+To further simplify the exploration of gene family abundance data, users can regroup gene families 
+into other functional categories using the current tool. This tool takes as arguments a gene 
+family abundance table and a mapping (groups) file that indicates which gene families belong to which 
+groups.
+
+Out of the box, HUMAnN can regroup gene families to MetaCyc reactions (a step which is also 
+used internally as part of MetaCyc pathway quantification). Users can use additional mapping files 
+for both UniRef90 and UniRef50 gene families to the following systems:
+
+- MetaCyc Reactions
+- KEGG Orthogroups (KOs)
+- Pfam domains
+- Level-4 enzyme commission (EC) categories
+- EggNOG (including COGs)
+- Gene Ontology (GO)
+- Informative GO
+
+In most cases, mappings are directly inferred from the annotation of the corresponding UniRef centroid sequence in UniProt.
+
+One exception to this are the "informative GO" (infogo1000) maps: These are informative subsets of GO computed from UniProt's 
+annotations and the structure of the GO hierarchy specifically for HUMAnN (each informative GO term has >1,000 UniRef centroids 
+annotated to it, but none of its progeny terms have >1,000 centroids so annotated).
+
+If the "UNMAPPED" gene abundance feature is included in a user's input, it will automatically be carried forward to the final output. 
+In addition, genes that do not group with a non-trivial feature are combined as an "UNGROUPED" group. By default, UNGROUPED reflects 
+the total abundance of genes that did not belong to another group (similar in spirit to the "UNINTEGRATED" value reported in the pathway 
+abundance file).
+
+
+Some groups are not associated by default with human-readable names. To attach names to a regrouped table, use the HUMAnN rename tool
+(The "GO" name map can be used for both raw GO and informative GO.)    
+
+Inputs
+======
+
+Users are free to create and use additional mapping files and pass them to this tool. The format of a mapping file is:
+
+``
+group1  uniref1  uniref2  uniref3  ...
+``
+
+``
+group2  uniref1  uniref5  ...
+``
+
+Where spaces between items above denote TABS. By default, feature abundances (such as gene families) are summed to produce group abundances.
+
+
+
+]]></help>
+    <expand macro="citations"/>
+</tool>