Mercurial > repos > iuc > humann_regroup_table
diff humann_regroup_table.xml @ 0:26ce946f4da9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
author | iuc |
---|---|
date | Wed, 12 May 2021 08:59:30 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/humann_regroup_table.xml Wed May 12 08:59:30 2021 +0000 @@ -0,0 +1,182 @@ +<tool id="humann_regroup_table" name="Regroup" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>HUMAnN table features</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +humann_regroup_table + --input '$input' + --output '$output' + --function '$function' +#if $grouping.type == "standard" + --groups '$grouping.groups' +#else if $grouping.type == "large" + --custom '$grouping.grouping.fields.path' + $grouping.reversed +#else: + --custom '$grouping.custom' + $grouping.reversed +#end if + --precision $precision + --ungrouped '$ungrouped' + --protected '$protected' + ]]></command> + <inputs> + <param argument="--input" type="data" format="tsv,tabular" label="Gene families table"/> + <param argument="--function" type="select" label="How to combine grouped features?"> + <option value="sum" selected="true">Sum</option> + <option value="mean">Mean</option> + </param> + <conditional name="grouping"> + <param name="type" type="select" label="Grouping"> + <option value="standard">Basic grouping (to MetaCyc pathways)</option> + <option value="large">Grouping with larger mapping</option> + <option value="custom">Grouping with custom mapping</option> + </param> + <when value="standard"> + <param argument="--groups" type="select" label="Grouping"> + <option value="uniref90_rxn">UniRef90 to MetaCyc pathways</option> + <option value="uniref50_rxn">UniRef50 to MetaCyc pathways</option> + </param> + </when> + <when value="large"> + <param name="grouping" type="select" label="Mapping to use for the grouping"> + <options from_data_table="humann_utility_mapping"> + <validator message="No utility mapping is available" type="no_options" /> + <filter type="regexp" column="0" value=".*map.*(?!_name)" keep="true"/> + </options> + </param> + <param argument="--reversed" type="boolean" checked="false" truevalue="--reversed" falsevalue="" label="Mapping from features to groups?"/> + </when> + <when value="custom"> + <param argument="--custom" type="data" format="tsv,txt,tabular" label="Custom mapping file for grouping" help="The format is explained in the help"/> + <param argument="--reversed" type="boolean" checked="false" truevalue="--reversed" falsevalue="" label="Is the groups file reversed?" help="Mapping from features to groups"/> + </when> + </conditional> + <param argument="--precision" type="integer" value="3" label="Decimal places to round to after applying function"/> + <param argument="--ungrouped" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Include an 'UNGROUPED' group to capture features that did not belong to other groups?"/> + <param argument="--protected" type="boolean" checked="true" truevalue="Y" falsevalue="N" label="Carry through protected features, such as 'UNMAPPED'?"/> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input" value="demo_genefamilies.tsv"/> + <param name="function" value="sum"/> + <conditional name="grouping"> + <param name="type" value="standard"/> + <param name="groups" value="uniref90_rxn"/> + </conditional> + <param name="precision" value="3"/> + <param name="ungrouped" value="Y"/> + <param name="protected" value="Y"/> + <output name="output" ftype="tabular"> + <assert_contents> + <has_text text="2.5.1.19-RXN|g__Bacteroides.s__Bacteroides_vulgatus"/> + <has_text text="ACETYLGLUTKIN-RXN|g__Bacteroides.s__Bacteroides_vulgatus"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input" value="demo_genefamilies.tsv"/> + <param name="function" value="mean"/> + <conditional name="grouping"> + <param name="type" value="large"/> + <param name="grouping" value="utility_mapping-full-map_go_uniref90-3.0.0-29042021"/> + </conditional> + <param name="precision" value="3"/> + <param name="ungrouped" value="Y"/> + <param name="protected" value="Y"/> + <output name="output" ftype="tabular"> + <assert_contents> + <has_text text="GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus" /> + <has_text text="GO:0003866|g__Bacteroides.s__Bacteroides_vulgatus" /> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input" value="demo_genefamilies.tsv"/> + <param name="function" value="sum"/> + <conditional name="grouping"> + <param name="type" value="custom"/> + <!-- file generated using customizemapping file --> + <param name="custom" value="test-db/utility_mapping/map_go_uniref90.txt"/> + </conditional> + <param name="precision" value="3"/> + <param name="ungrouped" value="Y"/> + <param name="protected" value="Y"/> + <output name="output" ftype="tabular"> + <assert_contents> + <has_text text="GO:0000049|g__Bacteroides.s__Bacteroides_vulgatus" /> + <has_text text="GO:0003866|g__Bacteroides.s__Bacteroides_vulgatus" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +This tool takes a table of feature values and a mapping of groups to component features to produce +a new table with group values in place of feature values. + +HUMAnN gene family output can contain a very large number of features depending on the complexity +of your underlying sample. One way to explore this information in a simplified manner is via HUMAnN's +own pathway coverage and abundance, which summarize the values of their member genes. However, this +approach does not apply to gene families that are not associated with metabolic pathways. + +To further simplify the exploration of gene family abundance data, users can regroup gene families +into other functional categories using the current tool. This tool takes as arguments a gene +family abundance table and a mapping (groups) file that indicates which gene families belong to which +groups. + +Out of the box, HUMAnN can regroup gene families to MetaCyc reactions (a step which is also +used internally as part of MetaCyc pathway quantification). Users can use additional mapping files +for both UniRef90 and UniRef50 gene families to the following systems: + +- MetaCyc Reactions +- KEGG Orthogroups (KOs) +- Pfam domains +- Level-4 enzyme commission (EC) categories +- EggNOG (including COGs) +- Gene Ontology (GO) +- Informative GO + +In most cases, mappings are directly inferred from the annotation of the corresponding UniRef centroid sequence in UniProt. + +One exception to this are the "informative GO" (infogo1000) maps: These are informative subsets of GO computed from UniProt's +annotations and the structure of the GO hierarchy specifically for HUMAnN (each informative GO term has >1,000 UniRef centroids +annotated to it, but none of its progeny terms have >1,000 centroids so annotated). + +If the "UNMAPPED" gene abundance feature is included in a user's input, it will automatically be carried forward to the final output. +In addition, genes that do not group with a non-trivial feature are combined as an "UNGROUPED" group. By default, UNGROUPED reflects +the total abundance of genes that did not belong to another group (similar in spirit to the "UNINTEGRATED" value reported in the pathway +abundance file). + + +Some groups are not associated by default with human-readable names. To attach names to a regrouped table, use the HUMAnN rename tool +(The "GO" name map can be used for both raw GO and informative GO.) + +Inputs +====== + +Users are free to create and use additional mapping files and pass them to this tool. The format of a mapping file is: + +`` +group1 uniref1 uniref2 uniref3 ... +`` + +`` +group2 uniref1 uniref5 ... +`` + +Where spaces between items above denote TABS. By default, feature abundances (such as gene families) are summed to produce group abundances. + + + +]]></help> + <expand macro="citations"/> +</tool>