Mercurial > repos > malex > gait_gm
diff sPLS.xml @ 1:ec9ee8edb84d draft
Initial upload of 21.6.10 release.
author | malex |
---|---|
date | Fri, 18 Jun 2021 20:23:19 +0000 |
parents | |
children | 2c218a253d56 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sPLS.xml Fri Jun 18 20:23:19 2021 +0000 @@ -0,0 +1,464 @@ +<tool id="secimtools_spls" name="Metabolite - Gene Integration" version="@WRAPPER_VERSION@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <stdio> + <exit_code range="2" level="fatal" description="Not enough metabolites for the Analysis."/> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + sPLS.py + -g=$metsOption.geneDataset + -gid=$metsOption.geneId + #if $metsOption.useGeneAnnoCond.useGeneAnno == "y": + -ga=$metsOption.useGeneAnnoCond.geneAnno + -gn=$metsOption.useGeneAnnoCond.geneName + #end if + -go=$metsOption.genesOption.allGenes + #if $metsOption.genesOption.allGenes == "geneList": + -gl=$metsOption.genesOption.relatedGeneList + #end if + #if $metsOption.genesOption.allGenes == "path": + -gkp=$metsOption.genesOption.geneKeggPath + -mkp=$metsOption.genesOption.metKeggPath + #end if + #if $metsOption.genesOption.allGenes == "pana": + -gka=$metsOption.genesOption.geneKeggAnno + -gkn=$metsOption.genesOption.geneKeggName + -p2g=$metsOption.genesOption.path2genes + -cu=$metsOption.genesOption.cutoff + -f=$metsOption.genesOption.facSel + #if $metsOption.genesOption.PANAAnno.usePANAAnno == "yes": + -p2n=$metsOption.genesOption.PANAAnno.path2names + #end if + -o3=$panaOut + #end if + -k=$keepX + -t=$threshold + -m=$metDataset + -mid=$metId + #if $useMetAnnoCond.useMetAnno == "y": + -ma=$useMetAnnoCond.metAnno + -mn=$useMetAnnoCond.metName + #end if + -mo=$metsOption.allMets + -mka=$metsOption.metKeggAnno + #if $metsOption.allMets == "mmc": + -d=$metsOption.design + -c=$metsOption.corr + -sl=$metsOption.sigmaLow + -sh=$metsOption.sigmaHigh + -sn=$metsOption.sigmaNum + -f2=$figure2 + -o2=$mmcOut + #end if + #if $metsOption.allMets == "both": + -d=$metsOption.design + -c=$metsOption.corr + -sl=$metsOption.sigmaLow + -sh=$metsOption.sigmaHigh + -sn=$metsOption.sigmaNum + -f2=$figure2 + -o2=$mmcOut + #end if + -f1=$figure1 + -o1=$splsOut + ]]></command> + <inputs> + <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset for Integration" help="Select the Metabolite Wide Dataset from your history"/> + <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/> + <conditional name="useMetAnnoCond"> + <param name="useMetAnno" type="select" label="Use Metabolite Annotation File?" help="You can chose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) to use for labeling the output files."> + <option value="n">No</option> + <option value="y">Yes</option> + </param> + <when value="y"> + <param name="metAnno" type="data" format="tabular" label="Metabolomic Annotation File" help="Select the Metabolomic Annotation File from your history."/> + <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolomic Annotation Dataset that contains metabolite annotation information."/> + </when> + </conditional> + <conditional name="metsOption"> + <param name="allMets" type="select" display="radio" label="Select which option to use for subsetting the Metabolite Wide Dataset" help="Select one of the options above."> + <option value="generic">By metabolite class -- Uses the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool to subset.</option> + <option value="mmc">By MMC pattern -- Runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subseting.</option> + <option value="both">By both metabolite class AND by MMC pattern.</option> + </param> + <when value="generic"> + <param name="metKeggAnno" type="data" format="tabular" label="'Metabolite to KEGGID Link' File." help="Select the 'Metabolite to KEGGID Link' File from your history. This file can be generated using the 'Link Name to KEGGID' tool."/> + <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history"/> + <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Wide Dataset that contains unique identifiers."/> + <conditional name="useGeneAnnoCond"> + <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling the output files."> + <option value="n">No</option> + <option value="y">Yes</option> + </param> + <when value="y"> + <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/> + <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotation information."/> + </when> + </conditional> + <conditional name="genesOption"> + <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Expression Wide Dataset" help="Select one of the options above."> + <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option> + <option value="geneList">Use a custom tsv file containing specific genes of interest.</option> + <option value="path">Include genes linked to each metabolite class through common KEGG pathways.</option> + <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option> + </param> + <when value="geneList"> + <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must consist of a single column of Gene Symbols."/> + </when> + <when value="path"> + <param name="geneKeggPath" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the Gene Expression KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/> + <param name="metKeggPath" type="data" format="tabular" label="Metabolomic KEGG Pathway File" help="Select the Metabolomic KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/> + </when> + <when value="pana"> + <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select the Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> + <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/> + <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/> + <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%."> + <option value="single">single% -- Percent of variability for a given principle component.</option> + <option value="accum">%accum -- Percent of accumulated variability.</option> + <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option> + <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option> + </param> + <param name="cutoff" type="float" value="0.20" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.20"/> + <conditional name="PANAAnno"> + <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs."> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/> + </when> + </conditional> + </when> + </conditional> + </when> + <when value="mmc"> + <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File to Input into MMC" help="Select the Metabolite KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> + <param name="design" type="data" format="tabular" label="Design Dataset" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a column called 'sampleID' that contains the names of your samples."/> + <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." /> + <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." /> + <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." /> + <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation before clustering. Default: Pearson." > + <option value="pearson" selected="true">Pearson</option> + <option value="kendall" selected="true">Kendall</option> + <option value="spearman" selected="true">Spearman</option> + </param> + <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history."/> + <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/> + <conditional name="useGeneAnnoCond"> + <param name="useGeneAnno" type="select" label="Use Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) for labeling output files."> + <option value="n">No</option> + <option value="y">Yes</option> + </param> + <when value="y"> + <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/> + <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/> + </when> + </conditional> + <conditional name="genesOption"> + <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Dataset" help="Select one of the options above."> + <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option> + <option value="geneList">Use a custom tsv file containing specific genes of interest.</option> + <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option> + </param> + <when value="geneList"> + <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must contain Gene Symbol IDs and must be a single column."/> + </when> + <when value="pana"> + <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> + <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/> + <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/> + <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%."> + <option value="single">single% -- Percent of variability for a given principle component.</option> + <option value="accum">%accum -- Percent of accumulated variability.</option> + <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option> + <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option> + </param> + <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/> + <conditional name="PANAAnno"> + <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathway IDs."> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/> + </when> + </conditional> + </when> + </conditional> + </when> + <when value="both"> + <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File" help="Select the Metabolite KEGGID Link File from your history.his file can be generated from the 'Link Name to KEGGID' tool."/> + <param name="design" type="data" format="tabular" label="Design File" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a 'sampleID' column."/> + <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." /> + <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." /> + <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." /> + <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation prior to clustering. Default: Pearson." > + <option value="pearson" selected="true">Pearson</option> + <option value="kendall" selected="true">Kendall</option> + <option value="spearman" selected="true">Spearman</option> + </param> + <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select Gene Expression Wide Dataset from your history"/> + <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/> + <conditional name="useGeneAnnoCond"> + <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) to use for labeling output files."> + <option value="n">No</option> + <option value="y">Yes</option> + </param> + <when value="y"> + <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/> + <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/> + </when> + </conditional> + <conditional name="genesOption"> + <param name="allGenes" type="select" display="radio" label="Gene Dataset Subsetting Option" help="Select one of the following."> + <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option> + <option value="geneList">Upload a custion list containing specific genes of interest.</option> + <option value="pana">Use Metagenes (PANA Approach).</option> + </param> + <when value="geneList"> + <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="The list must consist of a single column of Gene Symbols."/> + </when> + <when value="pana"> + <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> + <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/> + <param name="path2genes" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the File from your history that contains the list of ALL Gene KEGGIDs to PathwayIDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/> + <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%."> + <option value="single">single% -- Percent of variability for a given principle component.</option> + <option value="accum">%accum -- Percent of accumulated variability.</option> + <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle componenet.</option> + <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option> + </param> + <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/> + <conditional name="PANAAnno"> + <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs."> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="yes"> + <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + <param name="keepX" type="integer" size="30" value="10" label="Number of Genes to Keep in the Model" help="Enter the number of genes to keep for each component in the sPLS analysis."/> + <param name="threshold" type="float" size="30" value="0.8" label="Threshold" help="Correlations under this threshold will NOT be included in the ouput file."/> + </inputs> + <outputs> + <data format="pdf" name="figure1" label="${tool.name} on ${on_string}: sPLS Figure"/> + <data format="tabular" name="splsOut" label="${tool.name} on ${on_string}: sPLS Correlation Table"/> + <data format="pdf" name="figure2" label="${tool.name} on ${on_string}: MMC Figure"> + <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter> + </data> + <data format="tabular" name="mmcOut" label="${tool.name} on ${on_string}: MMC Output Table"> + <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter> + </data> + <data format="tabular" name="panaOut" label="${tool.name} on ${on_string}: PANA Output Table"> + <filter>(metsOption['genesOption']['allGenes'] == 'pana')</filter> + </data> + </outputs> + <tests> + <test> + <param name="metDataset" value="metabolite_wide_dataset_01fhl.tsv"/> + <param name="metId" value="UniqueID"/> + <param name="allMets" value="generic"/> + <param name="metKeggAnno" value="metabolite_to_keggId_link_01fhl.tsv"/> + <param name="metName" value="MetName"/> + <param name="geneDataset" value="gene_wide_dataset_01fhl.tsv"/> + <param name="geneId" value="UniqueID"/> + <param name="allGenes" value="all"/> + <param name="keepX" value="10"/> + <param name="threshold" value="0.8"/> + <output name="splsOut" file="spls_correlation_file_01fhl.tsv"/> + <output name="figure1" file="spls_figure_01fhl.pdf"/> + </test> + </tests> + <help><![CDATA[ + +**Tool Description** + + NOTE: The parameters you select are data dependent. + + This tool carries out the integrated analysis of metabolite and gene expression data. Here, metabolite data are considered the dependent (Y) variable + and genes the explanatory variable. The tool allows for several combinations of metabolite and gene models. A note of caution: a complete metabolite + and gene expression dataset with no filtering will be challenging to interpret using this tool. + + We recommend that both gene expression and metabolite datasets be reduced to reflect a common biological hypothesis before running this tool. For example, + metabolite data can be subset by class (i.e. using the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool). Users who want to include + similarly behaving compounds without regard to identification or type of compound can estimate modules with the Modulated Modularity Clustering (MMC) tool + (Stone and Ayroles 2009). Each module can be examined separately. Finally, metabolite data can be reduced by using both metabolite class and the MMC tool. + Similarly, gene expression data can be reduced in scope by uploading and using a custom list of genes of interest or by using metagenes as implemented in PANA + (Ponzoni et al. 2014). + + 1) Classes of metabolites can be modeled as a function of metagenes. + 2) Classes of metabolites can be modeled as a function of a set of individual genes. + 3) Unbiased clusters of metabolites can be modeled as a function of metagenes + 4) Unbiased clusters of metabolites can be modeled as a function of a set of individual genes. + + The tool executes a partial least squares regression with variable selection (sparse PLS, sPLS) as implemented in the 'mixOmics' package (Rohart F., Gautier, B, Singh, + A and Lê Cao, K. A. mixOmics: an R package for ‘omics feature selection and multiple data integration. On bioRxiv). The mixomics sPLS function is run in ‘classic mode’ + (http://mixomics.org/methods/spls/) with the number of components included in the model set to 2. In addition, the user selects the number of variables (genes) for + each component to use in model construction. + + This tool needs at least 1 subset with a minimum number of 3 metabolites to run properly. If the user selects subset metabolites by class and no metabolite groups are + identified or small metabolite groups with less than 3 members are found, the tool will stop and a warning message will be generated to try the MMC option instead. + Similarly, if the user selects subset metabolites using MMC clusters and there are no clusters with at least 3 metabolites, the tool will stop and a warning message + will be generated to try the 'by class’ option instead. + +-------------------------------------------------------------------------------- + +**INPUT** + +**Please see the UserGuide for more details regarding tool inputs and options.** + +**Metabolite Wide Dataset** + A wide formatted dataset that contains measurements for each sample (samples are in columns): + + +-----------+---------+---------+---------+-----+ + | FeatureID | sample1 | sample2 | sample3 | ... | + +===========+=========+=========+=========+=====+ + | met_one | 10 | 20 | 10 | ... | + +-----------+---------+---------+---------+-----+ + | met_two | 5 | 22 | 30 | ... | + +-----------+---------+---------+---------+-----+ + | met_three | 30 | 27 | 2 | ... | + +-----------+---------+---------+---------+-----+ + | met_four | 32 | 17 | 8 | ... | + +-----------+---------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +-----------+---------+---------+---------+-----+ + +**Unique Metabolite FeatureID** + Name of the column in your Metabolite Wide Dataset that contains unique identifiers. + +**Optional - Metabolite Annotation File** + A wide format dataset containing metabolite descriptor information (e.g. metabolite names, m/z ratios). The user can chose a column in the Annotation File for labeling output files. + +**Optional - Metabolite Names** + Column name in the Metabolite Annotation File to use for labeling output files. + +**Data reduction (subsetting) of Metabolite Data** + 1) By metablite class - uses a predefined grouping of metabolites based on the 'Name_in_KEGG' column in the Metabolite to KEGGID Link File. + 2) By MMC pattern - runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subsetting. Please see Stone and Ayroles (2009) for MMC options. + 3) By both metabolite class AND MMC pattern + +**Metabolite to KEGGID Link File** + This file MUST contain a column called 'Name_in_KEGG' and can be generated using the 'Link Name to KEGGID' tool. + +**Gene Expression Wide Dataset** + A wide formatted gene expression dataset that contains measurements for each sample: + + +------------+---------+---------+---------+-----+ + | FeatureID | sample1 | sample2 | sample3 | ... | + +============+=========+=========+=========+=====+ + | one | 10 | 20 | 10 | ... | + +------------+---------+---------+---------+-----+ + | two | 5 | 22 | 30 | ... | + +------------+---------+---------+---------+-----+ + | three | 30 | 27 | 2 | ... | + +------------+---------+---------+---------+-----+ + | four | 32 | 17 | 8 | ... | + +------------+---------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +------------+---------+---------+---------+-----+ + +**Unique Gene FeatureID** + Name of the column in your Gene Expression Wide Dataset that contains unique gene identifiers. + +**Optional - Gene Annotation File** + A wide format dataset containing gene annotation information (e.g. gene names). The user can chose a column in the Annotation File for labeling output files. + +**Optional - Gene Names** + Column name in the Gene Annotation File to use for labeling output files. + +**Data reduction (subsetting) of Gene Expression Data** + 1) No subsetting - include all genes in the Gene Expression Wide Dataset + 2) Use a custom tsv file containing specific genes of interest - select a custom gene list from your history + 3) Include genes linked to each metabolite class through common KEGG pathways + 4) Use Metagenes from PANA (PAthway Network Analysis from gene expression data) + +**Gene Expression KEGG Pathway File** + Contains links between gene symbols and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool + +**Metabolomic KEGG Pathway File** + Contains links between metabolites and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool + +**Gene to KEGGID Link File** + Contains links between gene symbols and KEGGIDs. + +**Gene Symbol** + Name of the column in your Gene to KEGGID Link File that contains gene symbols + +**GeneKEGGID2PathwayID** + Contains KEGG links between gene KEGGIDs and KEGG PathwayIDs. Can be generated from the 'Add KEGG Pathway Information' tool + +**Number of Genes to Keep in Model** + default: 10. This is the number of genes to keep for each principle component in the sPLS analysis. + +**Threshold** + default: 0.8. Correlations less than this value will NOT be included in the output files. + +-------------------------------------------------------------------------------- + +**OUTPUT** + +**For metabolite reduction by metabolite class and all genes:** + (1) A PDF containing a sPLS figure for each metabolite class. + (2) A sPLS Correlation TSV file containing the correlations for each metabolite-gene pair and what subset (metabolite class) the pair locate to. + +**For metabolite reduction by MMC the following files will be output in addition to files (1) and (2) above** + (3) A MMC PDF Figure containing unsorted, sorted and sorted-smoothed heatmaps of the variance-covariance matrixes + (4) A MMC Output TSV file containing algorithm summaries in the following columns: + + (1) Unique metabolite featureID + (2) Module: Contains the module number for each feature calculated by the MMC tool. + (3) Entry Index: Contains the original order of the names of the rows of the input Metabolite Wide Dataset. + (4) Degree: Average of the absolute values of correlations for the given element in a block to other elements within that block. + (5) Average Degree: Average values of the degrees computed above across all elements within the given block. + +**For subsetting genes by generating metagenes using PANA the following files will be output in addition to files (1) and (2) above** + (5) A PANA Output TSV table containing associations between gene symbols and KEGG pathays. + + ]]> + </help> + <citations> + <citation type="bibtex">@article{ponzoni2014pathway, + title={Pathway network inference from gene expression data}, + author={Ponzoni, Ignacio and Nueda, Mar{\'\i}a Jos{\'e} and Tarazona, Sonia and G{\"o}tz, Stefan and Montaner, David and Dussaut, Julieta Sol and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, + journal={BMC systems biology}, + volume={8}, + number={2}, + pages={S7}, + year={2014}, + publisher={BioMed Central} + }</citation> + <citation type="bibtex">@article{dejean2013mixomics, + title={mixOmics: Omics data integration project}, + author={Dejean, Sebastien and Gonzalez, Ignacio and L{\^e} Cao, Kim-Anh and Monget, Pierre and Coquery, J and Yao, F and Liquet, B and Rohart, F}, + journal={R package}, + year={2013} + }</citation> + <citation type="bibtex">@ARTICLE{Kirpich17secimtools, + author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre}, + title = {SECIMTools: A suite of Metabolomics Data Analysis Tools}, + journal = {BMC Bioinformatics}, + year = {in press} + }</citation> + <citation type="bibtex"> + @article{garcia2010paintomics, + title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data}, + author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, + journal={Bioinformatics}, + volume={27}, + number={1}, + pages={137--139}, + year={2010}, + publisher={Oxford University Press} + }</citation> + </citations> +</tool>