Mercurial > repos > malex > gait_gm

<tool id="secimtools_spls" name="Metabolite - Gene Integration" version="@WRAPPER_VERSION@">
  <description></description>
  <macros>
      <import>macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <stdio>
    <exit_code range="2" level="fatal" description="Not enough metabolites for the Analysis."/>
  </stdio>
  <command detect_errors="exit_code"><![CDATA[
    sPLS.py
      -g=$metsOption.geneDataset
      -gid=$metsOption.geneId
      #if $metsOption.useGeneAnnoCond.useGeneAnno == "y":
        -ga=$metsOption.useGeneAnnoCond.geneAnno
        -gn=$metsOption.useGeneAnnoCond.geneName
      #end if
      -go=$metsOption.genesOption.allGenes
      #if $metsOption.genesOption.allGenes == "geneList":
        -gl=$metsOption.genesOption.relatedGeneList
      #end if
      #if $metsOption.genesOption.allGenes == "path":
        -gkp=$metsOption.genesOption.geneKeggPath
        -mkp=$metsOption.genesOption.metKeggPath
      #end if
      #if $metsOption.genesOption.allGenes == "pana":
        -gka=$metsOption.genesOption.geneKeggAnno
        -gkn=$metsOption.genesOption.geneKeggName
        -p2g=$metsOption.genesOption.path2genes
        -cu=$metsOption.genesOption.cutoff
        -f=$metsOption.genesOption.facSel
        #if $metsOption.genesOption.PANAAnno.usePANAAnno == "yes":
          -p2n=$metsOption.genesOption.PANAAnno.path2names
        #end if
        -o3=$panaOut
      #end if
      -k=$keepX
      -t=$threshold
      -m=$metDataset
      -mid=$metId
      #if $useMetAnnoCond.useMetAnno == "y":
        -ma=$useMetAnnoCond.metAnno
        -mn=$useMetAnnoCond.metName
      #end if
      -mo=$metsOption.allMets
      -mka=$metsOption.metKeggAnno
      #if $metsOption.allMets == "mmc":
        -d=$metsOption.design
        -c=$metsOption.corr
        -sl=$metsOption.sigmaLow
        -sh=$metsOption.sigmaHigh
        -sn=$metsOption.sigmaNum
        -f2=$figure2
        -o2=$mmcOut
      #end if
      #if $metsOption.allMets == "both":
        -d=$metsOption.design
        -c=$metsOption.corr
        -sl=$metsOption.sigmaLow
        -sh=$metsOption.sigmaHigh
        -sn=$metsOption.sigmaNum
        -f2=$figure2
        -o2=$mmcOut
      #end if
      -f1=$figure1
      -o1=$splsOut
  ]]></command>
  <inputs>
    <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset for Integration" help="Select the Metabolite Wide Dataset from your history"/>
    <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/>
    <conditional name="useMetAnnoCond">
      <param name="useMetAnno" type="select" label="Use Metabolite Annotation File?" help="You can choose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) to use for labeling the output files.">
        <option value="n">No</option>
        <option value="y">Yes</option>
      </param>
      <when value="y">
        <param name="metAnno" type="data" format="tabular" label="Metabolomic Annotation File" help="Select the Metabolomic Annotation File from your history."/>
        <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolomic Annotation Dataset that contains metabolite annotation information."/>
      </when>
      <when value="n" />
    </conditional>
    <conditional name="metsOption">
      <param name="allMets" type="select" display="radio" label="Select which option to use for subsetting the Metabolite Wide Dataset" help="Select one of the options above.">
        <option value="generic">By metabolite class -- Uses the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool to subset.</option>
        <option value="mmc">By MMC pattern -- Runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subsetting.</option>
        <option value="both">By both metabolite class AND by MMC pattern.</option>
      </param>
      <when value="generic">
        <param name="metKeggAnno" type="data" format="tabular" label="'Metabolite to KEGGID Link' File."  help="Select the 'Metabolite to KEGGID Link' File from your history. This file can be generated using the 'Link Name to KEGGID' tool."/>
        <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history"/>
        <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Wide Dataset that contains unique identifiers."/>
        <conditional name="useGeneAnnoCond">
          <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can choose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling the output files.">
            <option value="n">No</option>
            <option value="y">Yes</option>
          </param>
          <when value="y">
            <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
            <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotation information."/>
          </when>
          <when value="n" />
        </conditional>
        <conditional name="genesOption">
          <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Expression Wide Dataset" help="Select one of the options above.">
            <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
            <option value="geneList">Use a custom tsv file containing specific genes of interest.</option>
            <option value="path">Include genes linked to each metabolite class through common KEGG pathways.</option>
            <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option>
          </param>
          <when value="all" />
          <when value="geneList">
            <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must consist of a single column of Gene Symbols."/>
          </when>
          <when value="path">
            <param name="geneKeggPath" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the Gene Expression KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/>
            <param name="metKeggPath" type="data" format="tabular" label="Metabolomic KEGG Pathway File" help="Select the Metabolomic KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/>
          </when>
          <when value="pana">
            <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select the Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
            <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
            <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
            <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
              <option value="single">single% -- Percent of variability for a given principle component.</option>
              <option value="accum">%accum -- Percent of accumulated variability.</option>
              <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option>
              <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
            </param>
            <param name="cutoff" type="float" value="0.20" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.20"/>
            <conditional name="PANAAnno">
                <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can choose to input a file containing annotations for the KEGG pathwayIDs.">
                    <option value="no">No</option>
                    <option value="yes">Yes</option>
                  </param>
                  <when value="yes">
                    <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history.  This file can be generated from the 'Add KEGG Pathway Information' tool."/>
                </when>
                <when value="no" />
            </conditional>
          </when>
        </conditional>
      </when>
      <when value="mmc">
        <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File to Input into MMC" help="Select the Metabolite KEGGID Link File from your history.  This file can be generated from the 'Link Name to KEGGID' tool."/>
        <param name="design" type="data" format="tabular" label="Design Dataset" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a column called 'sampleID' that contains the names of your samples."/>
        <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." />
        <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." />
        <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." />
        <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation before clustering. Default: Pearson." >
          <option value="pearson" selected="true">Pearson</option>
          <option value="kendall" selected="true">Kendall</option>
          <option value="spearman" selected="true">Spearman</option>
        </param>
        <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history."/>
        <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/>
        <conditional name="useGeneAnnoCond">
          <param name="useGeneAnno" type="select" label="Use Gene Annotation File?" help="You can choose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) for labeling output files.">
            <option value="n">No</option>
            <option value="y">Yes</option>
          </param>
          <when value="y">
            <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
            <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/>
          </when>
          <when value="n"/>
        </conditional>
        <conditional name="genesOption">
          <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Dataset" help="Select one of the options above.">
            <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
            <option value="geneList">Use a custom tsv file containing specific genes of interest.</option>
            <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option>
          </param>
          <when value="all" />
          <when value="geneList">
            <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must contain Gene Symbol IDs and must be a single column."/>
          </when>
          <when value="pana">
            <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
            <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
            <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
            <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
              <option value="single">single% -- Percent of variability for a given principle component.</option>
              <option value="accum">%accum -- Percent of accumulated variability.</option>
              <option value="abs.val">abs.val -- Absolute value of the variability for a given principle component.</option>
              <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
            </param>
            <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/>
            <conditional name="PANAAnno">
                <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can choose to input a file containing annotations for the KEGG pathway IDs.">
                    <option value="no">No</option>
                    <option value="yes">Yes</option>
                  </param>
                  <when value="yes">
                    <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
                </when>
                <when value="no" />
            </conditional>
          </when>
        </conditional>
      </when>
      <when value="both">
        <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File" help="Select the Metabolite KEGGID Link File from your history.his file can be generated from the 'Link Name to KEGGID' tool."/>
        <param name="design" type="data" format="tabular" label="Design File" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a 'sampleID' column."/>
        <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." />
        <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." />
        <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." />
        <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation prior to clustering. Default: Pearson." >
          <option value="pearson" selected="true">Pearson</option>
          <option value="kendall" selected="true">Kendall</option>
          <option value="spearman" selected="true">Spearman</option>
        </param>
        <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select Gene Expression Wide Dataset from your history"/>
        <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/>
        <conditional name="useGeneAnnoCond">
          <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can choose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) to use for labeling output files.">
            <option value="n">No</option>
            <option value="y">Yes</option>
          </param>
          <when value="y">
            <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
            <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/>
          </when>
          <when value="n" />
        </conditional>
        <conditional name="genesOption">
          <param name="allGenes" type="select" display="radio" label="Gene Dataset Subsetting Option" help="Select one of the following.">
            <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
            <option value="geneList">Upload a custion list containing specific genes of interest.</option>
            <option value="pana">Use Metagenes (PANA Approach).</option>
          </param>
          <when value="all" />
          <when value="geneList">
            <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="The list must consist of a single column of Gene Symbols."/>
          </when>
          <when value="pana">
            <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
            <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
            <param name="path2genes" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the File from your history that contains the list of ALL Gene KEGGIDs to PathwayIDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
            <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
              <option value="single">single% -- Percent of variability for a given principle component.</option>
              <option value="accum">%accum -- Percent of accumulated variability.</option>
              <option value="abs.val">abs.val -- Absolute value of the variability for a given principle componenet.</option>
              <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
            </param>
            <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/>
            <conditional name="PANAAnno">
                <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can choose to input a file containing annotations for the KEGG pathwayIDs.">
                    <option value="no">No</option>
                    <option value="yes">Yes</option>
                </param>
                <when value="yes">
                    <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
                </when>
                <when value="no" />
            </conditional>
          </when>
        </conditional>
      </when>
    </conditional>
    <param name="keepX" type="integer" size="30" value="10" label="Number of Genes to Keep in the Model" help="Enter the number of genes to keep for each component in the sPLS analysis."/>
    <param name="threshold" type="float" size="30" value="0.8" label="Threshold" help="Correlations under this threshold will NOT be included in the output file."/>
  </inputs>
  <outputs>
    <data format="pdf" name="figure1" label="${tool.name} on ${on_string}: sPLS Figure"/>
    <data format="tabular" name="splsOut" label="${tool.name} on ${on_string}: sPLS Correlation Table"/>
    <data format="pdf" name="figure2" label="${tool.name} on ${on_string}: MMC Figure">
      <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter>
    </data>
    <data format="tabular" name="mmcOut" label="${tool.name} on ${on_string}: MMC Output Table">
      <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter>
    </data>
    <data format="tabular" name="panaOut" label="${tool.name} on ${on_string}: PANA Output Table">
      <filter>(metsOption['genesOption']['allGenes'] == 'pana')</filter>
    </data>
  </outputs>
  <tests>
      <!-- sPLS_lzh  -->
    <test>
      <param name="metDataset" value="metabolite_wide_dataset.tsv"/>
      <param name="metId" value="UniqueID"/>
      <param name="allMets" value="generic"/>
      <param name="metKeggAnno" value="metabolite_to_keggId_link.tsv"/>
      <param name="metName" value="MetName"/>
      <param name="geneDataset" value="gene_wide_dataset.tsv"/>
      <param name="geneId" value="UniqueID"/>
      <param name="allGenes" value="pana"/>
      <param name="geneKeggAnno" value="gene_to_keggId_link.tsv"/>
      <param name="geneKeggName" value="GeneSymbol"/>
      <param name="path2names" value="pathwayId2pathwayNames.tsv"/>
      <param name="path2genes" value="geneKeggId2pathwayId.tsv"/>
      <param name="keepX" value="10"/>
      <param name="threshold" value="0.8"/>
      <param name="cutoff" value="0.23" />
      <param name="facSel" value="single" />
      <output name="splsOut" file="spls_correlation.tsv" compare="diff" lines_diff="1000"/>
      <output name="figure1" file="spls_figure.pdf" compare="sim_size" delta="1000000"/>
      <output name="panaOut" file="spls_pana_outputTable.tsv" compare="diff" lines_diff="30000"/>
    </test>
      <!-- sPLS_met_generic_gene_all -->
    <test>
      <param name="metDataset" value="metabolite_wide_dataset.tsv"/>
      <param name="metId" value="UniqueID"/>
      <param name="allMets" value="generic"/>
      <param name="metKeggAnno" value="metabolite_to_keggId_link.tsv"/>
      <param name="metName" value="MetName"/>
      <param name="geneDataset" value="gene_wide_dataset.tsv"/>
      <param name="geneId" value="UniqueID"/>
      <param name="allGenes" value="all"/>
      <param name="keepX" value="10"/>
      <param name="threshold" value="0.8"/>
      <param name="cutoff" value="0.20" />
      <param name="facSel" value="single" />
      <output name="splsOut" file="spls_correlation_generic_all.tsv" compare="diff" lines_diff="1000"/>
      <output name="figure1" file="spls_figure_generic_all.pdf" compare="sim_size" delta="1000000"/>
    </test>
      <!-- sPLS_met_mmc_gene_pana -->
    <test>
      <param name="metDataset" value="metabolite_wide_dataset.tsv"/>
      <param name="metId" value="UniqueID"/>
      <param name="allMets" value="mmc"/>
      <param name="metAnno" value="metabolite_annotation.tsv"/>
      <param name="metKeggAnno" value="metabolite_to_keggId_link.tsv"/>
      <param name="metName" value="MetName"/>
      <param name="design" value="gene_design.tsv"/>
      <param name="corr" value="pearson"/>
      <param name="sigmaLow" value="0.05"/>
      <param name="sigmaHigh" value="0.50"/>
      <param name="sigmaNum" value="451"/>
      <param name="geneDataset" value="gene_wide_dataset.tsv"/>
      <param name="geneId" value="UniqueID"/>
      <param name="allGenes" value="pana"/>
      <param name="geneKeggAnno" value="gene_to_keggId_link.tsv"/>
      <param name="geneKeggName" value="GeneSymbol"/>
      <param name="path2genes" value="geneKeggId2pathwayId.tsv"/>
      <param name="path2names" value="gene_kegg_pathway.tsv"/>
      <param name="keepX" value="5"/>
      <param name="threshold" value="0.8"/>
      <param name="cutoff" value="0.20" />
      <output name="splsOut" file="integration_mmc_pana_genes_sPLS_output.tsv" compare="diff" lines_diff="100000"/>
      <output name="figure1" file="integration_mmc_pana_genes_sPLS_fig.pdf" compare="sim_size" delta="10000000"/>
      <output name="figure2" file="integration_mmc_pana_genes_MMC_heatmap.pdf" compare="sim_size" delta="10000000"/>
      <output name="mmcOut" file="integration_mmc_pana_genes_MMC_output.tsv" compare="diff" lines_diff="100000"/>
      <output name="panaOut" file="integration_mmc_pana_genes_PANA_output.tsv" compare="diff" lines_diff="100000"/>
    </test>

  </tests>
  <help><![CDATA[

**Tool Description**

  NOTE: The parameters you select are data dependent.

  This tool carries out the integrated analysis of metabolite and gene expression data. Here, metabolite data are considered the dependent variable
  and genes the explanatory variable. The tool allows for several combinations of metabolite and gene models. A note of caution: a complete metabolite
  and gene expression dataset with no filtering will be challenging to interpret using this tool.

  We recommend that both gene expression and metabolite datasets be reduced to reflect a common biological hypothesis before running this tool. For example,
  metabolite data can be subset by class (i.e. using the 'Name_in_KEGG' column generated from the 'Link Name  to KEGGID' tool). Users who want to include
  similarly behaving compounds without regard to identification or type of compound can estimate modules with the Modulated Modularity Clustering (MMC) tool
  (Stone and Ayroles 2009). Each module can be examined separately. Finally, metabolite data can be reduced by using both metabolite class and the MMC tool.
  Similarly, gene expression data can be reduced in scope by uploading and using a custom list of genes of interest or by using metagenes as implemented in PANA
  (Ponzoni et al. 2014).

  1) Classes of metabolites can be modeled as a function of metagenes.
  2) Classes of metabolites can be modeled as a function of a set of individual genes.
  3) Unbiased clusters of metabolites can be modeled as a function of metagenes
  4) Unbiased clusters of metabolites can be modeled as a function of a set of individual genes.

  The tool executes a partial least squares regression with variable selection (sparse PLS, sPLS) as implemented in the 'mixOmics' package (Rohart F., Gautier, B, Singh,
  A and Lê Cao, K. A. mixOmics: an R package for ‘omics feature selection and multiple data integration. On bioRxiv). The mixomics sPLS function is run in ‘classic mode’
  (http://mixomics.org/methods/spls/) with the number of components included in the model set to 2. In addition, the user selects the number of variables (genes) for
  each component to use in model construction.

  This tool needs at least 1 subset with a minimum number of 3 metabolites to run properly. If the user selects subset metabolites by class and no metabolite groups are
  identified or small metabolite groups with less than 3 members are found, the tool will stop and a warning message will be generated to try the MMC option instead.
  Similarly, if the user selects subset metabolites using MMC clusters and there are no clusters with at least 3 metabolites, the tool will stop and a warning message
  will be generated to try the 'by class’ option instead.

--------------------------------------------------------------------------------

**INPUT**

**Please see the UserGuide for more details regarding tool inputs and options.**

**Metabolite Wide Dataset**
  A wide formatted dataset that contains measurements for each sample (samples are in columns):

    +-----------+---------+---------+---------+-----+
    | FeatureID | sample1 | sample2 | sample3 | ... |
    +===========+=========+=========+=========+=====+
    | met_one   | 10      | 20      | 10      | ... |
    +-----------+---------+---------+---------+-----+
    | met_two   | 5       | 22      | 30      | ... |
    +-----------+---------+---------+---------+-----+
    | met_three | 30      | 27      | 2       | ... |
    +-----------+---------+---------+---------+-----+
    | met_four  | 32      | 17      | 8       | ... |
    +-----------+---------+---------+---------+-----+
    | ...       | ...     | ...     | ...     | ... |
    +-----------+---------+---------+---------+-----+

**Unique Metabolite FeatureID**
  Name of the column in your Metabolite Wide Dataset that contains unique identifiers.

**Optional - Metabolite Annotation File**
  A wide format dataset containing metabolite descriptor information (e.g. metabolite names, m/z ratios). The user can choose a column in the Annotation File for labeling output files.

**Optional - Metabolite Names**
  Column name in the Metabolite Annotation File to use for labeling output files.

**Data reduction (subsetting) of Metabolite Data**
  1) By metabolite class - uses a pre-defined grouping of metabolites based on the 'Name_in_KEGG' column in the Metabolite to KEGGID Link File.
  2) By MMC pattern - runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subsetting.  Please see Stone and Ayroles (2009) for MMC options.
  3) By both metabolite class AND MMC pattern

**Metabolite to KEGGID Link File**
  This file MUST contain a column called 'Name_in_KEGG' and can be generated using the 'Link Name to KEGGID' tool.

**Gene Expression Wide Dataset**
  A wide formatted gene expression dataset that contains measurements for each sample:

    +------------+---------+---------+---------+-----+
    | FeatureID  | sample1 | sample2 | sample3 | ... |
    +============+=========+=========+=========+=====+
    | one        | 10      | 20      | 10      | ... |
    +------------+---------+---------+---------+-----+
    | two        | 5       | 22      | 30      | ... |
    +------------+---------+---------+---------+-----+
    | three      | 30      | 27      | 2       | ... |
    +------------+---------+---------+---------+-----+
    | four       | 32      | 17      | 8       | ... |
    +------------+---------+---------+---------+-----+
    | ...        | ...     | ...     | ...     | ... |
    +------------+---------+---------+---------+-----+

**Unique Gene FeatureID**
  Name of the column in your Gene Expression Wide Dataset that contains unique gene identifiers.

**Optional - Gene Annotation File**
  A wide format dataset containing gene annotation information (e.g. gene names). The user can choose a column in the Annotation File for labeling output files.

**Optional - Gene Names**
  Column name in the Gene Annotation File to use for labeling output files.

**Data reduction (subsetting) of Gene Expression Data**
  1) No subsetting - include all genes in the Gene Expression Wide Dataset
  2) Use a custom tsv file containing specific genes of interest - select a custom gene list from your history
  3) Include genes linked to each metabolite class through common KEGG pathways
  4) Use Metagenes from PANA (PAthway Network Analysis from gene expression data)

**Gene Expression KEGG Pathway File**
  Contains links between gene symbols and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool

**Metabolomic KEGG Pathway File**
  Contains links between metabolites and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool

**Gene to KEGGID Link File**
  Contains links between gene symbols and KEGGIDs.

**Gene Symbol**
  Name of the column in your Gene to KEGGID Link File that contains gene symbols

**GeneKEGGID2PathwayID**
  Contains KEGG links between gene KEGGIDs and KEGG PathwayIDs. Can be generated from the 'Add KEGG Pathway Information' tool

**Number of Genes to Keep in Model**
  Default: 10.  This is the number of genes to keep for each principle component in the sPLS analysis.

**Threshold**
  Default: 0.8.  Correlations less than this value will NOT be included in the output files.

--------------------------------------------------------------------------------

**OUTPUT**

**For metabolite reduction by metabolite class and all genes:**
  (1) A PDF containing a sPLS figure for each metabolite class.
  (2) A sPLS Correlation TSV file containing the correlations for each metabolite-gene pair and what subset (metabolite class) the pair locate to.

**For metabolite reduction by MMC the following files will be output in addition to files (1) and (2) above**
  (3) A MMC PDF Figure containing unsorted, sorted and sorted-smoothed heatmaps of the variance-covariance matrices
  (4) A MMC Output TSV file containing algorithm summaries in the following columns:

       - Unique metabolite featureID
       - Module:  Contains the module number for each feature calculated by the MMC tool.
       - Entry Index:  Contains the original order of the names of the rows of the input Metabolite Wide Dataset.
       - Degree:  Average of the absolute values of correlations for the given element in a block to other elements within that block.
       - Average Degree:  Average values of the degrees computed above across all elements within the given block.

**For subsetting genes by generating metagenes using PANA the following files will be output in addition to files (1) and (2) above**
  (5) A PANA Output TSV table containing associations between gene symbols and KEGG pathways.

  ]]>
  </help>
  <citations>
    <citation type="bibtex">@article{stone2009mmc,
    title={Modulated Modularity Clustering as an Exploratory Tool for Functional Genomic Inference},
    author={Stone, EA and Ayroles, JF},
    journal={PLOS Genetics},
    year={2009},
    }</citation>
    <citation type="bibtex">@article{ponzoni2014pathway,
    title={Pathway network inference from gene expression data},
    author={Ponzoni, Ignacio and Nueda, Mar{\'\i}a Jos{\'e} and Tarazona, Sonia and G{\"o}tz, Stefan and Montaner, David and Dussaut, Julieta Sol and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
    journal={BMC Systems Biology},
    volume={8},
    number={2},
    pages={S7},
    year={2014},
    publisher={BioMed Central}
    }</citation>
    <citation type="bibtex">@article{rohart2017mixomics,
    title={mixOmics: an R package for Omics feature selection and multiple data integration},
    author={Rohart, F and Gautier, B and singh, A and L{\^e} Cao, K-A.},
    journal={R package},
    year={2017}
    }</citation>
    <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
    author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
    title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
    journal = {BMC Bioinformatics},
    year = {2018}
    }</citation>
    <citation type="bibtex">@article{Mor2021GaitGM,
    title={GAIT-GM integrative cross-omics analyses reveal cholinergic defects in a C. elegans model of Parkinson's disease},
    author={Mor, DE and Huertas, F and Morse, AM and Kaletsky, R and Murphy, CT and Kalia, V and Miller, GW and Moskalenko, O and Conesa, A and McIntyre, LM},
    journal={BMC Genomics},
    year={submitted},
    }</citation>
  </citations>
</tool>
author	malex
date	Thu, 29 Jul 2021 20:48:10 +0000
parents	ec9ee8edb84d
children