comparison sPLS.xml @ 1:ec9ee8edb84d draft

Initial upload of 21.6.10 release.
author malex
date Fri, 18 Jun 2021 20:23:19 +0000
parents
children 2c218a253d56
comparison
equal deleted inserted replaced
0:864fc6430432 1:ec9ee8edb84d
1 <tool id="secimtools_spls" name="Metabolite - Gene Integration" version="@WRAPPER_VERSION@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <stdio>
8 <exit_code range="2" level="fatal" description="Not enough metabolites for the Analysis."/>
9 </stdio>
10 <command detect_errors="exit_code"><![CDATA[
11 sPLS.py
12 -g=$metsOption.geneDataset
13 -gid=$metsOption.geneId
14 #if $metsOption.useGeneAnnoCond.useGeneAnno == "y":
15 -ga=$metsOption.useGeneAnnoCond.geneAnno
16 -gn=$metsOption.useGeneAnnoCond.geneName
17 #end if
18 -go=$metsOption.genesOption.allGenes
19 #if $metsOption.genesOption.allGenes == "geneList":
20 -gl=$metsOption.genesOption.relatedGeneList
21 #end if
22 #if $metsOption.genesOption.allGenes == "path":
23 -gkp=$metsOption.genesOption.geneKeggPath
24 -mkp=$metsOption.genesOption.metKeggPath
25 #end if
26 #if $metsOption.genesOption.allGenes == "pana":
27 -gka=$metsOption.genesOption.geneKeggAnno
28 -gkn=$metsOption.genesOption.geneKeggName
29 -p2g=$metsOption.genesOption.path2genes
30 -cu=$metsOption.genesOption.cutoff
31 -f=$metsOption.genesOption.facSel
32 #if $metsOption.genesOption.PANAAnno.usePANAAnno == "yes":
33 -p2n=$metsOption.genesOption.PANAAnno.path2names
34 #end if
35 -o3=$panaOut
36 #end if
37 -k=$keepX
38 -t=$threshold
39 -m=$metDataset
40 -mid=$metId
41 #if $useMetAnnoCond.useMetAnno == "y":
42 -ma=$useMetAnnoCond.metAnno
43 -mn=$useMetAnnoCond.metName
44 #end if
45 -mo=$metsOption.allMets
46 -mka=$metsOption.metKeggAnno
47 #if $metsOption.allMets == "mmc":
48 -d=$metsOption.design
49 -c=$metsOption.corr
50 -sl=$metsOption.sigmaLow
51 -sh=$metsOption.sigmaHigh
52 -sn=$metsOption.sigmaNum
53 -f2=$figure2
54 -o2=$mmcOut
55 #end if
56 #if $metsOption.allMets == "both":
57 -d=$metsOption.design
58 -c=$metsOption.corr
59 -sl=$metsOption.sigmaLow
60 -sh=$metsOption.sigmaHigh
61 -sn=$metsOption.sigmaNum
62 -f2=$figure2
63 -o2=$mmcOut
64 #end if
65 -f1=$figure1
66 -o1=$splsOut
67 ]]></command>
68 <inputs>
69 <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset for Integration" help="Select the Metabolite Wide Dataset from your history"/>
70 <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/>
71 <conditional name="useMetAnnoCond">
72 <param name="useMetAnno" type="select" label="Use Metabolite Annotation File?" help="You can chose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) to use for labeling the output files.">
73 <option value="n">No</option>
74 <option value="y">Yes</option>
75 </param>
76 <when value="y">
77 <param name="metAnno" type="data" format="tabular" label="Metabolomic Annotation File" help="Select the Metabolomic Annotation File from your history."/>
78 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolomic Annotation Dataset that contains metabolite annotation information."/>
79 </when>
80 </conditional>
81 <conditional name="metsOption">
82 <param name="allMets" type="select" display="radio" label="Select which option to use for subsetting the Metabolite Wide Dataset" help="Select one of the options above.">
83 <option value="generic">By metabolite class -- Uses the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool to subset.</option>
84 <option value="mmc">By MMC pattern -- Runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subseting.</option>
85 <option value="both">By both metabolite class AND by MMC pattern.</option>
86 </param>
87 <when value="generic">
88 <param name="metKeggAnno" type="data" format="tabular" label="'Metabolite to KEGGID Link' File." help="Select the 'Metabolite to KEGGID Link' File from your history. This file can be generated using the 'Link Name to KEGGID' tool."/>
89 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history"/>
90 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Wide Dataset that contains unique identifiers."/>
91 <conditional name="useGeneAnnoCond">
92 <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling the output files.">
93 <option value="n">No</option>
94 <option value="y">Yes</option>
95 </param>
96 <when value="y">
97 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
98 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotation information."/>
99 </when>
100 </conditional>
101 <conditional name="genesOption">
102 <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Expression Wide Dataset" help="Select one of the options above.">
103 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
104 <option value="geneList">Use a custom tsv file containing specific genes of interest.</option>
105 <option value="path">Include genes linked to each metabolite class through common KEGG pathways.</option>
106 <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option>
107 </param>
108 <when value="geneList">
109 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must consist of a single column of Gene Symbols."/>
110 </when>
111 <when value="path">
112 <param name="geneKeggPath" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the Gene Expression KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/>
113 <param name="metKeggPath" type="data" format="tabular" label="Metabolomic KEGG Pathway File" help="Select the Metabolomic KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/>
114 </when>
115 <when value="pana">
116 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select the Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
117 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
118 <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
119 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
120 <option value="single">single% -- Percent of variability for a given principle component.</option>
121 <option value="accum">%accum -- Percent of accumulated variability.</option>
122 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option>
123 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
124 </param>
125 <param name="cutoff" type="float" value="0.20" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.20"/>
126 <conditional name="PANAAnno">
127 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs.">
128 <option value="no">No</option>
129 <option value="yes">Yes</option>
130 </param>
131 <when value="yes">
132 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
133 </when>
134 </conditional>
135 </when>
136 </conditional>
137 </when>
138 <when value="mmc">
139 <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File to Input into MMC" help="Select the Metabolite KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
140 <param name="design" type="data" format="tabular" label="Design Dataset" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a column called 'sampleID' that contains the names of your samples."/>
141 <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." />
142 <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." />
143 <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." />
144 <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation before clustering. Default: Pearson." >
145 <option value="pearson" selected="true">Pearson</option>
146 <option value="kendall" selected="true">Kendall</option>
147 <option value="spearman" selected="true">Spearman</option>
148 </param>
149 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history."/>
150 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/>
151 <conditional name="useGeneAnnoCond">
152 <param name="useGeneAnno" type="select" label="Use Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) for labeling output files.">
153 <option value="n">No</option>
154 <option value="y">Yes</option>
155 </param>
156 <when value="y">
157 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
158 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/>
159 </when>
160 </conditional>
161 <conditional name="genesOption">
162 <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Dataset" help="Select one of the options above.">
163 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
164 <option value="geneList">Use a custom tsv file containing specific genes of interest.</option>
165 <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option>
166 </param>
167 <when value="geneList">
168 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must contain Gene Symbol IDs and must be a single column."/>
169 </when>
170 <when value="pana">
171 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
172 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
173 <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
174 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
175 <option value="single">single% -- Percent of variability for a given principle component.</option>
176 <option value="accum">%accum -- Percent of accumulated variability.</option>
177 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option>
178 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
179 </param>
180 <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/>
181 <conditional name="PANAAnno">
182 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathway IDs.">
183 <option value="no">No</option>
184 <option value="yes">Yes</option>
185 </param>
186 <when value="yes">
187 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
188 </when>
189 </conditional>
190 </when>
191 </conditional>
192 </when>
193 <when value="both">
194 <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File" help="Select the Metabolite KEGGID Link File from your history.his file can be generated from the 'Link Name to KEGGID' tool."/>
195 <param name="design" type="data" format="tabular" label="Design File" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a 'sampleID' column."/>
196 <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." />
197 <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." />
198 <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." />
199 <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation prior to clustering. Default: Pearson." >
200 <option value="pearson" selected="true">Pearson</option>
201 <option value="kendall" selected="true">Kendall</option>
202 <option value="spearman" selected="true">Spearman</option>
203 </param>
204 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select Gene Expression Wide Dataset from your history"/>
205 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/>
206 <conditional name="useGeneAnnoCond">
207 <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) to use for labeling output files.">
208 <option value="n">No</option>
209 <option value="y">Yes</option>
210 </param>
211 <when value="y">
212 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
213 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/>
214 </when>
215 </conditional>
216 <conditional name="genesOption">
217 <param name="allGenes" type="select" display="radio" label="Gene Dataset Subsetting Option" help="Select one of the following.">
218 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
219 <option value="geneList">Upload a custion list containing specific genes of interest.</option>
220 <option value="pana">Use Metagenes (PANA Approach).</option>
221 </param>
222 <when value="geneList">
223 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="The list must consist of a single column of Gene Symbols."/>
224 </when>
225 <when value="pana">
226 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
227 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
228 <param name="path2genes" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the File from your history that contains the list of ALL Gene KEGGIDs to PathwayIDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
229 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
230 <option value="single">single% -- Percent of variability for a given principle component.</option>
231 <option value="accum">%accum -- Percent of accumulated variability.</option>
232 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle componenet.</option>
233 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
234 </param>
235 <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/>
236 <conditional name="PANAAnno">
237 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs.">
238 <option value="no">No</option>
239 <option value="yes">Yes</option>
240 </param>
241 <when value="yes">
242 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
243 </when>
244 </conditional>
245 </when>
246 </conditional>
247 </when>
248 </conditional>
249 <param name="keepX" type="integer" size="30" value="10" label="Number of Genes to Keep in the Model" help="Enter the number of genes to keep for each component in the sPLS analysis."/>
250 <param name="threshold" type="float" size="30" value="0.8" label="Threshold" help="Correlations under this threshold will NOT be included in the ouput file."/>
251 </inputs>
252 <outputs>
253 <data format="pdf" name="figure1" label="${tool.name} on ${on_string}: sPLS Figure"/>
254 <data format="tabular" name="splsOut" label="${tool.name} on ${on_string}: sPLS Correlation Table"/>
255 <data format="pdf" name="figure2" label="${tool.name} on ${on_string}: MMC Figure">
256 <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter>
257 </data>
258 <data format="tabular" name="mmcOut" label="${tool.name} on ${on_string}: MMC Output Table">
259 <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter>
260 </data>
261 <data format="tabular" name="panaOut" label="${tool.name} on ${on_string}: PANA Output Table">
262 <filter>(metsOption['genesOption']['allGenes'] == 'pana')</filter>
263 </data>
264 </outputs>
265 <tests>
266 <test>
267 <param name="metDataset" value="metabolite_wide_dataset_01fhl.tsv"/>
268 <param name="metId" value="UniqueID"/>
269 <param name="allMets" value="generic"/>
270 <param name="metKeggAnno" value="metabolite_to_keggId_link_01fhl.tsv"/>
271 <param name="metName" value="MetName"/>
272 <param name="geneDataset" value="gene_wide_dataset_01fhl.tsv"/>
273 <param name="geneId" value="UniqueID"/>
274 <param name="allGenes" value="all"/>
275 <param name="keepX" value="10"/>
276 <param name="threshold" value="0.8"/>
277 <output name="splsOut" file="spls_correlation_file_01fhl.tsv"/>
278 <output name="figure1" file="spls_figure_01fhl.pdf"/>
279 </test>
280 </tests>
281 <help><![CDATA[
282
283 **Tool Description**
284
285 NOTE: The parameters you select are data dependent.
286
287 This tool carries out the integrated analysis of metabolite and gene expression data. Here, metabolite data are considered the dependent (Y) variable
288 and genes the explanatory variable. The tool allows for several combinations of metabolite and gene models. A note of caution: a complete metabolite
289 and gene expression dataset with no filtering will be challenging to interpret using this tool.
290
291 We recommend that both gene expression and metabolite datasets be reduced to reflect a common biological hypothesis before running this tool. For example,
292 metabolite data can be subset by class (i.e. using the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool). Users who want to include
293 similarly behaving compounds without regard to identification or type of compound can estimate modules with the Modulated Modularity Clustering (MMC) tool
294 (Stone and Ayroles 2009). Each module can be examined separately. Finally, metabolite data can be reduced by using both metabolite class and the MMC tool.
295 Similarly, gene expression data can be reduced in scope by uploading and using a custom list of genes of interest or by using metagenes as implemented in PANA
296 (Ponzoni et al. 2014).
297
298 1) Classes of metabolites can be modeled as a function of metagenes.
299 2) Classes of metabolites can be modeled as a function of a set of individual genes.
300 3) Unbiased clusters of metabolites can be modeled as a function of metagenes
301 4) Unbiased clusters of metabolites can be modeled as a function of a set of individual genes.
302
303 The tool executes a partial least squares regression with variable selection (sparse PLS, sPLS) as implemented in the 'mixOmics' package (Rohart F., Gautier, B, Singh,
304 A and Lê Cao, K. A. mixOmics: an R package for ‘omics feature selection and multiple data integration. On bioRxiv). The mixomics sPLS function is run in ‘classic mode’
305 (http://mixomics.org/methods/spls/) with the number of components included in the model set to 2. In addition, the user selects the number of variables (genes) for
306 each component to use in model construction.
307
308 This tool needs at least 1 subset with a minimum number of 3 metabolites to run properly. If the user selects subset metabolites by class and no metabolite groups are
309 identified or small metabolite groups with less than 3 members are found, the tool will stop and a warning message will be generated to try the MMC option instead.
310 Similarly, if the user selects subset metabolites using MMC clusters and there are no clusters with at least 3 metabolites, the tool will stop and a warning message
311 will be generated to try the 'by class’ option instead.
312
313 --------------------------------------------------------------------------------
314
315 **INPUT**
316
317 **Please see the UserGuide for more details regarding tool inputs and options.**
318
319 **Metabolite Wide Dataset**
320 A wide formatted dataset that contains measurements for each sample (samples are in columns):
321
322 +-----------+---------+---------+---------+-----+
323 | FeatureID | sample1 | sample2 | sample3 | ... |
324 +===========+=========+=========+=========+=====+
325 | met_one | 10 | 20 | 10 | ... |
326 +-----------+---------+---------+---------+-----+
327 | met_two | 5 | 22 | 30 | ... |
328 +-----------+---------+---------+---------+-----+
329 | met_three | 30 | 27 | 2 | ... |
330 +-----------+---------+---------+---------+-----+
331 | met_four | 32 | 17 | 8 | ... |
332 +-----------+---------+---------+---------+-----+
333 | ... | ... | ... | ... | ... |
334 +-----------+---------+---------+---------+-----+
335
336 **Unique Metabolite FeatureID**
337 Name of the column in your Metabolite Wide Dataset that contains unique identifiers.
338
339 **Optional - Metabolite Annotation File**
340 A wide format dataset containing metabolite descriptor information (e.g. metabolite names, m/z ratios). The user can chose a column in the Annotation File for labeling output files.
341
342 **Optional - Metabolite Names**
343 Column name in the Metabolite Annotation File to use for labeling output files.
344
345 **Data reduction (subsetting) of Metabolite Data**
346 1) By metablite class - uses a predefined grouping of metabolites based on the 'Name_in_KEGG' column in the Metabolite to KEGGID Link File.
347 2) By MMC pattern - runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subsetting. Please see Stone and Ayroles (2009) for MMC options.
348 3) By both metabolite class AND MMC pattern
349
350 **Metabolite to KEGGID Link File**
351 This file MUST contain a column called 'Name_in_KEGG' and can be generated using the 'Link Name to KEGGID' tool.
352
353 **Gene Expression Wide Dataset**
354 A wide formatted gene expression dataset that contains measurements for each sample:
355
356 +------------+---------+---------+---------+-----+
357 | FeatureID | sample1 | sample2 | sample3 | ... |
358 +============+=========+=========+=========+=====+
359 | one | 10 | 20 | 10 | ... |
360 +------------+---------+---------+---------+-----+
361 | two | 5 | 22 | 30 | ... |
362 +------------+---------+---------+---------+-----+
363 | three | 30 | 27 | 2 | ... |
364 +------------+---------+---------+---------+-----+
365 | four | 32 | 17 | 8 | ... |
366 +------------+---------+---------+---------+-----+
367 | ... | ... | ... | ... | ... |
368 +------------+---------+---------+---------+-----+
369
370 **Unique Gene FeatureID**
371 Name of the column in your Gene Expression Wide Dataset that contains unique gene identifiers.
372
373 **Optional - Gene Annotation File**
374 A wide format dataset containing gene annotation information (e.g. gene names). The user can chose a column in the Annotation File for labeling output files.
375
376 **Optional - Gene Names**
377 Column name in the Gene Annotation File to use for labeling output files.
378
379 **Data reduction (subsetting) of Gene Expression Data**
380 1) No subsetting - include all genes in the Gene Expression Wide Dataset
381 2) Use a custom tsv file containing specific genes of interest - select a custom gene list from your history
382 3) Include genes linked to each metabolite class through common KEGG pathways
383 4) Use Metagenes from PANA (PAthway Network Analysis from gene expression data)
384
385 **Gene Expression KEGG Pathway File**
386 Contains links between gene symbols and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool
387
388 **Metabolomic KEGG Pathway File**
389 Contains links between metabolites and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool
390
391 **Gene to KEGGID Link File**
392 Contains links between gene symbols and KEGGIDs.
393
394 **Gene Symbol**
395 Name of the column in your Gene to KEGGID Link File that contains gene symbols
396
397 **GeneKEGGID2PathwayID**
398 Contains KEGG links between gene KEGGIDs and KEGG PathwayIDs. Can be generated from the 'Add KEGG Pathway Information' tool
399
400 **Number of Genes to Keep in Model**
401 default: 10. This is the number of genes to keep for each principle component in the sPLS analysis.
402
403 **Threshold**
404 default: 0.8. Correlations less than this value will NOT be included in the output files.
405
406 --------------------------------------------------------------------------------
407
408 **OUTPUT**
409
410 **For metabolite reduction by metabolite class and all genes:**
411 (1) A PDF containing a sPLS figure for each metabolite class.
412 (2) A sPLS Correlation TSV file containing the correlations for each metabolite-gene pair and what subset (metabolite class) the pair locate to.
413
414 **For metabolite reduction by MMC the following files will be output in addition to files (1) and (2) above**
415 (3) A MMC PDF Figure containing unsorted, sorted and sorted-smoothed heatmaps of the variance-covariance matrixes
416 (4) A MMC Output TSV file containing algorithm summaries in the following columns:
417
418 (1) Unique metabolite featureID
419 (2) Module: Contains the module number for each feature calculated by the MMC tool.
420 (3) Entry Index: Contains the original order of the names of the rows of the input Metabolite Wide Dataset.
421 (4) Degree: Average of the absolute values of correlations for the given element in a block to other elements within that block.
422 (5) Average Degree: Average values of the degrees computed above across all elements within the given block.
423
424 **For subsetting genes by generating metagenes using PANA the following files will be output in addition to files (1) and (2) above**
425 (5) A PANA Output TSV table containing associations between gene symbols and KEGG pathays.
426
427 ]]>
428 </help>
429 <citations>
430 <citation type="bibtex">@article{ponzoni2014pathway,
431 title={Pathway network inference from gene expression data},
432 author={Ponzoni, Ignacio and Nueda, Mar{\'\i}a Jos{\'e} and Tarazona, Sonia and G{\"o}tz, Stefan and Montaner, David and Dussaut, Julieta Sol and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
433 journal={BMC systems biology},
434 volume={8},
435 number={2},
436 pages={S7},
437 year={2014},
438 publisher={BioMed Central}
439 }</citation>
440 <citation type="bibtex">@article{dejean2013mixomics,
441 title={mixOmics: Omics data integration project},
442 author={Dejean, Sebastien and Gonzalez, Ignacio and L{\^e} Cao, Kim-Anh and Monget, Pierre and Coquery, J and Yao, F and Liquet, B and Rohart, F},
443 journal={R package},
444 year={2013}
445 }</citation>
446 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
447 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
448 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
449 journal = {BMC Bioinformatics},
450 year = {in press}
451 }</citation>
452 <citation type="bibtex">
453 @article{garcia2010paintomics,
454 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
455 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
456 journal={Bioinformatics},
457 volume={27},
458 number={1},
459 pages={137--139},
460 year={2010},
461 publisher={Oxford University Press}
462 }</citation>
463 </citations>
464 </tool>