1
|
1 <tool id="secimtools_spls" name="Metabolite - Gene Integration" version="@WRAPPER_VERSION@">
|
|
2 <description></description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <stdio>
|
|
8 <exit_code range="2" level="fatal" description="Not enough metabolites for the Analysis."/>
|
|
9 </stdio>
|
|
10 <command detect_errors="exit_code"><![CDATA[
|
|
11 sPLS.py
|
|
12 -g=$metsOption.geneDataset
|
|
13 -gid=$metsOption.geneId
|
|
14 #if $metsOption.useGeneAnnoCond.useGeneAnno == "y":
|
|
15 -ga=$metsOption.useGeneAnnoCond.geneAnno
|
|
16 -gn=$metsOption.useGeneAnnoCond.geneName
|
|
17 #end if
|
|
18 -go=$metsOption.genesOption.allGenes
|
|
19 #if $metsOption.genesOption.allGenes == "geneList":
|
|
20 -gl=$metsOption.genesOption.relatedGeneList
|
|
21 #end if
|
|
22 #if $metsOption.genesOption.allGenes == "path":
|
|
23 -gkp=$metsOption.genesOption.geneKeggPath
|
|
24 -mkp=$metsOption.genesOption.metKeggPath
|
|
25 #end if
|
|
26 #if $metsOption.genesOption.allGenes == "pana":
|
|
27 -gka=$metsOption.genesOption.geneKeggAnno
|
|
28 -gkn=$metsOption.genesOption.geneKeggName
|
|
29 -p2g=$metsOption.genesOption.path2genes
|
|
30 -cu=$metsOption.genesOption.cutoff
|
|
31 -f=$metsOption.genesOption.facSel
|
|
32 #if $metsOption.genesOption.PANAAnno.usePANAAnno == "yes":
|
|
33 -p2n=$metsOption.genesOption.PANAAnno.path2names
|
|
34 #end if
|
|
35 -o3=$panaOut
|
|
36 #end if
|
|
37 -k=$keepX
|
|
38 -t=$threshold
|
|
39 -m=$metDataset
|
|
40 -mid=$metId
|
|
41 #if $useMetAnnoCond.useMetAnno == "y":
|
|
42 -ma=$useMetAnnoCond.metAnno
|
|
43 -mn=$useMetAnnoCond.metName
|
|
44 #end if
|
|
45 -mo=$metsOption.allMets
|
|
46 -mka=$metsOption.metKeggAnno
|
|
47 #if $metsOption.allMets == "mmc":
|
|
48 -d=$metsOption.design
|
|
49 -c=$metsOption.corr
|
|
50 -sl=$metsOption.sigmaLow
|
|
51 -sh=$metsOption.sigmaHigh
|
|
52 -sn=$metsOption.sigmaNum
|
|
53 -f2=$figure2
|
|
54 -o2=$mmcOut
|
|
55 #end if
|
|
56 #if $metsOption.allMets == "both":
|
|
57 -d=$metsOption.design
|
|
58 -c=$metsOption.corr
|
|
59 -sl=$metsOption.sigmaLow
|
|
60 -sh=$metsOption.sigmaHigh
|
|
61 -sn=$metsOption.sigmaNum
|
|
62 -f2=$figure2
|
|
63 -o2=$mmcOut
|
|
64 #end if
|
|
65 -f1=$figure1
|
|
66 -o1=$splsOut
|
|
67 ]]></command>
|
|
68 <inputs>
|
|
69 <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset for Integration" help="Select the Metabolite Wide Dataset from your history"/>
|
|
70 <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/>
|
|
71 <conditional name="useMetAnnoCond">
|
|
72 <param name="useMetAnno" type="select" label="Use Metabolite Annotation File?" help="You can chose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) to use for labeling the output files.">
|
|
73 <option value="n">No</option>
|
|
74 <option value="y">Yes</option>
|
|
75 </param>
|
|
76 <when value="y">
|
|
77 <param name="metAnno" type="data" format="tabular" label="Metabolomic Annotation File" help="Select the Metabolomic Annotation File from your history."/>
|
|
78 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolomic Annotation Dataset that contains metabolite annotation information."/>
|
|
79 </when>
|
|
80 </conditional>
|
|
81 <conditional name="metsOption">
|
|
82 <param name="allMets" type="select" display="radio" label="Select which option to use for subsetting the Metabolite Wide Dataset" help="Select one of the options above.">
|
|
83 <option value="generic">By metabolite class -- Uses the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool to subset.</option>
|
|
84 <option value="mmc">By MMC pattern -- Runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subseting.</option>
|
|
85 <option value="both">By both metabolite class AND by MMC pattern.</option>
|
|
86 </param>
|
|
87 <when value="generic">
|
|
88 <param name="metKeggAnno" type="data" format="tabular" label="'Metabolite to KEGGID Link' File." help="Select the 'Metabolite to KEGGID Link' File from your history. This file can be generated using the 'Link Name to KEGGID' tool."/>
|
|
89 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history"/>
|
|
90 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Wide Dataset that contains unique identifiers."/>
|
|
91 <conditional name="useGeneAnnoCond">
|
|
92 <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling the output files.">
|
|
93 <option value="n">No</option>
|
|
94 <option value="y">Yes</option>
|
|
95 </param>
|
|
96 <when value="y">
|
|
97 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
|
|
98 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotation information."/>
|
|
99 </when>
|
|
100 </conditional>
|
|
101 <conditional name="genesOption">
|
|
102 <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Expression Wide Dataset" help="Select one of the options above.">
|
|
103 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
|
|
104 <option value="geneList">Use a custom tsv file containing specific genes of interest.</option>
|
|
105 <option value="path">Include genes linked to each metabolite class through common KEGG pathways.</option>
|
|
106 <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option>
|
|
107 </param>
|
|
108 <when value="geneList">
|
|
109 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must consist of a single column of Gene Symbols."/>
|
|
110 </when>
|
|
111 <when value="path">
|
|
112 <param name="geneKeggPath" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the Gene Expression KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/>
|
|
113 <param name="metKeggPath" type="data" format="tabular" label="Metabolomic KEGG Pathway File" help="Select the Metabolomic KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/>
|
|
114 </when>
|
|
115 <when value="pana">
|
|
116 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select the Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
|
|
117 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
|
|
118 <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
|
|
119 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
|
|
120 <option value="single">single% -- Percent of variability for a given principle component.</option>
|
|
121 <option value="accum">%accum -- Percent of accumulated variability.</option>
|
|
122 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option>
|
|
123 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
|
|
124 </param>
|
|
125 <param name="cutoff" type="float" value="0.20" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.20"/>
|
|
126 <conditional name="PANAAnno">
|
|
127 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs.">
|
|
128 <option value="no">No</option>
|
|
129 <option value="yes">Yes</option>
|
|
130 </param>
|
|
131 <when value="yes">
|
|
132 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
|
|
133 </when>
|
|
134 </conditional>
|
|
135 </when>
|
|
136 </conditional>
|
|
137 </when>
|
|
138 <when value="mmc">
|
|
139 <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File to Input into MMC" help="Select the Metabolite KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
|
|
140 <param name="design" type="data" format="tabular" label="Design Dataset" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a column called 'sampleID' that contains the names of your samples."/>
|
|
141 <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." />
|
|
142 <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." />
|
|
143 <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." />
|
|
144 <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation before clustering. Default: Pearson." >
|
|
145 <option value="pearson" selected="true">Pearson</option>
|
|
146 <option value="kendall" selected="true">Kendall</option>
|
|
147 <option value="spearman" selected="true">Spearman</option>
|
|
148 </param>
|
|
149 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history."/>
|
|
150 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/>
|
|
151 <conditional name="useGeneAnnoCond">
|
|
152 <param name="useGeneAnno" type="select" label="Use Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) for labeling output files.">
|
|
153 <option value="n">No</option>
|
|
154 <option value="y">Yes</option>
|
|
155 </param>
|
|
156 <when value="y">
|
|
157 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
|
|
158 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/>
|
|
159 </when>
|
|
160 </conditional>
|
|
161 <conditional name="genesOption">
|
|
162 <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Dataset" help="Select one of the options above.">
|
|
163 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
|
|
164 <option value="geneList">Use a custom tsv file containing specific genes of interest.</option>
|
|
165 <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option>
|
|
166 </param>
|
|
167 <when value="geneList">
|
|
168 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must contain Gene Symbol IDs and must be a single column."/>
|
|
169 </when>
|
|
170 <when value="pana">
|
|
171 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
|
|
172 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
|
|
173 <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
|
|
174 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
|
|
175 <option value="single">single% -- Percent of variability for a given principle component.</option>
|
|
176 <option value="accum">%accum -- Percent of accumulated variability.</option>
|
|
177 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option>
|
|
178 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
|
|
179 </param>
|
|
180 <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/>
|
|
181 <conditional name="PANAAnno">
|
|
182 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathway IDs.">
|
|
183 <option value="no">No</option>
|
|
184 <option value="yes">Yes</option>
|
|
185 </param>
|
|
186 <when value="yes">
|
|
187 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
|
|
188 </when>
|
|
189 </conditional>
|
|
190 </when>
|
|
191 </conditional>
|
|
192 </when>
|
|
193 <when value="both">
|
|
194 <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File" help="Select the Metabolite KEGGID Link File from your history.his file can be generated from the 'Link Name to KEGGID' tool."/>
|
|
195 <param name="design" type="data" format="tabular" label="Design File" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a 'sampleID' column."/>
|
|
196 <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." />
|
|
197 <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." />
|
|
198 <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." />
|
|
199 <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation prior to clustering. Default: Pearson." >
|
|
200 <option value="pearson" selected="true">Pearson</option>
|
|
201 <option value="kendall" selected="true">Kendall</option>
|
|
202 <option value="spearman" selected="true">Spearman</option>
|
|
203 </param>
|
|
204 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select Gene Expression Wide Dataset from your history"/>
|
|
205 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/>
|
|
206 <conditional name="useGeneAnnoCond">
|
|
207 <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) to use for labeling output files.">
|
|
208 <option value="n">No</option>
|
|
209 <option value="y">Yes</option>
|
|
210 </param>
|
|
211 <when value="y">
|
|
212 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/>
|
|
213 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/>
|
|
214 </when>
|
|
215 </conditional>
|
|
216 <conditional name="genesOption">
|
|
217 <param name="allGenes" type="select" display="radio" label="Gene Dataset Subsetting Option" help="Select one of the following.">
|
|
218 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option>
|
|
219 <option value="geneList">Upload a custion list containing specific genes of interest.</option>
|
|
220 <option value="pana">Use Metagenes (PANA Approach).</option>
|
|
221 </param>
|
|
222 <when value="geneList">
|
|
223 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="The list must consist of a single column of Gene Symbols."/>
|
|
224 </when>
|
|
225 <when value="pana">
|
|
226 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/>
|
|
227 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/>
|
|
228 <param name="path2genes" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the File from your history that contains the list of ALL Gene KEGGIDs to PathwayIDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
|
|
229 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%.">
|
|
230 <option value="single">single% -- Percent of variability for a given principle component.</option>
|
|
231 <option value="accum">%accum -- Percent of accumulated variability.</option>
|
|
232 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle componenet.</option>
|
|
233 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option>
|
|
234 </param>
|
|
235 <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/>
|
|
236 <conditional name="PANAAnno">
|
|
237 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs.">
|
|
238 <option value="no">No</option>
|
|
239 <option value="yes">Yes</option>
|
|
240 </param>
|
|
241 <when value="yes">
|
|
242 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/>
|
|
243 </when>
|
|
244 </conditional>
|
|
245 </when>
|
|
246 </conditional>
|
|
247 </when>
|
|
248 </conditional>
|
|
249 <param name="keepX" type="integer" size="30" value="10" label="Number of Genes to Keep in the Model" help="Enter the number of genes to keep for each component in the sPLS analysis."/>
|
|
250 <param name="threshold" type="float" size="30" value="0.8" label="Threshold" help="Correlations under this threshold will NOT be included in the ouput file."/>
|
|
251 </inputs>
|
|
252 <outputs>
|
|
253 <data format="pdf" name="figure1" label="${tool.name} on ${on_string}: sPLS Figure"/>
|
|
254 <data format="tabular" name="splsOut" label="${tool.name} on ${on_string}: sPLS Correlation Table"/>
|
|
255 <data format="pdf" name="figure2" label="${tool.name} on ${on_string}: MMC Figure">
|
|
256 <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter>
|
|
257 </data>
|
|
258 <data format="tabular" name="mmcOut" label="${tool.name} on ${on_string}: MMC Output Table">
|
|
259 <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter>
|
|
260 </data>
|
|
261 <data format="tabular" name="panaOut" label="${tool.name} on ${on_string}: PANA Output Table">
|
|
262 <filter>(metsOption['genesOption']['allGenes'] == 'pana')</filter>
|
|
263 </data>
|
|
264 </outputs>
|
|
265 <tests>
|
|
266 <test>
|
|
267 <param name="metDataset" value="metabolite_wide_dataset_01fhl.tsv"/>
|
|
268 <param name="metId" value="UniqueID"/>
|
|
269 <param name="allMets" value="generic"/>
|
|
270 <param name="metKeggAnno" value="metabolite_to_keggId_link_01fhl.tsv"/>
|
|
271 <param name="metName" value="MetName"/>
|
|
272 <param name="geneDataset" value="gene_wide_dataset_01fhl.tsv"/>
|
|
273 <param name="geneId" value="UniqueID"/>
|
|
274 <param name="allGenes" value="all"/>
|
|
275 <param name="keepX" value="10"/>
|
|
276 <param name="threshold" value="0.8"/>
|
|
277 <output name="splsOut" file="spls_correlation_file_01fhl.tsv"/>
|
|
278 <output name="figure1" file="spls_figure_01fhl.pdf"/>
|
|
279 </test>
|
|
280 </tests>
|
|
281 <help><![CDATA[
|
|
282
|
|
283 **Tool Description**
|
|
284
|
|
285 NOTE: The parameters you select are data dependent.
|
|
286
|
|
287 This tool carries out the integrated analysis of metabolite and gene expression data. Here, metabolite data are considered the dependent (Y) variable
|
|
288 and genes the explanatory variable. The tool allows for several combinations of metabolite and gene models. A note of caution: a complete metabolite
|
|
289 and gene expression dataset with no filtering will be challenging to interpret using this tool.
|
|
290
|
|
291 We recommend that both gene expression and metabolite datasets be reduced to reflect a common biological hypothesis before running this tool. For example,
|
|
292 metabolite data can be subset by class (i.e. using the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool). Users who want to include
|
|
293 similarly behaving compounds without regard to identification or type of compound can estimate modules with the Modulated Modularity Clustering (MMC) tool
|
|
294 (Stone and Ayroles 2009). Each module can be examined separately. Finally, metabolite data can be reduced by using both metabolite class and the MMC tool.
|
|
295 Similarly, gene expression data can be reduced in scope by uploading and using a custom list of genes of interest or by using metagenes as implemented in PANA
|
|
296 (Ponzoni et al. 2014).
|
|
297
|
|
298 1) Classes of metabolites can be modeled as a function of metagenes.
|
|
299 2) Classes of metabolites can be modeled as a function of a set of individual genes.
|
|
300 3) Unbiased clusters of metabolites can be modeled as a function of metagenes
|
|
301 4) Unbiased clusters of metabolites can be modeled as a function of a set of individual genes.
|
|
302
|
|
303 The tool executes a partial least squares regression with variable selection (sparse PLS, sPLS) as implemented in the 'mixOmics' package (Rohart F., Gautier, B, Singh,
|
|
304 A and Lê Cao, K. A. mixOmics: an R package for ‘omics feature selection and multiple data integration. On bioRxiv). The mixomics sPLS function is run in ‘classic mode’
|
|
305 (http://mixomics.org/methods/spls/) with the number of components included in the model set to 2. In addition, the user selects the number of variables (genes) for
|
|
306 each component to use in model construction.
|
|
307
|
|
308 This tool needs at least 1 subset with a minimum number of 3 metabolites to run properly. If the user selects subset metabolites by class and no metabolite groups are
|
|
309 identified or small metabolite groups with less than 3 members are found, the tool will stop and a warning message will be generated to try the MMC option instead.
|
|
310 Similarly, if the user selects subset metabolites using MMC clusters and there are no clusters with at least 3 metabolites, the tool will stop and a warning message
|
|
311 will be generated to try the 'by class’ option instead.
|
|
312
|
|
313 --------------------------------------------------------------------------------
|
|
314
|
|
315 **INPUT**
|
|
316
|
|
317 **Please see the UserGuide for more details regarding tool inputs and options.**
|
|
318
|
|
319 **Metabolite Wide Dataset**
|
|
320 A wide formatted dataset that contains measurements for each sample (samples are in columns):
|
|
321
|
|
322 +-----------+---------+---------+---------+-----+
|
|
323 | FeatureID | sample1 | sample2 | sample3 | ... |
|
|
324 +===========+=========+=========+=========+=====+
|
|
325 | met_one | 10 | 20 | 10 | ... |
|
|
326 +-----------+---------+---------+---------+-----+
|
|
327 | met_two | 5 | 22 | 30 | ... |
|
|
328 +-----------+---------+---------+---------+-----+
|
|
329 | met_three | 30 | 27 | 2 | ... |
|
|
330 +-----------+---------+---------+---------+-----+
|
|
331 | met_four | 32 | 17 | 8 | ... |
|
|
332 +-----------+---------+---------+---------+-----+
|
|
333 | ... | ... | ... | ... | ... |
|
|
334 +-----------+---------+---------+---------+-----+
|
|
335
|
|
336 **Unique Metabolite FeatureID**
|
|
337 Name of the column in your Metabolite Wide Dataset that contains unique identifiers.
|
|
338
|
|
339 **Optional - Metabolite Annotation File**
|
|
340 A wide format dataset containing metabolite descriptor information (e.g. metabolite names, m/z ratios). The user can chose a column in the Annotation File for labeling output files.
|
|
341
|
|
342 **Optional - Metabolite Names**
|
|
343 Column name in the Metabolite Annotation File to use for labeling output files.
|
|
344
|
|
345 **Data reduction (subsetting) of Metabolite Data**
|
|
346 1) By metablite class - uses a predefined grouping of metabolites based on the 'Name_in_KEGG' column in the Metabolite to KEGGID Link File.
|
|
347 2) By MMC pattern - runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subsetting. Please see Stone and Ayroles (2009) for MMC options.
|
|
348 3) By both metabolite class AND MMC pattern
|
|
349
|
|
350 **Metabolite to KEGGID Link File**
|
|
351 This file MUST contain a column called 'Name_in_KEGG' and can be generated using the 'Link Name to KEGGID' tool.
|
|
352
|
|
353 **Gene Expression Wide Dataset**
|
|
354 A wide formatted gene expression dataset that contains measurements for each sample:
|
|
355
|
|
356 +------------+---------+---------+---------+-----+
|
|
357 | FeatureID | sample1 | sample2 | sample3 | ... |
|
|
358 +============+=========+=========+=========+=====+
|
|
359 | one | 10 | 20 | 10 | ... |
|
|
360 +------------+---------+---------+---------+-----+
|
|
361 | two | 5 | 22 | 30 | ... |
|
|
362 +------------+---------+---------+---------+-----+
|
|
363 | three | 30 | 27 | 2 | ... |
|
|
364 +------------+---------+---------+---------+-----+
|
|
365 | four | 32 | 17 | 8 | ... |
|
|
366 +------------+---------+---------+---------+-----+
|
|
367 | ... | ... | ... | ... | ... |
|
|
368 +------------+---------+---------+---------+-----+
|
|
369
|
|
370 **Unique Gene FeatureID**
|
|
371 Name of the column in your Gene Expression Wide Dataset that contains unique gene identifiers.
|
|
372
|
|
373 **Optional - Gene Annotation File**
|
|
374 A wide format dataset containing gene annotation information (e.g. gene names). The user can chose a column in the Annotation File for labeling output files.
|
|
375
|
|
376 **Optional - Gene Names**
|
|
377 Column name in the Gene Annotation File to use for labeling output files.
|
|
378
|
|
379 **Data reduction (subsetting) of Gene Expression Data**
|
|
380 1) No subsetting - include all genes in the Gene Expression Wide Dataset
|
|
381 2) Use a custom tsv file containing specific genes of interest - select a custom gene list from your history
|
|
382 3) Include genes linked to each metabolite class through common KEGG pathways
|
|
383 4) Use Metagenes from PANA (PAthway Network Analysis from gene expression data)
|
|
384
|
|
385 **Gene Expression KEGG Pathway File**
|
|
386 Contains links between gene symbols and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool
|
|
387
|
|
388 **Metabolomic KEGG Pathway File**
|
|
389 Contains links between metabolites and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool
|
|
390
|
|
391 **Gene to KEGGID Link File**
|
|
392 Contains links between gene symbols and KEGGIDs.
|
|
393
|
|
394 **Gene Symbol**
|
|
395 Name of the column in your Gene to KEGGID Link File that contains gene symbols
|
|
396
|
|
397 **GeneKEGGID2PathwayID**
|
|
398 Contains KEGG links between gene KEGGIDs and KEGG PathwayIDs. Can be generated from the 'Add KEGG Pathway Information' tool
|
|
399
|
|
400 **Number of Genes to Keep in Model**
|
|
401 default: 10. This is the number of genes to keep for each principle component in the sPLS analysis.
|
|
402
|
|
403 **Threshold**
|
|
404 default: 0.8. Correlations less than this value will NOT be included in the output files.
|
|
405
|
|
406 --------------------------------------------------------------------------------
|
|
407
|
|
408 **OUTPUT**
|
|
409
|
|
410 **For metabolite reduction by metabolite class and all genes:**
|
|
411 (1) A PDF containing a sPLS figure for each metabolite class.
|
|
412 (2) A sPLS Correlation TSV file containing the correlations for each metabolite-gene pair and what subset (metabolite class) the pair locate to.
|
|
413
|
|
414 **For metabolite reduction by MMC the following files will be output in addition to files (1) and (2) above**
|
|
415 (3) A MMC PDF Figure containing unsorted, sorted and sorted-smoothed heatmaps of the variance-covariance matrixes
|
|
416 (4) A MMC Output TSV file containing algorithm summaries in the following columns:
|
|
417
|
|
418 (1) Unique metabolite featureID
|
|
419 (2) Module: Contains the module number for each feature calculated by the MMC tool.
|
|
420 (3) Entry Index: Contains the original order of the names of the rows of the input Metabolite Wide Dataset.
|
|
421 (4) Degree: Average of the absolute values of correlations for the given element in a block to other elements within that block.
|
|
422 (5) Average Degree: Average values of the degrees computed above across all elements within the given block.
|
|
423
|
|
424 **For subsetting genes by generating metagenes using PANA the following files will be output in addition to files (1) and (2) above**
|
|
425 (5) A PANA Output TSV table containing associations between gene symbols and KEGG pathays.
|
|
426
|
|
427 ]]>
|
|
428 </help>
|
|
429 <citations>
|
|
430 <citation type="bibtex">@article{ponzoni2014pathway,
|
|
431 title={Pathway network inference from gene expression data},
|
|
432 author={Ponzoni, Ignacio and Nueda, Mar{\'\i}a Jos{\'e} and Tarazona, Sonia and G{\"o}tz, Stefan and Montaner, David and Dussaut, Julieta Sol and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
|
|
433 journal={BMC systems biology},
|
|
434 volume={8},
|
|
435 number={2},
|
|
436 pages={S7},
|
|
437 year={2014},
|
|
438 publisher={BioMed Central}
|
|
439 }</citation>
|
|
440 <citation type="bibtex">@article{dejean2013mixomics,
|
|
441 title={mixOmics: Omics data integration project},
|
|
442 author={Dejean, Sebastien and Gonzalez, Ignacio and L{\^e} Cao, Kim-Anh and Monget, Pierre and Coquery, J and Yao, F and Liquet, B and Rohart, F},
|
|
443 journal={R package},
|
|
444 year={2013}
|
|
445 }</citation>
|
|
446 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
|
|
447 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
|
|
448 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
|
|
449 journal = {BMC Bioinformatics},
|
|
450 year = {in press}
|
|
451 }</citation>
|
|
452 <citation type="bibtex">
|
|
453 @article{garcia2010paintomics,
|
|
454 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
|
|
455 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
|
|
456 journal={Bioinformatics},
|
|
457 volume={27},
|
|
458 number={1},
|
|
459 pages={137--139},
|
|
460 year={2010},
|
|
461 publisher={Oxford University Press}
|
|
462 }</citation>
|
|
463 </citations>
|
|
464 </tool>
|