Mercurial > repos > malex > gait_gm
comparison sPLS.xml @ 1:ec9ee8edb84d draft
Initial upload of 21.6.10 release.
author | malex |
---|---|
date | Fri, 18 Jun 2021 20:23:19 +0000 |
parents | |
children | 2c218a253d56 |
comparison
equal
deleted
inserted
replaced
0:864fc6430432 | 1:ec9ee8edb84d |
---|---|
1 <tool id="secimtools_spls" name="Metabolite - Gene Integration" version="@WRAPPER_VERSION@"> | |
2 <description></description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <stdio> | |
8 <exit_code range="2" level="fatal" description="Not enough metabolites for the Analysis."/> | |
9 </stdio> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 sPLS.py | |
12 -g=$metsOption.geneDataset | |
13 -gid=$metsOption.geneId | |
14 #if $metsOption.useGeneAnnoCond.useGeneAnno == "y": | |
15 -ga=$metsOption.useGeneAnnoCond.geneAnno | |
16 -gn=$metsOption.useGeneAnnoCond.geneName | |
17 #end if | |
18 -go=$metsOption.genesOption.allGenes | |
19 #if $metsOption.genesOption.allGenes == "geneList": | |
20 -gl=$metsOption.genesOption.relatedGeneList | |
21 #end if | |
22 #if $metsOption.genesOption.allGenes == "path": | |
23 -gkp=$metsOption.genesOption.geneKeggPath | |
24 -mkp=$metsOption.genesOption.metKeggPath | |
25 #end if | |
26 #if $metsOption.genesOption.allGenes == "pana": | |
27 -gka=$metsOption.genesOption.geneKeggAnno | |
28 -gkn=$metsOption.genesOption.geneKeggName | |
29 -p2g=$metsOption.genesOption.path2genes | |
30 -cu=$metsOption.genesOption.cutoff | |
31 -f=$metsOption.genesOption.facSel | |
32 #if $metsOption.genesOption.PANAAnno.usePANAAnno == "yes": | |
33 -p2n=$metsOption.genesOption.PANAAnno.path2names | |
34 #end if | |
35 -o3=$panaOut | |
36 #end if | |
37 -k=$keepX | |
38 -t=$threshold | |
39 -m=$metDataset | |
40 -mid=$metId | |
41 #if $useMetAnnoCond.useMetAnno == "y": | |
42 -ma=$useMetAnnoCond.metAnno | |
43 -mn=$useMetAnnoCond.metName | |
44 #end if | |
45 -mo=$metsOption.allMets | |
46 -mka=$metsOption.metKeggAnno | |
47 #if $metsOption.allMets == "mmc": | |
48 -d=$metsOption.design | |
49 -c=$metsOption.corr | |
50 -sl=$metsOption.sigmaLow | |
51 -sh=$metsOption.sigmaHigh | |
52 -sn=$metsOption.sigmaNum | |
53 -f2=$figure2 | |
54 -o2=$mmcOut | |
55 #end if | |
56 #if $metsOption.allMets == "both": | |
57 -d=$metsOption.design | |
58 -c=$metsOption.corr | |
59 -sl=$metsOption.sigmaLow | |
60 -sh=$metsOption.sigmaHigh | |
61 -sn=$metsOption.sigmaNum | |
62 -f2=$figure2 | |
63 -o2=$mmcOut | |
64 #end if | |
65 -f1=$figure1 | |
66 -o1=$splsOut | |
67 ]]></command> | |
68 <inputs> | |
69 <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset for Integration" help="Select the Metabolite Wide Dataset from your history"/> | |
70 <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/> | |
71 <conditional name="useMetAnnoCond"> | |
72 <param name="useMetAnno" type="select" label="Use Metabolite Annotation File?" help="You can chose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) to use for labeling the output files."> | |
73 <option value="n">No</option> | |
74 <option value="y">Yes</option> | |
75 </param> | |
76 <when value="y"> | |
77 <param name="metAnno" type="data" format="tabular" label="Metabolomic Annotation File" help="Select the Metabolomic Annotation File from your history."/> | |
78 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolomic Annotation Dataset that contains metabolite annotation information."/> | |
79 </when> | |
80 </conditional> | |
81 <conditional name="metsOption"> | |
82 <param name="allMets" type="select" display="radio" label="Select which option to use for subsetting the Metabolite Wide Dataset" help="Select one of the options above."> | |
83 <option value="generic">By metabolite class -- Uses the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool to subset.</option> | |
84 <option value="mmc">By MMC pattern -- Runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subseting.</option> | |
85 <option value="both">By both metabolite class AND by MMC pattern.</option> | |
86 </param> | |
87 <when value="generic"> | |
88 <param name="metKeggAnno" type="data" format="tabular" label="'Metabolite to KEGGID Link' File." help="Select the 'Metabolite to KEGGID Link' File from your history. This file can be generated using the 'Link Name to KEGGID' tool."/> | |
89 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history"/> | |
90 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Wide Dataset that contains unique identifiers."/> | |
91 <conditional name="useGeneAnnoCond"> | |
92 <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling the output files."> | |
93 <option value="n">No</option> | |
94 <option value="y">Yes</option> | |
95 </param> | |
96 <when value="y"> | |
97 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/> | |
98 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotation information."/> | |
99 </when> | |
100 </conditional> | |
101 <conditional name="genesOption"> | |
102 <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Expression Wide Dataset" help="Select one of the options above."> | |
103 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option> | |
104 <option value="geneList">Use a custom tsv file containing specific genes of interest.</option> | |
105 <option value="path">Include genes linked to each metabolite class through common KEGG pathways.</option> | |
106 <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option> | |
107 </param> | |
108 <when value="geneList"> | |
109 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must consist of a single column of Gene Symbols."/> | |
110 </when> | |
111 <when value="path"> | |
112 <param name="geneKeggPath" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the Gene Expression KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/> | |
113 <param name="metKeggPath" type="data" format="tabular" label="Metabolomic KEGG Pathway File" help="Select the Metabolomic KEGG Pathway File from your history. This file can be generated using the 'Add KEGG Pathway Information' tool."/> | |
114 </when> | |
115 <when value="pana"> | |
116 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select the Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> | |
117 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/> | |
118 <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/> | |
119 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%."> | |
120 <option value="single">single% -- Percent of variability for a given principle component.</option> | |
121 <option value="accum">%accum -- Percent of accumulated variability.</option> | |
122 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option> | |
123 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option> | |
124 </param> | |
125 <param name="cutoff" type="float" value="0.20" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.20"/> | |
126 <conditional name="PANAAnno"> | |
127 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs."> | |
128 <option value="no">No</option> | |
129 <option value="yes">Yes</option> | |
130 </param> | |
131 <when value="yes"> | |
132 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/> | |
133 </when> | |
134 </conditional> | |
135 </when> | |
136 </conditional> | |
137 </when> | |
138 <when value="mmc"> | |
139 <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File to Input into MMC" help="Select the Metabolite KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> | |
140 <param name="design" type="data" format="tabular" label="Design Dataset" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a column called 'sampleID' that contains the names of your samples."/> | |
141 <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." /> | |
142 <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." /> | |
143 <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." /> | |
144 <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation before clustering. Default: Pearson." > | |
145 <option value="pearson" selected="true">Pearson</option> | |
146 <option value="kendall" selected="true">Kendall</option> | |
147 <option value="spearman" selected="true">Spearman</option> | |
148 </param> | |
149 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select the Gene Expression Wide Dataset from your history."/> | |
150 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/> | |
151 <conditional name="useGeneAnnoCond"> | |
152 <param name="useGeneAnno" type="select" label="Use Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) for labeling output files."> | |
153 <option value="n">No</option> | |
154 <option value="y">Yes</option> | |
155 </param> | |
156 <when value="y"> | |
157 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/> | |
158 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/> | |
159 </when> | |
160 </conditional> | |
161 <conditional name="genesOption"> | |
162 <param name="allGenes" type="select" display="radio" label="Select which option to use for subsetting the Gene Dataset" help="Select one of the options above."> | |
163 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option> | |
164 <option value="geneList">Use a custom tsv file containing specific genes of interest.</option> | |
165 <option value="pana">Use Metagenes from PANA (PAthway Network Analysis from gene expression data).</option> | |
166 </param> | |
167 <when value="geneList"> | |
168 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="This custom list must contain Gene Symbol IDs and must be a single column."/> | |
169 </when> | |
170 <when value="pana"> | |
171 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> | |
172 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/> | |
173 <param name="path2genes" type="data" format="tabular" label="GeneKEGGID2PathwayID" help="Select the File from your history containing the list of ALL Gene KEGGIDs to Pathway IDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/> | |
174 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%."> | |
175 <option value="single">single% -- Percent of variability for a given principle component.</option> | |
176 <option value="accum">%accum -- Percent of accumulated variability.</option> | |
177 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle component.</option> | |
178 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option> | |
179 </param> | |
180 <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/> | |
181 <conditional name="PANAAnno"> | |
182 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathway IDs."> | |
183 <option value="no">No</option> | |
184 <option value="yes">Yes</option> | |
185 </param> | |
186 <when value="yes"> | |
187 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/> | |
188 </when> | |
189 </conditional> | |
190 </when> | |
191 </conditional> | |
192 </when> | |
193 <when value="both"> | |
194 <param name="metKeggAnno" type="data" format="tabular" label="Metabolite to KEGGID Link File" help="Select the Metabolite KEGGID Link File from your history.his file can be generated from the 'Link Name to KEGGID' tool."/> | |
195 <param name="design" type="data" format="tabular" label="Design File" help="Select the Design file to use with your Metabolite KEGGID Link File. This file can be generated using the 'Create: Design, Wide, and Annotation datasets' tool. Note that you need a 'sampleID' column."/> | |
196 <param name="sigmaLow" type="float" size="6" value="0.05" label="Lower sigma bound" help="Default: 0.05." /> | |
197 <param name="sigmaHigh" type="float" size="6" value="0.50" label="Upper sigma bound" help="Default: 0.50." /> | |
198 <param name="sigmaNum" type="integer" size="6" value="451" label="Number of Sigma values" help="Number of values of sigma to search. Default: 451." /> | |
199 <param name="corr" type="select" value="pearson" label="Correlation method" help="Select correlation method for preliminary correlation prior to clustering. Default: Pearson." > | |
200 <option value="pearson" selected="true">Pearson</option> | |
201 <option value="kendall" selected="true">Kendall</option> | |
202 <option value="spearman" selected="true">Spearman</option> | |
203 </param> | |
204 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset for Integration" help="Select Gene Expression Wide Dataset from your history"/> | |
205 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your Gene Expression Annotation File that contains unique identifiers."/> | |
206 <conditional name="useGeneAnnoCond"> | |
207 <param name="useGeneAnno" type="select" label="Use a Gene Annotation File?" help="You can chose to input a file containing gene annotations (e.g. gene names, identifiers, etc.) to use for labeling output files."> | |
208 <option value="n">No</option> | |
209 <option value="y">Yes</option> | |
210 </param> | |
211 <when value="y"> | |
212 <param name="geneAnno" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history."/> | |
213 <param name="geneName" type="text" size="30" value="" label="Gene Names" help="Name of the column in your Gene Expression Annotation Dataset that contains gene annotations."/> | |
214 </when> | |
215 </conditional> | |
216 <conditional name="genesOption"> | |
217 <param name="allGenes" type="select" display="radio" label="Gene Dataset Subsetting Option" help="Select one of the following."> | |
218 <option value="all">Include all genes in the Gene Expression Wide Dataset -- no subsetting.</option> | |
219 <option value="geneList">Upload a custion list containing specific genes of interest.</option> | |
220 <option value="pana">Use Metagenes (PANA Approach).</option> | |
221 </param> | |
222 <when value="geneList"> | |
223 <param name="relatedGeneList" type="data" format="tabular" label="Select a Custom Gene List from your history" help="The list must consist of a single column of Gene Symbols."/> | |
224 </when> | |
225 <when value="pana"> | |
226 <param name="geneKeggAnno" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select Gene to KEGGID Link File from your history. This file can be generated from the 'Link Name to KEGGID' tool."/> | |
227 <param name="geneKeggName" type="text" label="Gene Symbol" help="Name of the column in your 'Gene to KEGGID Link File' that contains Gene Symbols."/> | |
228 <param name="path2genes" type="data" format="tabular" label="Gene Expression KEGG Pathway File" help="Select the File from your history that contains the list of ALL Gene KEGGIDs to PathwayIDs. This file can be generated from the 'Add KEGG Pathway Information' tool."/> | |
229 <param name="facSel" type="select" display="radio" label="Criterion to select components" help="Choose the criterion to select components. Default: single%."> | |
230 <option value="single">single% -- Percent of variability for a given principle component.</option> | |
231 <option value="accum">%accum -- Percent of accumulated variability.</option> | |
232 <option value="abs.val">abs.val -- Absolute value of the variabily for a given principle componenet.</option> | |
233 <option value="rel.abs">rel.abs -- Fold variability of tot.var/rank(X).</option> | |
234 </param> | |
235 <param name="cutoff" type="float" value="0.23" label="Variability cut-off value" help="Select the variability cut-off value. Default: 0.23"/> | |
236 <conditional name="PANAAnno"> | |
237 <param name="usePANAAnno" type="select" label="Include Pathway Names in results files and figures?" help="You can chose to input a file containing annotations for the KEGG pathwayIDs."> | |
238 <option value="no">No</option> | |
239 <option value="yes">Yes</option> | |
240 </param> | |
241 <when value="yes"> | |
242 <param name="path2names" type="data" format="tabular" label="Pathway Names File" help="Select the Pathway Names File from your history. This file can be generated from the 'Add KEGG Pathway Information' tool."/> | |
243 </when> | |
244 </conditional> | |
245 </when> | |
246 </conditional> | |
247 </when> | |
248 </conditional> | |
249 <param name="keepX" type="integer" size="30" value="10" label="Number of Genes to Keep in the Model" help="Enter the number of genes to keep for each component in the sPLS analysis."/> | |
250 <param name="threshold" type="float" size="30" value="0.8" label="Threshold" help="Correlations under this threshold will NOT be included in the ouput file."/> | |
251 </inputs> | |
252 <outputs> | |
253 <data format="pdf" name="figure1" label="${tool.name} on ${on_string}: sPLS Figure"/> | |
254 <data format="tabular" name="splsOut" label="${tool.name} on ${on_string}: sPLS Correlation Table"/> | |
255 <data format="pdf" name="figure2" label="${tool.name} on ${on_string}: MMC Figure"> | |
256 <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter> | |
257 </data> | |
258 <data format="tabular" name="mmcOut" label="${tool.name} on ${on_string}: MMC Output Table"> | |
259 <filter>(metsOption['allMets'] == 'mmc') or (metsOption['allMets'] == 'both')</filter> | |
260 </data> | |
261 <data format="tabular" name="panaOut" label="${tool.name} on ${on_string}: PANA Output Table"> | |
262 <filter>(metsOption['genesOption']['allGenes'] == 'pana')</filter> | |
263 </data> | |
264 </outputs> | |
265 <tests> | |
266 <test> | |
267 <param name="metDataset" value="metabolite_wide_dataset_01fhl.tsv"/> | |
268 <param name="metId" value="UniqueID"/> | |
269 <param name="allMets" value="generic"/> | |
270 <param name="metKeggAnno" value="metabolite_to_keggId_link_01fhl.tsv"/> | |
271 <param name="metName" value="MetName"/> | |
272 <param name="geneDataset" value="gene_wide_dataset_01fhl.tsv"/> | |
273 <param name="geneId" value="UniqueID"/> | |
274 <param name="allGenes" value="all"/> | |
275 <param name="keepX" value="10"/> | |
276 <param name="threshold" value="0.8"/> | |
277 <output name="splsOut" file="spls_correlation_file_01fhl.tsv"/> | |
278 <output name="figure1" file="spls_figure_01fhl.pdf"/> | |
279 </test> | |
280 </tests> | |
281 <help><![CDATA[ | |
282 | |
283 **Tool Description** | |
284 | |
285 NOTE: The parameters you select are data dependent. | |
286 | |
287 This tool carries out the integrated analysis of metabolite and gene expression data. Here, metabolite data are considered the dependent (Y) variable | |
288 and genes the explanatory variable. The tool allows for several combinations of metabolite and gene models. A note of caution: a complete metabolite | |
289 and gene expression dataset with no filtering will be challenging to interpret using this tool. | |
290 | |
291 We recommend that both gene expression and metabolite datasets be reduced to reflect a common biological hypothesis before running this tool. For example, | |
292 metabolite data can be subset by class (i.e. using the 'Name_in_KEGG' column generated from the 'Link Name to KEGGID' tool). Users who want to include | |
293 similarly behaving compounds without regard to identification or type of compound can estimate modules with the Modulated Modularity Clustering (MMC) tool | |
294 (Stone and Ayroles 2009). Each module can be examined separately. Finally, metabolite data can be reduced by using both metabolite class and the MMC tool. | |
295 Similarly, gene expression data can be reduced in scope by uploading and using a custom list of genes of interest or by using metagenes as implemented in PANA | |
296 (Ponzoni et al. 2014). | |
297 | |
298 1) Classes of metabolites can be modeled as a function of metagenes. | |
299 2) Classes of metabolites can be modeled as a function of a set of individual genes. | |
300 3) Unbiased clusters of metabolites can be modeled as a function of metagenes | |
301 4) Unbiased clusters of metabolites can be modeled as a function of a set of individual genes. | |
302 | |
303 The tool executes a partial least squares regression with variable selection (sparse PLS, sPLS) as implemented in the 'mixOmics' package (Rohart F., Gautier, B, Singh, | |
304 A and Lê Cao, K. A. mixOmics: an R package for ‘omics feature selection and multiple data integration. On bioRxiv). The mixomics sPLS function is run in ‘classic mode’ | |
305 (http://mixomics.org/methods/spls/) with the number of components included in the model set to 2. In addition, the user selects the number of variables (genes) for | |
306 each component to use in model construction. | |
307 | |
308 This tool needs at least 1 subset with a minimum number of 3 metabolites to run properly. If the user selects subset metabolites by class and no metabolite groups are | |
309 identified or small metabolite groups with less than 3 members are found, the tool will stop and a warning message will be generated to try the MMC option instead. | |
310 Similarly, if the user selects subset metabolites using MMC clusters and there are no clusters with at least 3 metabolites, the tool will stop and a warning message | |
311 will be generated to try the 'by class’ option instead. | |
312 | |
313 -------------------------------------------------------------------------------- | |
314 | |
315 **INPUT** | |
316 | |
317 **Please see the UserGuide for more details regarding tool inputs and options.** | |
318 | |
319 **Metabolite Wide Dataset** | |
320 A wide formatted dataset that contains measurements for each sample (samples are in columns): | |
321 | |
322 +-----------+---------+---------+---------+-----+ | |
323 | FeatureID | sample1 | sample2 | sample3 | ... | | |
324 +===========+=========+=========+=========+=====+ | |
325 | met_one | 10 | 20 | 10 | ... | | |
326 +-----------+---------+---------+---------+-----+ | |
327 | met_two | 5 | 22 | 30 | ... | | |
328 +-----------+---------+---------+---------+-----+ | |
329 | met_three | 30 | 27 | 2 | ... | | |
330 +-----------+---------+---------+---------+-----+ | |
331 | met_four | 32 | 17 | 8 | ... | | |
332 +-----------+---------+---------+---------+-----+ | |
333 | ... | ... | ... | ... | ... | | |
334 +-----------+---------+---------+---------+-----+ | |
335 | |
336 **Unique Metabolite FeatureID** | |
337 Name of the column in your Metabolite Wide Dataset that contains unique identifiers. | |
338 | |
339 **Optional - Metabolite Annotation File** | |
340 A wide format dataset containing metabolite descriptor information (e.g. metabolite names, m/z ratios). The user can chose a column in the Annotation File for labeling output files. | |
341 | |
342 **Optional - Metabolite Names** | |
343 Column name in the Metabolite Annotation File to use for labeling output files. | |
344 | |
345 **Data reduction (subsetting) of Metabolite Data** | |
346 1) By metablite class - uses a predefined grouping of metabolites based on the 'Name_in_KEGG' column in the Metabolite to KEGGID Link File. | |
347 2) By MMC pattern - runs the SECIMTools MMC tool and uses the tool-generated pattern blocks for subsetting. Please see Stone and Ayroles (2009) for MMC options. | |
348 3) By both metabolite class AND MMC pattern | |
349 | |
350 **Metabolite to KEGGID Link File** | |
351 This file MUST contain a column called 'Name_in_KEGG' and can be generated using the 'Link Name to KEGGID' tool. | |
352 | |
353 **Gene Expression Wide Dataset** | |
354 A wide formatted gene expression dataset that contains measurements for each sample: | |
355 | |
356 +------------+---------+---------+---------+-----+ | |
357 | FeatureID | sample1 | sample2 | sample3 | ... | | |
358 +============+=========+=========+=========+=====+ | |
359 | one | 10 | 20 | 10 | ... | | |
360 +------------+---------+---------+---------+-----+ | |
361 | two | 5 | 22 | 30 | ... | | |
362 +------------+---------+---------+---------+-----+ | |
363 | three | 30 | 27 | 2 | ... | | |
364 +------------+---------+---------+---------+-----+ | |
365 | four | 32 | 17 | 8 | ... | | |
366 +------------+---------+---------+---------+-----+ | |
367 | ... | ... | ... | ... | ... | | |
368 +------------+---------+---------+---------+-----+ | |
369 | |
370 **Unique Gene FeatureID** | |
371 Name of the column in your Gene Expression Wide Dataset that contains unique gene identifiers. | |
372 | |
373 **Optional - Gene Annotation File** | |
374 A wide format dataset containing gene annotation information (e.g. gene names). The user can chose a column in the Annotation File for labeling output files. | |
375 | |
376 **Optional - Gene Names** | |
377 Column name in the Gene Annotation File to use for labeling output files. | |
378 | |
379 **Data reduction (subsetting) of Gene Expression Data** | |
380 1) No subsetting - include all genes in the Gene Expression Wide Dataset | |
381 2) Use a custom tsv file containing specific genes of interest - select a custom gene list from your history | |
382 3) Include genes linked to each metabolite class through common KEGG pathways | |
383 4) Use Metagenes from PANA (PAthway Network Analysis from gene expression data) | |
384 | |
385 **Gene Expression KEGG Pathway File** | |
386 Contains links between gene symbols and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool | |
387 | |
388 **Metabolomic KEGG Pathway File** | |
389 Contains links between metabolites and KEGG Pathways. Can be generated using the 'Add KEGG Pathway Information' tool | |
390 | |
391 **Gene to KEGGID Link File** | |
392 Contains links between gene symbols and KEGGIDs. | |
393 | |
394 **Gene Symbol** | |
395 Name of the column in your Gene to KEGGID Link File that contains gene symbols | |
396 | |
397 **GeneKEGGID2PathwayID** | |
398 Contains KEGG links between gene KEGGIDs and KEGG PathwayIDs. Can be generated from the 'Add KEGG Pathway Information' tool | |
399 | |
400 **Number of Genes to Keep in Model** | |
401 default: 10. This is the number of genes to keep for each principle component in the sPLS analysis. | |
402 | |
403 **Threshold** | |
404 default: 0.8. Correlations less than this value will NOT be included in the output files. | |
405 | |
406 -------------------------------------------------------------------------------- | |
407 | |
408 **OUTPUT** | |
409 | |
410 **For metabolite reduction by metabolite class and all genes:** | |
411 (1) A PDF containing a sPLS figure for each metabolite class. | |
412 (2) A sPLS Correlation TSV file containing the correlations for each metabolite-gene pair and what subset (metabolite class) the pair locate to. | |
413 | |
414 **For metabolite reduction by MMC the following files will be output in addition to files (1) and (2) above** | |
415 (3) A MMC PDF Figure containing unsorted, sorted and sorted-smoothed heatmaps of the variance-covariance matrixes | |
416 (4) A MMC Output TSV file containing algorithm summaries in the following columns: | |
417 | |
418 (1) Unique metabolite featureID | |
419 (2) Module: Contains the module number for each feature calculated by the MMC tool. | |
420 (3) Entry Index: Contains the original order of the names of the rows of the input Metabolite Wide Dataset. | |
421 (4) Degree: Average of the absolute values of correlations for the given element in a block to other elements within that block. | |
422 (5) Average Degree: Average values of the degrees computed above across all elements within the given block. | |
423 | |
424 **For subsetting genes by generating metagenes using PANA the following files will be output in addition to files (1) and (2) above** | |
425 (5) A PANA Output TSV table containing associations between gene symbols and KEGG pathays. | |
426 | |
427 ]]> | |
428 </help> | |
429 <citations> | |
430 <citation type="bibtex">@article{ponzoni2014pathway, | |
431 title={Pathway network inference from gene expression data}, | |
432 author={Ponzoni, Ignacio and Nueda, Mar{\'\i}a Jos{\'e} and Tarazona, Sonia and G{\"o}tz, Stefan and Montaner, David and Dussaut, Julieta Sol and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, | |
433 journal={BMC systems biology}, | |
434 volume={8}, | |
435 number={2}, | |
436 pages={S7}, | |
437 year={2014}, | |
438 publisher={BioMed Central} | |
439 }</citation> | |
440 <citation type="bibtex">@article{dejean2013mixomics, | |
441 title={mixOmics: Omics data integration project}, | |
442 author={Dejean, Sebastien and Gonzalez, Ignacio and L{\^e} Cao, Kim-Anh and Monget, Pierre and Coquery, J and Yao, F and Liquet, B and Rohart, F}, | |
443 journal={R package}, | |
444 year={2013} | |
445 }</citation> | |
446 <citation type="bibtex">@ARTICLE{Kirpich17secimtools, | |
447 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre}, | |
448 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools}, | |
449 journal = {BMC Bioinformatics}, | |
450 year = {in press} | |
451 }</citation> | |
452 <citation type="bibtex"> | |
453 @article{garcia2010paintomics, | |
454 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data}, | |
455 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, | |
456 journal={Bioinformatics}, | |
457 volume={27}, | |
458 number={1}, | |
459 pages={137--139}, | |
460 year={2010}, | |
461 publisher={Oxford University Press} | |
462 }</citation> | |
463 </citations> | |
464 </tool> |