1
|
1 <tool id="allByAllCorr" name="Metabolite - Gene Correlation" version="@WRAPPER_VERSION@">
|
|
2 <description></description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <command detect_errors="exit_code"><![CDATA[
|
|
8 all_by_all_correlation.py
|
|
9 -g=$geneDataset
|
|
10 -gid=$geneId
|
|
11 #if $geneAnnotation.useGeneAnnot == "Yes":
|
|
12 -ga=$geneAnnotation.geneAnnot
|
|
13 -gn=$geneAnnotation.geneAnnotName
|
|
14 #end if
|
|
15 -m=$metDataset
|
|
16 -mid=$metId
|
|
17 #if $metAnnotation.useMetAnnot == "Yes":
|
|
18 -ma=$metAnnotation.metAnnot
|
|
19 -mn=$metAnnotation.metAnnotName
|
|
20 #end if
|
|
21 -me=$method
|
|
22 -t=$threshold
|
|
23 -o=$output
|
|
24 -c=$corMat
|
|
25 -f=$figure
|
|
26 ]]></command>
|
|
27 <inputs>
|
|
28 <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset" help="Select the Gene Expression Wide Dataset from your history"/>
|
|
29 <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the Column in your Gene Expression Wide Dataset that contains unique identifiers."/>
|
|
30 <conditional name="geneAnnotation">
|
|
31 <param name="useGeneAnnot" type="select" label="Use Annotation File?" help="You can chose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling output files.">
|
|
32 <option value="No">No</option>
|
|
33 <option value="Yes">Yes</option>
|
|
34 </param>
|
|
35 <when value="Yes">
|
|
36 <param name="geneAnnot" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history"/>
|
|
37 <param name="geneAnnotName" type="text" value="" label="Gene Labels" help="Name of the column in the Gene Expression Annotation File to use for labeling output files."/>
|
|
38 </when>
|
|
39 </conditional>
|
|
40 <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset" help="Select the Metabolite Wide Dataset from your history"/>
|
|
41 <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/>
|
|
42 <conditional name="metAnnotation">
|
|
43 <param name="useMetAnnot" type="select" label="Use Annotation File?" help="You can chose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) for labeling output files.">
|
|
44 <option value="No">No</option>
|
|
45 <option value="Yes">Yes</option>
|
|
46 </param>
|
|
47 <when value="Yes">
|
|
48 <param name="metAnnot" type="data" format="tabular" label="Metabolite Annotation File" help="Select the Metabolite Annotation File from your history"/>
|
|
49 <param name="metAnnotName" type="text" value="" label="Metabolite Labels" help="Name of the column in the Metabolite Annotation File to use for labeling output files"/>
|
|
50 </when>
|
|
51 </conditional>
|
|
52 <param name="method" type="select" label="Correlation Method" help="Select a correlation method.">
|
|
53 <option value="pearson">Pearson</option>
|
|
54 <option value="spearman">Spearman</option>
|
|
55 <option value="kendall">Kendall</option>
|
|
56 </param>
|
|
57 <param name="threshold" type="text" value="0.05" label="P-Value threshold." help="Default: 0.05"/>
|
|
58 </inputs>
|
|
59 <outputs>
|
|
60 <data format="tabular" name="output" label="${tool.name} on ${on_string}: Correlation File"/>
|
|
61 <data format="tabular" name="corMat" label="${tool.name} on ${on_string}: Correlation Matrix"/>
|
|
62 <data format="pdf" name="figure" label="${tool.name} on ${on_string}: Correlation Figure"/>
|
|
63 </outputs>
|
|
64 <tests>
|
|
65 <test>
|
|
66 <param name="geneDataset" value="gene_wide_dataset_01fhl.tsv"/>
|
|
67 <param name="geneId" value="UniqueID"/>
|
|
68 <param name="geneAnnot" value="gene_annotation_file_01fhl.tsv"/>
|
|
69 <param name="geneName" value="GeneName"/>
|
|
70 <param name="metDataset" value="metabolite_wide_dataset_01fhl.tsv"/>
|
|
71 <param name="metId" value="UniqueID"/>
|
|
72 <param name="metAnnot" value="metabolite_annotation_file_01fhl.tsv"/>
|
|
73 <param name="metName" value="MetName"/>
|
|
74 <output name="output" file="correlation_file_01fhl.tsv"/>
|
|
75 <output name="corMat" file="correlation_matrix_01fhl.tsv"/>
|
|
76 <output name="figure" file="correlation_figure_01fhl.pdf"/>
|
|
77 </test>
|
|
78 </tests>
|
|
79 <help><![CDATA[
|
|
80
|
|
81 **Tool Description**
|
|
82
|
|
83 The tool performs a correlation analysis between genes (in a gene expression wide dataset) and metabolites (in a metabolite wide dataset)
|
|
84 to generate a table of correlation coefficients. P-values for the correlation coefficients are calculated by simulating individual gene and
|
|
85 metabolite datasets 1000 times using a normal distribution with means and standard deviations generated from the data. Sample size reflects
|
|
86 the input datasets. Correlations are calculated on the simulated data. Correlations must be higher/lower than 95% of the randomly simulated
|
|
87 values to be considered significant.
|
|
88
|
|
89 --------------------------------------------------------------------------------
|
|
90
|
|
91 **Input**
|
|
92
|
|
93 (1) **Gene Expression Dataset** A wide formatted dataset containing measurements for each sample (where samples are in columns):
|
|
94
|
|
95 +-----------+---------+---------+---------+-----+
|
|
96 | UniqueID | sample1 | sample2 | sample3 | ... |
|
|
97 +===========+=========+=========+=========+=====+
|
|
98 | gene_1 | 1.2 | 3.5 | 2.9 | ... |
|
|
99 +-----------+---------+---------+---------+-----+
|
|
100 | gene_2 | 1.6 | 3.2 | 3.2 | ... |
|
|
101 +-----------+---------+---------+---------+-----+
|
|
102 | gene_3 | 1.4 | 3.0 | 3.1 | ... |
|
|
103 +-----------+---------+---------+---------+-----+
|
|
104 | gene_4 | 1.6 | 2.9 | 3.1 | ... |
|
|
105 +-----------+---------+---------+---------+-----+
|
|
106 | ... | ... | ... | ... | ... |
|
|
107 +-----------+---------+---------+---------+-----+
|
|
108
|
|
109 (2) **Unique Gene FeatureID** Name of the column in your Gene Expression Wide Dataset that contains unique gene identifiers. **NOTE:** This identifier must be the gene symbol.
|
|
110
|
|
111 (3) **Metabolomic Wide Dataset** A wide formatted metabolomic dataset that contains measurements for each sample (where samples are in columns):
|
|
112
|
|
113 +------------+---------+---------+---------+-----+
|
|
114 | UniqueID | sample1 | sample2 | sample3 | ... |
|
|
115 +============+=========+=========+=========+=====+
|
|
116 | met_1 | 10 | 20 | 10 | ... |
|
|
117 +------------+---------+---------+---------+-----+
|
|
118 | met_2 | 5 | 22 | 30 | ... |
|
|
119 +------------+---------+---------+---------+-----+
|
|
120 | met_3 | 30 | 27 | 2 | ... |
|
|
121 +------------+---------+---------+---------+-----+
|
|
122 | met_4 | 32 | 17 | 8 | ... |
|
|
123 +------------+---------+---------+---------+-----+
|
|
124 | ... | ... | ... | ... | ... |
|
|
125 +------------+---------+---------+---------+-----+
|
|
126
|
|
127 (4) **Unique Metabolite FeatureID** Name of the column in your Metabolite Wide Dataset that contains unique metabolite identifiers.
|
|
128
|
|
129 (5) **Annotation Files** The user can provide (optional) Annotation Files for the Gene Expression and/or Metabolite Datasets to label the results for easier readability. The user must provide the name of the column with the desired feature name (e.g. Gene Symbol).
|
|
130
|
|
131 +-----------+------------+-------------+-----+
|
|
132 | UniqueID | ENSEMBL_ID | Gene_Symbol | ... |
|
|
133 +===========+============+=============+=====+
|
|
134 | gene_1 | ENS... | one | ... |
|
|
135 +-----------+------------+-------------+-----+
|
|
136 | gene_2 | ENS... | two | ... |
|
|
137 +-----------+------------+-------------+-----+
|
|
138 | gene_3 | ENS... | three | ... |
|
|
139 +-----------+------------+-------------+-----+
|
|
140 | gene_4 | ENS... | four | ... |
|
|
141 +-----------+------------+-------------+-----+
|
|
142 | ... | ... | ... | ... |
|
|
143 +-----------+------------+-------------+-----+
|
|
144
|
|
145 (6) **Correlation method** Select the correlation coefficient to be computed from the list. Pearson, kendall, or spearman are available.
|
|
146
|
|
147 (7) **P-Value threshold** User specified value that limits the data in the resulting 'Correlation File' to only those correlations with P-values less than this value.
|
|
148
|
|
149 --------------------------------------------------------------------------------
|
|
150
|
|
151 **Output**
|
|
152
|
|
153 The user will obtain three outputs from the Gene - Metabolite Correlation Tool:
|
|
154
|
|
155 (1) **Correlation File.** A file sorted by the absolute values of the correlation coeficient and including the P-value. The file contains only the correlation coefficients where the associated P-values is less than a user-specified value (default = 0.05).
|
|
156
|
|
157 +--------+------------+-------------+-----------+
|
|
158 | Gene | Metabolite | Correlation | (p-value) |
|
|
159 +========+============+=============+===========+
|
|
160 | gene_1 | met_1 | 0.99 | 0.000 |
|
|
161 +--------+------------+-------------+-----------+
|
|
162 | gene_2 | met_4 | -0.98 | 0.000 |
|
|
163 +--------+------------+-------------+-----------+
|
|
164 | gene_3 | met_5 | 0.96 | 0.001 |
|
|
165 +--------+------------+-------------+-----------+
|
|
166 | gene_4 | met_1 | 0.95 | 0.002 |
|
|
167 +--------+------------+-------------+-----------+
|
|
168 | ... | ... | ... | ... |
|
|
169 +--------+------------+-------------+-----------+
|
|
170
|
|
171 (2) **Correlation Matrix.** Output correlation matrix.
|
|
172
|
|
173 +--------+-------+-------+-------+-------+-----+
|
|
174 | Gene | met_1 | met_2 | met_3 | met_4 | ... |
|
|
175 +========+=======+=======+=======+=======+=====+
|
|
176 | gene_1 | 0.99 | 0.56 | 0.25 | 0.33 | ... |
|
|
177 +--------+-------+-------+-------+-------+-----+
|
|
178 | gene_2 | -0.57 | 0.63 | -0.14 | 0.01 | ... |
|
|
179 +--------+-------+-------+-------+-------+-----+
|
|
180 | gene_3 | 0.62 | 0.96 | 0.20 | 0.32 | ... |
|
|
181 +--------+-------+-------+-------+-------+-----+
|
|
182 | gene_4 | 0.95 | 0.25 | 0.16 | 0.44 | ... |
|
|
183 +--------+-------+-------+-------+-------+-----+
|
|
184 | ... | ... | ... | ... | ... | ... |
|
|
185 +--------+-------+-------+-------+-------+-----+
|
|
186
|
|
187 (3) **Correlation Figure.** Network representation of the top 500 gene-metabolite correlations based on the absolute value of the correlation coeficients. Maximum number of correlations in the netork is 500.
|
|
188
|
|
189 ]]>
|
|
190 </help>
|
|
191 <citations>
|
|
192 <citation type="bibtex">@article{dejean2013mixomics,
|
|
193 title={mixOmics: Omics data integration project},
|
|
194 author={Dejean, Sebastien and Gonzalez, Ignacio and L{\^e} Cao, Kim-Anh and Monget, Pierre and Coquery, J and Yao, F and Liquet, B and Rohart, F},
|
|
195 journal={R package},
|
|
196 year={2013}
|
|
197 }</citation>
|
|
198 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
|
|
199 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
|
|
200 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
|
|
201 journal = {BMC Bioinformatics},
|
|
202 year = {in press}
|
|
203 }</citation>
|
|
204 <citation type="bibtex">
|
|
205 @article{garcia2010paintomics,
|
|
206 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
|
|
207 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
|
|
208 journal={Bioinformatics},
|
|
209 volume={27},
|
|
210 number={1},
|
|
211 pages={137--139},
|
|
212 year={2010},
|
|
213 publisher={Oxford University Press}
|
|
214 }</citation>
|
|
215 </citations>
|
|
216 </tool>
|