comparison CorrTable/Corr.xml @ 1:29ec7e3afdd4 draft

Uploaded
author melpetera
date Thu, 01 Aug 2019 11:30:58 -0400
parents b22c453e4cf4
children
comparison
equal deleted inserted replaced
0:b22c453e4cf4 1:29ec7e3afdd4
1 <tool id="corrtable" name="Between-table Correlation" version="0.0.0"> 1 <tool id="corrtable" name="Between-table Correlation" version="1.0.0">
2 <description>Correlation table between two tables and graphic representation </description> 2 <description>Correlation table between two tables and graphic representation </description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.1_4">r-batch</requirement> 4 <requirement type="package" version="1.1_4">r-batch</requirement>
5 <requirement type="package" version="3.0.0">r-ggplot2</requirement> 5 <requirement type="package" version="3.0.0">r-ggplot2</requirement>
6 <requirement type="package" version="1.4.3">r-reshape2</requirement> 6 <requirement type="package" version="1.4.3">r-reshape2</requirement>
31 #end if 31 #end if
32 #end if 32 #end if
33 33
34 reorder_var "$out_section.reorder_var" 34 reorder_var "$out_section.reorder_var"
35 35
36 color_heatmap "${out_section.heatmap_cond.color_heatmap}" 36 plot_choice "$out_section.plot_cond.plot_choice"
37 #if str($out_section.heatmap_cond.color_heatmap) == 'yes' : 37 #if str($out_section.plot_cond.plot_choice) == 'none' :
38 type_classes "${out_section.heatmap_cond.typeclass_cond.type_classes}" 38 tabcorr_out "$tabcorr_out"
39 #if str($out_section.heatmap_cond.typeclass_cond.type_classes) == 'regular' : 39 #else:
40 reg_class_value "${out_section.heatmap_cond.typeclass_cond.reg_class_value}" 40 color_heatmap "${out_section.plot_cond.heatmap_cond.color_heatmap}"
41 #elif str($out_section.heatmap_cond.typeclass_cond.type_classes) == 'irregular' : 41 #if str($out_section.plot_cond.heatmap_cond.color_heatmap) == 'yes' :
42 irreg_class_vect "${out_section.heatmap_cond.typeclass_cond.irreg_class_vect}" 42 type_classes "${out_section.plot_cond.heatmap_cond.typeclass_cond.type_classes}"
43 #end if 43 #if str($out_section.plot_cond.heatmap_cond.typeclass_cond.type_classes) == 'regular' :
44 reg_class_value "${out_section.plot_cond.heatmap_cond.typeclass_cond.reg_class_value}"
45 #elif str($out_section.plot_cond.heatmap_cond.typeclass_cond.type_classes) == 'irregular' :
46 irreg_class_vect "${out_section.plot_cond.heatmap_cond.typeclass_cond.irreg_class_vect}"
47 #end if
48 #end if
49 tabcorr_out "$tabcorr_out"
50 heatmap_out "$heatmap_out"
44 #end if 51 #end if
45 52
46 tabcorr_out "$tabcorr_out"
47 heatmap_out "$heatmap_out"
48 53
49 </command> 54 </command>
50 55
51 <inputs> 56 <inputs>
52 57
116 <when value="no"> 121 <when value="no">
117 </when> 122 </when>
118 </conditional> 123 </conditional>
119 </section> 124 </section>
120 125
121 <section name="out_section" title="Graphical outputs" expanded="False"> 126 <section name="out_section" title="Output options" expanded="False">
122 <param name="reorder_var" label="Reorder variables (using Hierarchical Cluster Analysis)" type="select" display="radio" help=""> 127 <param name="reorder_var" label="Reorder variables (using Hierarchical Cluster Analysis)" type="select" display="radio" help="">
123 <option value="no">No</option> 128 <option value="no">No</option>
124 <option value="yes">Yes</option> 129 <option value="yes">Yes</option>
125 </param> 130 </param>
126 131
127 <conditional name="heatmap_cond"> 132 <conditional name="plot_cond">
128 <param name="color_heatmap" label="Colored correlation table strategy" type="select" display="radio" help="Standard corresponds to a scale with a smooth gradient between three colors: red, white and green (continuous case). Customized creates classes for the correlation coefficients - the scale has discrete values."> 133 <param name="plot_choice" label="PDF output" type="select" help="To determine whether a colored correlation table is plotted.">
129 <option value="no">Standard</option> 134 <option value="auto">Default</option>
130 <option value="yes">Customized</option> 135 <option value="forced">Always plot a colored table</option>
136 <option value="none">No colored table</option>
131 </param> 137 </param>
132 138
133 <when value="yes"> 139 <when value="auto">
134 <conditional name="typeclass_cond"> 140 <conditional name="heatmap_cond">
135 <param name="type_classes" label="Choose the type of classes" type="select" display="radio" help="Regular means the classes have the same size. Irregular means it is possible to choose any intervals." > 141 <param name="color_heatmap" label="Colored correlation table strategy" type="select" display="radio" help="Standard corresponds to a scale with a smooth gradient between three colors: red, white and green (continuous case). Customized creates classes for the correlation coefficients - the scale has discrete values.">
136 <option value="regular">Regular classes</option> 142 <option value="no">Standard</option>
137 <option value="irregular">Irregular classes</option> 143 <option value="yes">Customized</option>
138 </param> 144 </param>
145
146 <when value="yes">
147 <conditional name="typeclass_cond">
148 <param name="type_classes" label="Choose the type of classes" type="select" display="radio" help="Regular means the classes have the same size. Irregular means it is possible to choose any intervals." >
149 <option value="regular">Regular classes</option>
150 <option value="irregular">Irregular classes</option>
151 </param>
152
153 <when value="regular">
154 <param name="reg_class_value" label="Class size" type="float" value="" help="Must be between 0 and 1" />
155 </when>
139 156
140 <when value="regular"> 157 <when value="irregular">
141 <param name="reg_class_value" label="Class size" type="float" value="" help="Must be between 0 and 1" /> 158 <param name="irreg_class_vect" label="Vector with values for classes" type="text" value="" help="The vector must be of the following form: (value1,value2,value3,..). The values must be between -1 and 1 not included. For example: (-0.8,-0.5,-0.4,0,0.4,0.5,0.8)." />
142 </when> 159 </when>
143 160 </conditional>
144 <when value="irregular"> 161 </when>
145 <param name="irreg_class_vect" label="Vector with values for classes" type="text" value="" help="The vector must be of the following form: (value1,value2,value3,..). The values must be between -1 and 1 not included. For example: (-0.8,-0.5,-0.4,0,0.4,0.5,0.8)." /> 162
146 </when> 163 <when value ="no">
147 </conditional> 164 </when>
165
166 </conditional>
148 </when> 167 </when>
149 168
150 <when value ="no"> 169 <when value ="forced">
151 </when> 170 <conditional name="heatmap_cond">
171 <param name="color_heatmap" label="Colored correlation table strategy" type="select" display="radio" help="Standard corresponds to a scale with a smooth gradient between three colors: red, white and green (continuous case). Customized creates classes for the correlation coefficients - the scale has discrete values.">
172 <option value="no">Standard</option>
173 <option value="yes">Customized</option>
174 </param>
175
176 <when value="yes">
177 <conditional name="typeclass_cond">
178 <param name="type_classes" label="Choose the type of classes" type="select" display="radio" help="Regular means the classes have the same size. Irregular means it is possible to choose any intervals." >
179 <option value="regular">Regular classes</option>
180 <option value="irregular">Irregular classes</option>
181 </param>
182
183 <when value="regular">
184 <param name="reg_class_value" label="Class size" type="float" value="" help="Must be between 0 and 1" />
185 </when>
186
187 <when value="irregular">
188 <param name="irreg_class_vect" label="Vector with values for classes" type="text" value="" help="The vector must be of the following form: (value1,value2,value3,..). The values must be between -1 and 1 not included. For example: (-0.8,-0.5,-0.4,0,0.4,0.5,0.8)." />
189 </when>
190 </conditional>
191 </when>
192
193 <when value ="no">
194 </when>
195
196 </conditional>
197 </when>
198
199 <when value ="none">
200 </when>
201
152 202
153 </conditional> 203 </conditional>
204
205
154 </section> 206 </section>
155 207
156 </inputs> 208 </inputs>
157 209
158 <outputs> 210 <outputs>
159 <data name="tabcorr_out" label="CorrTable" format="tabular" /> 211 <data name="tabcorr_out" label="CorrTable" format="tabular" />
160 <data name="heatmap_out" label="CT_plot" format="pdf" /> 212 <data name="heatmap_out" label="CT_plot" format="pdf" >
213 <filter>out_section['plot_cond']['plot_choice'] == 'auto' or out_section['plot_cond']['plot_choice'] == 'forced'</filter>
214 </data>
161 </outputs> 215 </outputs>
216
217
218 <tests>
219 <test>
220 <param name="tab1_in" value="input1_tab1.tabular"/>
221 <param name="tab1_samples" value="column"/>
222 <param name="tab2_in" value="input1_tab2.txt"/>
223 <param name="tab2_samples" value="row"/>
224 <param name="corr_method" value="pearson"/>
225 <param name="test_corr" value="no"/>
226 <param name="filter" value="yes"/>
227 <param name="filters_choice" value="filters_0_thr"/>
228 <param name="threshold" value="0.3"/>
229 <param name="reorder_var" value="no"/>
230 <param name="plot_choice" value="auto"/>
231 <param name="color_heatmap" value="yes"/>
232 <param name="type_classes" value="irregular"/>
233 <param name="irreg_class_vect" value="(-0.8,-0.7,-0.5,-0.4,-0.3,-0.2,0,0.2,0.3,0.4,0.5,0.7,0.8)"/>
234 <output name="tabcorr_out" file="output1_CorrTable.tabular"/>
235 <output name="heatmap_out" file="output1_CT_plot.pdf"/>
236 </test>
237 <test>
238 <param name="tab1_in" value="input2_dataMatrix_500.txt"/>
239 <param name="tab1_samples" value="column"/>
240 <param name="tab2_in" value="input2_dataMatrix_500.txt"/>
241 <param name="tab2_samples" value="column"/>
242 <param name="corr_method" value="pearson"/>
243 <param name="test_corr" value="no"/>
244 <param name="filter" value="yes"/>
245 <param name="filters_choice" value="filters_0_thr"/>
246 <param name="threshold" value="0.7"/>
247 <param name="reorder_var" value="no"/>
248 <param name="plot_choice" value="auto"/>
249 <param name="color_heatmap" value="no"/>
250 <output name="tabcorr_out" file="output2_CorrTable.tabular"/>
251 </test>
252 </tests>
253
162 254
163 <help> 255 <help>
164 256
165 .. class:: infomark 257 .. class:: infomark
166 258
167 **Author:** 259 **Author:**
168 Ophelie Barbet for original code (PFEM - INRA) 260 Ophelie Barbet for original code (PFEM - INRA)
169 Maintainer: Melanie Petera (PFEM - INRA - MetaboHUB) 261 **Maintainer:** Melanie Petera (PFEM - INRA - MetaboHUB)
170 262
171 --------------------------------------------------- 263 ---------------------------------------------------
172 264
173 ========================= 265 =========================
174 Between-table Correlation 266 Between-table Correlation
202 ---------- 294 ----------
203 295
204 Positions of samples in table 1 and table 2 296 Positions of samples in table 1 and table 2
205 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 297 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
206 | Essential to correctly calculate the correlations. 298 | Essential to correctly calculate the correlations.
207 |
208 299
209 Method for calculating the correlation coefficients 300 Method for calculating the correlation coefficients
210 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 301 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
211 | - 'Pearson': Measures the intensity of the linear association between two continuous variables. 302 | - 'Pearson': Measures the intensity of the linear association between two continuous variables.
212 | - The 'Spearman' and 'Kendall' methods are explained in the R documentation of the 'cor' function as follows: " Kendall's tau or Spearman's rho statistic is used to estimate a rank-based measure of association. These are more robust and have been recommended if the data do not necessarily come from a bivariate normal distribution.". 303 | - The 'Spearman' and 'Kendall' methods are explained in the R documentation of the 'cor' function as follows: " Kendall's tau or Spearman's rho statistic is used to estimate a rank-based measure of association. These are more robust and have been recommended if the data do not necessarily come from a bivariate normal distribution.".
213 | 304
214
215 Significance test for the correlation coefficients 305 Significance test for the correlation coefficients
216 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 306 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
217 | This test is performed on each correlation coefficient, with the following hypotheses: 307 | This test is performed on each correlation coefficient, with the following hypotheses:
218 | H0: The correlation coefficient is not significantly different from zero. 308 | H0: The correlation coefficient is not significantly different from zero.
219 | H1: The correlation coefficient is significantly different from zero. 309 | H1: The correlation coefficient is significantly different from zero.
220 | 310 |
221 | Coefficients whose null hypothesis (H0) are not rejected are replaced by zeros in the correlation table. 311 | Coefficients whose null hypothesis (H0) are not rejected are replaced by zeros in the correlation table.
222 |
223 312
224 | **Method for multiple testing correction (only if significance test is 'Yes'):** 313 | **Method for multiple testing correction (only if significance test is 'Yes'):**
225 | The 7 methods implemented in the 'p.adjust' R function are available and documented as follows: 314 | The 7 methods implemented in the 'p.adjust' R function are available and documented as follows:
226 | "The adjustment methods include the Bonferroni correction ("bonferroni") in which the p-values are multiplied by the number of comparisons. Less conservative corrections are also included by Holm (1979) ("holm"), Hochberg (1988) ("hochberg"), Hommel (1988) ("hommel"), Benjamini and Hochberg (1995) ("BH" or its alias "fdr"), and Benjamini and Yekutieli (2001) ("BY"), respectively. A pass-through option ("none") is also included. The set of methods are contained in the p.adjust.methods vector for the benefit of methods that need to have the method as an option and pass it on to p.adjust. The first four methods are designed to give strong control of the family-wise error rate. There seems no reason to use the unmodified Bonferroni correction because it is dominated by Holm's method, which is also valid under arbitrary assumptions. Hochberg's and Hommel's methods are valid when the hypothesis tests are independent or when they are non-negatively associated (Sarkar, 1998; Sarkar and Chang, 1997). Hommel's method is more powerful than Hochberg's, but the difference is usually small and the Hochberg p-values are faster to compute. The "BH" (aka "fdr") and "BY" method of Benjamini, Hochberg, and Yekutieli control the false discovery rate, the expected proportion of false discoveries amongst the rejected hypotheses. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerfil than the others." 315 | "The adjustment methods include the Bonferroni correction ("bonferroni") in which the p-values are multiplied by the number of comparisons. Less conservative corrections are also included by Holm (1979) ("holm"), Hochberg (1988) ("hochberg"), Hommel (1988) ("hommel"), Benjamini and Hochberg (1995) ("BH" or its alias "fdr"), and Benjamini and Yekutieli (2001) ("BY"), respectively. A pass-through option ("none") is also included. The set of methods are contained in the p.adjust.methods vector for the benefit of methods that need to have the method as an option and pass it on to p.adjust. The first four methods are designed to give strong control of the family-wise error rate. There seems no reason to use the unmodified Bonferroni correction because it is dominated by Holm's method, which is also valid under arbitrary assumptions. Hochberg's and Hommel's methods are valid when the hypothesis tests are independent or when they are non-negatively associated (Sarkar, 1998; Sarkar and Chang, 1997). Hommel's method is more powerful than Hochberg's, but the difference is usually small and the Hochberg p-values are faster to compute. The "BH" (aka "fdr") and "BY" method of Benjamini, Hochberg, and Yekutieli control the false discovery rate, the expected proportion of false discoveries amongst the rejected hypotheses. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerfil than the others."
227 | 316 |
231 | 320 |
232 321
233 Filter the correlation table 322 Filter the correlation table
234 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 323 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
235 | Allows to reduce the correlation table size by keeping only variables considered relevant. 324 | Allows to reduce the correlation table size by keeping only variables considered relevant.
236 |
237 325
238 | **Choose the filters to apply (only if filter is 'Yes'):** 326 | **Choose the filters to apply (only if filter is 'Yes'):**
239 | - 'Only zero filter': Remove variables with all their correlation coefficients equal to zero. 327 | - 'Only zero filter': Remove variables with all their correlation coefficients equal to zero.
240 | - 'Threshold filter': Remove variables with all their correlation coefficients (in absolute value) strictly below a threshold. 328 | - 'Threshold filter': Remove variables with all their correlation coefficients (in absolute value) strictly below a threshold.
241 329
242 | *Choose a threshold (only threshold filter is used):* A value between 0 and 1. 330 | *Choose a threshold (only threshold filter is used):* A value between 0 and 1.
243 | 331 |
244 332
245 Reorder variables using Hierarchical Cluster Analysis (HCA) 333 Output options
246 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 334 ^^^^^^^^^^^^^^
247 | Allows the most linked variables to be close in the correlation table. 335 | Allows to set some parameters for the correlation table output and the pdf file.
248 | A HCA is performed on each input tables, with: 336
249 | - 1 - correlation coefficient, as distance 337 | **Reorder variables using Hierarchical Cluster Analysis (HCA):**
250 | - Ward as aggregation method. 338 | Allows the most linked variables to be close in the correlation table.
251 | 339 | A HCA is performed on each input tables, with:
252 340 | - 1 - correlation coefficient, as distance
253 341 | - Ward as aggregation method.
342 |
343
344 | **PDF output:** To determine whether a colored correlation table is plotted.
345 | - 'Default': generates a pdf file with a colored correlation table if the filtered table has no dimension above 1000 (number of lines or columns).
346 | - 'Always plot a colored table': used when you are not afraid of huge colored correlation table; to be used wisely.
347 | - 'No colored table': the module will generate the correlation table in tabular format only (no pdf file).
348 |
349
254 Colored correlation table strategy 350 Colored correlation table strategy
255 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 351 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
256 | Allows to create a colored correlation table. Variables of table 1 and variables of table 2 are related using colored rectangles. 352 | *Only available when* **PDF output** *is set to 'Default' or 'Always plot a colored table'.*
257 | About the colors, the negative correlations are in red, more or less intense according to their position between -1 and 0, and the positive correlations in green, more or less intense according to their position between 0 and 1. The coefficients equal to 0 are in white. 353 | Allows to create a colored correlation table. Variables of table 1 and variables of table 2 are related using colored rectangles.
258 | - 'Standard': the graphical representation has a scale with a smooth gradient between three colors: red, white and green. 354 | About the colors, the negative correlations are in red, more or less intense according to their position between -1 and 0, and the positive correlations in green, more or less intense according to their position between 0 and 1. The coefficients equal to 0 are in white.
259 | - 'Customized': the colored correlation table has coefficient classes. It is possible to create regular or irregular classes. The scale is discreet. 355 | - 'Standard': the graphical representation has a scale with a smooth gradient between three colors: red, white and green.
260 | 356 | - 'Customized': the colored correlation table has coefficient classes. It is possible to create regular or irregular classes. The scale is discreet.
261 357
262 | **Choose the type of classes (only if colored correlation table strategy is 'Customized'):** 358 | **Choose the type of classes (only if colored correlation table strategy is 'Customized'):**
359 |
263 360
264 | - 'Regular': classes are all (or almost) the same size. 361 | - 'Regular': classes are all (or almost) the same size.
265 | To realize these intervals, we start from 1 to go to 0 by taking a step of the size chosen by the user, and we make the symmetry for -1 towards 0. If the last step does not fall on the 0 value, we create a class between this last value and 0, smaller in size than the others. It is important to specify that 0 represents a class on its own, which is assigned the color white for the heatmap. 362 | To realize these intervals, we start from 1 to go to 0 by taking a step of the size chosen by the user, and we make the symmetry for -1 towards 0. If the last step does not fall on the 0 value, we create a class between this last value and 0, smaller in size than the others. It is important to specify that 0 represents a class on its own, which is assigned the color white for the heatmap.
266 363
267 | *Size of classes (if regular classes):* A value between 0 and 1. 364 | *Size of classes (if regular classes):* A value between 0 and 1.
268 365
269 | Example: if the size is 0.4, classes are [-1;-0.6], ]-0.6;-0.2], ]-0.2;0[, 0, ]0;0.2], ]0.2;0.6] and ]0.6;1]. 366 | Example: if the size is 0.4, classes are [-1;-0.6], ]-0.6;-0.2], ]-0.2;0[, 0, ]0;0.2], ]0.2;0.6] and ]0.6;1].
270 |
271 367
272 | - 'Irregular': classes have variable lengths. 368 | - 'Irregular': classes have variable lengths.
273 | It is possible to do as many classes as you want, and of any size. There is not necessarily symmetry between -1 and 0, and 0 and 1. You can choose to have a white class with only 0, or an interval which contains the value 0. 369 | It is possible to do as many classes as you want, and of any size. There is not necessarily symmetry between -1 and 0, and 0 and 1. You can choose to have a white class with only 0, or an interval which contains the value 0.
274 370
275 | *Vector with values for classes (if irregular classes):* The values in the vector must be between -1 and 1 excluded, and in ascending order. It must have this form (value1,value2,...). If the vector contains 0, then this value becomes a class on its own, otherwise the white class is the one which contains 0. 371 | *Vector with values for classes (if irregular classes):* The values in the vector must be between -1 and 1 excluded, and in ascending order. It must have this form (value1,value2,...). If the vector contains 0, then this value becomes a class on its own, otherwise the white class is the one which contains 0.
276 372
277 | Example: if the vector is (-0.8,-0.5,-0.4,0,0.4,0.5,0.8), the classes are [-1;-0.8], ]-0.8;-0.5], ]-0.5;-0.4], ]-0.4;0[, 0, ]0;0.4], ]0.4;0.5], ]0.5;0.8] and ]0.8;1]. 373 | Example: if the vector is (-0.8,-0.5,-0.4,0,0.4,0.5,0.8), the classes are [-1;-0.8], ]-0.8;-0.5], ]-0.5;-0.4], ]-0.4;0[, 0, ]0;0.4], ]0.4;0.5], ]0.5;0.8] and ]0.8;1].
278 | 374 |
279 375
280 376
281 ------------ 377 ------------
282 Output files 378 Output files
283 ------------ 379 ------------
284 380
289 | 385 |
290 386
291 Heatmap (colored correlation table) 387 Heatmap (colored correlation table)
292 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 388 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
293 | Pdf output 389 | Pdf output
294 | Colored representation of the correlation table. The coefficients are replaced by colors. A coefficient close to -1 is red, close to 0 white, and close to 1 in green. 390 | Colored representation of the correlation table. The coefficients are replaced by colors. A coefficient close to -1 is red, close to 0 white, and close to 1 green.
295 | 391 |
296 392
297 393
298 </help> 394 </help>
299 395