comparison CorrTable/Corr_Script_samples_row.R @ 1:29ec7e3afdd4 draft

Uploaded
author melpetera
date Thu, 01 Aug 2019 11:30:58 -0400
parents b22c453e4cf4
children 2173ad5e7750
comparison
equal deleted inserted replaced
0:b22c453e4cf4 1:29ec7e3afdd4
1 ################################################################################################# 1 #################################################################################################
2 # CORRELATION TABLE # 2 # CORRELATION TABLE #
3 # # 3 # #
4 # # 4 # #
5 # Input : 2 tables with common samples # 5 # Input : 2 tables with shared samples #
6 # Output : Correlation table ; Heatmap (pdf) # 6 # Output : Correlation table ; Heatmap (pdf) #
7 # # 7 # #
8 # Dependencies : Libraries "ggplot2" and "reshape2" # 8 # Dependencies : Libraries "ggplot2" and "reshape2" #
9 # # 9 # #
10 ################################################################################################# 10 #################################################################################################
11 11
12 12
13 # Parameters (for dev) 13 # Parameters (for dev)
14 if(FALSE){ 14 if(FALSE){
15 15
16 rm(list = ls()) 16 tab1.name <- "test/ressources/inputs/CT/CT2_DM.tabular"
17 setwd(dir = "Y:/Developpement") 17 tab2.name <- "test/ressources/inputs/CT/CT2_base_Diapason_14ClinCES_PRIN.txt"
18
19 tab1.name <- "Test/Ressources/Inputs/CT2_DM.tabular"
20 tab2.name <- "Test/Ressources/Inputs/CT2_base_Diapason_14ClinCES_PRIN.txt"
21 param1.samples <- "column" 18 param1.samples <- "column"
22 param2.samples <- "row" 19 param2.samples <- "row"
23 corr.method <- "pearson" 20 corr.method <- "pearson"
24 test.corr <- "yes" 21 test.corr <- "yes"
25 alpha <- 0.05 22 alpha <- 0.05
26 multi.name <- "none" 23 multi.name <- "none"
27 filter <- "yes" 24 filter <- "yes"
28 filters.choice <- "filters_0_thr" 25 filters.choice <- "filters_0_thr"
29 threshold <- 0.2 26 threshold <- 0.2
30 reorder.var <- "yes" 27 reorder.var <- "yes"
28 plot.choice <- "auto"
31 color.heatmap <- "yes" 29 color.heatmap <- "yes"
32 type.classes <-"irregular" 30 type.classes <-"irregular"
33 reg.value <- 1/3 31 reg.value <- 1/3
34 irreg.vect <- c(-0.3, -0.2, -0.1, 0, 0.3, 0.4) 32 irreg.vect <- c(-0.3, -0.2, -0.1, 0, 0.3, 0.4)
35 output1 <- "Correlation_table.txt" 33 output1 <- "Correlation_table.txt"
38 } 36 }
39 37
40 38
41 39
42 correlation.tab <- function(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha, 40 correlation.tab <- function(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha,
43 multi.name, filter, filters.choice, threshold, reorder.var, color.heatmap, type.classes, 41 multi.name, filter, filters.choice, threshold, reorder.var, plot.choice, color.heatmap,
44 reg.value, irreg.vect, output1, output2){ 42 type.classes, reg.value, irreg.vect, output1, output2){
45 43
46 # This function allows to visualize the correlation between two tables 44 # This function allows to visualize the correlation between two tables
47 # 45 #
48 # Parameters: 46 # Parameters:
49 # - tab1.name: table 1 file's access 47 # - tab1.name: table 1 file's access
57 # - filter ("yes", "no"): use filter.0 or/and filter.threshold 55 # - filter ("yes", "no"): use filter.0 or/and filter.threshold
58 # - filters.choice ("filter_0" or "filters_0_thr"): zero filter removes variables with all their correlation coefficients = 0 56 # - filters.choice ("filter_0" or "filters_0_thr"): zero filter removes variables with all their correlation coefficients = 0
59 # and threshold filter remove variables with all their correlation coefficients in abs < threshold 57 # and threshold filter remove variables with all their correlation coefficients in abs < threshold
60 # - threshold (value between 0 and 1): threshold for filter threshold 58 # - threshold (value between 0 and 1): threshold for filter threshold
61 # - reorder.var ("yes" or "no"): reorder variables in the correlation table thanks to the HCA 59 # - reorder.var ("yes" or "no"): reorder variables in the correlation table thanks to the HCA
60 # - plot.choice ("auto", "forced" or "none"): determine whether a heatmap is plotted
62 # - color.heatmap ("yes" or "no"): color the heatmap with classes defined by the user 61 # - color.heatmap ("yes" or "no"): color the heatmap with classes defined by the user
63 # - type.classes ("regular" or "irregular"): choose to color the heatmap with regular or irregular classes 62 # - type.classes ("regular" or "irregular"): choose to color the heatmap with regular or irregular classes
64 # - reg.value (value between 0 and 1): value for regular classes 63 # - reg.value (value between 0 and 1): value for regular classes
65 # - irreg.vect (vector with values between -1 and 1): vector which indicates values for intervals (irregular classes) 64 # - irreg.vect (vector with values between -1 and 1): vector which indicates values for intervals (irregular classes)
66 # - output1: correlation table file's access 65 # - output1: correlation table file's access
83 82
84 # Sorting tables in alphabetical order of the samples 83 # Sorting tables in alphabetical order of the samples
85 tab1 <- tab1[order(rownames(tab1)),] 84 tab1 <- tab1[order(rownames(tab1)),]
86 tab2 <- tab2[order(rownames(tab2)),] 85 tab2 <- tab2[order(rownames(tab2)),]
87 86
88 87
88 # Checks ---------------------------------------------------------------------------------------------
89
89 # Check if the 2 datasets match regarding samples identifiers 90 # Check if the 2 datasets match regarding samples identifiers
90 # Adapt from functions "check.err" and "match2", RcheckLibrary.R 91 # Adapt from functions "check.err" and "match2", RcheckLibrary.R
91 92
92 err.stock <- NULL 93 err.stock <- NULL
93 94
126 if(length(err.stock)!=0){ 127 if(length(err.stock)!=0){
127 stop("\n- - - - - - - - -\n",err.stock,"\n- - - - - - - - -\n\n") 128 stop("\n- - - - - - - - -\n",err.stock,"\n- - - - - - - - -\n\n")
128 } 129 }
129 130
130 131
132 # Check whether tab1=tab2
133
134 err.msg <- NULL
135
136 if((ncol(tab1)==ncol(tab2))&&(sum(tab1!=tab2,na.rm=TRUE)==0)){
137 autocor <- TRUE
138 err.msg <- c(err.msg, "\nYou chose the same table for the two dataset inputs. \nTo allow filtering options,",
139 "we will turn the diagonal to 0 in the correlation matrix during the filtering process.\n")
140 }else{
141 autocor <- FALSE
142 }
143
144
131 # Check qualitative variables in each input tables 145 # Check qualitative variables in each input tables
132 err.msg <- NULL
133 146
134 var1.quali <- vector() 147 var1.quali <- vector()
135 var2.quali <- vector() 148 var2.quali <- vector()
136 149
137 for (i in 1:dim(tab1)[2]){ 150 for (i in 1:dim(tab1)[2]){
165 " ",paste(colnames(tab2)[var2.quali[1:3]],collapse="\n "),"\n") 178 " ",paste(colnames(tab2)[var2.quali[1:3]],collapse="\n "),"\n")
166 } 179 }
167 } 180 }
168 181
169 if(length(var1.quali) != 0){ 182 if(length(var1.quali) != 0){
170 tab1 <- tab1[,-var1.quali] 183 tab1 <- tab1[,-var1.quali,drop=FALSE]
171 } 184 }
172 if(length(var2.quali) != 0){ 185 if(length(var2.quali) != 0){
173 tab2 <- tab2[,-var2.quali] 186 tab2 <- tab2[,-var2.quali,drop=FALSE]
174 } 187 }
188
189
190 # Check whether there are constant variables
191
192 var1.cons <- vector()
193 var2.cons <- vector()
194
195 for (i in 1:dim(tab1)[2]){
196 if(length(levels(as.factor(tab1[,i])))==1){ var1.cons <- c(var1.cons,i) }
197 }
198
199 for (j in 1:dim(tab2)[2]){
200 if(length(levels(as.factor(tab2[,j])))==1){ var2.cons <- c(var2.cons, j) }
201 }
202
203 if (length(var1.cons) != 0 | length(var2.cons) != 0){
204 err.msg <- c(err.msg, "\nThere are constant variables in your input tables which have been removed to compute the correlation table.\n\n")
205
206 if(length(var1.cons) != 0 && length(var1.cons) < 4){
207 err.msg <- c(err.msg, "In table 1, the following constant variables have been removed:\n",
208 " ",paste(colnames(tab1)[var1.cons],collapse="\n "),"\n")
209 } else if(length(var1.cons) != 0 && length(var1.cons) > 3){
210 err.msg <- c(err.msg, "For example, in table 1, the following constant variables have been removed:\n",
211 " ",paste(colnames(tab1)[var1.cons[1:3]],collapse="\n "),"\n")
212 }
213
214 if(length(var2.cons) != 0 && length(var2.cons) < 4){
215 err.msg <- c(err.msg, "In table 2, the following constant variables have been removed:\n",
216 " ",paste(colnames(tab2)[var2.cons],collapse="\n "),"\n")
217 } else if(length(var2.cons) != 0 && length(var2.cons) > 3){
218 err.msg <- c(err.msg, "For example, in table 2, the following constant variables have been removed:\n",
219 " ",paste(colnames(tab2)[var2.cons[1:3]],collapse="\n "),"\n")
220 }
221 }
222
223 if(length(var1.cons) != 0){
224 tab1 <- tab1[,-var1.cons,drop=FALSE]
225 }
226 if(length(var2.cons) != 0){
227 tab2 <- tab2[,-var2.cons,drop=FALSE]
228 }
229
230
231 # Print info message
175 232
176 if(length(err.msg) != 0){ 233 if(length(err.msg) != 0){
177 cat("\n- - - - - - - - -\n",err.msg,"\n- - - - - - - - -\n\n") 234 cat("\n- - - - - - - - -\n",err.msg,"\n- - - - - - - - -\n\n")
178 } 235 }
179 236
237 rm(err.stock,var1.quali,var2.quali,var1.cons,var2.cons,err.msg)
238
239
180 # Correlation table --------------------------------------------------------------------------------- 240 # Correlation table ---------------------------------------------------------------------------------
181 241
182 tab.corr <- matrix(nrow = dim(tab2)[2], ncol = dim(tab1)[2]) 242 tab.corr <- matrix(nrow = dim(tab2)[2], ncol = dim(tab1)[2])
183 for (i in 1:dim(tab2)[2]){ 243 for (i in 1:dim(tab2)[2]){
184 for (j in 1:dim(tab1)[2]){ 244 for (j in 1:dim(tab1)[2]){
186 } 246 }
187 } 247 }
188 248
189 colnames(tab.corr) <- colnames(tab1) 249 colnames(tab.corr) <- colnames(tab1)
190 rownames(tab.corr) <- colnames(tab2) 250 rownames(tab.corr) <- colnames(tab2)
191
192 251
193 252
194 # Significance of correlation test ------------------------------------------------------------------ 253 # Significance of correlation test ------------------------------------------------------------------
195 254
196 if (test.corr == "yes"){ 255 if (test.corr == "yes"){
216 275
217 276
218 # Filter settings ------------------------------------------------------------------------------------ 277 # Filter settings ------------------------------------------------------------------------------------
219 278
220 if (filter == "yes"){ 279 if (filter == "yes"){
280
281 # Turn diagonal from 1 to 0 if autocorrelation
282 if(autocor){
283 for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 0 }
284 }
221 285
222 # Remove variables with all their correlation coefficients = 0 : 286 # Remove variables with all their correlation coefficients = 0 :
223 if (filters.choice == "filter_0"){ 287 if (filters.choice == "filter_0"){
224 threshold <- 0 288 threshold <- 0
225 } 289 }
230 var2.thres <- c(var2.thres, i) 294 var2.thres <- c(var2.thres, i)
231 } 295 }
232 } 296 }
233 297
234 if (length(var2.thres) != 0){ 298 if (length(var2.thres) != 0){
235 tab.corr <- tab.corr[-var2.thres,] 299 tab.corr <- tab.corr[-var2.thres,,drop=FALSE]
236 tab2 <- tab2[, -var2.thres] 300 tab2 <- tab2[, -var2.thres,drop=FALSE]
237 } 301 }
238 302
239 var1.thres <- vector() 303 var1.thres <- vector()
240 for (i in 1:dim(tab.corr)[2]){ 304 for (i in 1:dim(tab.corr)[2]){
241 if (length(which(abs(tab.corr[,i]) <= threshold)) == dim(tab.corr)[1]){ 305 if (length(which(abs(tab.corr[,i]) <= threshold)) == dim(tab.corr)[1]){
242 var1.thres <- c(var1.thres, i) 306 var1.thres <- c(var1.thres, i)
243 } 307 }
244 } 308 }
245 309
246 if (length(var1.thres) != 0){ 310 if (length(var1.thres) != 0){
247 tab.corr <- tab.corr[,-var1.thres] 311 tab.corr <- tab.corr[,-var1.thres,drop=FALSE]
248 tab1 <- tab1[,-var1.thres] 312 tab1 <- tab1[,-var1.thres,drop=FALSE]
249 } 313 }
314
315 # Turn diagonal from 0 back to 1 if autocorrelation
316 if(autocor){
317 for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 1 }
318 }
250 319
251 } 320 }
252 321
253 322
254 # Reorder variables in the correlation table (with the HCA) ------------------------------------------ 323 # Reorder variables in the correlation table (with the HCA) ------------------------------------------
256 325
257 cormat.tab2 <- cor(tab2, method = corr.method, use = "pairwise.complete.obs") 326 cormat.tab2 <- cor(tab2, method = corr.method, use = "pairwise.complete.obs")
258 dist.tab2 <- as.dist(1 - cormat.tab2) 327 dist.tab2 <- as.dist(1 - cormat.tab2)
259 hc.tab2 <- hclust(dist.tab2, method = "ward.D2") 328 hc.tab2 <- hclust(dist.tab2, method = "ward.D2")
260 tab.corr <- tab.corr[hc.tab2$order,] 329 tab.corr <- tab.corr[hc.tab2$order,]
330 rm(cormat.tab2)
261 331
262 cormat.tab1 <- cor(tab1, method = corr.method, use = "pairwise.complete.obs") 332 cormat.tab1 <- cor(tab1, method = corr.method, use = "pairwise.complete.obs")
263 dist.tab1 <- as.dist(1 - cormat.tab1) 333 dist.tab1 <- as.dist(1 - cormat.tab1)
264 hc.tab1 <- hclust(dist.tab1, method = "ward.D2") 334 hc.tab1 <- hclust(dist.tab1, method = "ward.D2")
265 tab.corr <- tab.corr[,hc.tab1$order] 335 tab.corr <- tab.corr[,hc.tab1$order]
266 336 rm(cormat.tab1)
267 } 337
268 338 }
269 339
270 340
271 # Output 1 : Correlation table ----------------------------------------------------------------------- 341 # Output 1 : Correlation table -----------------------------------------------------------------------
342
272 343
273 # Export correlation table 344 # Export correlation table
274 write.table(x = data.frame(name = rownames(tab.corr), tab.corr), file = output1, sep = "\t", quote = FALSE, row.names = FALSE) 345 write.table(x = data.frame(name = rownames(tab.corr), tab.corr), file = output1, sep = "\t", quote = FALSE, row.names = FALSE)
275 346
276 347
277 348
278 # Create the heatmap --------------------------------------------------------------------------------- 349 # Create the heatmap ---------------------------------------------------------------------------------
279 350
351 if(plot.choice != "none"){
352
280 # A message if no variable kept 353 # A message if no variable kept
281 if(length(tab.corr)==0){ 354 if(length(tab.corr)==0){
282 pdf(output2) 355 pdf(output2)
283 plot.new() 356 plot.new()
284 legend("center","Filtering leads to no remaining correlation coefficient.") 357 legend("center","Filtering leads to no remaining correlation coefficient.")
358 dev.off()
359 } else {
360
361 # A message if more than 1000 variable in auto mode
362 if((plot.choice=="auto")&&(max(dim(tab.corr))>1000)){
363 pdf(output2)
364 war.msg <- paste0("In 'default' mode, the colored table is not provided when\none of the tables contains more than ",
365 "a thousand\nvariables after the filter step.\n\nOne of your table still contains ",max(dim(tab.corr))," variables.\n",
366 "Please consider more filtering, or use the 'Always plot a\ncolored table' mode to obtain your colored table.")
367 plot.new()
368 legend("center",war.msg,adj=c(0.05,0.075))
285 dev.off() 369 dev.off()
286 } else { 370 } else {
287 371
288 372
289 library(ggplot2) 373 library(ggplot2)
397 481
398 482
399 ggsave(output2, device = "pdf", width = 10+0.075*dim(tab.corr)[2], height = 5+0.075*dim(tab.corr)[1], limitsize = FALSE) 483 ggsave(output2, device = "pdf", width = 10+0.075*dim(tab.corr)[2], height = 5+0.075*dim(tab.corr)[1], limitsize = FALSE)
400 484
401 485
402 } # End if(length(tab.corr)==0)else 486 } # End of if((plot.choice=="auto")&&(max(dim(tab.corr))>1000))else
487 } # End of if(length(tab.corr)==0)else
488 } # End of if(plot.choice != "auto")
489
403 490
404 } # End of correlation.tab 491 } # End of correlation.tab
405 492
406 493
407 # Function call 494 # Function call