diff CorrTable/Corr_Script_samples_row.R @ 1:29ec7e3afdd4 draft

Uploaded
author melpetera
date Thu, 01 Aug 2019 11:30:58 -0400
parents b22c453e4cf4
children 2173ad5e7750
line wrap: on
line diff
--- a/CorrTable/Corr_Script_samples_row.R	Thu Oct 11 05:35:55 2018 -0400
+++ b/CorrTable/Corr_Script_samples_row.R	Thu Aug 01 11:30:58 2019 -0400
@@ -2,7 +2,7 @@
   # CORRELATION TABLE                                                                             #
   #                                                                                               #
   #                                                                                               #
-  # Input : 2 tables with common samples                                                          #
+  # Input : 2 tables with shared samples                                                          #
   # Output : Correlation table ; Heatmap (pdf)                                                    #
   #                                                                                               #
   # Dependencies : Libraries "ggplot2" and "reshape2"                                             #
@@ -13,11 +13,8 @@
   # Parameters (for dev)
   if(FALSE){
     
-    rm(list = ls())
-    setwd(dir = "Y:/Developpement")
-
-    tab1.name <- "Test/Ressources/Inputs/CT2_DM.tabular"
-    tab2.name <- "Test/Ressources/Inputs/CT2_base_Diapason_14ClinCES_PRIN.txt"
+    tab1.name <- "test/ressources/inputs/CT/CT2_DM.tabular"
+    tab2.name <- "test/ressources/inputs/CT/CT2_base_Diapason_14ClinCES_PRIN.txt"
     param1.samples <- "column"
     param2.samples <- "row"
     corr.method <- "pearson"
@@ -28,6 +25,7 @@
     filters.choice <- "filters_0_thr"
     threshold <- 0.2
     reorder.var <- "yes"
+	plot.choice <- "auto"
     color.heatmap <- "yes"
     type.classes <-"irregular"
     reg.value <- 1/3
@@ -40,8 +38,8 @@
   
   
   correlation.tab <- function(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha, 
-                              multi.name, filter, filters.choice, threshold, reorder.var, color.heatmap, type.classes, 
-                              reg.value, irreg.vect, output1, output2){
+                              multi.name, filter, filters.choice, threshold, reorder.var, plot.choice, color.heatmap, 
+                              type.classes, reg.value, irreg.vect, output1, output2){
 
     # This function allows to visualize the correlation between two tables
     # 
@@ -59,6 +57,7 @@
     #   and threshold filter remove variables with all their correlation coefficients in abs < threshold
     # - threshold (value between 0 and 1): threshold for filter threshold
     # - reorder.var ("yes" or "no"): reorder variables in the correlation table thanks to the HCA
+	# - plot.choice ("auto", "forced" or "none"): determine whether a heatmap is plotted
     # - color.heatmap ("yes" or "no"): color the heatmap with classes defined by the user
     # - type.classes ("regular" or "irregular"): choose to color the heatmap with regular or irregular classes
     # - reg.value (value between 0 and 1): value for regular classes
@@ -85,7 +84,9 @@
   tab1 <- tab1[order(rownames(tab1)),]
   tab2 <- tab2[order(rownames(tab2)),]
   
-  
+
+  # Checks ---------------------------------------------------------------------------------------------
+
   # Check if the 2 datasets match regarding samples identifiers
   # Adapt from functions "check.err" and "match2", RcheckLibrary.R
     
@@ -128,8 +129,20 @@
   }
   
     
+  # Check whether tab1=tab2
+  
+  err.msg <- NULL
+  
+  if((ncol(tab1)==ncol(tab2))&&(sum(tab1!=tab2,na.rm=TRUE)==0)){
+    autocor <- TRUE
+	err.msg <- c(err.msg, "\nYou chose the same table for the two dataset inputs. \nTo allow filtering options,",
+	             "we will turn the diagonal to 0 in the correlation matrix during the filtering process.\n")
+  }else{
+    autocor <- FALSE
+  }
+  
+  
   # Check qualitative variables in each input tables
-  err.msg <- NULL
   
   var1.quali <- vector()
   var2.quali <- vector()
@@ -167,16 +180,63 @@
   }
   
   if(length(var1.quali) != 0){
-    tab1 <- tab1[,-var1.quali]
+    tab1 <- tab1[,-var1.quali,drop=FALSE]
   }
   if(length(var2.quali) != 0){
-    tab2 <- tab2[,-var2.quali]
+    tab2 <- tab2[,-var2.quali,drop=FALSE]
+  }
+  
+  
+  # Check whether there are constant variables
+  
+  var1.cons <- vector()
+  var2.cons <- vector()
+  
+  for (i in 1:dim(tab1)[2]){
+    if(length(levels(as.factor(tab1[,i])))==1){ var1.cons <- c(var1.cons,i) }
   }
   
+  for (j in 1:dim(tab2)[2]){
+    if(length(levels(as.factor(tab2[,j])))==1){ var2.cons <- c(var2.cons, j) }
+  } 
+  
+  if (length(var1.cons) != 0 | length(var2.cons) != 0){
+    err.msg <- c(err.msg, "\nThere are constant variables in your input tables which have been removed to compute the correlation table.\n\n")
+    
+    if(length(var1.cons) != 0 && length(var1.cons) < 4){
+      err.msg <- c(err.msg, "In table 1, the following constant variables have been removed:\n",
+                   "  ",paste(colnames(tab1)[var1.cons],collapse="\n    "),"\n")
+    } else if(length(var1.cons) != 0 && length(var1.cons) > 3){
+      err.msg <- c(err.msg, "For example, in table 1, the following constant variables have been removed:\n",
+                   "  ",paste(colnames(tab1)[var1.cons[1:3]],collapse="\n    "),"\n")
+    }
+    
+    if(length(var2.cons) != 0 && length(var2.cons) < 4){
+      err.msg <- c(err.msg, "In table 2, the following constant variables have been removed:\n", 
+                   "  ",paste(colnames(tab2)[var2.cons],collapse="\n    "),"\n")
+    } else if(length(var2.cons) != 0 && length(var2.cons) > 3){
+      err.msg <- c(err.msg, "For example, in table 2, the following constant variables have been removed:\n", 
+                   "  ",paste(colnames(tab2)[var2.cons[1:3]],collapse="\n    "),"\n")
+    }  
+  }
+  
+  if(length(var1.cons) != 0){
+    tab1 <- tab1[,-var1.cons,drop=FALSE]
+  }
+  if(length(var2.cons) != 0){
+    tab2 <- tab2[,-var2.cons,drop=FALSE]
+  }
+  
+  
+  # Print info message
+  
   if(length(err.msg) != 0){ 
     cat("\n- - - - - - - - -\n",err.msg,"\n- - - - - - - - -\n\n")
   }
   
+  rm(err.stock,var1.quali,var2.quali,var1.cons,var2.cons,err.msg) 
+   
+   
   # Correlation table ---------------------------------------------------------------------------------
   
   tab.corr <- matrix(nrow = dim(tab2)[2], ncol = dim(tab1)[2])
@@ -190,7 +250,6 @@
   rownames(tab.corr) <- colnames(tab2)
   
   
-  
   # Significance of correlation test  ------------------------------------------------------------------
   
   if (test.corr == "yes"){
@@ -218,6 +277,11 @@
   # Filter settings ------------------------------------------------------------------------------------
   
   if (filter == "yes"){
+  
+    # Turn diagonal from 1 to 0 if autocorrelation
+	if(autocor){
+	  for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 0 }
+	}
     
     # Remove variables with all their correlation coefficients = 0 :
     if (filters.choice == "filter_0"){
@@ -232,8 +296,8 @@
     }
     
     if (length(var2.thres) != 0){
-      tab.corr <- tab.corr[-var2.thres,]
-      tab2 <- tab2[, -var2.thres]
+      tab.corr <- tab.corr[-var2.thres,,drop=FALSE]
+      tab2 <- tab2[, -var2.thres,drop=FALSE]
     }
     
     var1.thres <- vector()
@@ -244,9 +308,14 @@
     }
       
     if (length(var1.thres) != 0){
-      tab.corr <- tab.corr[,-var1.thres]
-      tab1 <- tab1[,-var1.thres]
+      tab.corr <- tab.corr[,-var1.thres,drop=FALSE]
+      tab1 <- tab1[,-var1.thres,drop=FALSE]
     }
+	
+	# Turn diagonal from 0 back to 1 if autocorrelation
+	if(autocor){
+	  for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 1 }
+	}
     
   } 
   
@@ -258,17 +327,19 @@
     dist.tab2 <- as.dist(1 - cormat.tab2)
     hc.tab2 <- hclust(dist.tab2, method = "ward.D2")
     tab.corr <- tab.corr[hc.tab2$order,]
+	rm(cormat.tab2)
     
     cormat.tab1 <- cor(tab1, method = corr.method, use = "pairwise.complete.obs")
     dist.tab1 <- as.dist(1 - cormat.tab1)
     hc.tab1 <- hclust(dist.tab1, method = "ward.D2")
     tab.corr <- tab.corr[,hc.tab1$order]
+	rm(cormat.tab1)
     
   }
   
   
+  # Output 1 : Correlation table -----------------------------------------------------------------------
   
-  # Output 1 : Correlation table -----------------------------------------------------------------------
   
   # Export correlation table
   write.table(x = data.frame(name = rownames(tab.corr), tab.corr), file = output1, sep = "\t", quote = FALSE, row.names = FALSE)
@@ -276,7 +347,9 @@
   
   
   # Create the heatmap ---------------------------------------------------------------------------------
-  
+
+  if(plot.choice != "none"){
+ 
   # A message if no variable kept
   if(length(tab.corr)==0){
 	pdf(output2)
@@ -285,6 +358,17 @@
 	dev.off()
   } else {
   
+  # A message if more than 1000 variable in auto mode
+  if((plot.choice=="auto")&&(max(dim(tab.corr))>1000)){
+	pdf(output2)
+	war.msg <- paste0("In 'default' mode, the colored table is not provided when\none of the tables contains more than ",
+	                  "a thousand\nvariables after the filter step.\n\nOne of your table still contains ",max(dim(tab.corr))," variables.\n",
+					  "Please consider more filtering, or use the 'Always plot a\ncolored table' mode to obtain your colored table.")
+	plot.new()
+	legend("center",war.msg,adj=c(0.05,0.075))
+	dev.off()
+  } else {
+  
   
   library(ggplot2)
   library(reshape2)
@@ -399,7 +483,10 @@
   ggsave(output2, device = "pdf", width = 10+0.075*dim(tab.corr)[2], height = 5+0.075*dim(tab.corr)[1], limitsize = FALSE)
   
   
-  } # End if(length(tab.corr)==0)else
+  } # End of if((plot.choice=="auto")&&(max(dim(tab.corr))>1000))else
+  } # End of if(length(tab.corr)==0)else
+  } # End of if(plot.choice != "auto")
+  
   
   } # End of correlation.tab