corr_table: CorrTable/Corr_Script_samples

comparison CorrTable/Corr_Script_samples_row.R @ 1:29ec7e3afdd4 draft

Uploaded

author	melpetera
date	Thu, 01 Aug 2019 11:30:58 -0400
parents	b22c453e4cf4
children	2173ad5e7750

comparison

equal deleted inserted replaced

-:b22c453e4cf4
+:29ec7e3afdd4
 #################################################################################################
 # CORRELATION TABLE                                                                             #
 #                                                                                               #
 #                                                                                               #
-# Input : 2 tables with common samples                                                          #
+# Input : 2 tables with shared samples                                                          #
 # Output : Correlation table ; Heatmap (pdf)                                                    #
 #                                                                                               #
 # Dependencies : Libraries "ggplot2" and "reshape2"                                             #
 #                                                                                               #
 #################################################################################################
 # Parameters (for dev)
 if(FALSE){
-rm(list = ls())
+tab1.name <- "test/ressources/inputs/CT/CT2_DM.tabular"
-setwd(dir = "Y:/Developpement")
+tab2.name <- "test/ressources/inputs/CT/CT2_base_Diapason_14ClinCES_PRIN.txt"
-tab1.name <- "Test/Ressources/Inputs/CT2_DM.tabular"
-tab2.name <- "Test/Ressources/Inputs/CT2_base_Diapason_14ClinCES_PRIN.txt"
 param1.samples <- "column"
 param2.samples <- "row"
 corr.method <- "pearson"
 test.corr <- "yes"
 alpha <- 0.05
 multi.name <- "none"
 filter <- "yes"
 filters.choice <- "filters_0_thr"
 threshold <- 0.2
 reorder.var <- "yes"
+	plot.choice <- "auto"
 color.heatmap <- "yes"
 type.classes <-"irregular"
 reg.value <- 1/3
 irreg.vect <- c(-0.3, -0.2, -0.1, 0, 0.3, 0.4)
 output1 <- "Correlation_table.txt"
 }
 correlation.tab <- function(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha,
-multi.name, filter, filters.choice, threshold, reorder.var, color.heatmap, type.classes,
+multi.name, filter, filters.choice, threshold, reorder.var, plot.choice, color.heatmap,
-reg.value, irreg.vect, output1, output2){
+type.classes, reg.value, irreg.vect, output1, output2){
 # This function allows to visualize the correlation between two tables
 #
 # Parameters:
 # - tab1.name: table 1 file's access
 # - filter ("yes", "no"): use filter.0 or/and filter.threshold
 # - filters.choice ("filter_0" or "filters_0_thr"): zero filter removes variables with all their correlation coefficients = 0
 #   and threshold filter remove variables with all their correlation coefficients in abs < threshold
 # - threshold (value between 0 and 1): threshold for filter threshold
 # - reorder.var ("yes" or "no"): reorder variables in the correlation table thanks to the HCA
+	# - plot.choice ("auto", "forced" or "none"): determine whether a heatmap is plotted
 # - color.heatmap ("yes" or "no"): color the heatmap with classes defined by the user
 # - type.classes ("regular" or "irregular"): choose to color the heatmap with regular or irregular classes
 # - reg.value (value between 0 and 1): value for regular classes
 # - irreg.vect (vector with values between -1 and 1): vector which indicates values for intervals (irregular classes)
 # - output1: correlation table file's access
 # Sorting tables in alphabetical order of the samples
 tab1 <- tab1[order(rownames(tab1)),]
 tab2 <- tab2[order(rownames(tab2)),]
+# Checks ---------------------------------------------------------------------------------------------
 # Check if the 2 datasets match regarding samples identifiers
 # Adapt from functions "check.err" and "match2", RcheckLibrary.R
 err.stock <- NULL
 if(length(err.stock)!=0){
 stop("\n- - - - - - - - -\n",err.stock,"\n- - - - - - - - -\n\n")
 }
+# Check whether tab1=tab2
+err.msg <- NULL
+if((ncol(tab1)==ncol(tab2))&&(sum(tab1!=tab2,na.rm=TRUE)==0)){
+autocor <- TRUE
+	err.msg <- c(err.msg, "\nYou chose the same table for the two dataset inputs. \nTo allow filtering options,",
+	             "we will turn the diagonal to 0 in the correlation matrix during the filtering process.\n")
+}else{
+autocor <- FALSE
+}
 # Check qualitative variables in each input tables
-err.msg <- NULL
 var1.quali <- vector()
 var2.quali <- vector()
 for (i in 1:dim(tab1)[2]){
 "  ",paste(colnames(tab2)[var2.quali[1:3]],collapse="\n    "),"\n")
 }
 }
 if(length(var1.quali) != 0){
-tab1 <- tab1[,-var1.quali]
+tab1 <- tab1[,-var1.quali,drop=FALSE]
 }
 if(length(var2.quali) != 0){
-tab2 <- tab2[,-var2.quali]
+tab2 <- tab2[,-var2.quali,drop=FALSE]
 }
+# Check whether there are constant variables
+var1.cons <- vector()
+var2.cons <- vector()
+for (i in 1:dim(tab1)[2]){
+if(length(levels(as.factor(tab1[,i])))==1){ var1.cons <- c(var1.cons,i) }
+}
+for (j in 1:dim(tab2)[2]){
+if(length(levels(as.factor(tab2[,j])))==1){ var2.cons <- c(var2.cons, j) }
+}
+if (length(var1.cons) != 0 | length(var2.cons) != 0){
+err.msg <- c(err.msg, "\nThere are constant variables in your input tables which have been removed to compute the correlation table.\n\n")
+if(length(var1.cons) != 0 && length(var1.cons) < 4){
+err.msg <- c(err.msg, "In table 1, the following constant variables have been removed:\n",
+"  ",paste(colnames(tab1)[var1.cons],collapse="\n    "),"\n")
+} else if(length(var1.cons) != 0 && length(var1.cons) > 3){
+err.msg <- c(err.msg, "For example, in table 1, the following constant variables have been removed:\n",
+"  ",paste(colnames(tab1)[var1.cons[1:3]],collapse="\n    "),"\n")
+}
+if(length(var2.cons) != 0 && length(var2.cons) < 4){
+err.msg <- c(err.msg, "In table 2, the following constant variables have been removed:\n",
+"  ",paste(colnames(tab2)[var2.cons],collapse="\n    "),"\n")
+} else if(length(var2.cons) != 0 && length(var2.cons) > 3){
+err.msg <- c(err.msg, "For example, in table 2, the following constant variables have been removed:\n",
+"  ",paste(colnames(tab2)[var2.cons[1:3]],collapse="\n    "),"\n")
+}
+}
+if(length(var1.cons) != 0){
+tab1 <- tab1[,-var1.cons,drop=FALSE]
+}
+if(length(var2.cons) != 0){
+tab2 <- tab2[,-var2.cons,drop=FALSE]
+}
+# Print info message
 if(length(err.msg) != 0){
 cat("\n- - - - - - - - -\n",err.msg,"\n- - - - - - - - -\n\n")
 }
+rm(err.stock,var1.quali,var2.quali,var1.cons,var2.cons,err.msg)
 # Correlation table ---------------------------------------------------------------------------------
 tab.corr <- matrix(nrow = dim(tab2)[2], ncol = dim(tab1)[2])
 for (i in 1:dim(tab2)[2]){
 for (j in 1:dim(tab1)[2]){
 }
 }
 colnames(tab.corr) <- colnames(tab1)
 rownames(tab.corr) <- colnames(tab2)
 # Significance of correlation test  ------------------------------------------------------------------
 if (test.corr == "yes"){
 # Filter settings ------------------------------------------------------------------------------------
 if (filter == "yes"){
+# Turn diagonal from 1 to 0 if autocorrelation
+	if(autocor){
+	  for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 0 }
+	}
 # Remove variables with all their correlation coefficients = 0 :
 if (filters.choice == "filter_0"){
 threshold <- 0
 }
 var2.thres <- c(var2.thres, i)
 }
 }
 if (length(var2.thres) != 0){
-tab.corr <- tab.corr[-var2.thres,]
+tab.corr <- tab.corr[-var2.thres,,drop=FALSE]
-tab2 <- tab2[, -var2.thres]
+tab2 <- tab2[, -var2.thres,drop=FALSE]
 }
 var1.thres <- vector()
 for (i in 1:dim(tab.corr)[2]){
 if (length(which(abs(tab.corr[,i]) <= threshold)) == dim(tab.corr)[1]){
 var1.thres <- c(var1.thres, i)
 }
 }
 if (length(var1.thres) != 0){
-tab.corr <- tab.corr[,-var1.thres]
+tab.corr <- tab.corr[,-var1.thres,drop=FALSE]
-tab1 <- tab1[,-var1.thres]
+tab1 <- tab1[,-var1.thres,drop=FALSE]
 }
+	# Turn diagonal from 0 back to 1 if autocorrelation
+	if(autocor){
+	  for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 1 }
+	}
 }
 # Reorder variables in the correlation table (with the HCA) ------------------------------------------
 cormat.tab2 <- cor(tab2, method = corr.method, use = "pairwise.complete.obs")
 dist.tab2 <- as.dist(1 - cormat.tab2)
 hc.tab2 <- hclust(dist.tab2, method = "ward.D2")
 tab.corr <- tab.corr[hc.tab2$order,]
+	rm(cormat.tab2)
 cormat.tab1 <- cor(tab1, method = corr.method, use = "pairwise.complete.obs")
 dist.tab1 <- as.dist(1 - cormat.tab1)
 hc.tab1 <- hclust(dist.tab1, method = "ward.D2")
 tab.corr <- tab.corr[,hc.tab1$order]
+	rm(cormat.tab1)
-}
+}
 # Output 1 : Correlation table -----------------------------------------------------------------------
 # Export correlation table
 write.table(x = data.frame(name = rownames(tab.corr), tab.corr), file = output1, sep = "\t", quote = FALSE, row.names = FALSE)
 # Create the heatmap ---------------------------------------------------------------------------------
+if(plot.choice != "none"){
 # A message if no variable kept
 if(length(tab.corr)==0){
 	pdf(output2)
 	plot.new()
 	legend("center","Filtering leads to no remaining correlation coefficient.")
+	dev.off()
+} else {
+# A message if more than 1000 variable in auto mode
+if((plot.choice=="auto")&&(max(dim(tab.corr))>1000)){
+	pdf(output2)
+	war.msg <- paste0("In 'default' mode, the colored table is not provided when\none of the tables contains more than ",
+	                  "a thousand\nvariables after the filter step.\n\nOne of your table still contains ",max(dim(tab.corr))," variables.\n",
+					  "Please consider more filtering, or use the 'Always plot a\ncolored table' mode to obtain your colored table.")
+	plot.new()
+	legend("center",war.msg,adj=c(0.05,0.075))
 	dev.off()
 } else {
 library(ggplot2)
 ggsave(output2, device = "pdf", width = 10+0.075*dim(tab.corr)[2], height = 5+0.075*dim(tab.corr)[1], limitsize = FALSE)
-} # End if(length(tab.corr)==0)else
+} # End of if((plot.choice=="auto")&&(max(dim(tab.corr))>1000))else
+} # End of if(length(tab.corr)==0)else
+} # End of if(plot.choice != "auto")
 } # End of correlation.tab
 # Function call

Mercurial > repos > melpetera > corr_table

comparison CorrTable/Corr_Script_samples_row.R @ 1:29ec7e3afdd4 draft