Mercurial > repos > melpetera > corr_table
diff CorrTable/Corr_Script_samples_row.R @ 1:29ec7e3afdd4 draft
Uploaded
author | melpetera |
---|---|
date | Thu, 01 Aug 2019 11:30:58 -0400 |
parents | b22c453e4cf4 |
children | 2173ad5e7750 |
line wrap: on
line diff
--- a/CorrTable/Corr_Script_samples_row.R Thu Oct 11 05:35:55 2018 -0400 +++ b/CorrTable/Corr_Script_samples_row.R Thu Aug 01 11:30:58 2019 -0400 @@ -2,7 +2,7 @@ # CORRELATION TABLE # # # # # - # Input : 2 tables with common samples # + # Input : 2 tables with shared samples # # Output : Correlation table ; Heatmap (pdf) # # # # Dependencies : Libraries "ggplot2" and "reshape2" # @@ -13,11 +13,8 @@ # Parameters (for dev) if(FALSE){ - rm(list = ls()) - setwd(dir = "Y:/Developpement") - - tab1.name <- "Test/Ressources/Inputs/CT2_DM.tabular" - tab2.name <- "Test/Ressources/Inputs/CT2_base_Diapason_14ClinCES_PRIN.txt" + tab1.name <- "test/ressources/inputs/CT/CT2_DM.tabular" + tab2.name <- "test/ressources/inputs/CT/CT2_base_Diapason_14ClinCES_PRIN.txt" param1.samples <- "column" param2.samples <- "row" corr.method <- "pearson" @@ -28,6 +25,7 @@ filters.choice <- "filters_0_thr" threshold <- 0.2 reorder.var <- "yes" + plot.choice <- "auto" color.heatmap <- "yes" type.classes <-"irregular" reg.value <- 1/3 @@ -40,8 +38,8 @@ correlation.tab <- function(tab1.name, tab2.name, param1.samples, param2.samples, corr.method, test.corr, alpha, - multi.name, filter, filters.choice, threshold, reorder.var, color.heatmap, type.classes, - reg.value, irreg.vect, output1, output2){ + multi.name, filter, filters.choice, threshold, reorder.var, plot.choice, color.heatmap, + type.classes, reg.value, irreg.vect, output1, output2){ # This function allows to visualize the correlation between two tables # @@ -59,6 +57,7 @@ # and threshold filter remove variables with all their correlation coefficients in abs < threshold # - threshold (value between 0 and 1): threshold for filter threshold # - reorder.var ("yes" or "no"): reorder variables in the correlation table thanks to the HCA + # - plot.choice ("auto", "forced" or "none"): determine whether a heatmap is plotted # - color.heatmap ("yes" or "no"): color the heatmap with classes defined by the user # - type.classes ("regular" or "irregular"): choose to color the heatmap with regular or irregular classes # - reg.value (value between 0 and 1): value for regular classes @@ -85,7 +84,9 @@ tab1 <- tab1[order(rownames(tab1)),] tab2 <- tab2[order(rownames(tab2)),] - + + # Checks --------------------------------------------------------------------------------------------- + # Check if the 2 datasets match regarding samples identifiers # Adapt from functions "check.err" and "match2", RcheckLibrary.R @@ -128,8 +129,20 @@ } + # Check whether tab1=tab2 + + err.msg <- NULL + + if((ncol(tab1)==ncol(tab2))&&(sum(tab1!=tab2,na.rm=TRUE)==0)){ + autocor <- TRUE + err.msg <- c(err.msg, "\nYou chose the same table for the two dataset inputs. \nTo allow filtering options,", + "we will turn the diagonal to 0 in the correlation matrix during the filtering process.\n") + }else{ + autocor <- FALSE + } + + # Check qualitative variables in each input tables - err.msg <- NULL var1.quali <- vector() var2.quali <- vector() @@ -167,16 +180,63 @@ } if(length(var1.quali) != 0){ - tab1 <- tab1[,-var1.quali] + tab1 <- tab1[,-var1.quali,drop=FALSE] } if(length(var2.quali) != 0){ - tab2 <- tab2[,-var2.quali] + tab2 <- tab2[,-var2.quali,drop=FALSE] + } + + + # Check whether there are constant variables + + var1.cons <- vector() + var2.cons <- vector() + + for (i in 1:dim(tab1)[2]){ + if(length(levels(as.factor(tab1[,i])))==1){ var1.cons <- c(var1.cons,i) } } + for (j in 1:dim(tab2)[2]){ + if(length(levels(as.factor(tab2[,j])))==1){ var2.cons <- c(var2.cons, j) } + } + + if (length(var1.cons) != 0 | length(var2.cons) != 0){ + err.msg <- c(err.msg, "\nThere are constant variables in your input tables which have been removed to compute the correlation table.\n\n") + + if(length(var1.cons) != 0 && length(var1.cons) < 4){ + err.msg <- c(err.msg, "In table 1, the following constant variables have been removed:\n", + " ",paste(colnames(tab1)[var1.cons],collapse="\n "),"\n") + } else if(length(var1.cons) != 0 && length(var1.cons) > 3){ + err.msg <- c(err.msg, "For example, in table 1, the following constant variables have been removed:\n", + " ",paste(colnames(tab1)[var1.cons[1:3]],collapse="\n "),"\n") + } + + if(length(var2.cons) != 0 && length(var2.cons) < 4){ + err.msg <- c(err.msg, "In table 2, the following constant variables have been removed:\n", + " ",paste(colnames(tab2)[var2.cons],collapse="\n "),"\n") + } else if(length(var2.cons) != 0 && length(var2.cons) > 3){ + err.msg <- c(err.msg, "For example, in table 2, the following constant variables have been removed:\n", + " ",paste(colnames(tab2)[var2.cons[1:3]],collapse="\n "),"\n") + } + } + + if(length(var1.cons) != 0){ + tab1 <- tab1[,-var1.cons,drop=FALSE] + } + if(length(var2.cons) != 0){ + tab2 <- tab2[,-var2.cons,drop=FALSE] + } + + + # Print info message + if(length(err.msg) != 0){ cat("\n- - - - - - - - -\n",err.msg,"\n- - - - - - - - -\n\n") } + rm(err.stock,var1.quali,var2.quali,var1.cons,var2.cons,err.msg) + + # Correlation table --------------------------------------------------------------------------------- tab.corr <- matrix(nrow = dim(tab2)[2], ncol = dim(tab1)[2]) @@ -190,7 +250,6 @@ rownames(tab.corr) <- colnames(tab2) - # Significance of correlation test ------------------------------------------------------------------ if (test.corr == "yes"){ @@ -218,6 +277,11 @@ # Filter settings ------------------------------------------------------------------------------------ if (filter == "yes"){ + + # Turn diagonal from 1 to 0 if autocorrelation + if(autocor){ + for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 0 } + } # Remove variables with all their correlation coefficients = 0 : if (filters.choice == "filter_0"){ @@ -232,8 +296,8 @@ } if (length(var2.thres) != 0){ - tab.corr <- tab.corr[-var2.thres,] - tab2 <- tab2[, -var2.thres] + tab.corr <- tab.corr[-var2.thres,,drop=FALSE] + tab2 <- tab2[, -var2.thres,drop=FALSE] } var1.thres <- vector() @@ -244,9 +308,14 @@ } if (length(var1.thres) != 0){ - tab.corr <- tab.corr[,-var1.thres] - tab1 <- tab1[,-var1.thres] + tab.corr <- tab.corr[,-var1.thres,drop=FALSE] + tab1 <- tab1[,-var1.thres,drop=FALSE] } + + # Turn diagonal from 0 back to 1 if autocorrelation + if(autocor){ + for(i in 1:(ncol(tab.corr))){ tab.corr[i,i] <- 1 } + } } @@ -258,17 +327,19 @@ dist.tab2 <- as.dist(1 - cormat.tab2) hc.tab2 <- hclust(dist.tab2, method = "ward.D2") tab.corr <- tab.corr[hc.tab2$order,] + rm(cormat.tab2) cormat.tab1 <- cor(tab1, method = corr.method, use = "pairwise.complete.obs") dist.tab1 <- as.dist(1 - cormat.tab1) hc.tab1 <- hclust(dist.tab1, method = "ward.D2") tab.corr <- tab.corr[,hc.tab1$order] + rm(cormat.tab1) } + # Output 1 : Correlation table ----------------------------------------------------------------------- - # Output 1 : Correlation table ----------------------------------------------------------------------- # Export correlation table write.table(x = data.frame(name = rownames(tab.corr), tab.corr), file = output1, sep = "\t", quote = FALSE, row.names = FALSE) @@ -276,7 +347,9 @@ # Create the heatmap --------------------------------------------------------------------------------- - + + if(plot.choice != "none"){ + # A message if no variable kept if(length(tab.corr)==0){ pdf(output2) @@ -285,6 +358,17 @@ dev.off() } else { + # A message if more than 1000 variable in auto mode + if((plot.choice=="auto")&&(max(dim(tab.corr))>1000)){ + pdf(output2) + war.msg <- paste0("In 'default' mode, the colored table is not provided when\none of the tables contains more than ", + "a thousand\nvariables after the filter step.\n\nOne of your table still contains ",max(dim(tab.corr))," variables.\n", + "Please consider more filtering, or use the 'Always plot a\ncolored table' mode to obtain your colored table.") + plot.new() + legend("center",war.msg,adj=c(0.05,0.075)) + dev.off() + } else { + library(ggplot2) library(reshape2) @@ -399,7 +483,10 @@ ggsave(output2, device = "pdf", width = 10+0.075*dim(tab.corr)[2], height = 5+0.075*dim(tab.corr)[1], limitsize = FALSE) - } # End if(length(tab.corr)==0)else + } # End of if((plot.choice=="auto")&&(max(dim(tab.corr))>1000))else + } # End of if(length(tab.corr)==0)else + } # End of if(plot.choice != "auto") + } # End of correlation.tab