Mercurial > repos > melpetera > intensity_checks
diff Intchecks/Script_intensity_check.R @ 1:4973a2104cfd draft
Uploaded
author | melpetera |
---|---|
date | Wed, 05 Dec 2018 10:27:45 -0500 |
parents | c2c2e1be904a |
children | bdee2c2c484b |
line wrap: on
line diff
--- a/Intchecks/Script_intensity_check.R Thu Oct 11 05:33:19 2018 -0400 +++ b/Intchecks/Script_intensity_check.R Wed Dec 05 10:27:45 2018 -0500 @@ -1,13 +1,12 @@ -#################################################################### -# SCRIPT INTENSITY CHECK -# V1: Fold and NA -# -# Input: Data Matrix, VariableMetadata, SampleMetadata -# Output: VariableMetadata, Graphics (barplots and boxplots) -# -# Dependencies: RcheckLibrary.R -# -#################################################################### +######################################################################### +# SCRIPT INTENSITY CHECK # +# # +# Input: Data Matrix, VariableMetadata, SampleMetadata # +# Output: VariableMetadata, Graphics (barplots and boxplots) # +# # +# Dependencies: RcheckLibrary.R # +# # +######################################################################### # Parameters (for dev) @@ -19,16 +18,24 @@ DM.name <- "DM_NA.tabular" SM.name <- "SM_NA.tabular" VM.name <- "vM_NA.tabular" + class.col <- "2" type <- "One_class" - class.col <- "2" class1 <- "Blanks" + fold.frac <- "Top" + logarithm <- "log2" VM.output <- "new_VM.txt" graphs.output <- "Barplots_and_Boxplots.pdf" } -intens_check <- function(DM.name, SM.name, VM.name, type, class.col, class1, VM.output, graphs.output){ - # This function allows to check the intensities considering classes with a fold calculation, the number and - # the proportion of NA + + + +intens_check <- function(DM.name, SM.name, VM.name, class.col, type, class1, fold.frac, logarithm, + VM.output, graphs.output){ + + + # This function allows to check the intensities considering classes with a mean fold change calculation, + # the number and the proportion of missing values (NA) in dataMatrix # # Two options: # - one class (selected by the user) against all the remaining samples ("One_class") @@ -36,16 +43,19 @@ # # Parameters: # DM.name, SM.name, VM.name: dataMatrix, sampleMetadata, variableMetadata files access + # class.col: number of the sampleMetadata's column with classes # type: "One_class" or "Each_class" - # class.col: number of the sampleMetadata's column with classes - # class1: name of the class if type="One_class" - # VM.output: output file's access (VM with the new columns) - # graphs.output: pdf with barplots for the proportion of NA and boxplots with the folds values + # class1: name of the class, only if type="One_class" + # fold.frac: if type="One class": class1/other ("Top") or other/class1 ("Bottom") + # logarithm: "log2", "log10" or "none" for log mean fold change + # VM.output: output file's access (VM with new columns) + # graphs.output: pdf file's access with barplots for the proportion of NA and boxplots with the folds values + - # Input --------------------------------------------------------- + # Input --------------------------------------------------------------------------------------------------- DM <- read.table(DM.name, header=TRUE, sep="\t", check.names=FALSE) SM <- read.table(SM.name, header=TRUE, sep="\t", check.names=FALSE) @@ -58,6 +68,7 @@ check.err(table.check) + rownames(DM) <- DM[,1] var_names <- DM[,1] DM <- DM[,-1] @@ -65,7 +76,8 @@ class.col <- colnames(SM)[as.numeric(class.col)] - # check class.col, class1 and the number of classes--------------- + + # check class.col, class1 and the number of classes --------------------------------------------------------- if(!(class.col %in% colnames(SM))){ stop("\n- - - - - - - - -\n", "The column ",class.col, " is not a part of the specify sample Metadata","\n- - - - - - - - -\n") @@ -76,6 +88,10 @@ nb_class <- nlevels(c_class) classnames <- levels(c_class) + if(nb_class < 2){ + err.1class <- c("\n The column",class.col, "contains only one class, fold calculation could not be executed \n") + cat(err.1class) + } if((nb_class > (nrow(SM))/3)){ class.err <- c("\n There are too many classes, think about reducing the number of classes and excluding those @@ -83,6 +99,7 @@ cat(class.err) } + if(type == "One_class"){ if(!(class1 %in% classnames)){ list.class1 <- c("\n Classes:",classnames,"\n") @@ -92,6 +109,7 @@ } } + #If type is "one_class", change others classes in "other" if(type == "One_class"){ for(i in 1:length(c_class)){ @@ -100,7 +118,7 @@ c_class[i] <- "Other" c_class <- as.factor(c_class) nb_class <- nlevels(c_class) - classnames <- levels(c_class) + classnames <- c(class1,"Other") } } @@ -108,19 +126,53 @@ DM <- cbind(DM,c_class) - # fold ------------------------------------------------------- - n <- 1 - fold <- data.frame() - for(j in 1:(nb_class-1)){ - for(k in (j+1):nb_class) { - for (i in 1:(length(DM)-1)){ - fold[i,n] <- mean(DM[which(DM$c_class==classnames[k]),i], na.rm=TRUE)/ - mean(DM[which(DM$c_class==classnames[j]),i], na.rm=TRUE)} - names(fold)[n] <- paste("fold",classnames[k],"VS", classnames[j], sep="_") - n <- n + 1} + + + # fold calculation ------------------------------------------------------------------------------------------- + + if(nb_class >= 2){ + + + fold <- data.frame() + n <- 1 + ratio1 <- NULL + ratio2 <- NULL + + if(type=="Each_class"){ + fold.frac <- "Top" + } + + for(j in 1:(nb_class-1)){ + for(k in (j+1):nb_class) { + + if(fold.frac=="Bottom"){ + ratio1 <- classnames[k] + ratio2 <- classnames[j] + }else{ + ratio1 <- classnames[j] + ratio2 <- classnames[k] + } + + for (i in 1:(length(DM)-1)){ + fold[i,n] <- mean(DM[which(DM$c_class==ratio1),i], na.rm=TRUE)/ + mean(DM[which(DM$c_class==ratio2),i], na.rm=TRUE) + if(logarithm=="log2"){ + fold[i,n] <- log2(fold[i,n]) + }else if(logarithm=="log10"){ + fold[i,n] <- log10(fold[i,n]) + } + } + names(fold)[n] <- paste("fold",ratio1,"VS", ratio2, sep="_") + if(logarithm != "none"){ + names(fold)[n] <- paste(logarithm,names(fold)[n], sep="_") + } + n <- n + 1} + } + } - # NA --------------------------------------------------------- + # number and proportion of NA --------------------------------------------------------------------------------- + calcul_NA <- data.frame() pct_NA <- data.frame() for (i in 1:(length(DM)-1)){ @@ -134,14 +186,13 @@ pct_NA[i,j] <- (calcul_NA[i,j]/length(new_DM))*100} } } - names(calcul_NA) <- paste("Nb_NA",classnames, sep="_") + names(calcul_NA) <- paste("NA",classnames, sep="_") names(pct_NA) <- paste("Pct_NA", classnames, sep="_") - # Alert message if there is no NA + # Alert message if there is no NA in data matrix sumNA <- colSums(calcul_NA) sum_total <- sum(sumNA) - alerte <- NULL if(sum_total==0){ alerte <- c(alerte, "Data Matrix contains no NA.\n") @@ -150,23 +201,31 @@ if(length(alerte) != 0){ cat(alerte,"\n") } - table_NA <- cbind(calcul_NA, pct_NA) - # check columns names ---------------------------------------- + + + # check columns names --------------------------------------------------------------------------------------- + + + VM.names <- colnames(VM) # Fold - VM.names <- colnames(VM) - fold.names <- colnames(fold) + + if(nb_class >=2){ + fold.names <- colnames(fold) - for (i in 1:length(VM.names)){ - for (j in 1:length(fold.names)){ - if (VM.names[i]==fold.names[j]){ - fold.names[j] <- paste(fold.names[j],"2", sep="_") + for (i in 1:length(VM.names)){ + for (j in 1:length(fold.names)){ + if (VM.names[i]==fold.names[j]){ + fold.names[j] <- paste(fold.names[j],"2", sep="_") + } } } + colnames(fold) <- fold.names + + VM <- cbind(VM,fold) } - colnames(fold) <- fold.names # NA NA.names <- colnames(table_NA) @@ -179,8 +238,10 @@ } } colnames(table_NA) <- NA.names + VM <- cbind(VM,table_NA) - #for NA barplots --------------------------------------------- + + #for NA barplots ------------------------------------------------------------------------------------------- data_bp <- data.frame() @@ -214,14 +275,13 @@ data_bp[4,j] <- Nb_NA_60_80 data_bp[5,j] <- Nb_NA_80_100 } - rownames(data_bp) <- c("0-20%", "20-40%", "40-60%", "60-80%", "80-100%") + rownames(data_bp) <- c("0%-20%", "20%-40%", "40%-60%", "60%-80%", "80%-100%") colnames(data_bp) <- classnames data_bp <- as.matrix(data_bp) - # Output-------------------------------------------------------- + # Output --------------------------------------------------------------------------------------------------- - VM <- cbind(VM,fold,table_NA) write.table(VM, VM.output,sep="\t", quote=FALSE, row.names=FALSE) @@ -229,36 +289,39 @@ pdf(graphs.output) - #Boxplots for NA - + #Barplots for NA par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) bp=barplot(data_bp, col=rainbow(nrow(data_bp)), main="Proportion of NA", xlab="Classes", ylab="Variables") legend("topright", fill=rainbow(nrow(data_bp)),rownames(data_bp), inset=c(-0.3,0)) - stock=0 for (i in 1:nrow(data_bp)){ - text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white") + text(bp, stock+data_bp[i,]/2, data_bp[i,], col="white", cex=0.7) stock <- stock+data_bp[i,] } #Boxplots for fold test - for (j in 1:ncol(fold)){ - title=paste(fold.names[j]) - boxplot(fold[j], main=title) - } + + if(nb_class >= 2){ + + clean_fold <- fold + for(i in 1:nrow(clean_fold)){ + for(j in 1:ncol(clean_fold)){ + if(is.infinite(clean_fold[i,j])){ + clean_fold[i,j] <- NA + } + } + } + for (j in 1:ncol(clean_fold)){ + title <- paste(fold.names[j]) + boxplot(clean_fold[j], main=title) + } + } dev.off() } - -# Function call--------------- - -#setwd("Y:\\Developpement\\Intensity check\\Pour tests") -#intens_check("DM_NA.tabular", "SM_NA.tabular", "VM_NA.tabular", "One_class", "class", "Blanks", "VM_oneclass_NA.txt", -#"Barplots_and_Boxplots") - \ No newline at end of file