Mercurial > repos > ethevenot > qualitymetrics
comparison qualitymetrics_script.R @ 0:b4f5b5bc01dd draft
planemo upload for repository https://github.com/workflow4metabolomics/qualitymetrics.git commit 73366dd3473c509341ab9ba1df8ba748d08a50a1
| author | ethevenot |
|---|---|
| date | Sat, 06 Aug 2016 12:01:17 -0400 |
| parents | |
| children | 6d3b7b6573d8 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b4f5b5bc01dd |
|---|---|
| 1 ################################################################################################ | |
| 2 # ANALYSES FOR QUALITY CONTROL # | |
| 3 # # | |
| 4 # Author: Melanie PETERA # | |
| 5 # User: Galaxy # | |
| 6 # Starting date: 04-09-2014 # | |
| 7 # V-1.0: Restriction of old filter script to CV filter # | |
| 8 # V-1.1: Addition of data check # | |
| 9 # V-1.2: Substitution of deletion by addition of indicator variable # | |
| 10 # V-1.3: Handling special characters # | |
| 11 # # | |
| 12 # # | |
| 13 # Input files: dataMatrix ; sampleMetadata ; variableMetadata # | |
| 14 # Output files: dataMatrix ; sampleMetadata ; variableMetadata # | |
| 15 # # | |
| 16 ################################################################################################ | |
| 17 | |
| 18 # Parameters (for dev) | |
| 19 if(FALSE){ | |
| 20 | |
| 21 ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt" #tab file | |
| 22 meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt" #tab file | |
| 23 meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt" #tab file | |
| 24 | |
| 25 ## ion.file.out <- "test/ressources/outputs/QCtest_ex_data_IONS.txt" #tab file | |
| 26 meta.samp.file.out <- "test/ressources/outputs/QCtest_ex_data_PROTOCOLE1.txt" #tab file | |
| 27 meta.ion.file.out <- "test/ressources/outputs/QCtest_ex_data_METAION.txt" #tab file | |
| 28 | |
| 29 CV <- TRUE ; if(CV){Compa<-TRUE;seuil<-1.25}else{Compa<-NULL;seuil<-NULL} | |
| 30 | |
| 31 poolAsPool1L <- FALSE | |
| 32 | |
| 33 if(FALSE) { ## Sacuri dataset | |
| 34 | |
| 35 ## 'example' input dir | |
| 36 exaDirInpC <- "example/input" | |
| 37 | |
| 38 ion.file.in <- file.path(exaDirInpC, "dataMatrix.tsv") | |
| 39 meta.samp.file.in <- file.path(exaDirInpC, "sampleMetadata.tsv") | |
| 40 meta.ion.file.in <- file.path(exaDirInpC, "variableMetadata.tsv") | |
| 41 | |
| 42 poolAsPool1L <- FALSE | |
| 43 | |
| 44 ## 'example' output dir | |
| 45 exaDirOutC <- gsub("input", "output", exaDirInpC) | |
| 46 | |
| 47 mata.samp.file.out <- file.path(exaDirOutC, "sampleMetadata.tsv") | |
| 48 meta.ion.file_out <- file.path(exaDirOutC, "variableMetadata.tsv") | |
| 49 fig.out <- file.path(exaDirOutC, "figure.pdf") | |
| 50 log.out <- file.path(exaDirOutC, "information.txt") | |
| 51 | |
| 52 stopifnot(file.exists(exaDirOutC)) | |
| 53 | |
| 54 } | |
| 55 | |
| 56 } | |
| 57 | |
| 58 QualityControl <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in, | |
| 59 CV, Compa, seuil, poolAsPool1L, | |
| 60 ion.file.out, meta.samp.file.out, meta.ion.file.out, fig.out, log.out){ | |
| 61 # This function allows to analyse data to check its quality | |
| 62 # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata. | |
| 63 # It generates 3 new datasets corresponding to the 3 inputs with additional columns. | |
| 64 # | |
| 65 # Parameters: | |
| 66 # - xxx.in: input files' names | |
| 67 # - xxx.out: output files' names | |
| 68 # - CV: CV calculation yes/no | |
| 69 # | > Compa: comparing pool and sample CVs (TRUE) or simple pool CV calculation (FALSE) | |
| 70 # | > seuil: maximum ratio tolerated between pool and sample CVs or maximum pool CV | |
| 71 | |
| 72 | |
| 73 # Input ----------------------------------------------------------------------------------- | |
| 74 | |
| 75 ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) | |
| 76 meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) | |
| 77 meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) | |
| 78 | |
| 79 # Error vector | |
| 80 err.stock <- "\n" | |
| 81 | |
| 82 # Table match check | |
| 83 table.check <- match3(ion.data,meta.samp.data,meta.ion.data) | |
| 84 check.err(table.check) | |
| 85 | |
| 86 # StockID | |
| 87 samp.id <- stockID(ion.data,meta.samp.data,"sample") | |
| 88 ion.data <- samp.id$dataMatrix | |
| 89 meta.samp.data <- samp.id$Metadata | |
| 90 samp.id <- samp.id$id.match | |
| 91 | |
| 92 | |
| 93 # Function 1: CV calculation -------------------------------------------------------------- | |
| 94 # Allows to class ions according to the Coefficient of Variation (CV): | |
| 95 # Compa=TRUE: | |
| 96 # CV of pools and CV of samples are compared (ration between pools' one and samples' one) | |
| 97 # and confronted to a given ration. | |
| 98 # Compa=FALSE: | |
| 99 # only CV of pools are considered ; compared to a given threshold | |
| 100 | |
| 101 if(CV){ | |
| 102 | |
| 103 # Checking the sampleType variable | |
| 104 if(is.null(meta.samp.data$sampleType)){ | |
| 105 err.stock <- c(err.stock,"\n-------", | |
| 106 "\nWarning : no 'sampleType' variable detected in sample meta-data !", | |
| 107 "\nCV can not be calculated.\n-------\n") | |
| 108 }else{ | |
| 109 if(!("pool"%in%levels(factor(meta.samp.data$sampleType)))){ | |
| 110 err.stock <- c(err.stock,"\n-------", | |
| 111 "\nWarning : no 'pool' detected in 'sampleType' variable (sample meta-data) !", | |
| 112 "\nCV can not be calculated.\n-------\n") | |
| 113 }else{ | |
| 114 if((!("sample"%in%levels(factor(meta.samp.data$sampleType))))&(Compa)){ | |
| 115 err.stock <- c(err.stock,"\n-------", | |
| 116 "\nWarning : no 'sample' detected in 'sampleType' variable (sample meta-data) !", | |
| 117 "\nCV can not be calculated.\n-------\n") | |
| 118 }else{ | |
| 119 | |
| 120 # Statement | |
| 121 tmp.ion <- data.frame(CV.ind=rep(NA,nrow(ion.data)),CV.samp=rep(NA,nrow(ion.data)), | |
| 122 CV.pool=rep(NA,nrow(ion.data)),ion.data,stringsAsFactors=FALSE) | |
| 123 # CV samples | |
| 124 tmp.samp <- which(colnames(tmp.ion)%in%meta.samp.data[which(meta.samp.data$sampleType=="sample"),1]) | |
| 125 tmp.ion$CV.samp <- apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE)) / rowMeans(tmp.ion[,tmp.samp], na.rm = TRUE) | |
| 126 tmp.ion$CV.samp[which(apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE))==0)] <- 0 | |
| 127 # CV pools | |
| 128 tmp.samp <- which(colnames(tmp.ion)%in%meta.samp.data[which(meta.samp.data$sampleType=="pool"),1]) | |
| 129 tmp.ion$CV.pool <- apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE)) / rowMeans(tmp.ion[,tmp.samp], na.rm = TRUE) | |
| 130 tmp.ion$CV.pool[which(apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE))==0)] <- 0 | |
| 131 # CV indicator | |
| 132 if(Compa){tmp.ion$CV.ind <- ifelse((tmp.ion$CV.pool)/(tmp.ion$CV.samp)>seuil,0,1) | |
| 133 }else{tmp.ion$CV.ind <- ifelse((tmp.ion$CV.pool)>seuil,0,1)} | |
| 134 # Addition of new columns in meta.ion.data | |
| 135 if(Compa){tmp.ion<-tmp.ion[,c(4,2,3,1,1)]}else{tmp.ion<-tmp.ion[,c(4,3,1,1)]} | |
| 136 tmp.ion[,ncol(tmp.ion)] <- 1:nrow(tmp.ion) | |
| 137 meta.ion.data <- merge(x=meta.ion.data,y=tmp.ion,by.x=1,by.y=1) | |
| 138 meta.ion.data <- meta.ion.data[order(meta.ion.data[,ncol(meta.ion.data)]),][,-ncol(meta.ion.data)] | |
| 139 rownames(meta.ion.data) <- NULL | |
| 140 | |
| 141 rm(tmp.ion,tmp.samp) | |
| 142 | |
| 143 }}} | |
| 144 | |
| 145 } # end if(CV) | |
| 146 | |
| 147 ## complementary metrics (ET) | |
| 148 | |
| 149 datMN <- t(as.matrix(ion.data[, -1])) | |
| 150 colnames(datMN) <- ion.data[, 1] | |
| 151 datMN <- datMN[, meta.ion.data[, 1]] ## in case meta.ion.data has been re-ordered during the CV = TRUE computations | |
| 152 quaLs <- qualityMetricsF(datMN, | |
| 153 meta.samp.data, | |
| 154 meta.ion.data, | |
| 155 poolAsPool1L, | |
| 156 fig.out, | |
| 157 log.out) | |
| 158 meta.samp.data <- quaLs[["samDF"]] | |
| 159 meta.ion.data <- quaLs[["varDF"]] | |
| 160 | |
| 161 | |
| 162 # Output ---------------------------------------------------------------------------------- | |
| 163 | |
| 164 # Getting back original identifiers | |
| 165 id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id) | |
| 166 ion.data <- id.ori$dataMatrix | |
| 167 meta.samp.data <- id.ori$Metadata | |
| 168 | |
| 169 | |
| 170 # Error checking | |
| 171 if(length(err.stock)>1){ | |
| 172 stop(err.stock) | |
| 173 }else{ | |
| 174 | |
| 175 ## write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
| 176 write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
| 177 write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
| 178 | |
| 179 } | |
| 180 | |
| 181 | |
| 182 } # end of QualityControl function | |
| 183 | |
| 184 | |
| 185 # Typical function call | |
| 186 # QualityControl(ion.file.in, meta.samp.file.in, meta.ion.file.in, | |
| 187 # CV, Compa, seuil, | |
| 188 # ion.file.out, meta.samp.file.out, meta.ion.file.out) | |
| 189 | |
| 190 | |
| 191 qualityMetricsF <- function(datMN, | |
| 192 samDF, | |
| 193 varDF, | |
| 194 pooAsPo1L = TRUE, | |
| 195 fig.pdfC = NULL, | |
| 196 log.txtC = NULL) { | |
| 197 | |
| 198 optWrnN <- options()$warn | |
| 199 options(warn = -1) | |
| 200 | |
| 201 | |
| 202 ##------------------------------ | |
| 203 ## Functions | |
| 204 ##------------------------------ | |
| 205 | |
| 206 | |
| 207 allDigF <- function (string) { ## from the Hmisc package (all.digits) | |
| 208 k <- length(string) | |
| 209 result <- logical(k) | |
| 210 for (i in 1:k) { | |
| 211 st <- string[i] | |
| 212 ls <- nchar(st) | |
| 213 ex <- substring(st, 1:ls, 1:ls) | |
| 214 result[i] <- all(match(ex, c("0", "1", "2", "3", "4", | |
| 215 "5", "6", "7", "8", "9"), nomatch = 0) > 0) | |
| 216 } | |
| 217 result | |
| 218 } | |
| 219 | |
| 220 datPloF <- function() { ## ploting data matrix | |
| 221 | |
| 222 thrVn <- c(pvalue=0.001, | |
| 223 poolCv=0.3) | |
| 224 | |
| 225 ## Constants | |
| 226 | |
| 227 marLs <- list(dri = c(2.1, 2.6, 1.1, 1.1), | |
| 228 ima = c(1.1, 2.6, 4.1, 1.1), | |
| 229 msd = c(2.1, 2.6, 1.1, 0.6), | |
| 230 sam = c(3.1, 3.6, 1.1, 0.6), | |
| 231 pca = c(2.6, 3.6, 1.1, 0.6), | |
| 232 sca = c(1.1, 4.1, 4.1, 0.6), | |
| 233 tit = c(0.1, 0.6, 1.1, 0.6)) | |
| 234 palHeaVc <- rev(rainbow(ceiling(256 * 1.5))[1:256]) | |
| 235 | |
| 236 ## Functions | |
| 237 | |
| 238 axiPreF <- function(valVn, | |
| 239 lenN) { | |
| 240 | |
| 241 if(NA %in% valVn) { | |
| 242 warning("NA in valVn") | |
| 243 valVn <- as.vector(na.omit(valVn)) | |
| 244 } | |
| 245 | |
| 246 if(lenN < length(valVn)) | |
| 247 stop("The length of in vector must be inferior to the length of the length parameter.") | |
| 248 | |
| 249 if(length(valVn) < lenN) | |
| 250 valVn <- seq(from = min(valVn), to = max(valVn), length.out = lenN) | |
| 251 | |
| 252 preValVn <- pretty(valVn) | |
| 253 | |
| 254 preLabVn <- preAtVn <- c() | |
| 255 | |
| 256 for(n in 1:length(preValVn)) | |
| 257 if(min(valVn) < preValVn[n] && preValVn[n] < max(valVn)) { | |
| 258 preLabVn <- c(preLabVn, preValVn[n]) | |
| 259 preAtVn <- c(preAtVn, which(abs(valVn - preValVn[n]) == min(abs(valVn - preValVn[n])))[1]) | |
| 260 } | |
| 261 | |
| 262 return(list(atVn = preAtVn, | |
| 263 labVn = preLabVn)) | |
| 264 | |
| 265 } | |
| 266 | |
| 267 colF <- function(vecVn) | |
| 268 sapply(vecVn, | |
| 269 function(outN) { | |
| 270 if(outN < ploRgeVn[1]) | |
| 271 return(palHeaVc[1]) | |
| 272 else if(outN > ploRgeVn[2]) | |
| 273 return(palHeaVc[256]) | |
| 274 else return(palHeaVc[round((outN - ploRgeVn[1]) / diff(ploRgeVn) * 256 + 1)])}) | |
| 275 | |
| 276 obsColF <- function(typVc) { | |
| 277 | |
| 278 ## available color palette | |
| 279 palVc <- palette() | |
| 280 | |
| 281 ## colors for common types are set aside | |
| 282 palVc <- palVc[!(palVc %in% c("black", "red", "green3"))] | |
| 283 | |
| 284 ## filling in the types with dedicated colors | |
| 285 samTypVc <- sort(unique(samDF[, "sampleType"])) | |
| 286 samColVc <- character(length(samTypVc)) | |
| 287 if("blank" %in% samTypVc) | |
| 288 samColVc[grepl("blank", samTypVc)] <- "black" | |
| 289 if("pool" %in% samTypVc) | |
| 290 samColVc[grepl("pool", samTypVc)] <- "red" | |
| 291 if("sample" %in% samTypVc) | |
| 292 samColVc[grepl("sample", samTypVc)] <- "green4" | |
| 293 | |
| 294 ## filling in the other types | |
| 295 palColI <- 1 | |
| 296 palColMaxI <- length(palVc) | |
| 297 | |
| 298 while(any(samColVc == "")) { | |
| 299 typToColI <- which(samColVc == "")[1] | |
| 300 if(palColI <= palColMaxI) | |
| 301 samColVc[typToColI] <- palVc[palColI] | |
| 302 else | |
| 303 samColVc[typToColI] <- "gray" | |
| 304 palColI <- palColI + 1 | |
| 305 } | |
| 306 | |
| 307 names(samColVc) <- samTypVc | |
| 308 | |
| 309 samColVc[typVc] | |
| 310 | |
| 311 } | |
| 312 | |
| 313 par(font = 2, | |
| 314 font.axis = 2, | |
| 315 font.lab = 2, | |
| 316 pch=18) | |
| 317 | |
| 318 layout(matrix(c(1, 3, 4, 5, 5, | |
| 319 1, 7, 7, 7, 6, | |
| 320 2, 7, 7, 7, 6), | |
| 321 byrow = TRUE, | |
| 322 nrow = 3), | |
| 323 heights = c(1.8, 1.2, 2.5), | |
| 324 widths = c(3.5, 1.8, 2.8, 1, 0.8)) | |
| 325 | |
| 326 ## Colors | |
| 327 ##------- | |
| 328 | |
| 329 if("sampleType" %in% colnames(samDF)) { | |
| 330 obsColVc <- obsColF(samDF[, "sampleType"]) | |
| 331 } else | |
| 332 obsColVc <- rep("black", nrow(samDF)) | |
| 333 | |
| 334 ## PCA and Hotelling ellipse | |
| 335 ##-------------------------- | |
| 336 | |
| 337 vVn <- getPcaVarVn(ropLs) | |
| 338 vRelVn <- vVn / ncol(datMN) | |
| 339 | |
| 340 par(mar = marLs[["pca"]]) | |
| 341 | |
| 342 plot(ropScoreMN, | |
| 343 type = "n", | |
| 344 xlab = "", | |
| 345 ylab = "", | |
| 346 xlim = range(ropScoreMN[, 1]) * 1.1) | |
| 347 mtext(paste("t1 (", round(vRelVn[1] * 100), "%)", sep = ""), | |
| 348 cex = 0.7, | |
| 349 line = 2, | |
| 350 side = 1) | |
| 351 mtext(paste("t2 (", round(vRelVn[2] * 100), "%)", sep = ""), | |
| 352 cex = 0.7, | |
| 353 las = 0, | |
| 354 line = 2, | |
| 355 side = 2) | |
| 356 abline(h = 0, lty = "dashed") | |
| 357 abline(v = 0, lty = "dashed") | |
| 358 radVn <- seq(0, 2 * pi, length.out = 100) | |
| 359 | |
| 360 hotFisN <- hotN * qf(1 - thrVn["pvalue"], 2, n - 2) | |
| 361 lines(sqrt(var(ropScoreMN[, 1]) * hotFisN) * cos(radVn), | |
| 362 sqrt(var(ropScoreMN[, 2]) * hotFisN) * sin(radVn)) | |
| 363 | |
| 364 text(ropScoreMN[, 1], | |
| 365 ropScoreMN[, 2], | |
| 366 cex = 0.7, | |
| 367 col = obsColVc, | |
| 368 labels = rownames(datMN)) | |
| 369 | |
| 370 if("sampleType" %in% colnames(samDF)) { | |
| 371 obsColVuc <- obsColVc[sort(unique(names(obsColVc)))] | |
| 372 legOrdVc <- c("blank", paste0("pool", 8:1), "pool", "other", "sample") | |
| 373 obsColVuc <- obsColVuc[legOrdVc[legOrdVc %in% names(obsColVuc)]] | |
| 374 | |
| 375 text(rep(par("usr")[1], times = length(obsColVuc)), | |
| 376 par("usr")[3] + (0.97 - length(obsColVuc) * 0.03 + 1:length(obsColVuc) * 0.03) * diff(par("usr")[3:4]), | |
| 377 col = obsColVuc, | |
| 378 font = 2, | |
| 379 labels = names(obsColVuc), | |
| 380 pos = 4) | |
| 381 } | |
| 382 | |
| 383 ## Missing/low intensities and decile values | |
| 384 ##------------------------------------------ | |
| 385 | |
| 386 par(mar = marLs[["sam"]]) | |
| 387 | |
| 388 plot(missZscoVn, | |
| 389 deciZscoMaxVn, | |
| 390 type = "n", | |
| 391 xlab = "", | |
| 392 ylab = "", | |
| 393 xlim = c(min(missZscoVn), | |
| 394 max(missZscoVn) + 0.5)) | |
| 395 mtext("amount of missing values (z-score)", | |
| 396 cex = 0.7, | |
| 397 line = 2, | |
| 398 side = 1) | |
| 399 mtext("deciles (zscore)", | |
| 400 cex = 0.7, | |
| 401 las = 0, | |
| 402 line = 2, | |
| 403 side = 2) | |
| 404 abline(h = qnorm(1 - thrVn["pvalue"] / 2) * c(-1, 1), lty = "dashed") | |
| 405 abline(v = qnorm(1 - thrVn["pvalue"] / 2) * c(-1, 1), lty = "dashed") | |
| 406 text(missZscoVn, | |
| 407 deciZscoMaxVn, | |
| 408 cex = 0.7, | |
| 409 col = obsColVc, | |
| 410 labels = rownames(datMN)) | |
| 411 | |
| 412 ## tit: Title | |
| 413 ##----------- | |
| 414 | |
| 415 par(mar = marLs[["tit"]]) | |
| 416 plot(0:1, bty = "n", type = "n", xaxt = "n", yaxt = "n", xlab = "", ylab = "") | |
| 417 text(1.5, 1, cex = 1.3, labels = "Quality Metrics") | |
| 418 text(1, 0.85, adj=0, cex = 1.1, labels = paste0("NAs: ", | |
| 419 round(length(which(is.na(c(datMN)))) / cumprod(dim(datMN))[2] * 100), "%")) | |
| 420 text(1, 0.75, adj=0, cex = 1.1, labels = paste0("0 values: ", | |
| 421 round(sum(abs(datMN) < epsN, na.rm=TRUE) / cumprod(dim(datMN))[2] * 100, 2), "%")) | |
| 422 text(1, 0.65, adj=0, cex = 1.1, labels = paste0("min: ", signif(min(datMN, na.rm=TRUE), 2))) | |
| 423 text(1, 0.55, adj=0, cex = 1.1, labels = paste0("median: ", signif(median(datMN, na.rm=TRUE), 2))) | |
| 424 text(1, 0.45, adj=0, cex = 1.1, labels = paste0("mean: ", signif(mean(datMN, na.rm=TRUE), 2))) | |
| 425 text(1, 0.35, adj=0, cex = 1.1, labels = paste0("max: ", signif(max(datMN, na.rm=TRUE), 2))) | |
| 426 if("sampleType" %in% colnames(samDF) && | |
| 427 "pool" %in% samDF[, "sampleType"]) | |
| 428 text(1, | |
| 429 0.25, | |
| 430 adj=0, cex = 1.1, | |
| 431 labels = paste0("pool CV < ", | |
| 432 round(thrVn["poolCv"] * 100), "%: ", | |
| 433 round(sum(varDF[, "pool_CV"] < thrVn["poolCv"]) / nrow(varDF) * 100), | |
| 434 "%")) | |
| 435 | |
| 436 text(1, 0.1, adj=0, labels = paste0("Thresholds used in plots:")) | |
| 437 text(1, 0, adj=0, labels = paste0(" p-value = ", thrVn["pvalue"])) | |
| 438 | |
| 439 ## dri: Analytical drift | |
| 440 ##---------------------- | |
| 441 | |
| 442 par(mar = marLs[["dri"]]) | |
| 443 | |
| 444 ## ordering | |
| 445 | |
| 446 driDatMN <- datMN | |
| 447 driSamDF <- samDF | |
| 448 | |
| 449 driSamDF[, "ordIniVi"] <- 1:nrow(driDatMN) | |
| 450 | |
| 451 if("injectionOrder" %in% colnames(driSamDF)) { | |
| 452 if("batch" %in% colnames(driSamDF)) | |
| 453 ordVi <- order(driSamDF[, "batch"], | |
| 454 driSamDF[, "injectionOrder"]) | |
| 455 else | |
| 456 ordVi <- order(driSamDF[, "injectionOrder"]) | |
| 457 } else | |
| 458 ordVi <- 1:nrow(driDatMN) | |
| 459 | |
| 460 driDatMN <- driDatMN[ordVi, ] | |
| 461 driSamDF <- driSamDF[ordVi, ] | |
| 462 | |
| 463 driColVc <- rep("black", nrow(driDatMN)) | |
| 464 if("sampleType" %in% colnames(driSamDF)) | |
| 465 driColVc <- obsColF(driSamDF[, "sampleType"]) | |
| 466 | |
| 467 plot(rowSums(driDatMN, na.rm=TRUE), | |
| 468 col = driColVc, | |
| 469 pch = 18, | |
| 470 xlab = "", | |
| 471 ylab = "") | |
| 472 | |
| 473 mtext("injection order", | |
| 474 cex = 0.7, | |
| 475 line = 2, | |
| 476 side = 1) | |
| 477 | |
| 478 mtext("Sum of intens. for all variables", | |
| 479 cex = 0.7, | |
| 480 line = 2, | |
| 481 side = 2) | |
| 482 | |
| 483 ## msd: Sd vs Mean plot | |
| 484 ##--------------------- | |
| 485 | |
| 486 par(mar = marLs[["msd"]]) | |
| 487 plot(apply(datMN, 2, function(y) mean(y, na.rm = TRUE)), | |
| 488 apply(datMN, 2, function(y) sd(y, na.rm = TRUE)), | |
| 489 col=obsColVc, | |
| 490 pch = 18, | |
| 491 xlab = "", | |
| 492 ylab = "") | |
| 493 mtext("mean", | |
| 494 cex = 0.7, | |
| 495 line = 2, | |
| 496 side = 1) | |
| 497 mtext("sd", | |
| 498 cex = 0.7, | |
| 499 line = 2, | |
| 500 side = 2) | |
| 501 | |
| 502 ## sca-6: Color scale | |
| 503 ##------------------- | |
| 504 | |
| 505 par(mar = marLs[["sca"]]) | |
| 506 | |
| 507 ylimVn <- c(0, 256) | |
| 508 ybottomVn <- 0:255 | |
| 509 ytopVn <- 1:256 | |
| 510 | |
| 511 plot(x = 0, | |
| 512 y = 0, | |
| 513 font.axis = 2, | |
| 514 font.lab = 2, | |
| 515 type = "n", | |
| 516 xlim = c(0, 1), | |
| 517 ylim = ylimVn, | |
| 518 xlab = "", | |
| 519 ylab = "", | |
| 520 xaxs = "i", | |
| 521 yaxs = "i", | |
| 522 xaxt = "n", | |
| 523 yaxt = "n") | |
| 524 | |
| 525 rect(xleft = 0, | |
| 526 ybottom = ybottomVn, | |
| 527 xright = 1, | |
| 528 ytop = ytopVn, | |
| 529 col = palHeaVc, | |
| 530 border = NA) | |
| 531 | |
| 532 eval(parse(text = paste("axis(at = axiPreF(c(ifelse(min(datMN, na.rm = TRUE) == -Inf, yes = 0, no = min(datMN, na.rm = TRUE)) , max(datMN, na.rm = TRUE)), 256)$atVn, | |
| 533 font = 2, | |
| 534 font.axis = 2, | |
| 535 labels = axiPreF(c(ifelse(min(datMN, na.rm = TRUE) == -Inf, yes = 0, no = min(datMN, na.rm = TRUE)), max(datMN, na.rm = TRUE)), 256)$labVn, | |
| 536 las = 1, | |
| 537 lwd = 2, | |
| 538 lwd.ticks = 2, | |
| 539 side = 2, | |
| 540 xpd = TRUE)", sep = ""))) | |
| 541 | |
| 542 arrows(par("usr")[1], | |
| 543 par("usr")[4], | |
| 544 par("usr")[1], | |
| 545 par("usr")[3], | |
| 546 code = 0, | |
| 547 lwd = 2, | |
| 548 xpd = TRUE) | |
| 549 | |
| 550 ## ima: Image | |
| 551 ##----------- | |
| 552 | |
| 553 par(mar = marLs[["ima"]]) | |
| 554 | |
| 555 ploRgeVn <- range(datMN, na.rm = TRUE) | |
| 556 | |
| 557 imaMN <- t(datMN)[, rev(1:nrow(datMN)), drop = FALSE] | |
| 558 | |
| 559 image(x = 1:nrow(imaMN), | |
| 560 y = 1:ncol(imaMN), | |
| 561 z = imaMN, | |
| 562 col = palHeaVc, | |
| 563 font.axis = 2, | |
| 564 font.lab = 2, | |
| 565 xaxt = "n", | |
| 566 yaxt = "n", | |
| 567 xlab = "", | |
| 568 ylab = "") | |
| 569 | |
| 570 if(length(rownames(datMN)) == 0) { | |
| 571 rowNamVc <- rep("", times = nrow(datMN)) | |
| 572 } else | |
| 573 rowNamVc <- rownames(datMN) | |
| 574 | |
| 575 if(length(colnames(datMN)) == 0) { | |
| 576 colNamVc <- rep("", times = ncol(datMN)) | |
| 577 } else | |
| 578 colNamVc <- colnames(datMN) | |
| 579 | |
| 580 xlaVc <- paste(paste(rep("[", 2), | |
| 581 c(1, nrow(imaMN)), | |
| 582 rep("] ", 2), | |
| 583 sep = ""), | |
| 584 rep("\n", times = 2), | |
| 585 c(colNamVc[1], tail(colNamVc, 1)), | |
| 586 sep = "") | |
| 587 | |
| 588 for(k in 1:2) | |
| 589 axis(side = 3, | |
| 590 hadj = c(0, 1)[k], | |
| 591 at = c(1, nrow(imaMN))[k], | |
| 592 cex = 0.8, | |
| 593 font = 2, | |
| 594 labels = xlaVc[k], | |
| 595 line = -0.5, | |
| 596 tick = FALSE) | |
| 597 | |
| 598 | |
| 599 ylaVc <- paste(paste(rep("[", times = 2), | |
| 600 c(ncol(imaMN), 1), | |
| 601 rep("]", times = 2), | |
| 602 sep = ""), | |
| 603 rep("\n", times = 2), | |
| 604 c(tail(rowNamVc, 1), rowNamVc[1]), | |
| 605 sep = "") | |
| 606 | |
| 607 for(k in 1:2) | |
| 608 axis(side = 2, | |
| 609 at = c(1, ncol(imaMN))[k], | |
| 610 cex = 0.8, | |
| 611 font = 2, | |
| 612 hadj = c(0, 1)[k], | |
| 613 labels = ylaVc[k], | |
| 614 las = 0, | |
| 615 line = -0.5, | |
| 616 lty = "blank", | |
| 617 tick = FALSE) | |
| 618 | |
| 619 box(lwd = 2) | |
| 620 | |
| 621 | |
| 622 } | |
| 623 | |
| 624 | |
| 625 zScoreF <- function(x) { | |
| 626 sdxN <- sd(x, na.rm = TRUE) | |
| 627 if(sdxN < epsN) | |
| 628 return(rep(0, length(x))) | |
| 629 else | |
| 630 return((x - mean(x, na.rm = TRUE)) / sdxN) | |
| 631 } | |
| 632 | |
| 633 | |
| 634 ## Option | |
| 635 ##------- | |
| 636 | |
| 637 strAsFacL <- options()$stringsAsFactors | |
| 638 options(stingsAsFactors = FALSE) | |
| 639 | |
| 640 ## Constants | |
| 641 ##---------- | |
| 642 | |
| 643 epsN <- .Machine[["double.eps"]] ## [1] 2.22e-16 | |
| 644 | |
| 645 | |
| 646 ##------------------------------ | |
| 647 ## Start | |
| 648 ##------------------------------ | |
| 649 | |
| 650 if(!is.null(log.txtC)) | |
| 651 sink(log.txtC) | |
| 652 | |
| 653 ## Description | |
| 654 ##------------ | |
| 655 | |
| 656 cat("\n\nData description:\n\n", sep = "") | |
| 657 cat("observations:", nrow(datMN), "\n") | |
| 658 cat("variables:", ncol(datMN), "\n") | |
| 659 cat("missing:", sum(is.na(datMN)), "\n") | |
| 660 cat("0 values (%):", | |
| 661 sum(abs(datMN) < epsN, na.rm=TRUE) / cumprod(dim(datMN))[2] * 100, "\n") | |
| 662 cat("min:", min(datMN, na.rm=TRUE), "\n") | |
| 663 cat("mean:", signif(mean(datMN, na.rm=TRUE), 2), "\n") | |
| 664 cat("median:", signif(median(datMN, na.rm=TRUE), 2), "\n") | |
| 665 cat("max:", signif(max(datMN, na.rm=TRUE), 2), "\n") | |
| 666 | |
| 667 if("sampleType" %in% colnames(samDF)) { | |
| 668 cat("\nSample types:\n", sep = "") | |
| 669 print(table(samDF[, "sampleType"])) | |
| 670 cat("\n", sep="") | |
| 671 } | |
| 672 | |
| 673 | |
| 674 ##------------------------------ | |
| 675 ## Variable metrics | |
| 676 ##------------------------------ | |
| 677 | |
| 678 | |
| 679 ## 'blank' observations | |
| 680 | |
| 681 if("sampleType" %in% colnames(samDF) && "blank" %in% samDF[, "sampleType"]) { | |
| 682 | |
| 683 cat("\nVariables: Blank mean, sd, and CV\n", sep="") | |
| 684 | |
| 685 blkVl <- samDF[, "sampleType"] == "blank" | |
| 686 | |
| 687 if(sum(blkVl) == 1) | |
| 688 varDF[, "blank_mean"] <- datMN[blkVl, ] | |
| 689 else | |
| 690 varDF[, "blank_mean"] <- apply(datMN[blkVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) | |
| 691 | |
| 692 if(sum(blkVl) == 1) | |
| 693 varDF[, "blank_sd"] <- rep(0, nrow(varDF)) | |
| 694 else | |
| 695 varDF[, "blank_sd"] <- apply(datMN[blkVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) | |
| 696 | |
| 697 varDF[, "blank_CV"] <- varDF[, "blank_sd"] / varDF[, "blank_mean"] | |
| 698 | |
| 699 } | |
| 700 | |
| 701 | |
| 702 ## 'sample' observations | |
| 703 | |
| 704 if("sampleType" %in% colnames(samDF) && "sample" %in% samDF[, "sampleType"]) { | |
| 705 | |
| 706 cat("\nVariables: Sample mean, sd, and CV\n", sep="") | |
| 707 | |
| 708 samVl <- samDF[, "sampleType"] == "sample" | |
| 709 | |
| 710 if(sum(samVl) == 1) | |
| 711 varDF[, "sample_mean"] <- datMN[samVl, ] | |
| 712 else | |
| 713 varDF[, "sample_mean"] <- apply(datMN[samVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) | |
| 714 | |
| 715 if(sum(samVl) == 1) | |
| 716 varDF[, "sample_sd"] <- rep(0, nrow(varDF)) | |
| 717 else | |
| 718 varDF[, "sample_sd"] <- apply(datMN[samVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) | |
| 719 | |
| 720 varDF[, "sample_CV"] <- varDF[, "sample_sd"] / varDF[, "sample_mean"] | |
| 721 | |
| 722 } | |
| 723 | |
| 724 ## 'blank' mean / 'sample' mean ratio | |
| 725 | |
| 726 if(all(c("blank_mean", "sample_mean") %in% colnames(varDF))) { | |
| 727 | |
| 728 cat("\nVariables: Blank mean over sample mean\n", sep="") | |
| 729 | |
| 730 varDF[, "blankMean_over_sampleMean"] <- varDF[, "blank_mean"] / varDF[, "sample_mean"] | |
| 731 | |
| 732 } | |
| 733 | |
| 734 ## 'pool' observations | |
| 735 | |
| 736 if("sampleType" %in% colnames(samDF) && "pool" %in% samDF[, "sampleType"]) { | |
| 737 | |
| 738 cat("\nVariables: Pool mean, sd, and CV\n", sep="") | |
| 739 | |
| 740 pooVl <- samDF[, "sampleType"] == "pool" | |
| 741 | |
| 742 if(sum(pooVl) == 1) | |
| 743 varDF[, "pool_mean"] <- datMN[pooVl, ] | |
| 744 else | |
| 745 varDF[, "pool_mean"] <- apply(datMN[pooVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) | |
| 746 | |
| 747 if(sum(pooVl) == 1) | |
| 748 varDF[, "pool_sd"] <- rep(0, nrow(varDF)) | |
| 749 else | |
| 750 varDF[, "pool_sd"] <- apply(datMN[pooVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) | |
| 751 | |
| 752 varDF[, "pool_CV"] <- varDF[, "pool_sd"] / varDF[, "pool_mean"] | |
| 753 | |
| 754 } | |
| 755 | |
| 756 ## 'pool' CV / 'sample' CV ratio | |
| 757 | |
| 758 if(all(c("pool_CV", "sample_CV") %in% colnames(varDF))) { | |
| 759 | |
| 760 cat("\nVariables: Pool CV over sample CV\n", sep="") | |
| 761 | |
| 762 varDF[, "poolCV_over_sampleCV"] <- varDF[, "pool_CV"] / varDF[, "sample_CV"] | |
| 763 | |
| 764 } | |
| 765 | |
| 766 | |
| 767 ## 'pool' dilutions | |
| 768 | |
| 769 if("sampleType" %in% colnames(samDF) && any(grepl("pool.+", samDF[, "sampleType"]))) { | |
| 770 | |
| 771 pooVi <- grep("pool.*", samDF[, "sampleType"]) ## pool, pool2, pool4, poolInter, ... | |
| 772 | |
| 773 pooNamVc <- samDF[pooVi, "sampleType"] | |
| 774 | |
| 775 if(pooAsPo1L) { | |
| 776 | |
| 777 pooNamVc[pooNamVc == "pool"] <- "pool1" ## 'pool' -> 'pool1' | |
| 778 | |
| 779 } else { | |
| 780 | |
| 781 pooVl <- pooNamVc == "pool" | |
| 782 pooVi <- pooVi[!pooVl] | |
| 783 pooNamVc <- pooNamVc[!pooVl] | |
| 784 | |
| 785 } | |
| 786 | |
| 787 pooDilVc <- gsub("pool", "", pooNamVc) | |
| 788 | |
| 789 pooDilVl <- sapply(pooDilVc, allDigF) | |
| 790 | |
| 791 if(sum(pooDilVl)) { | |
| 792 | |
| 793 cat("\nVariables: Pool dilutions\n", sep="") | |
| 794 | |
| 795 pooNamVc <- pooNamVc[pooDilVl] ## for the plot | |
| 796 | |
| 797 pooVi <- pooVi[pooDilVl] | |
| 798 | |
| 799 dilVn <- 1 / as.numeric(pooDilVc[pooDilVl]) | |
| 800 | |
| 801 varDF[, "poolDil_correl"] <- apply(datMN[pooVi, , drop=FALSE], 2, | |
| 802 function(varVn) cor(dilVn, varVn)) | |
| 803 | |
| 804 varDF[, "poolDil_pval"] <- apply(datMN[pooVi, , drop=FALSE], 2, | |
| 805 function(varVn) cor.test(dilVn, varVn)[["p.value"]]) | |
| 806 | |
| 807 } | |
| 808 | |
| 809 } | |
| 810 | |
| 811 | |
| 812 ##------------------------------ | |
| 813 ## Sample metrics | |
| 814 ##------------------------------ | |
| 815 | |
| 816 | |
| 817 ## Hotelling: p-value associated to the distance from the center in the first PCA score plane | |
| 818 | |
| 819 cat("\nObservations: Hotelling ellipse\n", sep="") | |
| 820 | |
| 821 ropLs <- opls(datMN, predI = 2, plotL = FALSE, printL = FALSE) | |
| 822 | |
| 823 ropScoreMN <- getScoreMN(ropLs) | |
| 824 | |
| 825 invCovScoMN <- solve(cov(ropScoreMN)) | |
| 826 | |
| 827 n <- nrow(datMN) | |
| 828 hotN <- 2 * (n - 1) * (n^2 - 1) / (n^2 * (n - 2)) | |
| 829 | |
| 830 hotPvaVn <- apply(ropScoreMN, | |
| 831 1, | |
| 832 function(x) | |
| 833 1 - pf(1 / hotN * t(as.matrix(x)) %*% invCovScoMN %*% as.matrix(x), 2, n - 2)) | |
| 834 | |
| 835 samDF[, "hotelling_pval"] <- hotPvaVn | |
| 836 | |
| 837 ## p-value associated to number of missing values | |
| 838 | |
| 839 cat("\nObservations: Missing values\n", sep="") | |
| 840 | |
| 841 missZscoVn <- zScoreF(apply(datMN, | |
| 842 1, | |
| 843 function(rowVn) { | |
| 844 sum(is.na(rowVn)) | |
| 845 })) | |
| 846 | |
| 847 samDF[, "missing_pval"] <- sapply(missZscoVn, function(zscoN) 2 * (1 - pnorm(abs(zscoN)))) | |
| 848 | |
| 849 ## p-value associated to the deciles of the profiles | |
| 850 | |
| 851 cat("\nObservations: Profile deciles\n", sep="") | |
| 852 | |
| 853 deciMN <- t(as.matrix(apply(datMN, | |
| 854 1, | |
| 855 function(x) quantile(x, 0.1 * 1:9, na.rm = TRUE)))) | |
| 856 | |
| 857 deciZscoMN <- apply(deciMN, 2, zScoreF) | |
| 858 | |
| 859 deciZscoMaxVn <- apply(deciZscoMN, 1, function(rowVn) rowVn[which.max(abs(rowVn))]) | |
| 860 | |
| 861 samDF[, "decile_pval"] <- sapply(deciZscoMaxVn, function(zscoN) 2 * (1 - pnorm(abs(zscoN)))) | |
| 862 | |
| 863 | |
| 864 ##------------------------------ | |
| 865 ## Figure | |
| 866 ##------------------------------ | |
| 867 | |
| 868 cat("\nPlotting\n") | |
| 869 | |
| 870 if(!is.null(fig.pdfC)) { | |
| 871 pdf(fig.pdfC, width=11, height=7) | |
| 872 } else | |
| 873 dev.new(width=11, height=7) | |
| 874 | |
| 875 datPloF() | |
| 876 | |
| 877 if(!is.null(fig.pdfC)) | |
| 878 dev.off() | |
| 879 | |
| 880 | |
| 881 ##------------------------------ | |
| 882 ## End | |
| 883 ##------------------------------ | |
| 884 | |
| 885 | |
| 886 if(!is.null(log.txtC)) | |
| 887 sink() | |
| 888 | |
| 889 options(stingsAsFactors = strAsFacL) | |
| 890 options(warn = optWrnN) | |
| 891 | |
| 892 return(list(samDF=samDF, | |
| 893 varDF=varDF)) | |
| 894 | |
| 895 | |
| 896 } ## qualityMetricsF |
