Mercurial > repos > ethevenot > qualitymetrics
comparison qualitymetrics_script.R @ 0:b4f5b5bc01dd draft
planemo upload for repository https://github.com/workflow4metabolomics/qualitymetrics.git commit 73366dd3473c509341ab9ba1df8ba748d08a50a1
author | ethevenot |
---|---|
date | Sat, 06 Aug 2016 12:01:17 -0400 |
parents | |
children | 6d3b7b6573d8 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b4f5b5bc01dd |
---|---|
1 ################################################################################################ | |
2 # ANALYSES FOR QUALITY CONTROL # | |
3 # # | |
4 # Author: Melanie PETERA # | |
5 # User: Galaxy # | |
6 # Starting date: 04-09-2014 # | |
7 # V-1.0: Restriction of old filter script to CV filter # | |
8 # V-1.1: Addition of data check # | |
9 # V-1.2: Substitution of deletion by addition of indicator variable # | |
10 # V-1.3: Handling special characters # | |
11 # # | |
12 # # | |
13 # Input files: dataMatrix ; sampleMetadata ; variableMetadata # | |
14 # Output files: dataMatrix ; sampleMetadata ; variableMetadata # | |
15 # # | |
16 ################################################################################################ | |
17 | |
18 # Parameters (for dev) | |
19 if(FALSE){ | |
20 | |
21 ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt" #tab file | |
22 meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt" #tab file | |
23 meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt" #tab file | |
24 | |
25 ## ion.file.out <- "test/ressources/outputs/QCtest_ex_data_IONS.txt" #tab file | |
26 meta.samp.file.out <- "test/ressources/outputs/QCtest_ex_data_PROTOCOLE1.txt" #tab file | |
27 meta.ion.file.out <- "test/ressources/outputs/QCtest_ex_data_METAION.txt" #tab file | |
28 | |
29 CV <- TRUE ; if(CV){Compa<-TRUE;seuil<-1.25}else{Compa<-NULL;seuil<-NULL} | |
30 | |
31 poolAsPool1L <- FALSE | |
32 | |
33 if(FALSE) { ## Sacuri dataset | |
34 | |
35 ## 'example' input dir | |
36 exaDirInpC <- "example/input" | |
37 | |
38 ion.file.in <- file.path(exaDirInpC, "dataMatrix.tsv") | |
39 meta.samp.file.in <- file.path(exaDirInpC, "sampleMetadata.tsv") | |
40 meta.ion.file.in <- file.path(exaDirInpC, "variableMetadata.tsv") | |
41 | |
42 poolAsPool1L <- FALSE | |
43 | |
44 ## 'example' output dir | |
45 exaDirOutC <- gsub("input", "output", exaDirInpC) | |
46 | |
47 mata.samp.file.out <- file.path(exaDirOutC, "sampleMetadata.tsv") | |
48 meta.ion.file_out <- file.path(exaDirOutC, "variableMetadata.tsv") | |
49 fig.out <- file.path(exaDirOutC, "figure.pdf") | |
50 log.out <- file.path(exaDirOutC, "information.txt") | |
51 | |
52 stopifnot(file.exists(exaDirOutC)) | |
53 | |
54 } | |
55 | |
56 } | |
57 | |
58 QualityControl <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in, | |
59 CV, Compa, seuil, poolAsPool1L, | |
60 ion.file.out, meta.samp.file.out, meta.ion.file.out, fig.out, log.out){ | |
61 # This function allows to analyse data to check its quality | |
62 # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata. | |
63 # It generates 3 new datasets corresponding to the 3 inputs with additional columns. | |
64 # | |
65 # Parameters: | |
66 # - xxx.in: input files' names | |
67 # - xxx.out: output files' names | |
68 # - CV: CV calculation yes/no | |
69 # | > Compa: comparing pool and sample CVs (TRUE) or simple pool CV calculation (FALSE) | |
70 # | > seuil: maximum ratio tolerated between pool and sample CVs or maximum pool CV | |
71 | |
72 | |
73 # Input ----------------------------------------------------------------------------------- | |
74 | |
75 ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) | |
76 meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) | |
77 meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) | |
78 | |
79 # Error vector | |
80 err.stock <- "\n" | |
81 | |
82 # Table match check | |
83 table.check <- match3(ion.data,meta.samp.data,meta.ion.data) | |
84 check.err(table.check) | |
85 | |
86 # StockID | |
87 samp.id <- stockID(ion.data,meta.samp.data,"sample") | |
88 ion.data <- samp.id$dataMatrix | |
89 meta.samp.data <- samp.id$Metadata | |
90 samp.id <- samp.id$id.match | |
91 | |
92 | |
93 # Function 1: CV calculation -------------------------------------------------------------- | |
94 # Allows to class ions according to the Coefficient of Variation (CV): | |
95 # Compa=TRUE: | |
96 # CV of pools and CV of samples are compared (ration between pools' one and samples' one) | |
97 # and confronted to a given ration. | |
98 # Compa=FALSE: | |
99 # only CV of pools are considered ; compared to a given threshold | |
100 | |
101 if(CV){ | |
102 | |
103 # Checking the sampleType variable | |
104 if(is.null(meta.samp.data$sampleType)){ | |
105 err.stock <- c(err.stock,"\n-------", | |
106 "\nWarning : no 'sampleType' variable detected in sample meta-data !", | |
107 "\nCV can not be calculated.\n-------\n") | |
108 }else{ | |
109 if(!("pool"%in%levels(factor(meta.samp.data$sampleType)))){ | |
110 err.stock <- c(err.stock,"\n-------", | |
111 "\nWarning : no 'pool' detected in 'sampleType' variable (sample meta-data) !", | |
112 "\nCV can not be calculated.\n-------\n") | |
113 }else{ | |
114 if((!("sample"%in%levels(factor(meta.samp.data$sampleType))))&(Compa)){ | |
115 err.stock <- c(err.stock,"\n-------", | |
116 "\nWarning : no 'sample' detected in 'sampleType' variable (sample meta-data) !", | |
117 "\nCV can not be calculated.\n-------\n") | |
118 }else{ | |
119 | |
120 # Statement | |
121 tmp.ion <- data.frame(CV.ind=rep(NA,nrow(ion.data)),CV.samp=rep(NA,nrow(ion.data)), | |
122 CV.pool=rep(NA,nrow(ion.data)),ion.data,stringsAsFactors=FALSE) | |
123 # CV samples | |
124 tmp.samp <- which(colnames(tmp.ion)%in%meta.samp.data[which(meta.samp.data$sampleType=="sample"),1]) | |
125 tmp.ion$CV.samp <- apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE)) / rowMeans(tmp.ion[,tmp.samp], na.rm = TRUE) | |
126 tmp.ion$CV.samp[which(apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE))==0)] <- 0 | |
127 # CV pools | |
128 tmp.samp <- which(colnames(tmp.ion)%in%meta.samp.data[which(meta.samp.data$sampleType=="pool"),1]) | |
129 tmp.ion$CV.pool <- apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE)) / rowMeans(tmp.ion[,tmp.samp], na.rm = TRUE) | |
130 tmp.ion$CV.pool[which(apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE))==0)] <- 0 | |
131 # CV indicator | |
132 if(Compa){tmp.ion$CV.ind <- ifelse((tmp.ion$CV.pool)/(tmp.ion$CV.samp)>seuil,0,1) | |
133 }else{tmp.ion$CV.ind <- ifelse((tmp.ion$CV.pool)>seuil,0,1)} | |
134 # Addition of new columns in meta.ion.data | |
135 if(Compa){tmp.ion<-tmp.ion[,c(4,2,3,1,1)]}else{tmp.ion<-tmp.ion[,c(4,3,1,1)]} | |
136 tmp.ion[,ncol(tmp.ion)] <- 1:nrow(tmp.ion) | |
137 meta.ion.data <- merge(x=meta.ion.data,y=tmp.ion,by.x=1,by.y=1) | |
138 meta.ion.data <- meta.ion.data[order(meta.ion.data[,ncol(meta.ion.data)]),][,-ncol(meta.ion.data)] | |
139 rownames(meta.ion.data) <- NULL | |
140 | |
141 rm(tmp.ion,tmp.samp) | |
142 | |
143 }}} | |
144 | |
145 } # end if(CV) | |
146 | |
147 ## complementary metrics (ET) | |
148 | |
149 datMN <- t(as.matrix(ion.data[, -1])) | |
150 colnames(datMN) <- ion.data[, 1] | |
151 datMN <- datMN[, meta.ion.data[, 1]] ## in case meta.ion.data has been re-ordered during the CV = TRUE computations | |
152 quaLs <- qualityMetricsF(datMN, | |
153 meta.samp.data, | |
154 meta.ion.data, | |
155 poolAsPool1L, | |
156 fig.out, | |
157 log.out) | |
158 meta.samp.data <- quaLs[["samDF"]] | |
159 meta.ion.data <- quaLs[["varDF"]] | |
160 | |
161 | |
162 # Output ---------------------------------------------------------------------------------- | |
163 | |
164 # Getting back original identifiers | |
165 id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id) | |
166 ion.data <- id.ori$dataMatrix | |
167 meta.samp.data <- id.ori$Metadata | |
168 | |
169 | |
170 # Error checking | |
171 if(length(err.stock)>1){ | |
172 stop(err.stock) | |
173 }else{ | |
174 | |
175 ## write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
176 write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
177 write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
178 | |
179 } | |
180 | |
181 | |
182 } # end of QualityControl function | |
183 | |
184 | |
185 # Typical function call | |
186 # QualityControl(ion.file.in, meta.samp.file.in, meta.ion.file.in, | |
187 # CV, Compa, seuil, | |
188 # ion.file.out, meta.samp.file.out, meta.ion.file.out) | |
189 | |
190 | |
191 qualityMetricsF <- function(datMN, | |
192 samDF, | |
193 varDF, | |
194 pooAsPo1L = TRUE, | |
195 fig.pdfC = NULL, | |
196 log.txtC = NULL) { | |
197 | |
198 optWrnN <- options()$warn | |
199 options(warn = -1) | |
200 | |
201 | |
202 ##------------------------------ | |
203 ## Functions | |
204 ##------------------------------ | |
205 | |
206 | |
207 allDigF <- function (string) { ## from the Hmisc package (all.digits) | |
208 k <- length(string) | |
209 result <- logical(k) | |
210 for (i in 1:k) { | |
211 st <- string[i] | |
212 ls <- nchar(st) | |
213 ex <- substring(st, 1:ls, 1:ls) | |
214 result[i] <- all(match(ex, c("0", "1", "2", "3", "4", | |
215 "5", "6", "7", "8", "9"), nomatch = 0) > 0) | |
216 } | |
217 result | |
218 } | |
219 | |
220 datPloF <- function() { ## ploting data matrix | |
221 | |
222 thrVn <- c(pvalue=0.001, | |
223 poolCv=0.3) | |
224 | |
225 ## Constants | |
226 | |
227 marLs <- list(dri = c(2.1, 2.6, 1.1, 1.1), | |
228 ima = c(1.1, 2.6, 4.1, 1.1), | |
229 msd = c(2.1, 2.6, 1.1, 0.6), | |
230 sam = c(3.1, 3.6, 1.1, 0.6), | |
231 pca = c(2.6, 3.6, 1.1, 0.6), | |
232 sca = c(1.1, 4.1, 4.1, 0.6), | |
233 tit = c(0.1, 0.6, 1.1, 0.6)) | |
234 palHeaVc <- rev(rainbow(ceiling(256 * 1.5))[1:256]) | |
235 | |
236 ## Functions | |
237 | |
238 axiPreF <- function(valVn, | |
239 lenN) { | |
240 | |
241 if(NA %in% valVn) { | |
242 warning("NA in valVn") | |
243 valVn <- as.vector(na.omit(valVn)) | |
244 } | |
245 | |
246 if(lenN < length(valVn)) | |
247 stop("The length of in vector must be inferior to the length of the length parameter.") | |
248 | |
249 if(length(valVn) < lenN) | |
250 valVn <- seq(from = min(valVn), to = max(valVn), length.out = lenN) | |
251 | |
252 preValVn <- pretty(valVn) | |
253 | |
254 preLabVn <- preAtVn <- c() | |
255 | |
256 for(n in 1:length(preValVn)) | |
257 if(min(valVn) < preValVn[n] && preValVn[n] < max(valVn)) { | |
258 preLabVn <- c(preLabVn, preValVn[n]) | |
259 preAtVn <- c(preAtVn, which(abs(valVn - preValVn[n]) == min(abs(valVn - preValVn[n])))[1]) | |
260 } | |
261 | |
262 return(list(atVn = preAtVn, | |
263 labVn = preLabVn)) | |
264 | |
265 } | |
266 | |
267 colF <- function(vecVn) | |
268 sapply(vecVn, | |
269 function(outN) { | |
270 if(outN < ploRgeVn[1]) | |
271 return(palHeaVc[1]) | |
272 else if(outN > ploRgeVn[2]) | |
273 return(palHeaVc[256]) | |
274 else return(palHeaVc[round((outN - ploRgeVn[1]) / diff(ploRgeVn) * 256 + 1)])}) | |
275 | |
276 obsColF <- function(typVc) { | |
277 | |
278 ## available color palette | |
279 palVc <- palette() | |
280 | |
281 ## colors for common types are set aside | |
282 palVc <- palVc[!(palVc %in% c("black", "red", "green3"))] | |
283 | |
284 ## filling in the types with dedicated colors | |
285 samTypVc <- sort(unique(samDF[, "sampleType"])) | |
286 samColVc <- character(length(samTypVc)) | |
287 if("blank" %in% samTypVc) | |
288 samColVc[grepl("blank", samTypVc)] <- "black" | |
289 if("pool" %in% samTypVc) | |
290 samColVc[grepl("pool", samTypVc)] <- "red" | |
291 if("sample" %in% samTypVc) | |
292 samColVc[grepl("sample", samTypVc)] <- "green4" | |
293 | |
294 ## filling in the other types | |
295 palColI <- 1 | |
296 palColMaxI <- length(palVc) | |
297 | |
298 while(any(samColVc == "")) { | |
299 typToColI <- which(samColVc == "")[1] | |
300 if(palColI <= palColMaxI) | |
301 samColVc[typToColI] <- palVc[palColI] | |
302 else | |
303 samColVc[typToColI] <- "gray" | |
304 palColI <- palColI + 1 | |
305 } | |
306 | |
307 names(samColVc) <- samTypVc | |
308 | |
309 samColVc[typVc] | |
310 | |
311 } | |
312 | |
313 par(font = 2, | |
314 font.axis = 2, | |
315 font.lab = 2, | |
316 pch=18) | |
317 | |
318 layout(matrix(c(1, 3, 4, 5, 5, | |
319 1, 7, 7, 7, 6, | |
320 2, 7, 7, 7, 6), | |
321 byrow = TRUE, | |
322 nrow = 3), | |
323 heights = c(1.8, 1.2, 2.5), | |
324 widths = c(3.5, 1.8, 2.8, 1, 0.8)) | |
325 | |
326 ## Colors | |
327 ##------- | |
328 | |
329 if("sampleType" %in% colnames(samDF)) { | |
330 obsColVc <- obsColF(samDF[, "sampleType"]) | |
331 } else | |
332 obsColVc <- rep("black", nrow(samDF)) | |
333 | |
334 ## PCA and Hotelling ellipse | |
335 ##-------------------------- | |
336 | |
337 vVn <- getPcaVarVn(ropLs) | |
338 vRelVn <- vVn / ncol(datMN) | |
339 | |
340 par(mar = marLs[["pca"]]) | |
341 | |
342 plot(ropScoreMN, | |
343 type = "n", | |
344 xlab = "", | |
345 ylab = "", | |
346 xlim = range(ropScoreMN[, 1]) * 1.1) | |
347 mtext(paste("t1 (", round(vRelVn[1] * 100), "%)", sep = ""), | |
348 cex = 0.7, | |
349 line = 2, | |
350 side = 1) | |
351 mtext(paste("t2 (", round(vRelVn[2] * 100), "%)", sep = ""), | |
352 cex = 0.7, | |
353 las = 0, | |
354 line = 2, | |
355 side = 2) | |
356 abline(h = 0, lty = "dashed") | |
357 abline(v = 0, lty = "dashed") | |
358 radVn <- seq(0, 2 * pi, length.out = 100) | |
359 | |
360 hotFisN <- hotN * qf(1 - thrVn["pvalue"], 2, n - 2) | |
361 lines(sqrt(var(ropScoreMN[, 1]) * hotFisN) * cos(radVn), | |
362 sqrt(var(ropScoreMN[, 2]) * hotFisN) * sin(radVn)) | |
363 | |
364 text(ropScoreMN[, 1], | |
365 ropScoreMN[, 2], | |
366 cex = 0.7, | |
367 col = obsColVc, | |
368 labels = rownames(datMN)) | |
369 | |
370 if("sampleType" %in% colnames(samDF)) { | |
371 obsColVuc <- obsColVc[sort(unique(names(obsColVc)))] | |
372 legOrdVc <- c("blank", paste0("pool", 8:1), "pool", "other", "sample") | |
373 obsColVuc <- obsColVuc[legOrdVc[legOrdVc %in% names(obsColVuc)]] | |
374 | |
375 text(rep(par("usr")[1], times = length(obsColVuc)), | |
376 par("usr")[3] + (0.97 - length(obsColVuc) * 0.03 + 1:length(obsColVuc) * 0.03) * diff(par("usr")[3:4]), | |
377 col = obsColVuc, | |
378 font = 2, | |
379 labels = names(obsColVuc), | |
380 pos = 4) | |
381 } | |
382 | |
383 ## Missing/low intensities and decile values | |
384 ##------------------------------------------ | |
385 | |
386 par(mar = marLs[["sam"]]) | |
387 | |
388 plot(missZscoVn, | |
389 deciZscoMaxVn, | |
390 type = "n", | |
391 xlab = "", | |
392 ylab = "", | |
393 xlim = c(min(missZscoVn), | |
394 max(missZscoVn) + 0.5)) | |
395 mtext("amount of missing values (z-score)", | |
396 cex = 0.7, | |
397 line = 2, | |
398 side = 1) | |
399 mtext("deciles (zscore)", | |
400 cex = 0.7, | |
401 las = 0, | |
402 line = 2, | |
403 side = 2) | |
404 abline(h = qnorm(1 - thrVn["pvalue"] / 2) * c(-1, 1), lty = "dashed") | |
405 abline(v = qnorm(1 - thrVn["pvalue"] / 2) * c(-1, 1), lty = "dashed") | |
406 text(missZscoVn, | |
407 deciZscoMaxVn, | |
408 cex = 0.7, | |
409 col = obsColVc, | |
410 labels = rownames(datMN)) | |
411 | |
412 ## tit: Title | |
413 ##----------- | |
414 | |
415 par(mar = marLs[["tit"]]) | |
416 plot(0:1, bty = "n", type = "n", xaxt = "n", yaxt = "n", xlab = "", ylab = "") | |
417 text(1.5, 1, cex = 1.3, labels = "Quality Metrics") | |
418 text(1, 0.85, adj=0, cex = 1.1, labels = paste0("NAs: ", | |
419 round(length(which(is.na(c(datMN)))) / cumprod(dim(datMN))[2] * 100), "%")) | |
420 text(1, 0.75, adj=0, cex = 1.1, labels = paste0("0 values: ", | |
421 round(sum(abs(datMN) < epsN, na.rm=TRUE) / cumprod(dim(datMN))[2] * 100, 2), "%")) | |
422 text(1, 0.65, adj=0, cex = 1.1, labels = paste0("min: ", signif(min(datMN, na.rm=TRUE), 2))) | |
423 text(1, 0.55, adj=0, cex = 1.1, labels = paste0("median: ", signif(median(datMN, na.rm=TRUE), 2))) | |
424 text(1, 0.45, adj=0, cex = 1.1, labels = paste0("mean: ", signif(mean(datMN, na.rm=TRUE), 2))) | |
425 text(1, 0.35, adj=0, cex = 1.1, labels = paste0("max: ", signif(max(datMN, na.rm=TRUE), 2))) | |
426 if("sampleType" %in% colnames(samDF) && | |
427 "pool" %in% samDF[, "sampleType"]) | |
428 text(1, | |
429 0.25, | |
430 adj=0, cex = 1.1, | |
431 labels = paste0("pool CV < ", | |
432 round(thrVn["poolCv"] * 100), "%: ", | |
433 round(sum(varDF[, "pool_CV"] < thrVn["poolCv"]) / nrow(varDF) * 100), | |
434 "%")) | |
435 | |
436 text(1, 0.1, adj=0, labels = paste0("Thresholds used in plots:")) | |
437 text(1, 0, adj=0, labels = paste0(" p-value = ", thrVn["pvalue"])) | |
438 | |
439 ## dri: Analytical drift | |
440 ##---------------------- | |
441 | |
442 par(mar = marLs[["dri"]]) | |
443 | |
444 ## ordering | |
445 | |
446 driDatMN <- datMN | |
447 driSamDF <- samDF | |
448 | |
449 driSamDF[, "ordIniVi"] <- 1:nrow(driDatMN) | |
450 | |
451 if("injectionOrder" %in% colnames(driSamDF)) { | |
452 if("batch" %in% colnames(driSamDF)) | |
453 ordVi <- order(driSamDF[, "batch"], | |
454 driSamDF[, "injectionOrder"]) | |
455 else | |
456 ordVi <- order(driSamDF[, "injectionOrder"]) | |
457 } else | |
458 ordVi <- 1:nrow(driDatMN) | |
459 | |
460 driDatMN <- driDatMN[ordVi, ] | |
461 driSamDF <- driSamDF[ordVi, ] | |
462 | |
463 driColVc <- rep("black", nrow(driDatMN)) | |
464 if("sampleType" %in% colnames(driSamDF)) | |
465 driColVc <- obsColF(driSamDF[, "sampleType"]) | |
466 | |
467 plot(rowSums(driDatMN, na.rm=TRUE), | |
468 col = driColVc, | |
469 pch = 18, | |
470 xlab = "", | |
471 ylab = "") | |
472 | |
473 mtext("injection order", | |
474 cex = 0.7, | |
475 line = 2, | |
476 side = 1) | |
477 | |
478 mtext("Sum of intens. for all variables", | |
479 cex = 0.7, | |
480 line = 2, | |
481 side = 2) | |
482 | |
483 ## msd: Sd vs Mean plot | |
484 ##--------------------- | |
485 | |
486 par(mar = marLs[["msd"]]) | |
487 plot(apply(datMN, 2, function(y) mean(y, na.rm = TRUE)), | |
488 apply(datMN, 2, function(y) sd(y, na.rm = TRUE)), | |
489 col=obsColVc, | |
490 pch = 18, | |
491 xlab = "", | |
492 ylab = "") | |
493 mtext("mean", | |
494 cex = 0.7, | |
495 line = 2, | |
496 side = 1) | |
497 mtext("sd", | |
498 cex = 0.7, | |
499 line = 2, | |
500 side = 2) | |
501 | |
502 ## sca-6: Color scale | |
503 ##------------------- | |
504 | |
505 par(mar = marLs[["sca"]]) | |
506 | |
507 ylimVn <- c(0, 256) | |
508 ybottomVn <- 0:255 | |
509 ytopVn <- 1:256 | |
510 | |
511 plot(x = 0, | |
512 y = 0, | |
513 font.axis = 2, | |
514 font.lab = 2, | |
515 type = "n", | |
516 xlim = c(0, 1), | |
517 ylim = ylimVn, | |
518 xlab = "", | |
519 ylab = "", | |
520 xaxs = "i", | |
521 yaxs = "i", | |
522 xaxt = "n", | |
523 yaxt = "n") | |
524 | |
525 rect(xleft = 0, | |
526 ybottom = ybottomVn, | |
527 xright = 1, | |
528 ytop = ytopVn, | |
529 col = palHeaVc, | |
530 border = NA) | |
531 | |
532 eval(parse(text = paste("axis(at = axiPreF(c(ifelse(min(datMN, na.rm = TRUE) == -Inf, yes = 0, no = min(datMN, na.rm = TRUE)) , max(datMN, na.rm = TRUE)), 256)$atVn, | |
533 font = 2, | |
534 font.axis = 2, | |
535 labels = axiPreF(c(ifelse(min(datMN, na.rm = TRUE) == -Inf, yes = 0, no = min(datMN, na.rm = TRUE)), max(datMN, na.rm = TRUE)), 256)$labVn, | |
536 las = 1, | |
537 lwd = 2, | |
538 lwd.ticks = 2, | |
539 side = 2, | |
540 xpd = TRUE)", sep = ""))) | |
541 | |
542 arrows(par("usr")[1], | |
543 par("usr")[4], | |
544 par("usr")[1], | |
545 par("usr")[3], | |
546 code = 0, | |
547 lwd = 2, | |
548 xpd = TRUE) | |
549 | |
550 ## ima: Image | |
551 ##----------- | |
552 | |
553 par(mar = marLs[["ima"]]) | |
554 | |
555 ploRgeVn <- range(datMN, na.rm = TRUE) | |
556 | |
557 imaMN <- t(datMN)[, rev(1:nrow(datMN)), drop = FALSE] | |
558 | |
559 image(x = 1:nrow(imaMN), | |
560 y = 1:ncol(imaMN), | |
561 z = imaMN, | |
562 col = palHeaVc, | |
563 font.axis = 2, | |
564 font.lab = 2, | |
565 xaxt = "n", | |
566 yaxt = "n", | |
567 xlab = "", | |
568 ylab = "") | |
569 | |
570 if(length(rownames(datMN)) == 0) { | |
571 rowNamVc <- rep("", times = nrow(datMN)) | |
572 } else | |
573 rowNamVc <- rownames(datMN) | |
574 | |
575 if(length(colnames(datMN)) == 0) { | |
576 colNamVc <- rep("", times = ncol(datMN)) | |
577 } else | |
578 colNamVc <- colnames(datMN) | |
579 | |
580 xlaVc <- paste(paste(rep("[", 2), | |
581 c(1, nrow(imaMN)), | |
582 rep("] ", 2), | |
583 sep = ""), | |
584 rep("\n", times = 2), | |
585 c(colNamVc[1], tail(colNamVc, 1)), | |
586 sep = "") | |
587 | |
588 for(k in 1:2) | |
589 axis(side = 3, | |
590 hadj = c(0, 1)[k], | |
591 at = c(1, nrow(imaMN))[k], | |
592 cex = 0.8, | |
593 font = 2, | |
594 labels = xlaVc[k], | |
595 line = -0.5, | |
596 tick = FALSE) | |
597 | |
598 | |
599 ylaVc <- paste(paste(rep("[", times = 2), | |
600 c(ncol(imaMN), 1), | |
601 rep("]", times = 2), | |
602 sep = ""), | |
603 rep("\n", times = 2), | |
604 c(tail(rowNamVc, 1), rowNamVc[1]), | |
605 sep = "") | |
606 | |
607 for(k in 1:2) | |
608 axis(side = 2, | |
609 at = c(1, ncol(imaMN))[k], | |
610 cex = 0.8, | |
611 font = 2, | |
612 hadj = c(0, 1)[k], | |
613 labels = ylaVc[k], | |
614 las = 0, | |
615 line = -0.5, | |
616 lty = "blank", | |
617 tick = FALSE) | |
618 | |
619 box(lwd = 2) | |
620 | |
621 | |
622 } | |
623 | |
624 | |
625 zScoreF <- function(x) { | |
626 sdxN <- sd(x, na.rm = TRUE) | |
627 if(sdxN < epsN) | |
628 return(rep(0, length(x))) | |
629 else | |
630 return((x - mean(x, na.rm = TRUE)) / sdxN) | |
631 } | |
632 | |
633 | |
634 ## Option | |
635 ##------- | |
636 | |
637 strAsFacL <- options()$stringsAsFactors | |
638 options(stingsAsFactors = FALSE) | |
639 | |
640 ## Constants | |
641 ##---------- | |
642 | |
643 epsN <- .Machine[["double.eps"]] ## [1] 2.22e-16 | |
644 | |
645 | |
646 ##------------------------------ | |
647 ## Start | |
648 ##------------------------------ | |
649 | |
650 if(!is.null(log.txtC)) | |
651 sink(log.txtC) | |
652 | |
653 ## Description | |
654 ##------------ | |
655 | |
656 cat("\n\nData description:\n\n", sep = "") | |
657 cat("observations:", nrow(datMN), "\n") | |
658 cat("variables:", ncol(datMN), "\n") | |
659 cat("missing:", sum(is.na(datMN)), "\n") | |
660 cat("0 values (%):", | |
661 sum(abs(datMN) < epsN, na.rm=TRUE) / cumprod(dim(datMN))[2] * 100, "\n") | |
662 cat("min:", min(datMN, na.rm=TRUE), "\n") | |
663 cat("mean:", signif(mean(datMN, na.rm=TRUE), 2), "\n") | |
664 cat("median:", signif(median(datMN, na.rm=TRUE), 2), "\n") | |
665 cat("max:", signif(max(datMN, na.rm=TRUE), 2), "\n") | |
666 | |
667 if("sampleType" %in% colnames(samDF)) { | |
668 cat("\nSample types:\n", sep = "") | |
669 print(table(samDF[, "sampleType"])) | |
670 cat("\n", sep="") | |
671 } | |
672 | |
673 | |
674 ##------------------------------ | |
675 ## Variable metrics | |
676 ##------------------------------ | |
677 | |
678 | |
679 ## 'blank' observations | |
680 | |
681 if("sampleType" %in% colnames(samDF) && "blank" %in% samDF[, "sampleType"]) { | |
682 | |
683 cat("\nVariables: Blank mean, sd, and CV\n", sep="") | |
684 | |
685 blkVl <- samDF[, "sampleType"] == "blank" | |
686 | |
687 if(sum(blkVl) == 1) | |
688 varDF[, "blank_mean"] <- datMN[blkVl, ] | |
689 else | |
690 varDF[, "blank_mean"] <- apply(datMN[blkVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) | |
691 | |
692 if(sum(blkVl) == 1) | |
693 varDF[, "blank_sd"] <- rep(0, nrow(varDF)) | |
694 else | |
695 varDF[, "blank_sd"] <- apply(datMN[blkVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) | |
696 | |
697 varDF[, "blank_CV"] <- varDF[, "blank_sd"] / varDF[, "blank_mean"] | |
698 | |
699 } | |
700 | |
701 | |
702 ## 'sample' observations | |
703 | |
704 if("sampleType" %in% colnames(samDF) && "sample" %in% samDF[, "sampleType"]) { | |
705 | |
706 cat("\nVariables: Sample mean, sd, and CV\n", sep="") | |
707 | |
708 samVl <- samDF[, "sampleType"] == "sample" | |
709 | |
710 if(sum(samVl) == 1) | |
711 varDF[, "sample_mean"] <- datMN[samVl, ] | |
712 else | |
713 varDF[, "sample_mean"] <- apply(datMN[samVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) | |
714 | |
715 if(sum(samVl) == 1) | |
716 varDF[, "sample_sd"] <- rep(0, nrow(varDF)) | |
717 else | |
718 varDF[, "sample_sd"] <- apply(datMN[samVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) | |
719 | |
720 varDF[, "sample_CV"] <- varDF[, "sample_sd"] / varDF[, "sample_mean"] | |
721 | |
722 } | |
723 | |
724 ## 'blank' mean / 'sample' mean ratio | |
725 | |
726 if(all(c("blank_mean", "sample_mean") %in% colnames(varDF))) { | |
727 | |
728 cat("\nVariables: Blank mean over sample mean\n", sep="") | |
729 | |
730 varDF[, "blankMean_over_sampleMean"] <- varDF[, "blank_mean"] / varDF[, "sample_mean"] | |
731 | |
732 } | |
733 | |
734 ## 'pool' observations | |
735 | |
736 if("sampleType" %in% colnames(samDF) && "pool" %in% samDF[, "sampleType"]) { | |
737 | |
738 cat("\nVariables: Pool mean, sd, and CV\n", sep="") | |
739 | |
740 pooVl <- samDF[, "sampleType"] == "pool" | |
741 | |
742 if(sum(pooVl) == 1) | |
743 varDF[, "pool_mean"] <- datMN[pooVl, ] | |
744 else | |
745 varDF[, "pool_mean"] <- apply(datMN[pooVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) | |
746 | |
747 if(sum(pooVl) == 1) | |
748 varDF[, "pool_sd"] <- rep(0, nrow(varDF)) | |
749 else | |
750 varDF[, "pool_sd"] <- apply(datMN[pooVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) | |
751 | |
752 varDF[, "pool_CV"] <- varDF[, "pool_sd"] / varDF[, "pool_mean"] | |
753 | |
754 } | |
755 | |
756 ## 'pool' CV / 'sample' CV ratio | |
757 | |
758 if(all(c("pool_CV", "sample_CV") %in% colnames(varDF))) { | |
759 | |
760 cat("\nVariables: Pool CV over sample CV\n", sep="") | |
761 | |
762 varDF[, "poolCV_over_sampleCV"] <- varDF[, "pool_CV"] / varDF[, "sample_CV"] | |
763 | |
764 } | |
765 | |
766 | |
767 ## 'pool' dilutions | |
768 | |
769 if("sampleType" %in% colnames(samDF) && any(grepl("pool.+", samDF[, "sampleType"]))) { | |
770 | |
771 pooVi <- grep("pool.*", samDF[, "sampleType"]) ## pool, pool2, pool4, poolInter, ... | |
772 | |
773 pooNamVc <- samDF[pooVi, "sampleType"] | |
774 | |
775 if(pooAsPo1L) { | |
776 | |
777 pooNamVc[pooNamVc == "pool"] <- "pool1" ## 'pool' -> 'pool1' | |
778 | |
779 } else { | |
780 | |
781 pooVl <- pooNamVc == "pool" | |
782 pooVi <- pooVi[!pooVl] | |
783 pooNamVc <- pooNamVc[!pooVl] | |
784 | |
785 } | |
786 | |
787 pooDilVc <- gsub("pool", "", pooNamVc) | |
788 | |
789 pooDilVl <- sapply(pooDilVc, allDigF) | |
790 | |
791 if(sum(pooDilVl)) { | |
792 | |
793 cat("\nVariables: Pool dilutions\n", sep="") | |
794 | |
795 pooNamVc <- pooNamVc[pooDilVl] ## for the plot | |
796 | |
797 pooVi <- pooVi[pooDilVl] | |
798 | |
799 dilVn <- 1 / as.numeric(pooDilVc[pooDilVl]) | |
800 | |
801 varDF[, "poolDil_correl"] <- apply(datMN[pooVi, , drop=FALSE], 2, | |
802 function(varVn) cor(dilVn, varVn)) | |
803 | |
804 varDF[, "poolDil_pval"] <- apply(datMN[pooVi, , drop=FALSE], 2, | |
805 function(varVn) cor.test(dilVn, varVn)[["p.value"]]) | |
806 | |
807 } | |
808 | |
809 } | |
810 | |
811 | |
812 ##------------------------------ | |
813 ## Sample metrics | |
814 ##------------------------------ | |
815 | |
816 | |
817 ## Hotelling: p-value associated to the distance from the center in the first PCA score plane | |
818 | |
819 cat("\nObservations: Hotelling ellipse\n", sep="") | |
820 | |
821 ropLs <- opls(datMN, predI = 2, plotL = FALSE, printL = FALSE) | |
822 | |
823 ropScoreMN <- getScoreMN(ropLs) | |
824 | |
825 invCovScoMN <- solve(cov(ropScoreMN)) | |
826 | |
827 n <- nrow(datMN) | |
828 hotN <- 2 * (n - 1) * (n^2 - 1) / (n^2 * (n - 2)) | |
829 | |
830 hotPvaVn <- apply(ropScoreMN, | |
831 1, | |
832 function(x) | |
833 1 - pf(1 / hotN * t(as.matrix(x)) %*% invCovScoMN %*% as.matrix(x), 2, n - 2)) | |
834 | |
835 samDF[, "hotelling_pval"] <- hotPvaVn | |
836 | |
837 ## p-value associated to number of missing values | |
838 | |
839 cat("\nObservations: Missing values\n", sep="") | |
840 | |
841 missZscoVn <- zScoreF(apply(datMN, | |
842 1, | |
843 function(rowVn) { | |
844 sum(is.na(rowVn)) | |
845 })) | |
846 | |
847 samDF[, "missing_pval"] <- sapply(missZscoVn, function(zscoN) 2 * (1 - pnorm(abs(zscoN)))) | |
848 | |
849 ## p-value associated to the deciles of the profiles | |
850 | |
851 cat("\nObservations: Profile deciles\n", sep="") | |
852 | |
853 deciMN <- t(as.matrix(apply(datMN, | |
854 1, | |
855 function(x) quantile(x, 0.1 * 1:9, na.rm = TRUE)))) | |
856 | |
857 deciZscoMN <- apply(deciMN, 2, zScoreF) | |
858 | |
859 deciZscoMaxVn <- apply(deciZscoMN, 1, function(rowVn) rowVn[which.max(abs(rowVn))]) | |
860 | |
861 samDF[, "decile_pval"] <- sapply(deciZscoMaxVn, function(zscoN) 2 * (1 - pnorm(abs(zscoN)))) | |
862 | |
863 | |
864 ##------------------------------ | |
865 ## Figure | |
866 ##------------------------------ | |
867 | |
868 cat("\nPlotting\n") | |
869 | |
870 if(!is.null(fig.pdfC)) { | |
871 pdf(fig.pdfC, width=11, height=7) | |
872 } else | |
873 dev.new(width=11, height=7) | |
874 | |
875 datPloF() | |
876 | |
877 if(!is.null(fig.pdfC)) | |
878 dev.off() | |
879 | |
880 | |
881 ##------------------------------ | |
882 ## End | |
883 ##------------------------------ | |
884 | |
885 | |
886 if(!is.null(log.txtC)) | |
887 sink() | |
888 | |
889 options(stingsAsFactors = strAsFacL) | |
890 options(warn = optWrnN) | |
891 | |
892 return(list(samDF=samDF, | |
893 varDF=varDF)) | |
894 | |
895 | |
896 } ## qualityMetricsF |