comparison diagmfl.R @ 0:a4d89d47646f draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8
author workflow4metabolomics
date Mon, 16 May 2022 09:25:01 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a4d89d47646f
1 #' Calcul des grandeurs "diagnostiques"
2 #'
3 #' Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner
4 #' avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques,
5 #' et des observations non ordonnees sujet par sujet (dernier point a verifier.)
6 #'
7 #' @detail Les graphiques, les calculs associƩs et les notations utilisees dans le script suivent
8 #' l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear
9 #' mixed model assumptions and some remedial measures, International Statistical Review
10 #' (doi:10.1111/insr.12178)
11 #'
12 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data
13 #' @return A list
14 #' @author Natacha Lenuzza
15 #' @examples
16 #' print("hello !")
17 #'
18 #' @export lmer.computeDiag
19
20
21
22 lmer.computeDiag <- function(mfl) {
23
24 ## Check arguments ---------------------------------------------------------
25
26 if (length(mfl@flist) > 1)
27 stop("Several 'grouping level' for random effect not implemented yet.")
28
29
30 ## extracting information from mfl models -------------------------------------------------------------
31 # data
32 df <- mfl@frame
33 responseC <- names(df)[1]
34 unitC <- names(mfl@flist)[1]
35 # observations
36 yVn <- df[, responseC]
37 nobsN <- length(yVn)
38 # units
39 idunitVc <- levels(mfl@flist[[1]])
40 nunitN <- length(unique(idunitVc))
41 #X
42 xMN <- mfl@pp$X
43 pN <- ncol(xMN)
44 #Z
45 zMN <- t(as.matrix(mfl@pp$Zt))
46 # Estimated covariance matrix of random effects (Gam)
47 aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping
48 aux2 <- attr(aux, "stddev")
49 gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2))
50 gammaMN <- as.matrix(kronecker(diag(nunitN), gMN))
51 q <- dim(gMN)[1]
52 # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model)
53 sigsqN <- attr(VarCorr(mfl), "sc")^2
54 rMN <- sigsqN * diag(nobsN)
55 # Estimated covariance matrix of Y
56 vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN
57 invvMN <- MASS::ginv(vMN)
58 # H and Q matrix
59 hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN)
60 qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN
61 # eblue and eblup
62 eblueVn <- mfl@beta
63 eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl)
64 rownames(eblupVn) <- colnames(zMN)
65 ## Calculs of matrices and vectors used in graph diagnosics ---------------------------------------------
66 ## Marginal and individual predictions, residuals and variances
67 marpredVn <- xMN %*% eblueVn
68 marresVn <- yVn - marpredVn
69 marvarMN <- vMN - xMN %*% hMN %*% t(xMN)
70 condpredVn <- marpredVn + zMN %*% eblupVn
71 condresVn <- yVn - condpredVn
72 condvarMN <- rMN %*% qMN %*% rMN
73 ## Analysis of marginal and conditional residuals
74 stmarresVn <- stcondresVn <- rep(0, nobsN)
75 lesverVn <- rep(0, nunitN)
76 names(lesverVn) <- idunitVc
77 for (i in 1:nunitN) {
78 idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i
79 miN <- length(idxiVn)
80 ## standardization of marginal residual
81 stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn])
82 ##Standardized Lessafre and Verbeke's measure
83 auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn])
84 lesverVn[i] <- sum(diag(auxMN %*% t(auxMN)))
85 ## standardization of conditional residual
86 stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn])
87 }
88 lesverVn <- lesverVn / sum(lesverVn)
89 ## Least confounded conditional residuals
90 ## EBLUP analysis (Mahalanobis' distance)
91 varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN
92 mdistVn <- rep(0, nunitN)
93 qm <- q - 1
94 for (j in 1:nunitN) {
95 gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)]
96 eblupi <- eblupVn[(q * j - qm):(q * j)]
97 mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi
98 }
99 names(mdistVn) <- levels(mfl@flist[[1]])
100 ## output ----------------------------------------------
101 return(list(
102 data = df,
103 q = q,
104 eblue = eblueVn,
105 eblup = eblupVn,
106 marginal.prediction = marpredVn,
107 conditional.prediction = condpredVn,
108 std.marginal.residuals = stmarresVn,
109 std.conditional.residuals = stcondresVn,
110 mahalanobis.distance = mdistVn,
111 std.mahalanobis.distance = mdistVn / sum(mdistVn),
112 std.lesaffreverbeke.measure = lesverVn
113 ))
114 }
115
116
117 #' Wrapper function for diagnostic plots of 'lmer' linear mixed models
118 #'
119 #' (W4M mixmod)
120 #'
121 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data
122 #' @param title aa
123 #' @param outlier.limit aa
124 #' @param pvalCutof aa
125 #' @param resC aa
126 #' @param uniC aa
127 #' @param fixC aa
128 #' @param lest.confounded Not used yet.
129 #' @return NULL
130 #' @author Natacha Lenuzza
131 #' @examples
132 #' print("hello !")
133 #'
134 #' @export diagmflF
135
136
137 diagmflF <- function(mfl,
138 title = "",
139 outlier.limit = 3,
140 pvalCutof = 0.05,
141 resC = "vd",
142 uniC = "subject",
143 timC = "time",
144 fixC = "fixfact",
145 least.confounded = FALSE) {
146 ## diagnostics
147 diagLs <- lmer.computeDiag(mfl)
148 ## plots
149 blank <- rectGrob(gp = gpar(col = "white"))
150 rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey"))
151 grid.arrange(blank,
152 plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE,
153 label_factor = c(uniC, fixC, timC)),
154 blank,
155 plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE,
156 label_factor = c(uniC, fixC, timC)),
157 blank,
158 plot_condresQQplot(diagLs, plotL = FALSE),
159 blank,
160 plot_lesaffreVeerbeke(diagLs, plotL = FALSE),
161 blank,
162 plot_randomEffect(mfl, plotL = FALSE)[[1]],
163 blank,
164 plot_mahalanobisKhi2(diagLs, plotL = FALSE),
165 blank,
166 plot_mahalanobis(diagLs, plotL = FALSE),
167 blank,
168 blank,
169 blank,
170 top = textGrob(title, gp = gpar(fontsize = 40, font = 4)),
171 layout_matrix = matrix(c(rep(1, 7),
172 2, 3, rep(4, 3), 20, 21,
173 rep(5, 7),
174 6:12,
175 rep(13, 7),
176 14:18, rep(19, 2)),
177 ncol = 7, nrow = 6, byrow = TRUE),
178 heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3),
179 widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22))
180 }
181
182 #######################################################################################################
183 ## Raw data time courses
184 #######################################################################################################
185
186 #' Visualization of raw time course
187 #'
188 #' Une
189 #'
190 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data
191 #' @param responseC Name of the 'response' variable
192 #' @param timeC Name of the 'time' variable
193 #' @param subjectC Name of the 'subject' variable
194 #' @param fixfactC Name of the 'fixed factor' variable (e.g.treatment)
195 #' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE
196 #' @param plotL Boolean
197 #' @param colorType One of NA, FIXFACT or SUBJECT
198 #' @param shapeType One of NA, FIXFACT or SUBJECT
199 #' @param lineType One of NA, FIXFACT or SUBJECT
200 #' @return A plot
201 #' @author Natacha Lenuzza
202 #' @examples
203 #' print("hello !")
204 #'
205 #' @export plot_timeCourse
206
207
208
209 plot_timeCourse <- function(mfl,
210 responseC,
211 timeC,
212 subjectC,
213 fixfactC = NULL,
214 offset_subject = FALSE,
215 plotL = TRUE,
216 colorType = NA, ## subject, fixfact, none or NA
217 shapeType = NA, ## subject, fixfact, none or NA
218 lineType = NA ## subject, fixfact, none or NA
219 ) {
220 ## Data -----
221 if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) {
222 DF <- mfl@frame
223 } else if (class(mfl) == "data.frame") {
224 DF <- mfl
225 } else {
226 stop("'mfl' argument must be a linear mixed effect model or a data frame.")
227 }
228 ## Format data -----
229 if (is.null(fixfactC)) {
230 DF <- DF[, c(responseC, timeC, subjectC)]
231 colnames(DF) <- c("DV", "TIME", "SUBJECT")
232 meanDF <- aggregate(DF$DV,
233 by = list(SUBJECT = DF$SUBJECT,
234 TIME = DF$TIME),
235 FUN = mean,
236 na.rm = TRUE)
237 colnames(meanDF) <- c("SUBJECT", "TIME", "DV")
238 meanDF$GROUP <- meanDF$SUBJECT
239 } else{
240 DF <- DF[, c(responseC, fixfactC, timeC, subjectC)]
241 colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT")
242 meanDF <- aggregate(DF$DV,
243 by = list(SUBJECT = DF$SUBJECT,
244 TIME = DF$TIME,
245 FIXFACT = DF$FIXFACT),
246 FUN = mean,
247 na.rm = TRUE)
248 colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV")
249 meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_")
250 }
251 ## Offset -----
252 if (offset_subject) {
253 offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE)
254 offsetVn <- offsetMN[, 2]
255 names(offsetVn) <- offsetMN[, 1]
256 rm(offsetMN)
257 DF$DV <- DF$DV - offsetVn[DF$SUBJECT]
258 meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)]
259 }
260 ## Graphical parameters -----
261 xlabC <- timeC
262 ylabC <- responseC
263 titC <- "Individual time-courses"
264 if (offset_subject) {
265 ylabC <- paste(ylabC, "minus 'within-subject' empirical mean")
266 titC <- paste(titC, "('within-subject' empirical mean offset)")
267 }
268 ## color
269 if (is.na(colorType)) { ## automaticatical attribution
270 if (is.null(fixfactC)) {
271 colorType <- "SUBJECT"
272 } else {
273 colorType <- "FIXFACT"
274 }
275 colTxt <- paste(", colour=", colorType)
276 } else if (colorType == "none") {
277 colTxt <- ""
278 } else {
279 colTxt <- paste(", colour=", colorType)
280 }
281 ## lineType
282 if (is.na(lineType)) { ## automaticatical attribution
283 if (is.null(fixfactC)) {
284 linTxt <- ""
285 } else {
286 linTxt <- paste(", linetype=",
287 ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
288 }
289 } else if (lineType == "none") {
290 linTxt <- ""
291 } else {
292 linTxt <- paste(", linetype=", lineType)
293 }
294 ## shapeType
295 if (is.na(shapeType)) { ## automaticatical attribution
296 if (is.null(fixfactC)) {
297 shaTxt <- ""
298 } else {
299 shaTxt <- paste(", shape=",
300 ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
301 }
302 } else if (shapeType == "none") {
303 shaTxt <- ""
304 } else {
305 shaTxt <- paste(", shape=", shapeType)
306 }
307 ## aes mapping
308 txtMap <- paste("aes(x = TIME, y = DV",
309 colTxt, shaTxt, ")", sep = "")
310 txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ",
311 colTxt, linTxt, ")", sep = "")
312 ## plot and output
313 p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) +
314 ggtitle(titC) +
315 xlab(xlabC) + ylab(ylabC) +
316 theme(legend.position = "left",
317 plot.title = element_text(size = rel(1.2), face = "bold")) +
318 geom_point() +
319 geom_line(eval(parse(text = txtLineMap)), data = meanDF) +
320 theme_bw() +
321 NULL
322 if (plotL) plot(p)
323 invisible(p)
324 }
325
326 #######################################################################################################
327 ## Post-hoc estimate
328 #######################################################################################################
329 #' Visualization of fixed effects (post-hoc estimates)
330 #'
331 #' Description
332 #'
333 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data
334 #' @param pvalCutof User pvalue cut of
335 #' @param plotL Boolean
336 #' @param titC Title of the plot
337 #' @return A plot
338 #' @author Natacha Lenuzza
339 #' @examples
340 #' print("hello !")
341 #'
342 #' @export plot_posthoc
343 plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") {
344 ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL))
345 colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue"
346 ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
347 ## modif JF pour tenir compte du seuil de pvalues defini par le user
348 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "")
349 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "")
350 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "")
351 ddlsm1$levels <- rownames(ddlsm1)
352 ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
353 strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
354 })
355 colValue <- c("grey", "yellow", "orange", "red")
356 names(colValue) <- c("NS",
357 paste("p-value < ", pvalCutof, sep = ""),
358 paste("p-value < ", pvalCutof / 5, sep = ""),
359 paste("p-value < ", pvalCutof / 10, sep = ""))
360 p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
361 facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
362 geom_bar(aes(fill = Significance), stat = "identity") +
363 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
364 scale_fill_manual(values = colValue) +
365 geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) +
366 ggtitle(titC) + xlab("") +
367 NULL
368 if (plotL) plot(p)
369 invisible(p)
370 }
371
372 #######################################################################################################
373 ## Visualisation des effets alƩatoires
374 #######################################################################################################
375 #' Visualization of random effects
376 #'
377 #' Equivalent of dotplot(ranef)
378 #'
379 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data
380 #' @param plotL Logical
381 #' @return A plot
382 #' @author Natacha Lenuzza
383 #' @examples
384 #' print("hello !")
385 #'
386 #' @export plot_randomEffect
387
388
389 plot_randomEffect <- function(mfl, plotL = TRUE) {
390 ## Estimation et format des effets alƩatoires
391 randomEffect <- ranef(mfl, condVar = TRUE)
392 DF <- data.frame(randomEffect = rep(names(randomEffect),
393 times = sapply(seq_len(length(randomEffect)),
394 function(lsi) {
395 return(length(unlist(randomEffect[[lsi]])))})))
396 DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF))
397 for (rafC in names(randomEffect)) {
398 eff <- randomEffect[[rafC]]
399 DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff))
400 DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff))
401 DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff)
402 condvar <- attr(randomEffect[[rafC]], "postVar")
403 se <- NULL
404 for (coli in seq_len(ncol(eff))) {
405 se <- c(se,
406 sapply(seq_len(nrow(eff)),
407 function(i) {
408 return(condvar[coli, coli, i])}))
409 }
410 DF$condVar[which(DF$randomEffect == rafC)] <- se
411 }
412 DF$se <- sqrt(DF$condVar)
413 DF$lower <- DF$estimate - 1.96 * DF$se
414 DF$upper <- DF$estimate + 1.96 * DF$se
415 ## Plot
416 plotLs <- vector("list", length(randomEffect))
417 names(plotLs) <- names(randomEffect)
418 for (pi in seq_len(length(plotLs))) {
419 subDF <- DF[DF$randomEffect == names(plotLs)[pi], ]
420 subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ]
421 p <- ggplot(data = subDF,
422 mapping = aes(x = estimate, y = reorder(x2, estimate))
423 ) +
424 geom_point(size = 3) +
425 geom_segment(aes(xend = lower, yend = x2)) +
426 geom_segment(aes(xend = upper, yend = x2)) +
427 facet_wrap(~x1, ncol = length(unique(subDF$x1))) +
428 ylab("") + xlab("") +
429 ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) +
430 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
431 geom_vline(xintercept = 0, linetype = "dashed") +
432 theme_bw()
433 plotLs[[pi]] <- p
434 if (plotL) plot(p)
435 }
436 invisible(plotLs)
437 }
438
439 #######################################################################################################
440 ## LinearitƩ des effets et outlying observations
441 #######################################################################################################
442 #' Linarity of the fixed effect with regard to the continuous time
443 #'
444 #' @param diagLs diagnostic list
445 #' @param hlimitN Limit value for outliers (e.g.2 or 3)
446 #' @param plotL Boolean
447 #' @param label_factor Column of observation names used to label outlying values
448 #' @return A plot
449 #' @author Natacha Lenuzza
450 #' @examples
451 #' print("hello !")
452 #'
453 #' @export plot_linearity
454 #'
455
456 plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
457 df <- cbind.data.frame(diagLs$data,
458 marginal.prediction = diagLs$marginal.prediction,
459 standardized.marginal.residuals = diagLs$std.marginal.residuals)
460 # outlier annotation
461 df$outliers <- rep("", nrow(df))
462 outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN)
463 df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
464 if (length(label_factor) >= 1) {
465 df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
466 df[outidx, label_factor[1]],
467 sep = "_")
468 if (length(label_factor) > 1) {
469 for (li in 2:length(label_factor)) {
470 df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
471 df[outidx, label_factor[li]],
472 sep = ".")
473 }
474 }
475 }
476 p <- ggplot(data = df,
477 aes(x = marginal.prediction,
478 y = standardized.marginal.residuals)) +
479 geom_point(size = 2) +
480 geom_hline(yintercept = 0, col = "grey") +
481 geom_smooth(aes(x = marginal.prediction,
482 y = standardized.marginal.residuals), data = df, se = FALSE, col = "blue", method = "loess") +
483 ggtitle("Linearity of effects/outlying obervations") +
484 xlab("Marginal predictions") +
485 ylab("Standardized marginal residuals") +
486 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
487 geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
488 geom_text(aes(label = outliers), hjust = 0, vjust = 0)
489 if (plotL) plot(p)
490 invisible(p)
491 }
492
493
494 #######################################################################################################
495 ## EBLUP
496 #######################################################################################################
497
498
499 #' Mahalanobis distance
500 #'
501 #' @param diagLs diagnostic list
502 #' @param plotL Boolean
503 #' @return A plot
504 #' @author Natacha Lenuzza
505 #' @examples
506 #' print("hello !")
507 #'
508 #' @export plot_mahalanobis
509 #'
510
511
512 plot_mahalanobis <- function(diagLs, plotL = TRUE) {
513 unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
514 mal = diagLs$std.mahalanobis.distance)
515 ## Outlying subjects
516 p <-
517 ggplot(aes(y = mal, x = unit), data = unitDf) +
518 geom_point(size = 3) +
519 ylab("Standardized Mahalanobis distance") +
520 geom_vline(xintercept = 0, linetype = "dashed") +
521 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
522 geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") +
523 geom_text(aes(label = unit),
524 data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ],
525 hjust = 1, vjust = 0) +
526 ggtitle("Outlying unit") +
527 xlab("unit")
528 if (plotL) plot(p)
529 invisible(p)
530 }
531
532
533
534
535
536
537 #' Mahalanobis distance (Chi2)
538 #'
539 #' @param diagLs diagnostic list
540 #' @param plotL aa
541 #' @return A plot
542 #' @author Natacha Lenuzza
543 #' @examples
544 #' print("hello !")
545 #'
546 #' @export plot_mahalanobisKhi2
547 #'
548
549
550 plot_mahalanobisKhi2 <- function(diagLs, plotL = TRUE) {
551 unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
552 mal = diagLs$mahalanobis.distance)
553 p <- qqplotF(x = unitDf$mal,
554 distribution = "chisq",
555 df = diagLs$q,
556 line.estimate = NULL,
557 conf = 0.95) +
558 xlab("Chi-squared quantiles") +
559 ylab("Mahalanobis distance") +
560 ggtitle("Normality of random effect") +
561 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
562 if (plotL) plot(p)
563 invisible(p)
564 }
565
566
567
568
569
570
571
572 #######################################################################################################
573 ## Residus conditionels
574 #######################################################################################################
575
576 ## Presence of outlying observations and homoscedacity of residuals
577
578 #' Homoskedacity of conditionalresiduals
579 #'
580 #' @param diagLs diagnostic list
581 #' @param hlimitN Limit value for outliers (e.g.2 or 3)
582 #' @param plotL Boolean
583 #' @param label_factor Column of observation names used to label outlying values
584 #' @return A plot
585 #' @author Natacha Lenuzza
586 #' @examples
587 #' print("hello !")
588 #'
589 #' @export plot_conditionalResiduals
590 #'
591
592
593 plot_conditionalResiduals <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
594 df <- cbind.data.frame(diagLs$data,
595 conditional.prediction = diagLs$conditional.prediction,
596 standardized.conditional.residuals = diagLs$std.conditional.residuals)
597 # outlier annotation
598 df$outliers <- rep("", nrow(df))
599 outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN)
600 df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
601 if (length(label_factor) >= 1) {
602 df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
603 df[outidx, label_factor[1]],
604 sep = "_")
605 if (length(label_factor) > 1) {
606 for (li in 2:length(label_factor)) {
607 df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
608 df[outidx, label_factor[li]],
609 sep = ".")
610 }
611 }
612 }
613 p <- ggplot(data = df,
614 aes(x = conditional.prediction,
615 y = standardized.conditional.residuals)) +
616 geom_point(size = 2) +
617 geom_hline(yintercept = 0, col = "grey") +
618 geom_smooth(aes(x = conditional.prediction,
619 y = standardized.conditional.residuals),
620 data = df, se = FALSE, col = "blue", method = "loess") +
621 ggtitle("Homoscedasticity of conditional residuals/outlying observations") +
622 xlab("Individual predictions") +
623 ylab("Standardized conditional residuals") +
624 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
625 geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
626 geom_text(aes(label = outliers), hjust = 0, vjust = 0)
627 if (plotL) plot(p)
628 invisible(p)
629 }
630
631
632
633
634 #' Normality of conditionalresiduals
635 #'
636 #' @param diagLs diagnostic list
637 #' @param plotL aa
638 #' @return A plot
639 #' @author Natacha Lenuzza
640 #' @examples
641 #' print("hello !")
642 #'
643 #' @export plot_condresQQplot
644 #'
645
646
647 plot_condresQQplot <- function(diagLs, plotL = TRUE) {
648 df <- cbind.data.frame(diagLs$data,
649 conditional.prediction = diagLs$conditional.prediction,
650 standardized.conditional.residuals = diagLs$std.conditional.residuals)
651 p <- qqplotF(x = df$standardized.conditional.residuals,
652 distribution = "norm",
653 line.estimate = NULL,
654 conf = 0.95) +
655 xlab("Standard normal quantiles") +
656 ylab("Standardized conditional residual quantiles") +
657 ggtitle("Normality of conditional error") +
658 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
659 if (plotL) plot(p)
660 invisible(p)
661 }
662
663
664
665
666
667 #######################################################################################################
668 ## Within-units covariance structure
669 #######################################################################################################
670
671
672 #' Lesaffre-Veerbeke measure
673 #'
674 #' @param diagLs diagnostic list
675 #' @param plotL aa
676 #' @return A plot
677 #' @author Natacha Lenuzza
678 #' @examples
679 #' print("hello !")
680 #'
681 #' @export plot_lesaffreVeerbeke
682 #'
683
684
685 plot_lesaffreVeerbeke <- function(diagLs, plotL = TRUE) {
686 unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure),
687 lvm = diagLs$std.lesaffreverbeke.measure)
688 p <- ggplot(data = unitDf,
689 aes(x = unit,
690 y = lvm)) +
691 geom_point(size = 2) +
692 theme(legend.position = "none") +
693 xlab("units") +
694 ylab("Standardized Lesaffre-Verbeke measure") +
695 geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") +
696 geom_text(aes(label = unit),
697 data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ],
698 hjust = 0, vjust = 0) +
699 ggtitle("Within-units covariance matrice") +
700 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
701 if (plotL) plot(p)
702 invisible(p)
703 }
704
705 ##-------------------------------------------------------------------------------------------------##
706 ## Helpers
707 ##-------------------------------------------------------------------------------------------------##
708
709
710 ## square root of a matrix
711 ## From Rocha, Singer and Nobre
712
713 #' square root of a matrix (Rocha)
714 #'
715 #' Description
716 #'
717 #' @param mat Matrix
718 #' @return A list
719 #' @author Natacha Lenuzza
720 #' @examples
721 #' print("hello !")
722 #'
723 #' @export sqrt.matrix
724
725 sqrt.matrix <- function(mat) {
726 mat <- as.matrix(mat) # new line of code
727 singular_dec <- svd(mat, LINPACK = F)
728 U <- singular_dec$u
729 V <- singular_dec$v
730 D <- diag(singular_dec$d)
731 sqrtmatrix <- U %*% sqrt(D) %*% t(V)
732 }
733
734
735 ## square root of a matrix
736 ## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6)
737 ## (for matMN a n x n matrix that symetric and non-negative definite)
738
739 #' square root of a matrix (Rocha)
740 #'
741 #' @param mat Matrix
742 #' @return A list
743 #' @author Natacha Lenuzza
744 #' @examples
745 #' print("hello !")
746 #'
747 #' @export sqrtmF
748
749 sqrtmF <- function(matMN) {
750 matMN <- as.matrix(matMN)
751 ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.")
752 svd_dec <- svd(matMN)
753 invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v))
754 }
755
756
757 ## qqplotF
758 ## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c
759
760 qqplotF <- function(x,
761 distribution = "norm", ...,
762 line.estimate = NULL,
763 conf = 0.95,
764 labels = names(x)) {
765 q.function <- eval(parse(text = paste0("q", distribution)))
766 d.function <- eval(parse(text = paste0("d", distribution)))
767 x <- na.omit(x)
768 ord <- order(x)
769 n <- length(x)
770 P <- ppoints(length(x))
771 daf <- data.frame(ord.x = x[ord], z = q.function(P, ...))
772 if (is.null(line.estimate)) {
773 Q.x <- quantile(daf$ord.x, c(0.25, 0.75))
774 Q.z <- q.function(c(0.25, 0.75), ...)
775 b <- diff(Q.x) / diff(Q.z)
776 coef <- c(Q.x[1] - b * Q.z[1], b)
777 } else {
778 coef <- coef(line.estimate(ord.x ~ z))
779 }
780 zz <- qnorm(1 - (1 - conf) / 2)
781 SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n)
782 fit.value <- coef[1] + coef[2] * daf$z
783 daf$upper <- fit.value + zz * SE
784 daf$lower <- fit.value - zz * SE
785 if (!is.null(labels)) {
786 daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "")
787 }
788 p <- ggplot(daf, aes(x = z, y = ord.x)) +
789 geom_point() +
790 geom_abline(intercept = coef[1], slope = coef[2], col = "red") +
791 geom_line(aes(x = z, y = lower), daf, col = "red", linetype = "dashed") +
792 geom_line(aes(x = z, y = upper), daf, col = "red", linetype = "dashed") +
793 xlab("") + ylab("")
794 if (!is.null(labels)) p <- p + geom_text(aes(label = label))
795 return(p)
796 }
797
798
799 ## histogramm
800 histF <- function(x, sd_x = NULL, breaks = "scott") {
801 if (is.null(sd_x))
802 sd_x <- sd(x)
803 ## Bandwith estimation (default is Scott)
804 if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd"))
805 breaks <- "scott"
806 if (breaks %in% c("sqrt", "sturges", "rice")) {
807 k <- switch(breaks,
808 sqrt = sqrt(length(x)),
809 sturges = floor(log2(x)) + 1,
810 rice = floor(2 * length(x) ^ (1 / 3))
811 )
812 bw <- diff(range(x)) / k
813 }else{
814 bw <- switch(breaks,
815 scott = 3.5 * sd_x / length(x) ^ (1 / 3),
816 fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3))
817 )
818 }
819
820
821 daf <- data.frame(x = x)
822 ## graph
823 return(ggplot(data = daf, aes(x)) +
824 geom_histogram(aes(y = ..density..),
825 col = "black", fill = "grey", binwidth = bw) +
826 geom_density(size = 1.2,
827 col = "blue",
828 linetype = "blank",
829 fill = rgb(0, 0, 1, 0.1)) +
830 stat_function(fun = dnorm,
831 args = list(mean = 0, sd = sd_x),
832 col = "blue", size = 1.2) +
833 theme(legend.position = "none") +
834 xlab(""))
835 }
836
837
838 plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) {
839
840 ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not
841 if (ncol(df) > 3) {
842 varidx <- 4
843 ffidx <- 1
844 timidx <- 2
845 individx <- 3
846 } else {
847 varidx <- 3
848 ffidx <- 1
849 timidx <- 1
850 individx <- 2
851 }
852 nameVar <- colnames(df)[varidx]
853 fflab <- colnames(df)[ffidx]
854 ## Individual time-course
855 rawPlot <-
856 ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) +
857 geom_point() +
858 geom_line() + ggtitle("Individual time-courses (raw data)") +
859 ylab(nameVar) +
860 xlab(label = colnames(df)[2]) +
861 theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
862 ## Boxplot of fixed factor
863 bPlot <-
864 ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) +
865 geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) +
866 ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") +
867 theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold"))
868 ## Post-hoc estimates
869 ddlsm1 <- mfl
870 ddlsm1$name <- rownames(ddlsm1)
871 ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
872 ## modif JF pour tenir compte du seuil de pvalues defini par le user
873 options("scipen" = 100, "digits" = 5)
874 pvalCutof <- as.numeric(pvalCutof)
875 bs <- 0.05; bm <- 0.01; bi <- 0.005
876 if (pvalCutof > bm) {
877 bs <- pvalCutof
878 } else
879 if (pvalCutof < bm & pvalCutof > bi) {
880 bm <- pvalCutof; bs <- pvalCutof
881 } else
882 if (pvalCutof < bi) {
883 bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof
884 }
885 lbs <- paste("p-value < ", bs, sep = "")
886 lbm <- paste("p-value < ", bm, sep = "")
887 lbi <- paste("p-value < ", bi, sep = "")
888 cols <- paste("p-value < ", bs, sep = "")
889 colm <- paste("p-value < ", bm, sep = "")
890 coli <- paste("p-value < ", bi, sep = "")
891 valcol <- c("grey", "yellow", "orange", "red")
892 names(valcol) <- c("NS", lbs, lbm, lbi)
893 ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs
894 ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm
895 ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi
896 ddlsm1$levels <- rownames(ddlsm1)
897 ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
898 strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
899 })
900
901 phPlot <-
902 ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
903 facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
904 geom_bar(aes(fill = Significance), stat = "identity") +
905 theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
906 scale_fill_manual(
907 values = valcol) +
908 geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) +
909 ggtitle("Post-hoc estimates ") + xlab("") +
910 theme(plot.title = element_text(size = rel(1.2), face = "bold"))
911
912 ## Final plotting
913 grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2),
914 phPlot, nrow = 2,
915 top = textGrob(title, gp = gpar(fontsize = 32, font = 4))
916 )
917
918 }