Mercurial > repos > workflow4metabolomics > mixmodel4repeated_measures
comparison diagmfl.R @ 0:a4d89d47646f draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8
author | workflow4metabolomics |
---|---|
date | Mon, 16 May 2022 09:25:01 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a4d89d47646f |
---|---|
1 #' Calcul des grandeurs "diagnostiques" | |
2 #' | |
3 #' Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner | |
4 #' avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques, | |
5 #' et des observations non ordonnees sujet par sujet (dernier point a verifier.) | |
6 #' | |
7 #' @detail Les graphiques, les calculs associƩs et les notations utilisees dans le script suivent | |
8 #' l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear | |
9 #' mixed model assumptions and some remedial measures, International Statistical Review | |
10 #' (doi:10.1111/insr.12178) | |
11 #' | |
12 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
13 #' @return A list | |
14 #' @author Natacha Lenuzza | |
15 #' @examples | |
16 #' print("hello !") | |
17 #' | |
18 #' @export lmer.computeDiag | |
19 | |
20 | |
21 | |
22 lmer.computeDiag <- function(mfl) { | |
23 | |
24 ## Check arguments --------------------------------------------------------- | |
25 | |
26 if (length(mfl@flist) > 1) | |
27 stop("Several 'grouping level' for random effect not implemented yet.") | |
28 | |
29 | |
30 ## extracting information from mfl models ------------------------------------------------------------- | |
31 # data | |
32 df <- mfl@frame | |
33 responseC <- names(df)[1] | |
34 unitC <- names(mfl@flist)[1] | |
35 # observations | |
36 yVn <- df[, responseC] | |
37 nobsN <- length(yVn) | |
38 # units | |
39 idunitVc <- levels(mfl@flist[[1]]) | |
40 nunitN <- length(unique(idunitVc)) | |
41 #X | |
42 xMN <- mfl@pp$X | |
43 pN <- ncol(xMN) | |
44 #Z | |
45 zMN <- t(as.matrix(mfl@pp$Zt)) | |
46 # Estimated covariance matrix of random effects (Gam) | |
47 aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping | |
48 aux2 <- attr(aux, "stddev") | |
49 gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2)) | |
50 gammaMN <- as.matrix(kronecker(diag(nunitN), gMN)) | |
51 q <- dim(gMN)[1] | |
52 # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model) | |
53 sigsqN <- attr(VarCorr(mfl), "sc")^2 | |
54 rMN <- sigsqN * diag(nobsN) | |
55 # Estimated covariance matrix of Y | |
56 vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN | |
57 invvMN <- MASS::ginv(vMN) | |
58 # H and Q matrix | |
59 hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN) | |
60 qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN | |
61 # eblue and eblup | |
62 eblueVn <- mfl@beta | |
63 eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl) | |
64 rownames(eblupVn) <- colnames(zMN) | |
65 ## Calculs of matrices and vectors used in graph diagnosics --------------------------------------------- | |
66 ## Marginal and individual predictions, residuals and variances | |
67 marpredVn <- xMN %*% eblueVn | |
68 marresVn <- yVn - marpredVn | |
69 marvarMN <- vMN - xMN %*% hMN %*% t(xMN) | |
70 condpredVn <- marpredVn + zMN %*% eblupVn | |
71 condresVn <- yVn - condpredVn | |
72 condvarMN <- rMN %*% qMN %*% rMN | |
73 ## Analysis of marginal and conditional residuals | |
74 stmarresVn <- stcondresVn <- rep(0, nobsN) | |
75 lesverVn <- rep(0, nunitN) | |
76 names(lesverVn) <- idunitVc | |
77 for (i in 1:nunitN) { | |
78 idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i | |
79 miN <- length(idxiVn) | |
80 ## standardization of marginal residual | |
81 stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn]) | |
82 ##Standardized Lessafre and Verbeke's measure | |
83 auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn]) | |
84 lesverVn[i] <- sum(diag(auxMN %*% t(auxMN))) | |
85 ## standardization of conditional residual | |
86 stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn]) | |
87 } | |
88 lesverVn <- lesverVn / sum(lesverVn) | |
89 ## Least confounded conditional residuals | |
90 ## EBLUP analysis (Mahalanobis' distance) | |
91 varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN | |
92 mdistVn <- rep(0, nunitN) | |
93 qm <- q - 1 | |
94 for (j in 1:nunitN) { | |
95 gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)] | |
96 eblupi <- eblupVn[(q * j - qm):(q * j)] | |
97 mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi | |
98 } | |
99 names(mdistVn) <- levels(mfl@flist[[1]]) | |
100 ## output ---------------------------------------------- | |
101 return(list( | |
102 data = df, | |
103 q = q, | |
104 eblue = eblueVn, | |
105 eblup = eblupVn, | |
106 marginal.prediction = marpredVn, | |
107 conditional.prediction = condpredVn, | |
108 std.marginal.residuals = stmarresVn, | |
109 std.conditional.residuals = stcondresVn, | |
110 mahalanobis.distance = mdistVn, | |
111 std.mahalanobis.distance = mdistVn / sum(mdistVn), | |
112 std.lesaffreverbeke.measure = lesverVn | |
113 )) | |
114 } | |
115 | |
116 | |
117 #' Wrapper function for diagnostic plots of 'lmer' linear mixed models | |
118 #' | |
119 #' (W4M mixmod) | |
120 #' | |
121 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
122 #' @param title aa | |
123 #' @param outlier.limit aa | |
124 #' @param pvalCutof aa | |
125 #' @param resC aa | |
126 #' @param uniC aa | |
127 #' @param fixC aa | |
128 #' @param lest.confounded Not used yet. | |
129 #' @return NULL | |
130 #' @author Natacha Lenuzza | |
131 #' @examples | |
132 #' print("hello !") | |
133 #' | |
134 #' @export diagmflF | |
135 | |
136 | |
137 diagmflF <- function(mfl, | |
138 title = "", | |
139 outlier.limit = 3, | |
140 pvalCutof = 0.05, | |
141 resC = "vd", | |
142 uniC = "subject", | |
143 timC = "time", | |
144 fixC = "fixfact", | |
145 least.confounded = FALSE) { | |
146 ## diagnostics | |
147 diagLs <- lmer.computeDiag(mfl) | |
148 ## plots | |
149 blank <- rectGrob(gp = gpar(col = "white")) | |
150 rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey")) | |
151 grid.arrange(blank, | |
152 plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE, | |
153 label_factor = c(uniC, fixC, timC)), | |
154 blank, | |
155 plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE, | |
156 label_factor = c(uniC, fixC, timC)), | |
157 blank, | |
158 plot_condresQQplot(diagLs, plotL = FALSE), | |
159 blank, | |
160 plot_lesaffreVeerbeke(diagLs, plotL = FALSE), | |
161 blank, | |
162 plot_randomEffect(mfl, plotL = FALSE)[[1]], | |
163 blank, | |
164 plot_mahalanobisKhi2(diagLs, plotL = FALSE), | |
165 blank, | |
166 plot_mahalanobis(diagLs, plotL = FALSE), | |
167 blank, | |
168 blank, | |
169 blank, | |
170 top = textGrob(title, gp = gpar(fontsize = 40, font = 4)), | |
171 layout_matrix = matrix(c(rep(1, 7), | |
172 2, 3, rep(4, 3), 20, 21, | |
173 rep(5, 7), | |
174 6:12, | |
175 rep(13, 7), | |
176 14:18, rep(19, 2)), | |
177 ncol = 7, nrow = 6, byrow = TRUE), | |
178 heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3), | |
179 widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22)) | |
180 } | |
181 | |
182 ####################################################################################################### | |
183 ## Raw data time courses | |
184 ####################################################################################################### | |
185 | |
186 #' Visualization of raw time course | |
187 #' | |
188 #' Une | |
189 #' | |
190 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
191 #' @param responseC Name of the 'response' variable | |
192 #' @param timeC Name of the 'time' variable | |
193 #' @param subjectC Name of the 'subject' variable | |
194 #' @param fixfactC Name of the 'fixed factor' variable (e.g.treatment) | |
195 #' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE | |
196 #' @param plotL Boolean | |
197 #' @param colorType One of NA, FIXFACT or SUBJECT | |
198 #' @param shapeType One of NA, FIXFACT or SUBJECT | |
199 #' @param lineType One of NA, FIXFACT or SUBJECT | |
200 #' @return A plot | |
201 #' @author Natacha Lenuzza | |
202 #' @examples | |
203 #' print("hello !") | |
204 #' | |
205 #' @export plot_timeCourse | |
206 | |
207 | |
208 | |
209 plot_timeCourse <- function(mfl, | |
210 responseC, | |
211 timeC, | |
212 subjectC, | |
213 fixfactC = NULL, | |
214 offset_subject = FALSE, | |
215 plotL = TRUE, | |
216 colorType = NA, ## subject, fixfact, none or NA | |
217 shapeType = NA, ## subject, fixfact, none or NA | |
218 lineType = NA ## subject, fixfact, none or NA | |
219 ) { | |
220 ## Data ----- | |
221 if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) { | |
222 DF <- mfl@frame | |
223 } else if (class(mfl) == "data.frame") { | |
224 DF <- mfl | |
225 } else { | |
226 stop("'mfl' argument must be a linear mixed effect model or a data frame.") | |
227 } | |
228 ## Format data ----- | |
229 if (is.null(fixfactC)) { | |
230 DF <- DF[, c(responseC, timeC, subjectC)] | |
231 colnames(DF) <- c("DV", "TIME", "SUBJECT") | |
232 meanDF <- aggregate(DF$DV, | |
233 by = list(SUBJECT = DF$SUBJECT, | |
234 TIME = DF$TIME), | |
235 FUN = mean, | |
236 na.rm = TRUE) | |
237 colnames(meanDF) <- c("SUBJECT", "TIME", "DV") | |
238 meanDF$GROUP <- meanDF$SUBJECT | |
239 } else{ | |
240 DF <- DF[, c(responseC, fixfactC, timeC, subjectC)] | |
241 colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT") | |
242 meanDF <- aggregate(DF$DV, | |
243 by = list(SUBJECT = DF$SUBJECT, | |
244 TIME = DF$TIME, | |
245 FIXFACT = DF$FIXFACT), | |
246 FUN = mean, | |
247 na.rm = TRUE) | |
248 colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV") | |
249 meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_") | |
250 } | |
251 ## Offset ----- | |
252 if (offset_subject) { | |
253 offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE) | |
254 offsetVn <- offsetMN[, 2] | |
255 names(offsetVn) <- offsetMN[, 1] | |
256 rm(offsetMN) | |
257 DF$DV <- DF$DV - offsetVn[DF$SUBJECT] | |
258 meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)] | |
259 } | |
260 ## Graphical parameters ----- | |
261 xlabC <- timeC | |
262 ylabC <- responseC | |
263 titC <- "Individual time-courses" | |
264 if (offset_subject) { | |
265 ylabC <- paste(ylabC, "minus 'within-subject' empirical mean") | |
266 titC <- paste(titC, "('within-subject' empirical mean offset)") | |
267 } | |
268 ## color | |
269 if (is.na(colorType)) { ## automaticatical attribution | |
270 if (is.null(fixfactC)) { | |
271 colorType <- "SUBJECT" | |
272 } else { | |
273 colorType <- "FIXFACT" | |
274 } | |
275 colTxt <- paste(", colour=", colorType) | |
276 } else if (colorType == "none") { | |
277 colTxt <- "" | |
278 } else { | |
279 colTxt <- paste(", colour=", colorType) | |
280 } | |
281 ## lineType | |
282 if (is.na(lineType)) { ## automaticatical attribution | |
283 if (is.null(fixfactC)) { | |
284 linTxt <- "" | |
285 } else { | |
286 linTxt <- paste(", linetype=", | |
287 ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) | |
288 } | |
289 } else if (lineType == "none") { | |
290 linTxt <- "" | |
291 } else { | |
292 linTxt <- paste(", linetype=", lineType) | |
293 } | |
294 ## shapeType | |
295 if (is.na(shapeType)) { ## automaticatical attribution | |
296 if (is.null(fixfactC)) { | |
297 shaTxt <- "" | |
298 } else { | |
299 shaTxt <- paste(", shape=", | |
300 ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) | |
301 } | |
302 } else if (shapeType == "none") { | |
303 shaTxt <- "" | |
304 } else { | |
305 shaTxt <- paste(", shape=", shapeType) | |
306 } | |
307 ## aes mapping | |
308 txtMap <- paste("aes(x = TIME, y = DV", | |
309 colTxt, shaTxt, ")", sep = "") | |
310 txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ", | |
311 colTxt, linTxt, ")", sep = "") | |
312 ## plot and output | |
313 p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) + | |
314 ggtitle(titC) + | |
315 xlab(xlabC) + ylab(ylabC) + | |
316 theme(legend.position = "left", | |
317 plot.title = element_text(size = rel(1.2), face = "bold")) + | |
318 geom_point() + | |
319 geom_line(eval(parse(text = txtLineMap)), data = meanDF) + | |
320 theme_bw() + | |
321 NULL | |
322 if (plotL) plot(p) | |
323 invisible(p) | |
324 } | |
325 | |
326 ####################################################################################################### | |
327 ## Post-hoc estimate | |
328 ####################################################################################################### | |
329 #' Visualization of fixed effects (post-hoc estimates) | |
330 #' | |
331 #' Description | |
332 #' | |
333 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
334 #' @param pvalCutof User pvalue cut of | |
335 #' @param plotL Boolean | |
336 #' @param titC Title of the plot | |
337 #' @return A plot | |
338 #' @author Natacha Lenuzza | |
339 #' @examples | |
340 #' print("hello !") | |
341 #' | |
342 #' @export plot_posthoc | |
343 plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") { | |
344 ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL)) | |
345 colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue" | |
346 ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) | |
347 ## modif JF pour tenir compte du seuil de pvalues defini par le user | |
348 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "") | |
349 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "") | |
350 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "") | |
351 ddlsm1$levels <- rownames(ddlsm1) | |
352 ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { | |
353 strsplit(namC, split = " ", fixed = TRUE)[[1]][1] | |
354 }) | |
355 colValue <- c("grey", "yellow", "orange", "red") | |
356 names(colValue) <- c("NS", | |
357 paste("p-value < ", pvalCutof, sep = ""), | |
358 paste("p-value < ", pvalCutof / 5, sep = ""), | |
359 paste("p-value < ", pvalCutof / 10, sep = "")) | |
360 p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) + | |
361 facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + | |
362 geom_bar(aes(fill = Significance), stat = "identity") + | |
363 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
364 scale_fill_manual(values = colValue) + | |
365 geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) + | |
366 ggtitle(titC) + xlab("") + | |
367 NULL | |
368 if (plotL) plot(p) | |
369 invisible(p) | |
370 } | |
371 | |
372 ####################################################################################################### | |
373 ## Visualisation des effets alƩatoires | |
374 ####################################################################################################### | |
375 #' Visualization of random effects | |
376 #' | |
377 #' Equivalent of dotplot(ranef) | |
378 #' | |
379 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
380 #' @param plotL Logical | |
381 #' @return A plot | |
382 #' @author Natacha Lenuzza | |
383 #' @examples | |
384 #' print("hello !") | |
385 #' | |
386 #' @export plot_randomEffect | |
387 | |
388 | |
389 plot_randomEffect <- function(mfl, plotL = TRUE) { | |
390 ## Estimation et format des effets alƩatoires | |
391 randomEffect <- ranef(mfl, condVar = TRUE) | |
392 DF <- data.frame(randomEffect = rep(names(randomEffect), | |
393 times = sapply(seq_len(length(randomEffect)), | |
394 function(lsi) { | |
395 return(length(unlist(randomEffect[[lsi]])))}))) | |
396 DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF)) | |
397 for (rafC in names(randomEffect)) { | |
398 eff <- randomEffect[[rafC]] | |
399 DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff)) | |
400 DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff)) | |
401 DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff) | |
402 condvar <- attr(randomEffect[[rafC]], "postVar") | |
403 se <- NULL | |
404 for (coli in seq_len(ncol(eff))) { | |
405 se <- c(se, | |
406 sapply(seq_len(nrow(eff)), | |
407 function(i) { | |
408 return(condvar[coli, coli, i])})) | |
409 } | |
410 DF$condVar[which(DF$randomEffect == rafC)] <- se | |
411 } | |
412 DF$se <- sqrt(DF$condVar) | |
413 DF$lower <- DF$estimate - 1.96 * DF$se | |
414 DF$upper <- DF$estimate + 1.96 * DF$se | |
415 ## Plot | |
416 plotLs <- vector("list", length(randomEffect)) | |
417 names(plotLs) <- names(randomEffect) | |
418 for (pi in seq_len(length(plotLs))) { | |
419 subDF <- DF[DF$randomEffect == names(plotLs)[pi], ] | |
420 subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ] | |
421 p <- ggplot(data = subDF, | |
422 mapping = aes(x = estimate, y = reorder(x2, estimate)) | |
423 ) + | |
424 geom_point(size = 3) + | |
425 geom_segment(aes(xend = lower, yend = x2)) + | |
426 geom_segment(aes(xend = upper, yend = x2)) + | |
427 facet_wrap(~x1, ncol = length(unique(subDF$x1))) + | |
428 ylab("") + xlab("") + | |
429 ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) + | |
430 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
431 geom_vline(xintercept = 0, linetype = "dashed") + | |
432 theme_bw() | |
433 plotLs[[pi]] <- p | |
434 if (plotL) plot(p) | |
435 } | |
436 invisible(plotLs) | |
437 } | |
438 | |
439 ####################################################################################################### | |
440 ## LinearitƩ des effets et outlying observations | |
441 ####################################################################################################### | |
442 #' Linarity of the fixed effect with regard to the continuous time | |
443 #' | |
444 #' @param diagLs diagnostic list | |
445 #' @param hlimitN Limit value for outliers (e.g.2 or 3) | |
446 #' @param plotL Boolean | |
447 #' @param label_factor Column of observation names used to label outlying values | |
448 #' @return A plot | |
449 #' @author Natacha Lenuzza | |
450 #' @examples | |
451 #' print("hello !") | |
452 #' | |
453 #' @export plot_linearity | |
454 #' | |
455 | |
456 plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { | |
457 df <- cbind.data.frame(diagLs$data, | |
458 marginal.prediction = diagLs$marginal.prediction, | |
459 standardized.marginal.residuals = diagLs$std.marginal.residuals) | |
460 # outlier annotation | |
461 df$outliers <- rep("", nrow(df)) | |
462 outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN) | |
463 df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] | |
464 if (length(label_factor) >= 1) { | |
465 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
466 df[outidx, label_factor[1]], | |
467 sep = "_") | |
468 if (length(label_factor) > 1) { | |
469 for (li in 2:length(label_factor)) { | |
470 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
471 df[outidx, label_factor[li]], | |
472 sep = ".") | |
473 } | |
474 } | |
475 } | |
476 p <- ggplot(data = df, | |
477 aes(x = marginal.prediction, | |
478 y = standardized.marginal.residuals)) + | |
479 geom_point(size = 2) + | |
480 geom_hline(yintercept = 0, col = "grey") + | |
481 geom_smooth(aes(x = marginal.prediction, | |
482 y = standardized.marginal.residuals), data = df, se = FALSE, col = "blue", method = "loess") + | |
483 ggtitle("Linearity of effects/outlying obervations") + | |
484 xlab("Marginal predictions") + | |
485 ylab("Standardized marginal residuals") + | |
486 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
487 geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + | |
488 geom_text(aes(label = outliers), hjust = 0, vjust = 0) | |
489 if (plotL) plot(p) | |
490 invisible(p) | |
491 } | |
492 | |
493 | |
494 ####################################################################################################### | |
495 ## EBLUP | |
496 ####################################################################################################### | |
497 | |
498 | |
499 #' Mahalanobis distance | |
500 #' | |
501 #' @param diagLs diagnostic list | |
502 #' @param plotL Boolean | |
503 #' @return A plot | |
504 #' @author Natacha Lenuzza | |
505 #' @examples | |
506 #' print("hello !") | |
507 #' | |
508 #' @export plot_mahalanobis | |
509 #' | |
510 | |
511 | |
512 plot_mahalanobis <- function(diagLs, plotL = TRUE) { | |
513 unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), | |
514 mal = diagLs$std.mahalanobis.distance) | |
515 ## Outlying subjects | |
516 p <- | |
517 ggplot(aes(y = mal, x = unit), data = unitDf) + | |
518 geom_point(size = 3) + | |
519 ylab("Standardized Mahalanobis distance") + | |
520 geom_vline(xintercept = 0, linetype = "dashed") + | |
521 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
522 geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") + | |
523 geom_text(aes(label = unit), | |
524 data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ], | |
525 hjust = 1, vjust = 0) + | |
526 ggtitle("Outlying unit") + | |
527 xlab("unit") | |
528 if (plotL) plot(p) | |
529 invisible(p) | |
530 } | |
531 | |
532 | |
533 | |
534 | |
535 | |
536 | |
537 #' Mahalanobis distance (Chi2) | |
538 #' | |
539 #' @param diagLs diagnostic list | |
540 #' @param plotL aa | |
541 #' @return A plot | |
542 #' @author Natacha Lenuzza | |
543 #' @examples | |
544 #' print("hello !") | |
545 #' | |
546 #' @export plot_mahalanobisKhi2 | |
547 #' | |
548 | |
549 | |
550 plot_mahalanobisKhi2 <- function(diagLs, plotL = TRUE) { | |
551 unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), | |
552 mal = diagLs$mahalanobis.distance) | |
553 p <- qqplotF(x = unitDf$mal, | |
554 distribution = "chisq", | |
555 df = diagLs$q, | |
556 line.estimate = NULL, | |
557 conf = 0.95) + | |
558 xlab("Chi-squared quantiles") + | |
559 ylab("Mahalanobis distance") + | |
560 ggtitle("Normality of random effect") + | |
561 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
562 if (plotL) plot(p) | |
563 invisible(p) | |
564 } | |
565 | |
566 | |
567 | |
568 | |
569 | |
570 | |
571 | |
572 ####################################################################################################### | |
573 ## Residus conditionels | |
574 ####################################################################################################### | |
575 | |
576 ## Presence of outlying observations and homoscedacity of residuals | |
577 | |
578 #' Homoskedacity of conditionalresiduals | |
579 #' | |
580 #' @param diagLs diagnostic list | |
581 #' @param hlimitN Limit value for outliers (e.g.2 or 3) | |
582 #' @param plotL Boolean | |
583 #' @param label_factor Column of observation names used to label outlying values | |
584 #' @return A plot | |
585 #' @author Natacha Lenuzza | |
586 #' @examples | |
587 #' print("hello !") | |
588 #' | |
589 #' @export plot_conditionalResiduals | |
590 #' | |
591 | |
592 | |
593 plot_conditionalResiduals <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { | |
594 df <- cbind.data.frame(diagLs$data, | |
595 conditional.prediction = diagLs$conditional.prediction, | |
596 standardized.conditional.residuals = diagLs$std.conditional.residuals) | |
597 # outlier annotation | |
598 df$outliers <- rep("", nrow(df)) | |
599 outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN) | |
600 df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] | |
601 if (length(label_factor) >= 1) { | |
602 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
603 df[outidx, label_factor[1]], | |
604 sep = "_") | |
605 if (length(label_factor) > 1) { | |
606 for (li in 2:length(label_factor)) { | |
607 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
608 df[outidx, label_factor[li]], | |
609 sep = ".") | |
610 } | |
611 } | |
612 } | |
613 p <- ggplot(data = df, | |
614 aes(x = conditional.prediction, | |
615 y = standardized.conditional.residuals)) + | |
616 geom_point(size = 2) + | |
617 geom_hline(yintercept = 0, col = "grey") + | |
618 geom_smooth(aes(x = conditional.prediction, | |
619 y = standardized.conditional.residuals), | |
620 data = df, se = FALSE, col = "blue", method = "loess") + | |
621 ggtitle("Homoscedasticity of conditional residuals/outlying observations") + | |
622 xlab("Individual predictions") + | |
623 ylab("Standardized conditional residuals") + | |
624 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
625 geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + | |
626 geom_text(aes(label = outliers), hjust = 0, vjust = 0) | |
627 if (plotL) plot(p) | |
628 invisible(p) | |
629 } | |
630 | |
631 | |
632 | |
633 | |
634 #' Normality of conditionalresiduals | |
635 #' | |
636 #' @param diagLs diagnostic list | |
637 #' @param plotL aa | |
638 #' @return A plot | |
639 #' @author Natacha Lenuzza | |
640 #' @examples | |
641 #' print("hello !") | |
642 #' | |
643 #' @export plot_condresQQplot | |
644 #' | |
645 | |
646 | |
647 plot_condresQQplot <- function(diagLs, plotL = TRUE) { | |
648 df <- cbind.data.frame(diagLs$data, | |
649 conditional.prediction = diagLs$conditional.prediction, | |
650 standardized.conditional.residuals = diagLs$std.conditional.residuals) | |
651 p <- qqplotF(x = df$standardized.conditional.residuals, | |
652 distribution = "norm", | |
653 line.estimate = NULL, | |
654 conf = 0.95) + | |
655 xlab("Standard normal quantiles") + | |
656 ylab("Standardized conditional residual quantiles") + | |
657 ggtitle("Normality of conditional error") + | |
658 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
659 if (plotL) plot(p) | |
660 invisible(p) | |
661 } | |
662 | |
663 | |
664 | |
665 | |
666 | |
667 ####################################################################################################### | |
668 ## Within-units covariance structure | |
669 ####################################################################################################### | |
670 | |
671 | |
672 #' Lesaffre-Veerbeke measure | |
673 #' | |
674 #' @param diagLs diagnostic list | |
675 #' @param plotL aa | |
676 #' @return A plot | |
677 #' @author Natacha Lenuzza | |
678 #' @examples | |
679 #' print("hello !") | |
680 #' | |
681 #' @export plot_lesaffreVeerbeke | |
682 #' | |
683 | |
684 | |
685 plot_lesaffreVeerbeke <- function(diagLs, plotL = TRUE) { | |
686 unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure), | |
687 lvm = diagLs$std.lesaffreverbeke.measure) | |
688 p <- ggplot(data = unitDf, | |
689 aes(x = unit, | |
690 y = lvm)) + | |
691 geom_point(size = 2) + | |
692 theme(legend.position = "none") + | |
693 xlab("units") + | |
694 ylab("Standardized Lesaffre-Verbeke measure") + | |
695 geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") + | |
696 geom_text(aes(label = unit), | |
697 data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ], | |
698 hjust = 0, vjust = 0) + | |
699 ggtitle("Within-units covariance matrice") + | |
700 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
701 if (plotL) plot(p) | |
702 invisible(p) | |
703 } | |
704 | |
705 ##-------------------------------------------------------------------------------------------------## | |
706 ## Helpers | |
707 ##-------------------------------------------------------------------------------------------------## | |
708 | |
709 | |
710 ## square root of a matrix | |
711 ## From Rocha, Singer and Nobre | |
712 | |
713 #' square root of a matrix (Rocha) | |
714 #' | |
715 #' Description | |
716 #' | |
717 #' @param mat Matrix | |
718 #' @return A list | |
719 #' @author Natacha Lenuzza | |
720 #' @examples | |
721 #' print("hello !") | |
722 #' | |
723 #' @export sqrt.matrix | |
724 | |
725 sqrt.matrix <- function(mat) { | |
726 mat <- as.matrix(mat) # new line of code | |
727 singular_dec <- svd(mat, LINPACK = F) | |
728 U <- singular_dec$u | |
729 V <- singular_dec$v | |
730 D <- diag(singular_dec$d) | |
731 sqrtmatrix <- U %*% sqrt(D) %*% t(V) | |
732 } | |
733 | |
734 | |
735 ## square root of a matrix | |
736 ## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6) | |
737 ## (for matMN a n x n matrix that symetric and non-negative definite) | |
738 | |
739 #' square root of a matrix (Rocha) | |
740 #' | |
741 #' @param mat Matrix | |
742 #' @return A list | |
743 #' @author Natacha Lenuzza | |
744 #' @examples | |
745 #' print("hello !") | |
746 #' | |
747 #' @export sqrtmF | |
748 | |
749 sqrtmF <- function(matMN) { | |
750 matMN <- as.matrix(matMN) | |
751 ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.") | |
752 svd_dec <- svd(matMN) | |
753 invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v)) | |
754 } | |
755 | |
756 | |
757 ## qqplotF | |
758 ## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c | |
759 | |
760 qqplotF <- function(x, | |
761 distribution = "norm", ..., | |
762 line.estimate = NULL, | |
763 conf = 0.95, | |
764 labels = names(x)) { | |
765 q.function <- eval(parse(text = paste0("q", distribution))) | |
766 d.function <- eval(parse(text = paste0("d", distribution))) | |
767 x <- na.omit(x) | |
768 ord <- order(x) | |
769 n <- length(x) | |
770 P <- ppoints(length(x)) | |
771 daf <- data.frame(ord.x = x[ord], z = q.function(P, ...)) | |
772 if (is.null(line.estimate)) { | |
773 Q.x <- quantile(daf$ord.x, c(0.25, 0.75)) | |
774 Q.z <- q.function(c(0.25, 0.75), ...) | |
775 b <- diff(Q.x) / diff(Q.z) | |
776 coef <- c(Q.x[1] - b * Q.z[1], b) | |
777 } else { | |
778 coef <- coef(line.estimate(ord.x ~ z)) | |
779 } | |
780 zz <- qnorm(1 - (1 - conf) / 2) | |
781 SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n) | |
782 fit.value <- coef[1] + coef[2] * daf$z | |
783 daf$upper <- fit.value + zz * SE | |
784 daf$lower <- fit.value - zz * SE | |
785 if (!is.null(labels)) { | |
786 daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "") | |
787 } | |
788 p <- ggplot(daf, aes(x = z, y = ord.x)) + | |
789 geom_point() + | |
790 geom_abline(intercept = coef[1], slope = coef[2], col = "red") + | |
791 geom_line(aes(x = z, y = lower), daf, col = "red", linetype = "dashed") + | |
792 geom_line(aes(x = z, y = upper), daf, col = "red", linetype = "dashed") + | |
793 xlab("") + ylab("") | |
794 if (!is.null(labels)) p <- p + geom_text(aes(label = label)) | |
795 return(p) | |
796 } | |
797 | |
798 | |
799 ## histogramm | |
800 histF <- function(x, sd_x = NULL, breaks = "scott") { | |
801 if (is.null(sd_x)) | |
802 sd_x <- sd(x) | |
803 ## Bandwith estimation (default is Scott) | |
804 if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd")) | |
805 breaks <- "scott" | |
806 if (breaks %in% c("sqrt", "sturges", "rice")) { | |
807 k <- switch(breaks, | |
808 sqrt = sqrt(length(x)), | |
809 sturges = floor(log2(x)) + 1, | |
810 rice = floor(2 * length(x) ^ (1 / 3)) | |
811 ) | |
812 bw <- diff(range(x)) / k | |
813 }else{ | |
814 bw <- switch(breaks, | |
815 scott = 3.5 * sd_x / length(x) ^ (1 / 3), | |
816 fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3)) | |
817 ) | |
818 } | |
819 | |
820 | |
821 daf <- data.frame(x = x) | |
822 ## graph | |
823 return(ggplot(data = daf, aes(x)) + | |
824 geom_histogram(aes(y = ..density..), | |
825 col = "black", fill = "grey", binwidth = bw) + | |
826 geom_density(size = 1.2, | |
827 col = "blue", | |
828 linetype = "blank", | |
829 fill = rgb(0, 0, 1, 0.1)) + | |
830 stat_function(fun = dnorm, | |
831 args = list(mean = 0, sd = sd_x), | |
832 col = "blue", size = 1.2) + | |
833 theme(legend.position = "none") + | |
834 xlab("")) | |
835 } | |
836 | |
837 | |
838 plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) { | |
839 | |
840 ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not | |
841 if (ncol(df) > 3) { | |
842 varidx <- 4 | |
843 ffidx <- 1 | |
844 timidx <- 2 | |
845 individx <- 3 | |
846 } else { | |
847 varidx <- 3 | |
848 ffidx <- 1 | |
849 timidx <- 1 | |
850 individx <- 2 | |
851 } | |
852 nameVar <- colnames(df)[varidx] | |
853 fflab <- colnames(df)[ffidx] | |
854 ## Individual time-course | |
855 rawPlot <- | |
856 ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) + | |
857 geom_point() + | |
858 geom_line() + ggtitle("Individual time-courses (raw data)") + | |
859 ylab(nameVar) + | |
860 xlab(label = colnames(df)[2]) + | |
861 theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
862 ## Boxplot of fixed factor | |
863 bPlot <- | |
864 ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) + | |
865 geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) + | |
866 ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") + | |
867 theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold")) | |
868 ## Post-hoc estimates | |
869 ddlsm1 <- mfl | |
870 ddlsm1$name <- rownames(ddlsm1) | |
871 ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) | |
872 ## modif JF pour tenir compte du seuil de pvalues defini par le user | |
873 options("scipen" = 100, "digits" = 5) | |
874 pvalCutof <- as.numeric(pvalCutof) | |
875 bs <- 0.05; bm <- 0.01; bi <- 0.005 | |
876 if (pvalCutof > bm) { | |
877 bs <- pvalCutof | |
878 } else | |
879 if (pvalCutof < bm & pvalCutof > bi) { | |
880 bm <- pvalCutof; bs <- pvalCutof | |
881 } else | |
882 if (pvalCutof < bi) { | |
883 bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof | |
884 } | |
885 lbs <- paste("p-value < ", bs, sep = "") | |
886 lbm <- paste("p-value < ", bm, sep = "") | |
887 lbi <- paste("p-value < ", bi, sep = "") | |
888 cols <- paste("p-value < ", bs, sep = "") | |
889 colm <- paste("p-value < ", bm, sep = "") | |
890 coli <- paste("p-value < ", bi, sep = "") | |
891 valcol <- c("grey", "yellow", "orange", "red") | |
892 names(valcol) <- c("NS", lbs, lbm, lbi) | |
893 ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs | |
894 ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm | |
895 ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi | |
896 ddlsm1$levels <- rownames(ddlsm1) | |
897 ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { | |
898 strsplit(namC, split = " ", fixed = TRUE)[[1]][1] | |
899 }) | |
900 | |
901 phPlot <- | |
902 ggplot(ddlsm1, aes(x = levels, y = Estimate)) + | |
903 facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + | |
904 geom_bar(aes(fill = Significance), stat = "identity") + | |
905 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
906 scale_fill_manual( | |
907 values = valcol) + | |
908 geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) + | |
909 ggtitle("Post-hoc estimates ") + xlab("") + | |
910 theme(plot.title = element_text(size = rel(1.2), face = "bold")) | |
911 | |
912 ## Final plotting | |
913 grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2), | |
914 phPlot, nrow = 2, | |
915 top = textGrob(title, gp = gpar(fontsize = 32, font = 4)) | |
916 ) | |
917 | |
918 } |