Mercurial > repos > workflow4metabolomics > mixmodel4repeated_measures
view diagmfl.R @ 0:a4d89d47646f draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8
author | workflow4metabolomics |
---|---|
date | Mon, 16 May 2022 09:25:01 +0000 |
parents | |
children |
line wrap: on
line source
#' Calcul des grandeurs "diagnostiques" #' #' Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner #' avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques, #' et des observations non ordonnees sujet par sujet (dernier point a verifier.) #' #' @detail Les graphiques, les calculs associés et les notations utilisees dans le script suivent #' l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear #' mixed model assumptions and some remedial measures, International Statistical Review #' (doi:10.1111/insr.12178) #' #' @param mfl A linear mixed model fitted via lmer or a data frame containing data #' @return A list #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export lmer.computeDiag lmer.computeDiag <- function(mfl) { ## Check arguments --------------------------------------------------------- if (length(mfl@flist) > 1) stop("Several 'grouping level' for random effect not implemented yet.") ## extracting information from mfl models ------------------------------------------------------------- # data df <- mfl@frame responseC <- names(df)[1] unitC <- names(mfl@flist)[1] # observations yVn <- df[, responseC] nobsN <- length(yVn) # units idunitVc <- levels(mfl@flist[[1]]) nunitN <- length(unique(idunitVc)) #X xMN <- mfl@pp$X pN <- ncol(xMN) #Z zMN <- t(as.matrix(mfl@pp$Zt)) # Estimated covariance matrix of random effects (Gam) aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping aux2 <- attr(aux, "stddev") gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2)) gammaMN <- as.matrix(kronecker(diag(nunitN), gMN)) q <- dim(gMN)[1] # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model) sigsqN <- attr(VarCorr(mfl), "sc")^2 rMN <- sigsqN * diag(nobsN) # Estimated covariance matrix of Y vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN invvMN <- MASS::ginv(vMN) # H and Q matrix hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN) qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN # eblue and eblup eblueVn <- mfl@beta eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl) rownames(eblupVn) <- colnames(zMN) ## Calculs of matrices and vectors used in graph diagnosics --------------------------------------------- ## Marginal and individual predictions, residuals and variances marpredVn <- xMN %*% eblueVn marresVn <- yVn - marpredVn marvarMN <- vMN - xMN %*% hMN %*% t(xMN) condpredVn <- marpredVn + zMN %*% eblupVn condresVn <- yVn - condpredVn condvarMN <- rMN %*% qMN %*% rMN ## Analysis of marginal and conditional residuals stmarresVn <- stcondresVn <- rep(0, nobsN) lesverVn <- rep(0, nunitN) names(lesverVn) <- idunitVc for (i in 1:nunitN) { idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i miN <- length(idxiVn) ## standardization of marginal residual stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn]) ##Standardized Lessafre and Verbeke's measure auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn]) lesverVn[i] <- sum(diag(auxMN %*% t(auxMN))) ## standardization of conditional residual stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn]) } lesverVn <- lesverVn / sum(lesverVn) ## Least confounded conditional residuals ## EBLUP analysis (Mahalanobis' distance) varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN mdistVn <- rep(0, nunitN) qm <- q - 1 for (j in 1:nunitN) { gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)] eblupi <- eblupVn[(q * j - qm):(q * j)] mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi } names(mdistVn) <- levels(mfl@flist[[1]]) ## output ---------------------------------------------- return(list( data = df, q = q, eblue = eblueVn, eblup = eblupVn, marginal.prediction = marpredVn, conditional.prediction = condpredVn, std.marginal.residuals = stmarresVn, std.conditional.residuals = stcondresVn, mahalanobis.distance = mdistVn, std.mahalanobis.distance = mdistVn / sum(mdistVn), std.lesaffreverbeke.measure = lesverVn )) } #' Wrapper function for diagnostic plots of 'lmer' linear mixed models #' #' (W4M mixmod) #' #' @param mfl A linear mixed model fitted via lmer or a data frame containing data #' @param title aa #' @param outlier.limit aa #' @param pvalCutof aa #' @param resC aa #' @param uniC aa #' @param fixC aa #' @param lest.confounded Not used yet. #' @return NULL #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export diagmflF diagmflF <- function(mfl, title = "", outlier.limit = 3, pvalCutof = 0.05, resC = "vd", uniC = "subject", timC = "time", fixC = "fixfact", least.confounded = FALSE) { ## diagnostics diagLs <- lmer.computeDiag(mfl) ## plots blank <- rectGrob(gp = gpar(col = "white")) rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey")) grid.arrange(blank, plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE, label_factor = c(uniC, fixC, timC)), blank, plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE, label_factor = c(uniC, fixC, timC)), blank, plot_condresQQplot(diagLs, plotL = FALSE), blank, plot_lesaffreVeerbeke(diagLs, plotL = FALSE), blank, plot_randomEffect(mfl, plotL = FALSE)[[1]], blank, plot_mahalanobisKhi2(diagLs, plotL = FALSE), blank, plot_mahalanobis(diagLs, plotL = FALSE), blank, blank, blank, top = textGrob(title, gp = gpar(fontsize = 40, font = 4)), layout_matrix = matrix(c(rep(1, 7), 2, 3, rep(4, 3), 20, 21, rep(5, 7), 6:12, rep(13, 7), 14:18, rep(19, 2)), ncol = 7, nrow = 6, byrow = TRUE), heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3), widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22)) } ####################################################################################################### ## Raw data time courses ####################################################################################################### #' Visualization of raw time course #' #' Une #' #' @param mfl A linear mixed model fitted via lmer or a data frame containing data #' @param responseC Name of the 'response' variable #' @param timeC Name of the 'time' variable #' @param subjectC Name of the 'subject' variable #' @param fixfactC Name of the 'fixed factor' variable (e.g.treatment) #' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE #' @param plotL Boolean #' @param colorType One of NA, FIXFACT or SUBJECT #' @param shapeType One of NA, FIXFACT or SUBJECT #' @param lineType One of NA, FIXFACT or SUBJECT #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_timeCourse plot_timeCourse <- function(mfl, responseC, timeC, subjectC, fixfactC = NULL, offset_subject = FALSE, plotL = TRUE, colorType = NA, ## subject, fixfact, none or NA shapeType = NA, ## subject, fixfact, none or NA lineType = NA ## subject, fixfact, none or NA ) { ## Data ----- if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) { DF <- mfl@frame } else if (class(mfl) == "data.frame") { DF <- mfl } else { stop("'mfl' argument must be a linear mixed effect model or a data frame.") } ## Format data ----- if (is.null(fixfactC)) { DF <- DF[, c(responseC, timeC, subjectC)] colnames(DF) <- c("DV", "TIME", "SUBJECT") meanDF <- aggregate(DF$DV, by = list(SUBJECT = DF$SUBJECT, TIME = DF$TIME), FUN = mean, na.rm = TRUE) colnames(meanDF) <- c("SUBJECT", "TIME", "DV") meanDF$GROUP <- meanDF$SUBJECT } else{ DF <- DF[, c(responseC, fixfactC, timeC, subjectC)] colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT") meanDF <- aggregate(DF$DV, by = list(SUBJECT = DF$SUBJECT, TIME = DF$TIME, FIXFACT = DF$FIXFACT), FUN = mean, na.rm = TRUE) colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV") meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_") } ## Offset ----- if (offset_subject) { offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE) offsetVn <- offsetMN[, 2] names(offsetVn) <- offsetMN[, 1] rm(offsetMN) DF$DV <- DF$DV - offsetVn[DF$SUBJECT] meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)] } ## Graphical parameters ----- xlabC <- timeC ylabC <- responseC titC <- "Individual time-courses" if (offset_subject) { ylabC <- paste(ylabC, "minus 'within-subject' empirical mean") titC <- paste(titC, "('within-subject' empirical mean offset)") } ## color if (is.na(colorType)) { ## automaticatical attribution if (is.null(fixfactC)) { colorType <- "SUBJECT" } else { colorType <- "FIXFACT" } colTxt <- paste(", colour=", colorType) } else if (colorType == "none") { colTxt <- "" } else { colTxt <- paste(", colour=", colorType) } ## lineType if (is.na(lineType)) { ## automaticatical attribution if (is.null(fixfactC)) { linTxt <- "" } else { linTxt <- paste(", linetype=", ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) } } else if (lineType == "none") { linTxt <- "" } else { linTxt <- paste(", linetype=", lineType) } ## shapeType if (is.na(shapeType)) { ## automaticatical attribution if (is.null(fixfactC)) { shaTxt <- "" } else { shaTxt <- paste(", shape=", ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) } } else if (shapeType == "none") { shaTxt <- "" } else { shaTxt <- paste(", shape=", shapeType) } ## aes mapping txtMap <- paste("aes(x = TIME, y = DV", colTxt, shaTxt, ")", sep = "") txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ", colTxt, linTxt, ")", sep = "") ## plot and output p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) + ggtitle(titC) + xlab(xlabC) + ylab(ylabC) + theme(legend.position = "left", plot.title = element_text(size = rel(1.2), face = "bold")) + geom_point() + geom_line(eval(parse(text = txtLineMap)), data = meanDF) + theme_bw() + NULL if (plotL) plot(p) invisible(p) } ####################################################################################################### ## Post-hoc estimate ####################################################################################################### #' Visualization of fixed effects (post-hoc estimates) #' #' Description #' #' @param mfl A linear mixed model fitted via lmer or a data frame containing data #' @param pvalCutof User pvalue cut of #' @param plotL Boolean #' @param titC Title of the plot #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_posthoc plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") { ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL)) colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue" ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) ## modif JF pour tenir compte du seuil de pvalues defini par le user ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "") ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "") ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "") ddlsm1$levels <- rownames(ddlsm1) ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { strsplit(namC, split = " ", fixed = TRUE)[[1]][1] }) colValue <- c("grey", "yellow", "orange", "red") names(colValue) <- c("NS", paste("p-value < ", pvalCutof, sep = ""), paste("p-value < ", pvalCutof / 5, sep = ""), paste("p-value < ", pvalCutof / 10, sep = "")) p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) + facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + geom_bar(aes(fill = Significance), stat = "identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_manual(values = colValue) + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) + ggtitle(titC) + xlab("") + NULL if (plotL) plot(p) invisible(p) } ####################################################################################################### ## Visualisation des effets aléatoires ####################################################################################################### #' Visualization of random effects #' #' Equivalent of dotplot(ranef) #' #' @param mfl A linear mixed model fitted via lmer or a data frame containing data #' @param plotL Logical #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_randomEffect plot_randomEffect <- function(mfl, plotL = TRUE) { ## Estimation et format des effets aléatoires randomEffect <- ranef(mfl, condVar = TRUE) DF <- data.frame(randomEffect = rep(names(randomEffect), times = sapply(seq_len(length(randomEffect)), function(lsi) { return(length(unlist(randomEffect[[lsi]])))}))) DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF)) for (rafC in names(randomEffect)) { eff <- randomEffect[[rafC]] DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff)) DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff)) DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff) condvar <- attr(randomEffect[[rafC]], "postVar") se <- NULL for (coli in seq_len(ncol(eff))) { se <- c(se, sapply(seq_len(nrow(eff)), function(i) { return(condvar[coli, coli, i])})) } DF$condVar[which(DF$randomEffect == rafC)] <- se } DF$se <- sqrt(DF$condVar) DF$lower <- DF$estimate - 1.96 * DF$se DF$upper <- DF$estimate + 1.96 * DF$se ## Plot plotLs <- vector("list", length(randomEffect)) names(plotLs) <- names(randomEffect) for (pi in seq_len(length(plotLs))) { subDF <- DF[DF$randomEffect == names(plotLs)[pi], ] subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ] p <- ggplot(data = subDF, mapping = aes(x = estimate, y = reorder(x2, estimate)) ) + geom_point(size = 3) + geom_segment(aes(xend = lower, yend = x2)) + geom_segment(aes(xend = upper, yend = x2)) + facet_wrap(~x1, ncol = length(unique(subDF$x1))) + ylab("") + xlab("") + ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + geom_vline(xintercept = 0, linetype = "dashed") + theme_bw() plotLs[[pi]] <- p if (plotL) plot(p) } invisible(plotLs) } ####################################################################################################### ## Linearité des effets et outlying observations ####################################################################################################### #' Linarity of the fixed effect with regard to the continuous time #' #' @param diagLs diagnostic list #' @param hlimitN Limit value for outliers (e.g.2 or 3) #' @param plotL Boolean #' @param label_factor Column of observation names used to label outlying values #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_linearity #' plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { df <- cbind.data.frame(diagLs$data, marginal.prediction = diagLs$marginal.prediction, standardized.marginal.residuals = diagLs$std.marginal.residuals) # outlier annotation df$outliers <- rep("", nrow(df)) outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN) df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] if (length(label_factor) >= 1) { df[outidx, "outliers"] <- paste(df[outidx, "outliers"], df[outidx, label_factor[1]], sep = "_") if (length(label_factor) > 1) { for (li in 2:length(label_factor)) { df[outidx, "outliers"] <- paste(df[outidx, "outliers"], df[outidx, label_factor[li]], sep = ".") } } } p <- ggplot(data = df, aes(x = marginal.prediction, y = standardized.marginal.residuals)) + geom_point(size = 2) + geom_hline(yintercept = 0, col = "grey") + geom_smooth(aes(x = marginal.prediction, y = standardized.marginal.residuals), data = df, se = FALSE, col = "blue", method = "loess") + ggtitle("Linearity of effects/outlying obervations") + xlab("Marginal predictions") + ylab("Standardized marginal residuals") + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + geom_text(aes(label = outliers), hjust = 0, vjust = 0) if (plotL) plot(p) invisible(p) } ####################################################################################################### ## EBLUP ####################################################################################################### #' Mahalanobis distance #' #' @param diagLs diagnostic list #' @param plotL Boolean #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_mahalanobis #' plot_mahalanobis <- function(diagLs, plotL = TRUE) { unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), mal = diagLs$std.mahalanobis.distance) ## Outlying subjects p <- ggplot(aes(y = mal, x = unit), data = unitDf) + geom_point(size = 3) + ylab("Standardized Mahalanobis distance") + geom_vline(xintercept = 0, linetype = "dashed") + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") + geom_text(aes(label = unit), data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ], hjust = 1, vjust = 0) + ggtitle("Outlying unit") + xlab("unit") if (plotL) plot(p) invisible(p) } #' Mahalanobis distance (Chi2) #' #' @param diagLs diagnostic list #' @param plotL aa #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_mahalanobisKhi2 #' plot_mahalanobisKhi2 <- function(diagLs, plotL = TRUE) { unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), mal = diagLs$mahalanobis.distance) p <- qqplotF(x = unitDf$mal, distribution = "chisq", df = diagLs$q, line.estimate = NULL, conf = 0.95) + xlab("Chi-squared quantiles") + ylab("Mahalanobis distance") + ggtitle("Normality of random effect") + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) if (plotL) plot(p) invisible(p) } ####################################################################################################### ## Residus conditionels ####################################################################################################### ## Presence of outlying observations and homoscedacity of residuals #' Homoskedacity of conditionalresiduals #' #' @param diagLs diagnostic list #' @param hlimitN Limit value for outliers (e.g.2 or 3) #' @param plotL Boolean #' @param label_factor Column of observation names used to label outlying values #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_conditionalResiduals #' plot_conditionalResiduals <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { df <- cbind.data.frame(diagLs$data, conditional.prediction = diagLs$conditional.prediction, standardized.conditional.residuals = diagLs$std.conditional.residuals) # outlier annotation df$outliers <- rep("", nrow(df)) outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN) df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] if (length(label_factor) >= 1) { df[outidx, "outliers"] <- paste(df[outidx, "outliers"], df[outidx, label_factor[1]], sep = "_") if (length(label_factor) > 1) { for (li in 2:length(label_factor)) { df[outidx, "outliers"] <- paste(df[outidx, "outliers"], df[outidx, label_factor[li]], sep = ".") } } } p <- ggplot(data = df, aes(x = conditional.prediction, y = standardized.conditional.residuals)) + geom_point(size = 2) + geom_hline(yintercept = 0, col = "grey") + geom_smooth(aes(x = conditional.prediction, y = standardized.conditional.residuals), data = df, se = FALSE, col = "blue", method = "loess") + ggtitle("Homoscedasticity of conditional residuals/outlying observations") + xlab("Individual predictions") + ylab("Standardized conditional residuals") + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + geom_text(aes(label = outliers), hjust = 0, vjust = 0) if (plotL) plot(p) invisible(p) } #' Normality of conditionalresiduals #' #' @param diagLs diagnostic list #' @param plotL aa #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_condresQQplot #' plot_condresQQplot <- function(diagLs, plotL = TRUE) { df <- cbind.data.frame(diagLs$data, conditional.prediction = diagLs$conditional.prediction, standardized.conditional.residuals = diagLs$std.conditional.residuals) p <- qqplotF(x = df$standardized.conditional.residuals, distribution = "norm", line.estimate = NULL, conf = 0.95) + xlab("Standard normal quantiles") + ylab("Standardized conditional residual quantiles") + ggtitle("Normality of conditional error") + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) if (plotL) plot(p) invisible(p) } ####################################################################################################### ## Within-units covariance structure ####################################################################################################### #' Lesaffre-Veerbeke measure #' #' @param diagLs diagnostic list #' @param plotL aa #' @return A plot #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export plot_lesaffreVeerbeke #' plot_lesaffreVeerbeke <- function(diagLs, plotL = TRUE) { unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure), lvm = diagLs$std.lesaffreverbeke.measure) p <- ggplot(data = unitDf, aes(x = unit, y = lvm)) + geom_point(size = 2) + theme(legend.position = "none") + xlab("units") + ylab("Standardized Lesaffre-Verbeke measure") + geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") + geom_text(aes(label = unit), data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ], hjust = 0, vjust = 0) + ggtitle("Within-units covariance matrice") + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) if (plotL) plot(p) invisible(p) } ##-------------------------------------------------------------------------------------------------## ## Helpers ##-------------------------------------------------------------------------------------------------## ## square root of a matrix ## From Rocha, Singer and Nobre #' square root of a matrix (Rocha) #' #' Description #' #' @param mat Matrix #' @return A list #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export sqrt.matrix sqrt.matrix <- function(mat) { mat <- as.matrix(mat) # new line of code singular_dec <- svd(mat, LINPACK = F) U <- singular_dec$u V <- singular_dec$v D <- diag(singular_dec$d) sqrtmatrix <- U %*% sqrt(D) %*% t(V) } ## square root of a matrix ## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6) ## (for matMN a n x n matrix that symetric and non-negative definite) #' square root of a matrix (Rocha) #' #' @param mat Matrix #' @return A list #' @author Natacha Lenuzza #' @examples #' print("hello !") #' #' @export sqrtmF sqrtmF <- function(matMN) { matMN <- as.matrix(matMN) ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.") svd_dec <- svd(matMN) invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v)) } ## qqplotF ## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c qqplotF <- function(x, distribution = "norm", ..., line.estimate = NULL, conf = 0.95, labels = names(x)) { q.function <- eval(parse(text = paste0("q", distribution))) d.function <- eval(parse(text = paste0("d", distribution))) x <- na.omit(x) ord <- order(x) n <- length(x) P <- ppoints(length(x)) daf <- data.frame(ord.x = x[ord], z = q.function(P, ...)) if (is.null(line.estimate)) { Q.x <- quantile(daf$ord.x, c(0.25, 0.75)) Q.z <- q.function(c(0.25, 0.75), ...) b <- diff(Q.x) / diff(Q.z) coef <- c(Q.x[1] - b * Q.z[1], b) } else { coef <- coef(line.estimate(ord.x ~ z)) } zz <- qnorm(1 - (1 - conf) / 2) SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n) fit.value <- coef[1] + coef[2] * daf$z daf$upper <- fit.value + zz * SE daf$lower <- fit.value - zz * SE if (!is.null(labels)) { daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "") } p <- ggplot(daf, aes(x = z, y = ord.x)) + geom_point() + geom_abline(intercept = coef[1], slope = coef[2], col = "red") + geom_line(aes(x = z, y = lower), daf, col = "red", linetype = "dashed") + geom_line(aes(x = z, y = upper), daf, col = "red", linetype = "dashed") + xlab("") + ylab("") if (!is.null(labels)) p <- p + geom_text(aes(label = label)) return(p) } ## histogramm histF <- function(x, sd_x = NULL, breaks = "scott") { if (is.null(sd_x)) sd_x <- sd(x) ## Bandwith estimation (default is Scott) if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd")) breaks <- "scott" if (breaks %in% c("sqrt", "sturges", "rice")) { k <- switch(breaks, sqrt = sqrt(length(x)), sturges = floor(log2(x)) + 1, rice = floor(2 * length(x) ^ (1 / 3)) ) bw <- diff(range(x)) / k }else{ bw <- switch(breaks, scott = 3.5 * sd_x / length(x) ^ (1 / 3), fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3)) ) } daf <- data.frame(x = x) ## graph return(ggplot(data = daf, aes(x)) + geom_histogram(aes(y = ..density..), col = "black", fill = "grey", binwidth = bw) + geom_density(size = 1.2, col = "blue", linetype = "blank", fill = rgb(0, 0, 1, 0.1)) + stat_function(fun = dnorm, args = list(mean = 0, sd = sd_x), col = "blue", size = 1.2) + theme(legend.position = "none") + xlab("")) } plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) { ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not if (ncol(df) > 3) { varidx <- 4 ffidx <- 1 timidx <- 2 individx <- 3 } else { varidx <- 3 ffidx <- 1 timidx <- 1 individx <- 2 } nameVar <- colnames(df)[varidx] fflab <- colnames(df)[ffidx] ## Individual time-course rawPlot <- ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) + geom_point() + geom_line() + ggtitle("Individual time-courses (raw data)") + ylab(nameVar) + xlab(label = colnames(df)[2]) + theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) ## Boxplot of fixed factor bPlot <- ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) + geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) + ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") + theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold")) ## Post-hoc estimates ddlsm1 <- mfl ddlsm1$name <- rownames(ddlsm1) ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) ## modif JF pour tenir compte du seuil de pvalues defini par le user options("scipen" = 100, "digits" = 5) pvalCutof <- as.numeric(pvalCutof) bs <- 0.05; bm <- 0.01; bi <- 0.005 if (pvalCutof > bm) { bs <- pvalCutof } else if (pvalCutof < bm & pvalCutof > bi) { bm <- pvalCutof; bs <- pvalCutof } else if (pvalCutof < bi) { bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof } lbs <- paste("p-value < ", bs, sep = "") lbm <- paste("p-value < ", bm, sep = "") lbi <- paste("p-value < ", bi, sep = "") cols <- paste("p-value < ", bs, sep = "") colm <- paste("p-value < ", bm, sep = "") coli <- paste("p-value < ", bi, sep = "") valcol <- c("grey", "yellow", "orange", "red") names(valcol) <- c("NS", lbs, lbm, lbi) ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi ddlsm1$levels <- rownames(ddlsm1) ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { strsplit(namC, split = " ", fixed = TRUE)[[1]][1] }) phPlot <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) + facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + geom_bar(aes(fill = Significance), stat = "identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_manual( values = valcol) + geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) + ggtitle("Post-hoc estimates ") + xlab("") + theme(plot.title = element_text(size = rel(1.2), face = "bold")) ## Final plotting grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2), phPlot, nrow = 2, top = textGrob(title, gp = gpar(fontsize = 32, font = 4)) ) }