Mercurial > repos > workflow4metabolomics > mixmodel4repeated_measures

diff diagmfl.R @ 0:a4d89d47646f draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8
author: workflow4metabolomics
date: Mon, 16 May 2022 09:25:01 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/diagmfl.R	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,918 @@
+#' Calcul des grandeurs "diagnostiques"
+#'
+#'  Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner
+#'  avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques,
+#'  et des observations non ordonnees sujet par sujet (dernier point a verifier.)
+#'
+#'  @detail Les graphiques, les calculs associés et les notations utilisees dans le script suivent
+#'   l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear
+#'    mixed model assumptions and some remedial measures, International Statistical Review
+#'       (doi:10.1111/insr.12178)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export lmer.computeDiag
+
+
+
+lmer.computeDiag <- function(mfl) {
+
+  ## Check arguments ---------------------------------------------------------
+
+  if (length(mfl@flist) > 1)
+    stop("Several 'grouping level' for random effect not implemented yet.")
+
+
+  ## extracting information from mfl models -------------------------------------------------------------
+  # data
+  df <- mfl@frame
+  responseC <- names(df)[1]
+  unitC <- names(mfl@flist)[1]
+  # observations
+  yVn <- df[, responseC]
+  nobsN <- length(yVn)
+  # units
+  idunitVc <- levels(mfl@flist[[1]])
+  nunitN <- length(unique(idunitVc))
+  #X
+  xMN <- mfl@pp$X
+  pN <- ncol(xMN)
+  #Z
+  zMN <- t(as.matrix(mfl@pp$Zt))
+  # Estimated covariance matrix of random effects (Gam)
+  aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping
+  aux2 <- attr(aux, "stddev")
+  gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2))
+  gammaMN <- as.matrix(kronecker(diag(nunitN), gMN))
+  q <- dim(gMN)[1]
+  # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model)
+  sigsqN <- attr(VarCorr(mfl), "sc")^2
+  rMN <- sigsqN * diag(nobsN)
+  # Estimated covariance matrix of Y
+  vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN
+  invvMN <- MASS::ginv(vMN)
+  # H and Q matrix
+  hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN)
+  qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN
+  # eblue and eblup
+  eblueVn <- mfl@beta
+  eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl)
+  rownames(eblupVn) <- colnames(zMN)
+  ##  Calculs of matrices and vectors used in graph diagnosics ---------------------------------------------
+  ## Marginal and individual predictions, residuals and variances
+  marpredVn <- xMN %*% eblueVn
+  marresVn <- yVn - marpredVn
+  marvarMN <- vMN - xMN %*% hMN %*% t(xMN)
+  condpredVn <- marpredVn + zMN %*% eblupVn
+  condresVn <- yVn - condpredVn
+  condvarMN <- rMN %*% qMN %*% rMN
+  ## Analysis of marginal and conditional residuals
+  stmarresVn <- stcondresVn <- rep(0, nobsN)
+  lesverVn <- rep(0, nunitN)
+  names(lesverVn) <- idunitVc
+  for (i in 1:nunitN) {
+      idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i
+    miN <- length(idxiVn)
+      ## standardization of marginal residual
+    stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn])
+      ##Standardized Lessafre and Verbeke's measure
+    auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn])
+    lesverVn[i] <- sum(diag(auxMN %*% t(auxMN)))
+      ## standardization of conditional residual
+    stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn])
+  }
+  lesverVn <- lesverVn / sum(lesverVn)
+  ## Least confounded conditional residuals
+  ## EBLUP analysis (Mahalanobis' distance)
+  varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN
+  mdistVn <- rep(0, nunitN)
+  qm <- q - 1
+  for (j in 1:nunitN) {
+    gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)]
+    eblupi <- eblupVn[(q * j - qm):(q * j)]
+    mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi
+  }
+  names(mdistVn) <- levels(mfl@flist[[1]])
+  ## output ----------------------------------------------
+  return(list(
+    data = df,
+    q = q,
+    eblue = eblueVn,
+    eblup = eblupVn,
+    marginal.prediction = marpredVn,
+    conditional.prediction = condpredVn,
+    std.marginal.residuals = stmarresVn,
+    std.conditional.residuals = stcondresVn,
+    mahalanobis.distance = mdistVn,
+    std.mahalanobis.distance = mdistVn / sum(mdistVn),
+    std.lesaffreverbeke.measure = lesverVn
+  ))
+}
+
+
+#' Wrapper function for diagnostic plots of 'lmer' linear mixed models
+#'
+#' (W4M mixmod)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param title aa
+#' @param outlier.limit aa
+#' @param pvalCutof aa
+#' @param resC aa
+#' @param uniC aa
+#' @param fixC aa
+#' @param lest.confounded Not used yet.
+#' @return NULL
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export diagmflF
+
+
+diagmflF <- function(mfl,
+                     title = "",
+                     outlier.limit = 3,
+                     pvalCutof = 0.05,
+                     resC = "vd",
+                     uniC = "subject",
+                     timC = "time",
+                     fixC = "fixfact",
+                     least.confounded = FALSE) {
+  ## diagnostics
+  diagLs <- lmer.computeDiag(mfl)
+  ## plots
+  blank <- rectGrob(gp = gpar(col = "white"))
+  rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey"))
+  grid.arrange(blank,
+               plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE,
+                              label_factor = c(uniC, fixC, timC)),
+               blank,
+               plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE,
+                                         label_factor = c(uniC, fixC, timC)),
+               blank,
+               plot_condresQQplot(diagLs,  plotL = FALSE),
+               blank,
+               plot_lesaffreVeerbeke(diagLs,  plotL = FALSE),
+               blank,
+               plot_randomEffect(mfl, plotL = FALSE)[[1]],
+               blank,
+               plot_mahalanobisKhi2(diagLs,  plotL = FALSE),
+               blank,
+               plot_mahalanobis(diagLs,  plotL = FALSE),
+               blank,
+               blank,
+               blank,
+               top = textGrob(title, gp = gpar(fontsize = 40, font = 4)),
+               layout_matrix = matrix(c(rep(1, 7),
+                                        2, 3, rep(4, 3), 20, 21,
+                                        rep(5, 7),
+                                        6:12,
+                                        rep(13, 7),
+                                        14:18, rep(19, 2)),
+                                      ncol = 7, nrow = 6, byrow = TRUE),
+               heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3),
+               widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22))
+}
+
+#######################################################################################################
+## Raw data time courses
+#######################################################################################################
+
+#' Visualization of raw time course
+#'
+#' Une
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param responseC Name of the 'response' variable
+#' @param timeC Name of the 'time' variable
+#' @param subjectC  Name of the 'subject' variable
+#' @param fixfactC  Name of the 'fixed factor' variable (e.g.treatment)
+#' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE
+#' @param plotL Boolean
+#' @param colorType One of NA, FIXFACT or SUBJECT
+#' @param shapeType One of NA, FIXFACT or SUBJECT
+#' @param lineType One of NA, FIXFACT or SUBJECT
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_timeCourse
+
+
+
+plot_timeCourse <- function(mfl,
+                             responseC,
+                             timeC,
+                             subjectC,
+                             fixfactC = NULL,
+                             offset_subject = FALSE,
+                             plotL = TRUE,
+                             colorType = NA, ## subject, fixfact, none or NA
+                             shapeType = NA, ## subject, fixfact, none or NA
+                             lineType = NA ## subject, fixfact, none or NA
+) {
+  ## Data -----
+  if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) {
+    DF <- mfl@frame
+  } else if (class(mfl) == "data.frame") {
+    DF <- mfl
+  } else {
+    stop("'mfl' argument must be a linear mixed effect model or a data frame.")
+  }
+  ## Format data -----
+  if (is.null(fixfactC)) {
+    DF <- DF[, c(responseC,  timeC, subjectC)]
+    colnames(DF) <- c("DV", "TIME", "SUBJECT")
+    meanDF <- aggregate(DF$DV,
+                        by = list(SUBJECT = DF$SUBJECT,
+                                  TIME = DF$TIME),
+                        FUN = mean,
+                        na.rm = TRUE)
+    colnames(meanDF) <- c("SUBJECT", "TIME", "DV")
+    meanDF$GROUP <- meanDF$SUBJECT
+    } else{
+      DF <- DF[, c(responseC, fixfactC, timeC, subjectC)]
+    colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT")
+      meanDF <- aggregate(DF$DV,
+                        by = list(SUBJECT = DF$SUBJECT,
+                                  TIME = DF$TIME,
+                                  FIXFACT = DF$FIXFACT),
+                        FUN = mean,
+                        na.rm = TRUE)
+    colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV")
+    meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_")
+    }
+  ## Offset -----
+  if (offset_subject) {
+      offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE)
+    offsetVn <- offsetMN[, 2]
+    names(offsetVn) <- offsetMN[, 1]
+    rm(offsetMN)
+    DF$DV <- DF$DV - offsetVn[DF$SUBJECT]
+    meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)]
+  }
+  ## Graphical parameters -----
+  xlabC <-  timeC
+  ylabC <- responseC
+  titC <- "Individual time-courses"
+  if (offset_subject) {
+    ylabC <- paste(ylabC, "minus 'within-subject' empirical mean")
+    titC <- paste(titC, "('within-subject' empirical mean offset)")
+  }
+  ## color
+  if (is.na(colorType)) { ## automaticatical attribution
+      if (is.null(fixfactC)) {
+      colorType <- "SUBJECT"
+    } else {
+      colorType <- "FIXFACT"
+    }
+    colTxt <- paste(", colour=", colorType)
+    } else if (colorType == "none") {
+    colTxt  <- ""
+  } else {
+    colTxt <- paste(", colour=", colorType)
+  }
+  ## lineType
+  if (is.na(lineType)) { ## automaticatical attribution
+      if (is.null(fixfactC)) {
+      linTxt  <- ""
+    } else {
+      linTxt <- paste(", linetype=",
+                      ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
+    }
+    } else if (lineType == "none") {
+    linTxt  <- ""
+  } else {
+    linTxt  <-  paste(", linetype=", lineType)
+  }
+  ## shapeType
+  if (is.na(shapeType)) { ## automaticatical attribution
+      if (is.null(fixfactC)) {
+      shaTxt  <- ""
+    } else {
+      shaTxt <- paste(", shape=",
+                      ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
+    }
+    } else if (shapeType == "none") {
+    shaTxt  <- ""
+  } else {
+    shaTxt  <-  paste(", shape=", shapeType)
+  }
+  ## aes mapping
+  txtMap <- paste("aes(x = TIME, y = DV",
+                  colTxt, shaTxt, ")", sep = "")
+  txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ",
+                      colTxt, linTxt,  ")", sep = "")
+  ## plot and output
+  p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) +
+    ggtitle(titC) +
+    xlab(xlabC) + ylab(ylabC) +
+    theme(legend.position = "left",
+          plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_point() +
+    geom_line(eval(parse(text = txtLineMap)), data = meanDF) +
+    theme_bw() +
+    NULL
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+#######################################################################################################
+## Post-hoc estimate
+#######################################################################################################
+#' Visualization of fixed effects (post-hoc estimates)
+#'
+#' Description
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param pvalCutof User pvalue cut of
+#' @param plotL Boolean
+#' @param titC Title of the plot
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_posthoc
+plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") {
+  ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL))
+  colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue"
+  ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
+  ## modif JF pour tenir compte du seuil de pvalues defini par le user
+  ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "")
+  ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "")
+  ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "")
+  ddlsm1$levels <- rownames(ddlsm1)
+  ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
+    strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
+  })
+  colValue <- c("grey", "yellow", "orange", "red")
+  names(colValue) <- c("NS",
+                       paste("p-value < ", pvalCutof, sep = ""),
+                       paste("p-value < ", pvalCutof / 5, sep = ""),
+                       paste("p-value < ", pvalCutof / 10, sep = ""))
+  p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
+    facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
+    geom_bar(aes(fill = Significance), stat = "identity") +
+    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
+    scale_fill_manual(values = colValue) +
+    geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) +
+    ggtitle(titC) + xlab("") +
+    NULL
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+#######################################################################################################
+## Visualisation des effets aléatoires
+#######################################################################################################
+#' Visualization of random effects
+#'
+#' Equivalent of dotplot(ranef)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param  plotL Logical
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_randomEffect
+
+
+plot_randomEffect <- function(mfl, plotL = TRUE) {
+  ## Estimation et format des effets aléatoires
+  randomEffect <- ranef(mfl, condVar = TRUE)
+  DF <- data.frame(randomEffect = rep(names(randomEffect),
+                                      times = sapply(seq_len(length(randomEffect)),
+                                                      function(lsi) {
+                                                        return(length(unlist(randomEffect[[lsi]])))})))
+  DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF))
+  for (rafC in names(randomEffect)) {
+      eff <- randomEffect[[rafC]]
+      DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff))
+    DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff))
+    DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff)
+      condvar <- attr(randomEffect[[rafC]], "postVar")
+      se <- NULL
+    for (coli in seq_len(ncol(eff))) {
+      se <- c(se,
+              sapply(seq_len(nrow(eff)),
+                     function(i) {
+                       return(condvar[coli, coli, i])}))
+    }
+    DF$condVar[which(DF$randomEffect == rafC)] <- se
+    }
+  DF$se <- sqrt(DF$condVar)
+  DF$lower <- DF$estimate - 1.96 * DF$se
+  DF$upper <- DF$estimate + 1.96 * DF$se
+  ## Plot
+  plotLs <- vector("list", length(randomEffect))
+  names(plotLs) <- names(randomEffect)
+  for (pi in seq_len(length(plotLs))) {
+      subDF <- DF[DF$randomEffect == names(plotLs)[pi], ]
+      subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ]
+        p <- ggplot(data = subDF,
+                mapping = aes(x = estimate, y = reorder(x2, estimate))
+    ) +
+      geom_point(size = 3) +
+      geom_segment(aes(xend = lower, yend = x2)) +
+      geom_segment(aes(xend = upper, yend = x2)) +
+      facet_wrap(~x1, ncol = length(unique(subDF$x1))) +
+      ylab("") + xlab("") +
+      ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) +
+      theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+      geom_vline(xintercept = 0, linetype = "dashed") +
+      theme_bw()
+        plotLs[[pi]] <- p
+      if (plotL) plot(p)
+  }
+  invisible(plotLs)
+}
+
+#######################################################################################################
+## Linearité des effets et outlying observations
+#######################################################################################################
+#' Linarity of the fixed effect with regard to the continuous time
+#'
+#' @param diagLs diagnostic list
+#' @param hlimitN Limit value for outliers (e.g.2 or 3)
+#' @param plotL Boolean
+#' @param label_factor Column of observation names used to label outlying values
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_linearity
+#'
+
+plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
+  df <- cbind.data.frame(diagLs$data,
+                         marginal.prediction = diagLs$marginal.prediction,
+                         standardized.marginal.residuals = diagLs$std.marginal.residuals)
+  # outlier annotation
+  df$outliers <- rep("", nrow(df))
+  outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN)
+  df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
+  if (length(label_factor) >= 1) {
+    df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                    df[outidx, label_factor[1]],
+                                    sep = "_")
+    if (length(label_factor) > 1) {
+      for (li in 2:length(label_factor)) {
+        df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                        df[outidx, label_factor[li]],
+                                        sep = ".")
+      }
+    }
+    }
+  p <- ggplot(data = df,
+              aes(x = marginal.prediction,
+                  y = standardized.marginal.residuals)) +
+    geom_point(size = 2) +
+    geom_hline(yintercept = 0, col = "grey") +
+    geom_smooth(aes(x = marginal.prediction,
+                    y = standardized.marginal.residuals), data = df,  se = FALSE, col = "blue", method = "loess") +
+    ggtitle("Linearity of effects/outlying obervations") +
+    xlab("Marginal predictions") +
+    ylab("Standardized marginal residuals") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
+    geom_text(aes(label = outliers), hjust = 0, vjust = 0)
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+#######################################################################################################
+## EBLUP
+#######################################################################################################
+
+
+#' Mahalanobis distance
+#'
+#' @param diagLs diagnostic list
+#' @param plotL Boolean
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_mahalanobis
+#'
+
+
+plot_mahalanobis <- function(diagLs,  plotL = TRUE) {
+  unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
+                       mal = diagLs$std.mahalanobis.distance)
+  ## Outlying subjects
+  p <-
+    ggplot(aes(y = mal, x = unit), data = unitDf) +
+    geom_point(size = 3) +
+    ylab("Standardized Mahalanobis distance") +
+    geom_vline(xintercept = 0, linetype = "dashed") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") +
+    geom_text(aes(label = unit),
+              data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ],
+              hjust = 1, vjust = 0) +
+    ggtitle("Outlying unit") +
+    xlab("unit")
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+
+
+#' Mahalanobis distance (Chi2)
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_mahalanobisKhi2
+#'
+
+
+plot_mahalanobisKhi2 <- function(diagLs,  plotL = TRUE) {
+  unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
+                       mal = diagLs$mahalanobis.distance)
+  p <- qqplotF(x = unitDf$mal,
+              distribution = "chisq",
+              df = diagLs$q,
+              line.estimate = NULL,
+              conf = 0.95) +
+    xlab("Chi-squared quantiles") +
+    ylab("Mahalanobis distance") +
+    ggtitle("Normality of random effect") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+
+
+
+#######################################################################################################
+## Residus conditionels
+#######################################################################################################
+
+## Presence of outlying observations and homoscedacity of residuals
+
+#' Homoskedacity of conditionalresiduals
+#'
+#' @param diagLs diagnostic list
+#' @param hlimitN Limit value for outliers (e.g.2 or 3)
+#' @param plotL Boolean
+#' @param label_factor Column of observation names used to label outlying values
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_conditionalResiduals
+#'
+
+
+plot_conditionalResiduals <-  function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
+  df <- cbind.data.frame(diagLs$data,
+                         conditional.prediction = diagLs$conditional.prediction,
+                         standardized.conditional.residuals = diagLs$std.conditional.residuals)
+  # outlier annotation
+  df$outliers <- rep("", nrow(df))
+  outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN)
+  df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
+  if (length(label_factor) >= 1) {
+    df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                    df[outidx, label_factor[1]],
+                                    sep = "_")
+    if (length(label_factor) > 1) {
+      for (li in 2:length(label_factor)) {
+        df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                        df[outidx, label_factor[li]],
+                                        sep = ".")
+      }
+    }
+    }
+  p <- ggplot(data = df,
+              aes(x = conditional.prediction,
+                  y = standardized.conditional.residuals)) +
+    geom_point(size = 2) +
+    geom_hline(yintercept = 0, col = "grey") +
+    geom_smooth(aes(x = conditional.prediction,
+                    y = standardized.conditional.residuals),
+                data = df,  se = FALSE, col = "blue", method = "loess") +
+    ggtitle("Homoscedasticity of conditional residuals/outlying observations") +
+    xlab("Individual predictions") +
+    ylab("Standardized conditional residuals") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
+    geom_text(aes(label = outliers), hjust = 0, vjust = 0)
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+#' Normality of conditionalresiduals
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_condresQQplot
+#'
+
+
+plot_condresQQplot <-  function(diagLs, plotL = TRUE) {
+  df <- cbind.data.frame(diagLs$data,
+                         conditional.prediction = diagLs$conditional.prediction,
+                         standardized.conditional.residuals = diagLs$std.conditional.residuals)
+  p <- qqplotF(x = df$standardized.conditional.residuals,
+              distribution = "norm",
+              line.estimate = NULL,
+              conf = 0.95) +
+    xlab("Standard normal quantiles") +
+    ylab("Standardized conditional residual quantiles") +
+    ggtitle("Normality of conditional error") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+
+#######################################################################################################
+## Within-units covariance structure
+#######################################################################################################
+
+
+#' Lesaffre-Veerbeke measure
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_lesaffreVeerbeke
+#'
+
+
+plot_lesaffreVeerbeke <- function(diagLs,  plotL = TRUE) {
+  unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure),
+                       lvm = diagLs$std.lesaffreverbeke.measure)
+  p <- ggplot(data = unitDf,
+              aes(x = unit,
+                  y = lvm)) +
+    geom_point(size = 2) +
+    theme(legend.position = "none") +
+    xlab("units") +
+    ylab("Standardized Lesaffre-Verbeke measure") +
+    geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") +
+    geom_text(aes(label = unit),
+              data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ],
+              hjust = 0, vjust = 0) +
+    ggtitle("Within-units covariance matrice") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+##-------------------------------------------------------------------------------------------------##
+## Helpers
+##-------------------------------------------------------------------------------------------------##
+
+
+## square root of a matrix
+## From Rocha, Singer and Nobre
+
+#' square root of a matrix (Rocha)
+#'
+#' Description
+#'
+#' @param mat Matrix
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export sqrt.matrix
+
+sqrt.matrix <- function(mat) {
+  mat <- as.matrix(mat)  # new line of code
+  singular_dec <- svd(mat, LINPACK = F)
+  U <- singular_dec$u
+  V <- singular_dec$v
+  D <- diag(singular_dec$d)
+  sqrtmatrix <- U %*% sqrt(D) %*% t(V)
+}
+
+
+## square root of a matrix
+## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6)
+## (for matMN a n x n matrix that symetric and non-negative definite)
+
+#' square root of a matrix (Rocha)
+#'
+#' @param mat Matrix
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export sqrtmF
+
+sqrtmF <- function(matMN) {
+  matMN <- as.matrix(matMN)
+  ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.")
+  svd_dec <- svd(matMN)
+  invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v))
+}
+
+
+## qqplotF
+## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c
+
+qqplotF <- function(x,
+                    distribution = "norm", ...,
+                    line.estimate = NULL,
+                    conf = 0.95,
+                    labels = names(x)) {
+  q.function <- eval(parse(text = paste0("q", distribution)))
+  d.function <- eval(parse(text = paste0("d", distribution)))
+  x <- na.omit(x)
+  ord <- order(x)
+  n <- length(x)
+  P <- ppoints(length(x))
+  daf <- data.frame(ord.x = x[ord], z = q.function(P, ...))
+  if (is.null(line.estimate)) {
+    Q.x <- quantile(daf$ord.x, c(0.25, 0.75))
+    Q.z <- q.function(c(0.25, 0.75), ...)
+    b <- diff(Q.x) / diff(Q.z)
+    coef <- c(Q.x[1] - b * Q.z[1], b)
+  } else {
+    coef <- coef(line.estimate(ord.x ~ z))
+  }
+  zz <- qnorm(1 - (1 - conf) / 2)
+  SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n)
+  fit.value <- coef[1] + coef[2] * daf$z
+  daf$upper <- fit.value + zz * SE
+  daf$lower <- fit.value - zz * SE
+  if (!is.null(labels)) {
+    daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "")
+  }
+  p <- ggplot(daf, aes(x = z, y = ord.x)) +
+    geom_point() +
+    geom_abline(intercept = coef[1], slope = coef[2], col = "red") +
+    geom_line(aes(x = z, y = lower), daf,  col = "red", linetype = "dashed") +
+    geom_line(aes(x = z, y = upper), daf,  col = "red", linetype = "dashed") +
+    xlab("") + ylab("")
+  if (!is.null(labels)) p <- p + geom_text(aes(label = label))
+  return(p)
+}
+
+
+## histogramm
+histF <- function(x, sd_x = NULL, breaks = "scott") {
+  if (is.null(sd_x))
+    sd_x <- sd(x)
+  ## Bandwith estimation (default is Scott)
+  if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd"))
+    breaks <- "scott"
+  if (breaks %in% c("sqrt", "sturges", "rice")) {
+    k <- switch(breaks,
+                sqrt = sqrt(length(x)),
+                sturges = floor(log2(x)) + 1,
+                rice = floor(2 * length(x) ^ (1 / 3))
+    )
+    bw <- diff(range(x)) / k
+  }else{
+    bw <- switch(breaks,
+                 scott = 3.5 * sd_x / length(x) ^ (1 / 3),
+                 fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3))
+    )
+  }
+
+
+  daf <- data.frame(x = x)
+  ## graph
+  return(ggplot(data = daf, aes(x)) +
+           geom_histogram(aes(y = ..density..),
+                          col = "black", fill = "grey", binwidth = bw) +
+           geom_density(size = 1.2,
+                        col = "blue",
+                        linetype = "blank",
+                        fill = rgb(0, 0, 1, 0.1)) +
+           stat_function(fun = dnorm,
+                         args = list(mean = 0, sd = sd_x),
+                         col = "blue", size = 1.2) +
+           theme(legend.position = "none") +
+           xlab(""))
+}
+
+
+plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) {
+
+  ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not
+  if (ncol(df) > 3) {
+    varidx <- 4
+    ffidx <- 1
+    timidx <- 2
+    individx <- 3
+    } else {
+    varidx <- 3
+    ffidx <- 1
+    timidx <- 1
+    individx <- 2
+  }
+  nameVar <- colnames(df)[varidx]
+  fflab <- colnames(df)[ffidx]
+  ## Individual time-course
+  rawPlot <-
+    ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) +
+    geom_point() +
+    geom_line() +  ggtitle("Individual time-courses (raw data)") +
+    ylab(nameVar) +
+    xlab(label = colnames(df)[2]) +
+    theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  ## Boxplot of fixed factor
+  bPlot <-
+    ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) +
+    geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) +
+    ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") +
+    theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold"))
+  ## Post-hoc estimates
+  ddlsm1  <- mfl
+  ddlsm1$name <- rownames(ddlsm1)
+  ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
+  ## modif JF pour tenir compte du seuil de pvalues defini par le user
+  options("scipen" = 100, "digits" = 5)
+  pvalCutof <- as.numeric(pvalCutof)
+  bs <- 0.05; bm <- 0.01; bi <- 0.005
+  if (pvalCutof > bm) {
+    bs <- pvalCutof
+  } else
+    if (pvalCutof < bm & pvalCutof > bi) {
+      bm <- pvalCutof; bs <- pvalCutof
+    } else
+      if (pvalCutof < bi) {
+        bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof
+      }
+  lbs <- paste("p-value < ", bs, sep = "")
+  lbm <- paste("p-value < ", bm, sep = "")
+  lbi <- paste("p-value < ", bi, sep = "")
+  cols <- paste("p-value < ", bs, sep = "")
+  colm <- paste("p-value < ", bm, sep = "")
+  coli <- paste("p-value < ", bi, sep = "")
+  valcol <- c("grey", "yellow", "orange", "red")
+  names(valcol) <- c("NS", lbs, lbm, lbi)
+  ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs
+  ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm
+  ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi
+  ddlsm1$levels <- rownames(ddlsm1)
+  ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
+    strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
+  })
+
+  phPlot <-
+    ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
+    facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
+    geom_bar(aes(fill = Significance), stat = "identity") +
+    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
+    scale_fill_manual(
+      values = valcol) +
+    geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) +
+    ggtitle("Post-hoc estimates ") + xlab("") +
+    theme(plot.title = element_text(size = rel(1.2), face = "bold"))
+
+  ## Final plotting
+  grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2),
+               phPlot, nrow = 2,
+               top = textGrob(title, gp = gpar(fontsize = 32, font = 4))
+  )
+
+}
author	workflow4metabolomics
date	Mon, 16 May 2022 09:25:01 +0000
parents
children