# HG changeset patch # User melpetera # Date 1652704318 0 # Node ID a3147e3d66e28d9bba8c284d7835bb6779924f5f # Parent 1422de1812044926d971cbdd1ce24606aa9617e9 Uploaded diff -r 1422de181204 -r a3147e3d66e2 diagmfl.R --- a/diagmfl.R Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,568 +0,0 @@ -############################################################################################### -## Diagnostics graphics pour les modèles linéaires mixtes de type "mfl" ## -############################################################################################### ## -## Input: ## -## mfl, un modèle linéaire mixte généré par le module lmixedm de JF Martin (fonction ## -## lmRepeated2FF), i.e. : un modèle linéaire mixte de type lmer créé par la formule ## -## mfl <- lmer( vd ~ time + fixfact + time:fixfact + (1| subject), ids) ## -## => noms de colonnes importants ## -## => 1 seul effet aléatoire sur l'intercept ## -############################################################################################### -## Output : ## -## Les graphics, les calculs associés et les notations* utilisées dans le script suivent ## -## l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear ## -## mixed model assumptions and some remedial measures, International Statistical Review ## -## (doi:10.1111/insr.12178) ## -## * ajout d'une ou 2 lettres de typage de la variables ## -## -## Script adapté de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonction- ## -## ner avec un modèle lmer (et non lme), des sujets avec des identifiants non numériques, ## -## et des observations non ordonnées sujet par sujet (dernier point à vérifier.) ## -## ############################################################################################ -## Remarques sur les calculs numériques ## -## - l'inverse d'une matrice est calculée à partir de la fonction ginv du package MASS ## -## (Moore- Penrose generalized inverse) au lieu de la fonction "solve" ## -## - la racine carrée des matrices sont calculées grâce à une déomposition SVD (car ## -## s'applique normalement ici uniquement à des matrices symétriques positives). A ## -## remplacer par la fonction sqrtm{expm} si des erreurs apparaissent ??? ## -############################################################################################### - - -library(ggplot2) -library(gridExtra) -library(grid) - - -##-------------------------------------------------------------------------------------------------## -## Helpers -##-------------------------------------------------------------------------------------------------## - -## square root of a matrix -## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6) -## (for matMN a n x n matrix that symetric and non-negative definite) - -sqrtmF<-function(matMN){ - matMN <- as.matrix(matMN) - # ## check that matMN is symetric - # if(!all(t(matMN==matMN))) - # stop("matMN must be symetric.") - svd_dec <- svd(matMN) - invisible(svd_dec$u%*%sqrt(diag(svd_dec$d))%*%t(svd_dec$v)) -} - - -## qqplotF -## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c - - -qqplotF <- function(x, - distribution = "norm", ..., - line.estimate = NULL, - conf = 0.95, - labels = names(x)){ - q.function <- eval(parse(text = paste0("q", distribution))) - d.function <- eval(parse(text = paste0("d", distribution))) - x <- na.omit(x) - ord <- order(x) - n <- length(x) - P <- ppoints(length(x)) - daf <- data.frame(ord.x = x[ord], z = q.function(P, ...)) - - if(is.null(line.estimate)){ - Q.x <- quantile(daf$ord.x, c(0.25, 0.75)) - Q.z <- q.function(c(0.25, 0.75), ...) - b <- diff(Q.x)/diff(Q.z) - coef <- c(Q.x[1] - b * Q.z[1], b) - } else { - coef <- coef(line.estimate(ord.x ~ z)) - } - - zz <- qnorm(1 - (1 - conf)/2) - SE <- (coef[2]/d.function(daf$z,...)) * sqrt(P * (1 - P)/n) - fit.value <- coef[1] + coef[2] * daf$z - daf$upper <- fit.value + zz * SE - daf$lower <- fit.value - zz * SE - - if(!is.null(labels)){ - daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord],"") - } - - p <- ggplot(daf, aes(x=z, y=ord.x)) + - geom_point() + - geom_abline(intercept = coef[1], slope = coef[2], col = "red") + - geom_line(aes(x=z, y = lower),daf, col = "red", linetype = "dashed") + - geom_line(aes(x=z, y = upper),daf, col = "red", linetype = "dashed") + - #geom_ribbon(aes(ymin = lower, ymax = upper), alpha=0.2)+ - xlab("")+ylab("") - if(!is.null(labels)) p <- p + geom_text( aes(label = label)) - - return(p) - #print(p) - #coef -} - - -## histogramm -histF <- function(x, sd_x = NULL, breaks = "scott"){ - - if(is.null(sd_x)){ sd_x <- sd(x)} - - ## Bandwith estimation (default is Scott) - if(!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd")) - breaks <- "scott" - - if(breaks %in% c("sqrt", "sturges", "rice")){ - k <- switch(breaks, - sqrt = sqrt(length(x)), - sturges = floor(log2(x))+1, - rice = floor(2*length(x)^(1/3)) - ) - bw <- diff(range(x))/k - }else{ - bw <- switch(breaks, - scott = 3.5*sd_x/length(x)^(1/3), - fd = diff(range(x))/(2*IQR(x)/length(x)^(1/3)) - ) - } - - - daf <- data.frame(x=x) - ## graph - return(ggplot(data=daf, aes(x)) + - geom_histogram(aes(y = ..density..), - col="black", fill = "grey", binwidth = bw)+ - geom_density(size = 1.2, - col = "blue", - linetype = "blank", - fill = rgb(0,0,1, 0.1))+ - stat_function(fun = dnorm, - args = list(mean = 0, sd = sd_x), - col = "blue", size = 1.2)+ - theme(legend.position="none")+ - xlab("")) -} - - - -##-------------------------------------------------------------------------------------------------## -## Main function -##-------------------------------------------------------------------------------------------------## - - -diagmflF <- function(mfl, title = "", - outlier.limit = 3, - pvalCutof = 0.05, - least.confounded = FALSE){ - - ## initialisation ------------------------------------------------------------------------------------- - responseC<- "vd" - unitC <- "subject" - timeC<- "time" - fixfactC<-"fixfact" - hlimitN <- outlier.limit - - - - ## extracting information from mfl models ------------------------------------------------------------- - df <- mfl@frame - yVn <- df[, responseC] - nobsN <- length(yVn) - idunitVc <- unique(df[, unitC]) - nunitN <- length(unique(idunitVc)) - xMN <- mfl@pp$X - pN <- ncol(xMN) - zMN <- as.matrix(t(mfl@pp$Zt)) - gMN <- as.matrix(as.data.frame(VarCorr(mfl))[1, "vcov"]) ## Valable seulement pour 1 seul effet aléatoire - gammaMN <- as.matrix(kronecker(diag(nunitN), gMN)) - sigsqN <- as.data.frame(VarCorr(mfl))[2, "vcov"] - rMN <- sigsqN*diag(nobsN) - vMN <- (zMN%*%gammaMN%*%t(zMN)) + rMN - invvMN <- MASS::ginv(vMN) - hMN <- MASS::ginv(t(xMN)%*%invvMN%*%xMN) - qMN <- invvMN - invvMN%*%xMN%*%(hMN)%*%t(xMN)%*%invvMN - eblueVn<-mfl@beta - eblupVn<-gammaMN%*%t(zMN)%*%invvMN%*%(yVn-xMN%*%eblueVn) ## equivalent de ranef(mfl) - rownames(eblupVn) <- colnames(zMN) - - - ## Calculs of matrices and vectors used in graph diagnosics --------------------------------------------- - - ## Marginal and individual predictions, residuals and variances - marpredVn <- xMN%*%eblueVn - marresVn <- yVn - marpredVn - marvarMN <- vMN - xMN%*%hMN%*%t(xMN) - - condpredVn <- marpredVn + zMN%*%eblupVn - condresVn <- yVn - condpredVn - condvarMN <- rMN%*%qMN%*%rMN - - - ## Analysis of marginal and conditional residuals - stmarresVn <-stcondresVn <- rep(0,nobsN) - lesverVn <- rep(0, nunitN) - names(lesverVn )<- idunitVc - - for(i in 1:nunitN){ - - idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i - miN <- length(idxiVn) - - ## standardization of marginal residual - stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn,idxiVn]))%*%marresVn[idxiVn]) - - ##Standardized Lessafre and Verbeke's measure - auxMN <- diag(1, ncol = miN, nrow =miN)- stmarresVn[idxiVn]%*%t(stmarresVn[idxiVn]) - lesverVn[i] <- sum(diag(auxMN%*%t(auxMN))) - - ## standardization of conditional residual - stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn,idxiVn]))%*%condresVn[idxiVn]) - } - lesverVn <- lesverVn/sum(lesverVn) - - - ## Least confounded residuals (à valider !) - ## résultats différents des valeurs estimées via le script de Singer, car - ## non unicité des vecteurs propres de la décomposition spectrale ? - if(least.confounded){ - sqrMN <- sqrtmF(rMN) - specDec <- eigen((sqrMN%*%qMN%*%sqrMN), symmetric = TRUE, only.values = FALSE) - - cMN <- t(sqrt(solve(diag((specDec$values[1:(nobsN -pN)])))) %*% t(specDec$vectors[1:(nobsN -pN),1:(nobsN -pN)]) %*% - solve(sqrtmF(rMN[1:(nobsN -pN),1:(nobsN -pN)])) ) - - lccondresVn <- (cMN%*%condresVn[1:(nobsN -pN)])/sqrt(diag(cMN%*%condvarMN[1:(nobsN -pN),1:(nobsN -pN)]%*%t(cMN))) - } - - - ## EBLUP analysis (Mahalanobis' distance) - varbMN <- gammaMN%*%t(zMN)%*%qMN%*%zMN%*%gammaMN - mdistVn <- rep(0, nunitN) - for(i in 1:nunitN){ - mdistVn[i] <- eblupVn[i]^2/varbMN[i, i] - } - mdistVn <- mdistVn/sum(mdistVn) - - - ## Combine data and results in 2 data frames for easy plotting with ggplot2 ----------------------------- - - ## long data frame (all observations) - - df <- data.frame(df, - mar.pred = marpredVn, - mar.res = marresVn, - st.mar.res = stmarresVn, - cond.pred = condpredVn, - cond.res = condresVn, - st.cond.res = stcondresVn) - - if(!("fixfact" %in% colnames(df))) - df$fixfact <- rep(1, nrow(df)) - df$numTime <- as.numeric(levels(as.factor(df$time))) - - - ## short data frame (1 row per unit) - - unitDf <- data.frame(unit = idunitVc, - eblup = eblupVn, - lvm = lesverVn, - mal = mdistVn) - - unitDf$fixfact <- sapply(1:nrow(unitDf), - function(i){ - unique(df[which(df[, unitC] == unitDf$unit[i]), - fixfactC]) - }) - - unitDf$se <- rep(NA, nrow(unitDf)) - re <- ranef(mfl, condVar =TRUE) - for(i in 1:nrow(unitDf)) - unitDf$se[i] <- sqrt(attr(re[[unitC]], "postVar")[1,1,i]) - unitDf$upper <- unitDf$eblup+unitDf$se*1.96 - unitDf$lower <- unitDf$eblup-unitDf$se*1.96 - - - ## Outliers "annotations" - df$marres.out <- rep("", nrow(df)) - df$marres.out[abs(df$st.mar.res)>hlimitN] <- - paste(df[abs(df$st.mar.res)>hlimitN, unitC], - df[abs(df$st.mar.res)>hlimitN, timeC], - sep = ".") - df$marres.out <- paste(" ", df$marres.out, sep ="") - - df$condres.out <- rep("", nrow(df)) - df$condres.out[abs(df$st.cond.res)>hlimitN] <- - paste(df[abs(df$st.cond.res)>hlimitN, unitC], - df[abs(df$st.cond.res)>hlimitN, timeC], - sep = ".") - df$condres.out <- paste(" ", df$condres.out, sep ="") - - ## Diagnostic Plots ------------------------------------------------------------------------------------- - - ## Linearity of effect and outlying observations - p1 <- ggplot(data = df, - aes(x=mar.pred, - y=st.mar.res, - colour=fixfact)) + - geom_point(size =2) + - geom_hline(yintercept = 0, col = "grey")+ - geom_smooth(aes(x=mar.pred, y=st.mar.res), data = df, se = FALSE, col = "blue", method = "loess")+ - ggtitle("Linearity of effects/outlying obervations")+ - xlab("Marginal predictions")+ - ylab("Standardized marginal residuals")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold"))+ - geom_hline(yintercept = c(-1,1)*hlimitN, linetype = "dashed")+ - geom_text(aes(label = marres.out, col = fixfact), hjust=0, vjust=0) - - # p1hist <- histF(df$st.mar.res, sd_x =1)+ - # xlab("Standardized marginal residuals") - - - ## Presence of outlying observations and homoscedacity of residuals - p2 <- ggplot(data = df, - aes(x=cond.pred, - y=st.cond.res, - colour=fixfact)) + - geom_point(size =2) + - geom_hline(yintercept = 0, col = "grey")+ - geom_smooth(aes(x=cond.pred, y=st.cond.res), data = df, se = FALSE, col = "blue", method = "loess")+ - ggtitle("Homosedasticity of conditional residuals/outlying observations")+ - xlab("Individual predictions")+ - ylab("Standardized conditional residuals")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold"))+ - geom_hline(yintercept = c(-1,1)*hlimitN, linetype = "dashed")+ - geom_text(aes(label = condres.out, col = fixfact), hjust=0, vjust=0) - - # p2hist <- histF(df$st.cond.res, sd_x =1)+ - # xlab("Standardized conditional residuals") - - - ## Normality of residuals - if(least.confounded){ - p3 <- qqplotF(x = lccondresVn, - distribution = "norm", - line.estimate = NULL, - conf = 0.95)+ - xlab("Standard normal quantiles")+ - ylab("Least confounded conditional residual quantiles")+ - ggtitle("Normality of conditional error (least confounded)")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold")) - - # p3hist <- histF(lccondresVn, sd_x =1)+ - # xlab("Least confounded conditional residuals") - }else{ - p3 <-qqplotF(x = df$st.cond.res, - distribution = "norm", - line.estimate = NULL, - conf = 0.95)+ - xlab("Standard normal quantiles")+ - ylab("Standardized conditional residual quantiles")+ - ggtitle("Normality of conditional error")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold")) - # p3hist <-p2hist - } - - ## Within-units covariance structure - p4 <- ggplot(data = unitDf, - aes(x=unit, - y=lvm, - colour=fixfact)) + - geom_point(size =2) + - theme(legend.position="none")+ - xlab(unitC)+ - ylab("Standardized Lesaffre-Verbeke measure")+ - geom_hline(yintercept = 2*mean(unitDf$lvm), linetype = "dashed")+ - geom_text(aes(label = unit, col = fixfact), - data = unitDf[unitDf$lvm>2*mean(unitDf$lvm), ], - hjust=0, vjust=0)+ - ggtitle("Within-units covariance matrice")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold")) - - ## EBLUP modif lmerTest v3 mais plante si pas d'effet aléatoire - #pvl <- ranova(model=mfl, reduce.terms = TRUE) - #pvrnd <- pvl[[6]][2] - #ggtitle(paste("Random effect on intercept (LRT p-value = ",round(pvrnd,digits = 5), ")", sep = ""))+ - - p5 <- - ggplot(aes(x = eblup, y = unit, col = fixfact), data = unitDf)+ - geom_point(size =3)+ - geom_segment(aes(xend = lower, yend = unit)) + - geom_segment(aes(xend = upper, yend = unit))+ - ggtitle("Random effect on intercept")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold"))+ - geom_vline(xintercept = 0, linetype = "dashed")+ - ylab(unitC) - - ## p6 - p6 <-qqplotF(x = unitDf$mal, - distribution = "chisq", - df= 1, - line.estimate = NULL, - conf = 0.95)+ - xlab("Chi-squared quantiles")+ - ylab("Standadized Mahalanobis distance")+ - ggtitle("Normality of random effect")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold")) - - ## Outlying subjects - p7 <- - ggplot(aes(y = mal, x= unit, col = fixfact), data = unitDf)+ - geom_point(size =3)+ - ylab("Standardized Mahalanobis distance")+ - geom_vline(xintercept = 0, linetype = "dashed")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold"))+ - geom_hline(yintercept = 2*mean(unitDf$mal), linetype = "dashed")+ - geom_text(aes(label = unit, col = fixfact), - data = unitDf[unitDf$mal>2*mean(unitDf$mal), ], - hjust=1, vjust=0)+ - ggtitle("Outlying subjects")+ - xlab(unitC) - - ## "Data" and "modeling" Plots -------------------------------------------------------------------------- - - ## Individual time-course - rawPlot <- ggplot(data = df, - aes(x=time, y=vd, colour=fixfact, group=subject)) + - geom_line() + ggtitle("Individual time-courses ")+ - theme(legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold")) - - ## Post-hoc estimates (modification due to lmerTest v3) - ddlsm1 <- data.frame(difflsmeans(mfl,test.effs=NULL)) - colnames(ddlsm1)[9] <- "pvalue" - # ddlsm1$name <- sapply(rownames(ddlsm1), - # function(nam){ - # strsplit(nam, split = " ", fixed =TRUE)[[1]][1] - # }) - # ddlsm1$detail <- sapply(rownames(ddlsm1), - # function(nam){ - # paste(strsplit(nam, split = " ", fixed =TRUE)[[1]][-1], - # collapse= "") - # }) - # - # colnames(ddlsm1)<- make.names(colnames(ddlsm1)) - ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) - - ## modif JF pour tenir compte du seuil de pvalues defini par le user - ddlsm1$Significance[which(ddlsm1$pvalue =0.01)] <- "p-value < threshold" - ddlsm1$Significance[which(ddlsm1$pvalue <0.01 & ddlsm1$pvalue >=0.005)] <- "p-value < 0.01" - ddlsm1$Significance[which(ddlsm1$pvalue <0.005)] <- "p-value < 0.005" - - phPlot <- ggplot(ddlsm1, aes(x = levels, y = Estimate))+ - facet_grid(facets = ~term, ddlsm1,scales = "free", space = "free")+ - geom_bar( aes(fill = Significance), stat="identity")+ - theme(axis.text.x = element_text(angle = 90, hjust = 1))+ - scale_fill_manual( - values = c("NS" = "grey", - "p-value < threshold" = "yellow", - "p-value < 0.01" = "orange", - "p-value < 0.005" = "red"))+ - geom_errorbar(aes(ymin = lower, ymax =upper ), width=0.25)+ - ggtitle("Post-hoc estimates")+xlab("")+ - theme(plot.title = element_text(size = rel(1.2), face = "bold")) - - ## Final plotting - - blank<-rectGrob(gp=gpar(col="white")) - rectspacer<-rectGrob(height = unit(0.1, "npc"), gp=gpar(col="grey")) - - grid.arrange(blank, - rawPlot, blank, phPlot, - rectspacer, - p1,blank, p2, blank, p3, blank, p4, - blank, - p5,blank, p6, blank, p7, blank, - blank,blank, - top = textGrob(title,gp=gpar(fontsize=40,font=4)), - layout_matrix = matrix(c(rep(1,7), - 2, 3, rep(4,3), 20,21, - rep(5,7), - 6:12, - rep(13,7), - 14:18, rep(19,2)), - ncol=7, nrow=6, byrow=TRUE), - heights= c(0.1/3, 0.3, 0.1/3, 0.3, 0.1/3, 0.3 ), - widths = c(0.22, 0.04, 0.22,0.04 , 0.22, 0.04, 0.22)) - - invisible(NULL) - -} - -#diagmflF(mfl, title = "", outlier.limit = 3, least.confounded =TRUE) - -plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) { - - nameVar <- colnames(df)[4] - fflab <- colnames(df)[1] - ## Individual time-course - rawPlot <- - ggplot(data = df, aes(x=df[[2]], y=df[[4]], colour=df[[1]], group=df[[3]])) + - geom_line() + ggtitle("Individual time-courses (raw data)")+ - ylab(nameVar) + - xlab(label = colnames(df)[2])+ - theme(legend.title = element_blank() , legend.position="none", plot.title = element_text(size = rel(1.2), face = "bold")) - - ## Boxplot of fixed factor - bPlot <- - ggplot(data = df, aes(y=df[[4]], x=df[[1]], color=df[[1]]))+ - geom_boxplot(outlier.colour="red", outlier.shape=8, outlier.size=4)+ - ggtitle(paste("Boxplot by ",fflab,sep=""))+xlab("")+ylab("")+ - theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold")) - - ## Post-hoc estimates - - ddlsm1 <- mfl - ddlsm1$name <- rownames(ddlsm1) - # ddlsm1$name <- sapply(rownames(ddlsm1), - # function(nam){ - # strsplit(nam, split = " ", fixed =TRUE)[[1]][1] - # }) - # ddlsm1$detail <- sapply(rownames(ddlsm1), - # function(nam){ - # paste(strsplit(nam, split = " ", fixed =TRUE)[[1]][-1], - # collapse= "") - # }) - # - #colnames(ddlsm1)<- make.names(colnames(ddlsm1)) - ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) - - ## modif JF pour tenir compte du seuil de pvalues defini par le user - options("scipen"=100, "digits"=5) - pvalCutof <- as.numeric(pvalCutof) - bs=0.05; bm=0.01; bi=0.005 - if (pvalCutof >bm) {bs <- pvalCutof} else - if (pvalCutof bi) {bm <- pvalCutof; bs <- pvalCutof} else - if (pvalCutof < bi) {bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof} - lbs <- paste("p-value < ",bs, sep="") - lbm <- paste("p-value < ",bm, sep="") - lbi <- paste("p-value < ",bi, sep="") - - cols <- paste("p-value < ",bs, sep="") - colm <- paste("p-value < ",bm, sep="") - coli <- paste("p-value < ",bi, sep="") - valcol <- c("grey","yellow","orange","red") - names (valcol) <- c("NS",lbs,lbm,lbi) - ddlsm1$Significance[which(ddlsm1$p.value<= bs)] <- lbs - ddlsm1$Significance[which(ddlsm1$p.value= bm)] <- lbm - ddlsm1$Significance[which(ddlsm1$p.value - ANOVA for repeated measures statistics - - - r-batch - r-lme4 - r-lmertest - bioconductor-multtest - r-gridextra - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 10.18637/jss.v082.i13. - @ARTICLE{fisher, - author = {Benjamini Y. and Hochberg Y., - title = {Controlling the false discovery rate: a practical and powerful approach for multiple testing. Journal of the Royal Statistical Society}, - journal = {Series B (Methodological)}, - year = {1995}, - volume = {57}, - pages = {289-300} - } - 10.1093/bioinformatics/btu813 - - - diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/diagmfl.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/diagmfl.R Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,918 @@ +#' Calcul des grandeurs "diagnostiques" +#' +#' Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner +#' avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques, +#' et des observations non ordonnees sujet par sujet (dernier point a verifier.) +#' +#' @detail Les graphiques, les calculs associés et les notations utilisees dans le script suivent +#' l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear +#' mixed model assumptions and some remedial measures, International Statistical Review +#' (doi:10.1111/insr.12178) +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @return A list +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export lmer.computeDiag + + + +lmer.computeDiag <- function(mfl) { + + ## Check arguments --------------------------------------------------------- + + if (length(mfl@flist) > 1) + stop("Several 'grouping level' for random effect not implemented yet.") + + + ## extracting information from mfl models ------------------------------------------------------------- + # data + df <- mfl@frame + responseC <- names(df)[1] + unitC <- names(mfl@flist)[1] + # observations + yVn <- df[, responseC] + nobsN <- length(yVn) + # units + idunitVc <- levels(mfl@flist[[1]]) + nunitN <- length(unique(idunitVc)) + #X + xMN <- mfl@pp$X + pN <- ncol(xMN) + #Z + zMN <- t(as.matrix(mfl@pp$Zt)) + # Estimated covariance matrix of random effects (Gam) + aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping + aux2 <- attr(aux, "stddev") + gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2)) + gammaMN <- as.matrix(kronecker(diag(nunitN), gMN)) + q <- dim(gMN)[1] + # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model) + sigsqN <- attr(VarCorr(mfl), "sc")^2 + rMN <- sigsqN * diag(nobsN) + # Estimated covariance matrix of Y + vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN + invvMN <- MASS::ginv(vMN) + # H and Q matrix + hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN) + qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN + # eblue and eblup + eblueVn <- mfl@beta + eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl) + rownames(eblupVn) <- colnames(zMN) + ## Calculs of matrices and vectors used in graph diagnosics --------------------------------------------- + ## Marginal and individual predictions, residuals and variances + marpredVn <- xMN %*% eblueVn + marresVn <- yVn - marpredVn + marvarMN <- vMN - xMN %*% hMN %*% t(xMN) + condpredVn <- marpredVn + zMN %*% eblupVn + condresVn <- yVn - condpredVn + condvarMN <- rMN %*% qMN %*% rMN + ## Analysis of marginal and conditional residuals + stmarresVn <- stcondresVn <- rep(0, nobsN) + lesverVn <- rep(0, nunitN) + names(lesverVn) <- idunitVc + for (i in 1:nunitN) { + idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i + miN <- length(idxiVn) + ## standardization of marginal residual + stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn]) + ##Standardized Lessafre and Verbeke's measure + auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn]) + lesverVn[i] <- sum(diag(auxMN %*% t(auxMN))) + ## standardization of conditional residual + stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn]) + } + lesverVn <- lesverVn / sum(lesverVn) + ## Least confounded conditional residuals + ## EBLUP analysis (Mahalanobis' distance) + varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN + mdistVn <- rep(0, nunitN) + qm <- q - 1 + for (j in 1:nunitN) { + gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)] + eblupi <- eblupVn[(q * j - qm):(q * j)] + mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi + } + names(mdistVn) <- levels(mfl@flist[[1]]) + ## output ---------------------------------------------- + return(list( + data = df, + q = q, + eblue = eblueVn, + eblup = eblupVn, + marginal.prediction = marpredVn, + conditional.prediction = condpredVn, + std.marginal.residuals = stmarresVn, + std.conditional.residuals = stcondresVn, + mahalanobis.distance = mdistVn, + std.mahalanobis.distance = mdistVn / sum(mdistVn), + std.lesaffreverbeke.measure = lesverVn + )) +} + + +#' Wrapper function for diagnostic plots of 'lmer' linear mixed models +#' +#' (W4M mixmod) +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param title aa +#' @param outlier.limit aa +#' @param pvalCutof aa +#' @param resC aa +#' @param uniC aa +#' @param fixC aa +#' @param lest.confounded Not used yet. +#' @return NULL +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export diagmflF + + +diagmflF <- function(mfl, + title = "", + outlier.limit = 3, + pvalCutof = 0.05, + resC = "vd", + uniC = "subject", + timC = "time", + fixC = "fixfact", + least.confounded = FALSE) { + ## diagnostics + diagLs <- lmer.computeDiag(mfl) + ## plots + blank <- rectGrob(gp = gpar(col = "white")) + rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey")) + grid.arrange(blank, + plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE, + label_factor = c(uniC, fixC, timC)), + blank, + plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE, + label_factor = c(uniC, fixC, timC)), + blank, + plot_condresQQplot(diagLs, plotL = FALSE), + blank, + plot_lesaffreVeerbeke(diagLs, plotL = FALSE), + blank, + plot_randomEffect(mfl, plotL = FALSE)[[1]], + blank, + plot_mahalanobisKhi2(diagLs, plotL = FALSE), + blank, + plot_mahalanobis(diagLs, plotL = FALSE), + blank, + blank, + blank, + top = textGrob(title, gp = gpar(fontsize = 40, font = 4)), + layout_matrix = matrix(c(rep(1, 7), + 2, 3, rep(4, 3), 20, 21, + rep(5, 7), + 6:12, + rep(13, 7), + 14:18, rep(19, 2)), + ncol = 7, nrow = 6, byrow = TRUE), + heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3), + widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22)) +} + +####################################################################################################### +## Raw data time courses +####################################################################################################### + +#' Visualization of raw time course +#' +#' Une +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param responseC Name of the 'response' variable +#' @param timeC Name of the 'time' variable +#' @param subjectC Name of the 'subject' variable +#' @param fixfactC Name of the 'fixed factor' variable (e.g.treatment) +#' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE +#' @param plotL Boolean +#' @param colorType One of NA, FIXFACT or SUBJECT +#' @param shapeType One of NA, FIXFACT or SUBJECT +#' @param lineType One of NA, FIXFACT or SUBJECT +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_timeCourse + + + +plot_timeCourse <- function(mfl, + responseC, + timeC, + subjectC, + fixfactC = NULL, + offset_subject = FALSE, + plotL = TRUE, + colorType = NA, ## subject, fixfact, none or NA + shapeType = NA, ## subject, fixfact, none or NA + lineType = NA ## subject, fixfact, none or NA +) { + ## Data ----- + if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) { + DF <- mfl@frame + } else if (class(mfl) == "data.frame") { + DF <- mfl + } else { + stop("'mfl' argument must be a linear mixed effect model or a data frame.") + } + ## Format data ----- + if (is.null(fixfactC)) { + DF <- DF[, c(responseC, timeC, subjectC)] + colnames(DF) <- c("DV", "TIME", "SUBJECT") + meanDF <- aggregate(DF$DV, + by = list(SUBJECT = DF$SUBJECT, + TIME = DF$TIME), + FUN = mean, + na.rm = TRUE) + colnames(meanDF) <- c("SUBJECT", "TIME", "DV") + meanDF$GROUP <- meanDF$SUBJECT + } else{ + DF <- DF[, c(responseC, fixfactC, timeC, subjectC)] + colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT") + meanDF <- aggregate(DF$DV, + by = list(SUBJECT = DF$SUBJECT, + TIME = DF$TIME, + FIXFACT = DF$FIXFACT), + FUN = mean, + na.rm = TRUE) + colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV") + meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_") + } + ## Offset ----- + if (offset_subject) { + offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE) + offsetVn <- offsetMN[, 2] + names(offsetVn) <- offsetMN[, 1] + rm(offsetMN) + DF$DV <- DF$DV - offsetVn[DF$SUBJECT] + meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)] + } + ## Graphical parameters ----- + xlabC <- timeC + ylabC <- responseC + titC <- "Individual time-courses" + if (offset_subject) { + ylabC <- paste(ylabC, "minus 'within-subject' empirical mean") + titC <- paste(titC, "('within-subject' empirical mean offset)") + } + ## color + if (is.na(colorType)) { ## automaticatical attribution + if (is.null(fixfactC)) { + colorType <- "SUBJECT" + } else { + colorType <- "FIXFACT" + } + colTxt <- paste(", colour=", colorType) + } else if (colorType == "none") { + colTxt <- "" + } else { + colTxt <- paste(", colour=", colorType) + } + ## lineType + if (is.na(lineType)) { ## automaticatical attribution + if (is.null(fixfactC)) { + linTxt <- "" + } else { + linTxt <- paste(", linetype=", + ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) + } + } else if (lineType == "none") { + linTxt <- "" + } else { + linTxt <- paste(", linetype=", lineType) + } + ## shapeType + if (is.na(shapeType)) { ## automaticatical attribution + if (is.null(fixfactC)) { + shaTxt <- "" + } else { + shaTxt <- paste(", shape=", + ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) + } + } else if (shapeType == "none") { + shaTxt <- "" + } else { + shaTxt <- paste(", shape=", shapeType) + } + ## aes mapping + txtMap <- paste("aes(x = TIME, y = DV", + colTxt, shaTxt, ")", sep = "") + txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ", + colTxt, linTxt, ")", sep = "") + ## plot and output + p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) + + ggtitle(titC) + + xlab(xlabC) + ylab(ylabC) + + theme(legend.position = "left", + plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_point() + + geom_line(eval(parse(text = txtLineMap)), data = meanDF) + + theme_bw() + + NULL + if (plotL) plot(p) + invisible(p) +} + +####################################################################################################### +## Post-hoc estimate +####################################################################################################### +#' Visualization of fixed effects (post-hoc estimates) +#' +#' Description +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param pvalCutof User pvalue cut of +#' @param plotL Boolean +#' @param titC Title of the plot +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_posthoc +plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") { + ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL)) + colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue" + ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) + ## modif JF pour tenir compte du seuil de pvalues defini par le user + ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "") + ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "") + ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "") + ddlsm1$levels <- rownames(ddlsm1) + ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { + strsplit(namC, split = " ", fixed = TRUE)[[1]][1] + }) + colValue <- c("grey", "yellow", "orange", "red") + names(colValue) <- c("NS", + paste("p-value < ", pvalCutof, sep = ""), + paste("p-value < ", pvalCutof / 5, sep = ""), + paste("p-value < ", pvalCutof / 10, sep = "")) + p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) + + facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + + geom_bar(aes(fill = Significance), stat = "identity") + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + scale_fill_manual(values = colValue) + + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) + + ggtitle(titC) + xlab("") + + NULL + if (plotL) plot(p) + invisible(p) +} + +####################################################################################################### +## Visualisation des effets aléatoires +####################################################################################################### +#' Visualization of random effects +#' +#' Equivalent of dotplot(ranef) +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param plotL Logical +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_randomEffect + + +plot_randomEffect <- function(mfl, plotL = TRUE) { + ## Estimation et format des effets aléatoires + randomEffect <- ranef(mfl, condVar = TRUE) + DF <- data.frame(randomEffect = rep(names(randomEffect), + times = sapply(seq_len(length(randomEffect)), + function(lsi) { + return(length(unlist(randomEffect[[lsi]])))}))) + DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF)) + for (rafC in names(randomEffect)) { + eff <- randomEffect[[rafC]] + DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff)) + DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff)) + DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff) + condvar <- attr(randomEffect[[rafC]], "postVar") + se <- NULL + for (coli in seq_len(ncol(eff))) { + se <- c(se, + sapply(seq_len(nrow(eff)), + function(i) { + return(condvar[coli, coli, i])})) + } + DF$condVar[which(DF$randomEffect == rafC)] <- se + } + DF$se <- sqrt(DF$condVar) + DF$lower <- DF$estimate - 1.96 * DF$se + DF$upper <- DF$estimate + 1.96 * DF$se + ## Plot + plotLs <- vector("list", length(randomEffect)) + names(plotLs) <- names(randomEffect) + for (pi in seq_len(length(plotLs))) { + subDF <- DF[DF$randomEffect == names(plotLs)[pi], ] + subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ] + p <- ggplot(data = subDF, + mapping = aes(x = estimate, y = reorder(x2, estimate)) + ) + + geom_point(size = 3) + + geom_segment(aes(xend = lower, yend = x2)) + + geom_segment(aes(xend = upper, yend = x2)) + + facet_wrap(~x1, ncol = length(unique(subDF$x1))) + + ylab("") + xlab("") + + ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_vline(xintercept = 0, linetype = "dashed") + + theme_bw() + plotLs[[pi]] <- p + if (plotL) plot(p) + } + invisible(plotLs) +} + +####################################################################################################### +## Linearité des effets et outlying observations +####################################################################################################### +#' Linarity of the fixed effect with regard to the continuous time +#' +#' @param diagLs diagnostic list +#' @param hlimitN Limit value for outliers (e.g.2 or 3) +#' @param plotL Boolean +#' @param label_factor Column of observation names used to label outlying values +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_linearity +#' + +plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { + df <- cbind.data.frame(diagLs$data, + marginal.prediction = diagLs$marginal.prediction, + standardized.marginal.residuals = diagLs$std.marginal.residuals) + # outlier annotation + df$outliers <- rep("", nrow(df)) + outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN) + df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] + if (length(label_factor) >= 1) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[1]], + sep = "_") + if (length(label_factor) > 1) { + for (li in 2:length(label_factor)) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[li]], + sep = ".") + } + } + } + p <- ggplot(data = df, + aes(x = marginal.prediction, + y = standardized.marginal.residuals)) + + geom_point(size = 2) + + geom_hline(yintercept = 0, col = "grey") + + geom_smooth(aes(x = marginal.prediction, + y = standardized.marginal.residuals), data = df, se = FALSE, col = "blue", method = "loess") + + ggtitle("Linearity of effects/outlying obervations") + + xlab("Marginal predictions") + + ylab("Standardized marginal residuals") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + + geom_text(aes(label = outliers), hjust = 0, vjust = 0) + if (plotL) plot(p) + invisible(p) +} + + +####################################################################################################### +## EBLUP +####################################################################################################### + + +#' Mahalanobis distance +#' +#' @param diagLs diagnostic list +#' @param plotL Boolean +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_mahalanobis +#' + + +plot_mahalanobis <- function(diagLs, plotL = TRUE) { + unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), + mal = diagLs$std.mahalanobis.distance) + ## Outlying subjects + p <- + ggplot(aes(y = mal, x = unit), data = unitDf) + + geom_point(size = 3) + + ylab("Standardized Mahalanobis distance") + + geom_vline(xintercept = 0, linetype = "dashed") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") + + geom_text(aes(label = unit), + data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ], + hjust = 1, vjust = 0) + + ggtitle("Outlying unit") + + xlab("unit") + if (plotL) plot(p) + invisible(p) +} + + + + + + +#' Mahalanobis distance (Chi2) +#' +#' @param diagLs diagnostic list +#' @param plotL aa +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_mahalanobisKhi2 +#' + + +plot_mahalanobisKhi2 <- function(diagLs, plotL = TRUE) { + unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), + mal = diagLs$mahalanobis.distance) + p <- qqplotF(x = unitDf$mal, + distribution = "chisq", + df = diagLs$q, + line.estimate = NULL, + conf = 0.95) + + xlab("Chi-squared quantiles") + + ylab("Mahalanobis distance") + + ggtitle("Normality of random effect") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + if (plotL) plot(p) + invisible(p) +} + + + + + + + +####################################################################################################### +## Residus conditionels +####################################################################################################### + +## Presence of outlying observations and homoscedacity of residuals + +#' Homoskedacity of conditionalresiduals +#' +#' @param diagLs diagnostic list +#' @param hlimitN Limit value for outliers (e.g.2 or 3) +#' @param plotL Boolean +#' @param label_factor Column of observation names used to label outlying values +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_conditionalResiduals +#' + + +plot_conditionalResiduals <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { + df <- cbind.data.frame(diagLs$data, + conditional.prediction = diagLs$conditional.prediction, + standardized.conditional.residuals = diagLs$std.conditional.residuals) + # outlier annotation + df$outliers <- rep("", nrow(df)) + outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN) + df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] + if (length(label_factor) >= 1) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[1]], + sep = "_") + if (length(label_factor) > 1) { + for (li in 2:length(label_factor)) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[li]], + sep = ".") + } + } + } + p <- ggplot(data = df, + aes(x = conditional.prediction, + y = standardized.conditional.residuals)) + + geom_point(size = 2) + + geom_hline(yintercept = 0, col = "grey") + + geom_smooth(aes(x = conditional.prediction, + y = standardized.conditional.residuals), + data = df, se = FALSE, col = "blue", method = "loess") + + ggtitle("Homoscedasticity of conditional residuals/outlying observations") + + xlab("Individual predictions") + + ylab("Standardized conditional residuals") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + + geom_text(aes(label = outliers), hjust = 0, vjust = 0) + if (plotL) plot(p) + invisible(p) +} + + + + +#' Normality of conditionalresiduals +#' +#' @param diagLs diagnostic list +#' @param plotL aa +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_condresQQplot +#' + + +plot_condresQQplot <- function(diagLs, plotL = TRUE) { + df <- cbind.data.frame(diagLs$data, + conditional.prediction = diagLs$conditional.prediction, + standardized.conditional.residuals = diagLs$std.conditional.residuals) + p <- qqplotF(x = df$standardized.conditional.residuals, + distribution = "norm", + line.estimate = NULL, + conf = 0.95) + + xlab("Standard normal quantiles") + + ylab("Standardized conditional residual quantiles") + + ggtitle("Normality of conditional error") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + if (plotL) plot(p) + invisible(p) +} + + + + + +####################################################################################################### +## Within-units covariance structure +####################################################################################################### + + +#' Lesaffre-Veerbeke measure +#' +#' @param diagLs diagnostic list +#' @param plotL aa +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_lesaffreVeerbeke +#' + + +plot_lesaffreVeerbeke <- function(diagLs, plotL = TRUE) { + unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure), + lvm = diagLs$std.lesaffreverbeke.measure) + p <- ggplot(data = unitDf, + aes(x = unit, + y = lvm)) + + geom_point(size = 2) + + theme(legend.position = "none") + + xlab("units") + + ylab("Standardized Lesaffre-Verbeke measure") + + geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") + + geom_text(aes(label = unit), + data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ], + hjust = 0, vjust = 0) + + ggtitle("Within-units covariance matrice") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + if (plotL) plot(p) + invisible(p) +} + +##-------------------------------------------------------------------------------------------------## +## Helpers +##-------------------------------------------------------------------------------------------------## + + +## square root of a matrix +## From Rocha, Singer and Nobre + +#' square root of a matrix (Rocha) +#' +#' Description +#' +#' @param mat Matrix +#' @return A list +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export sqrt.matrix + +sqrt.matrix <- function(mat) { + mat <- as.matrix(mat) # new line of code + singular_dec <- svd(mat, LINPACK = F) + U <- singular_dec$u + V <- singular_dec$v + D <- diag(singular_dec$d) + sqrtmatrix <- U %*% sqrt(D) %*% t(V) +} + + +## square root of a matrix +## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6) +## (for matMN a n x n matrix that symetric and non-negative definite) + +#' square root of a matrix (Rocha) +#' +#' @param mat Matrix +#' @return A list +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export sqrtmF + +sqrtmF <- function(matMN) { + matMN <- as.matrix(matMN) + ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.") + svd_dec <- svd(matMN) + invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v)) +} + + +## qqplotF +## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c + +qqplotF <- function(x, + distribution = "norm", ..., + line.estimate = NULL, + conf = 0.95, + labels = names(x)) { + q.function <- eval(parse(text = paste0("q", distribution))) + d.function <- eval(parse(text = paste0("d", distribution))) + x <- na.omit(x) + ord <- order(x) + n <- length(x) + P <- ppoints(length(x)) + daf <- data.frame(ord.x = x[ord], z = q.function(P, ...)) + if (is.null(line.estimate)) { + Q.x <- quantile(daf$ord.x, c(0.25, 0.75)) + Q.z <- q.function(c(0.25, 0.75), ...) + b <- diff(Q.x) / diff(Q.z) + coef <- c(Q.x[1] - b * Q.z[1], b) + } else { + coef <- coef(line.estimate(ord.x ~ z)) + } + zz <- qnorm(1 - (1 - conf) / 2) + SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n) + fit.value <- coef[1] + coef[2] * daf$z + daf$upper <- fit.value + zz * SE + daf$lower <- fit.value - zz * SE + if (!is.null(labels)) { + daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "") + } + p <- ggplot(daf, aes(x = z, y = ord.x)) + + geom_point() + + geom_abline(intercept = coef[1], slope = coef[2], col = "red") + + geom_line(aes(x = z, y = lower), daf, col = "red", linetype = "dashed") + + geom_line(aes(x = z, y = upper), daf, col = "red", linetype = "dashed") + + xlab("") + ylab("") + if (!is.null(labels)) p <- p + geom_text(aes(label = label)) + return(p) +} + + +## histogramm +histF <- function(x, sd_x = NULL, breaks = "scott") { + if (is.null(sd_x)) + sd_x <- sd(x) + ## Bandwith estimation (default is Scott) + if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd")) + breaks <- "scott" + if (breaks %in% c("sqrt", "sturges", "rice")) { + k <- switch(breaks, + sqrt = sqrt(length(x)), + sturges = floor(log2(x)) + 1, + rice = floor(2 * length(x) ^ (1 / 3)) + ) + bw <- diff(range(x)) / k + }else{ + bw <- switch(breaks, + scott = 3.5 * sd_x / length(x) ^ (1 / 3), + fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3)) + ) + } + + + daf <- data.frame(x = x) + ## graph + return(ggplot(data = daf, aes(x)) + + geom_histogram(aes(y = ..density..), + col = "black", fill = "grey", binwidth = bw) + + geom_density(size = 1.2, + col = "blue", + linetype = "blank", + fill = rgb(0, 0, 1, 0.1)) + + stat_function(fun = dnorm, + args = list(mean = 0, sd = sd_x), + col = "blue", size = 1.2) + + theme(legend.position = "none") + + xlab("")) +} + + +plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) { + + ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not + if (ncol(df) > 3) { + varidx <- 4 + ffidx <- 1 + timidx <- 2 + individx <- 3 + } else { + varidx <- 3 + ffidx <- 1 + timidx <- 1 + individx <- 2 + } + nameVar <- colnames(df)[varidx] + fflab <- colnames(df)[ffidx] + ## Individual time-course + rawPlot <- + ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) + + geom_point() + + geom_line() + ggtitle("Individual time-courses (raw data)") + + ylab(nameVar) + + xlab(label = colnames(df)[2]) + + theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + ## Boxplot of fixed factor + bPlot <- + ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) + + geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) + + ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") + + theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold")) + ## Post-hoc estimates + ddlsm1 <- mfl + ddlsm1$name <- rownames(ddlsm1) + ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) + ## modif JF pour tenir compte du seuil de pvalues defini par le user + options("scipen" = 100, "digits" = 5) + pvalCutof <- as.numeric(pvalCutof) + bs <- 0.05; bm <- 0.01; bi <- 0.005 + if (pvalCutof > bm) { + bs <- pvalCutof + } else + if (pvalCutof < bm & pvalCutof > bi) { + bm <- pvalCutof; bs <- pvalCutof + } else + if (pvalCutof < bi) { + bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof + } + lbs <- paste("p-value < ", bs, sep = "") + lbm <- paste("p-value < ", bm, sep = "") + lbi <- paste("p-value < ", bi, sep = "") + cols <- paste("p-value < ", bs, sep = "") + colm <- paste("p-value < ", bm, sep = "") + coli <- paste("p-value < ", bi, sep = "") + valcol <- c("grey", "yellow", "orange", "red") + names(valcol) <- c("NS", lbs, lbm, lbi) + ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs + ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm + ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi + ddlsm1$levels <- rownames(ddlsm1) + ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { + strsplit(namC, split = " ", fixed = TRUE)[[1]][1] + }) + + phPlot <- + ggplot(ddlsm1, aes(x = levels, y = Estimate)) + + facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + + geom_bar(aes(fill = Significance), stat = "identity") + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + scale_fill_manual( + values = valcol) + + geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) + + ggtitle("Post-hoc estimates ") + xlab("") + + theme(plot.title = element_text(size = rel(1.2), face = "bold")) + + ## Final plotting + grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2), + phPlot, nrow = 2, + top = textGrob(title, gp = gpar(fontsize = 32, font = 4)) + ) + +} diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/mixmodel.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/mixmodel.xml Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,264 @@ + + ANOVA for repeated measures statistics + + + r-lme4 + r-lmertest + r-ggplot2 + r-gridExtra + bioconductor-multtest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diaR == 'yes' + + + + + + + + + + + + + + + + + + + + + + + + + 10.18637/jss.v082.i13. + @ARTICLE{fisher, + author = {Benjamini Y. and Hochberg Y., + title = {Controlling the false discovery rate: a practical and powerful approach for multiple testing. Journal of the Royal Statistical Society}, + journal = {Series B (Methodological)}, + year = {1995}, + volume = {57}, + pages = {289-300} + } + 10.1093/bioinformatics/btu813 + + + diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/mixmodel_script.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/mixmodel_script.R Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,481 @@ +####### R functions to perform linear mixed model for repeated measures +####### on a multi var dataset using 3 files as used in W4M +############################################################################################################## +lmRepeated2FF <- function(ids, ifixfact, itime, isubject, ivd, ndim, nameVar=colnames(ids)[[ivd]], + pvalCutof = 0.05, dffOption, visu = visu, tit = "", least.confounded = FALSE, outlier.limit = 3) { + ### function to perform linear mixed model with 1 Fixed factor + Time + random factor subject + ### based on lmerTest package providing functions giving the same results as SAS proc mixed + options(scipen = 50, digits = 5) + + if (!is.numeric(ids[[ivd]])) stop("Dependant variable is not numeric") + if (!is.factor(ids[[ifixfact]])) stop("fixed factor is not a factor") + if (!is.factor(ids[[itime]])) stop("Repeated factor is not a factor") + if (!is.factor(ids[[isubject]])) stop("Random factor is not a factor") + + ## factors + time <- ids[[itime]] + fixfact <- ids[[ifixfact]] + subject <- ids[[isubject]] + # dependant variables + vd <- ids[[ivd]] + + # argument of the function instead of re re-running ndim <- defColRes(ids, ifixfact, itime) + # nfp : number of main factors + model infos (REML, varSubject) + normality test + nfp <- ndim[1]; + # ncff number of comparison of the fixed factor + nlff <- ndim[2]; ncff <- ndim[3] + # nct number of comparison of the time factor + nlt <- ndim[4]; nct <- ndim[5] + # nci number of comparison of the interaction + nli <- ndim[6]; nci <- ndim[7] + # number of all lmer results + nresT <- ncff + nct + nci + ## initialization of the result vector (1 line) + ## 4 * because nresf for : pvalues + Etimates + lower CI + Upper CI + res <- data.frame(array(rep(NA, (nfp + 4 * nresT)))) + colnames(res)[1] <- "resultLM" + + ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip + ### after excluding NA, table function is used to seek subjects with only 1 data + ids <- ids[!is.na(ids[[ivd]]), ] + skip <- length(which(table(ids[[isubject]]) == 1)) + + if (skip == 0) { + + mfl <- lmer(vd ~ time + fixfact + time:fixfact + (1 | subject), ids) # lmer remix + + rsum <- summary(mfl, ddf = dffOption) + ## test Shapiro Wilks on the residus of the model + rShapiro <- shapiro.test(rsum$residuals) + raov <- anova(mfl, ddf = dffOption) + dlsm1 <- data.frame(difflsmeans(mfl, test.effs = NULL)) + ddlsm1 <- dlsm1 + ## save rownames and factor names + rn <- rownames(ddlsm1) + fn <- ddlsm1[, c(1, 2)] + ## writing the results on a single line + namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesFactPval <- paste("pvalue ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesInter <- rownames(ddlsm1)[-c(1:(nct + ncff))] + namesEstimate <- paste("estimate ", namesInter) + namespvalues <- paste("pvalue ", namesInter) + namesFactprinc <- c("pval_time", "pval_trt", "pval_inter") + namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + + namesFactLowerCI <- paste("lowerCI ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesLowerCI <- paste("lowerCI ", namesInter, sep = "") + + namesFactUpperCI <- paste("UpperCI ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesUpperCI <- paste("UpperCI ", namesInter, sep = "") + + + ### lmer results on 1 vector row + # pvalue of shapiro Wilks test of the residuals + res[1, ] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals" + res[2, ] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance" + res[3, ] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML" + ### 3 principal factors pvalues results + shapiro test => nfp <- 4 + res[c((nfp - 2):nfp), ] <- raov[, 6] + rownames(res)[c((nfp - 2):nfp)] <- namesFactprinc + + #################### Residuals diagnostics for significants variables ######################### + ### Il at least 1 factor is significant and visu=TRUE NL graphics add to pdf + ## ajout JF du passage de la valeur de p-value cutoff + if (length(which(raov[, 6] <= pvalCutof)) > 0 & visu == "yes") { + diagmflF(mfl, title = tit, pvalCutof = pvalCutof, least.confounded = least.confounded, + outlier.limit = outlier.limit) + } + + # pvalue of fixed factor comparisons + nresf <- nresT + res[(nfp + 1):(nfp + nct), ] <- ddlsm1[c(1:nct), 9] + res[(nfp + nct + 1):(nfp + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 9] + rownames(res)[(nfp + 1):(nfp + nct + ncff)] <- namesFactPval + res[(nfp + nct + ncff + 1):(nfp + nresf), ] <- ddlsm1[(nct + ncff + 1):(nresT), 9] + rownames(res)[(nfp + nct + ncff + 1):(nfp + nresT)] <- namespvalues + # Estimate of the difference between levels of factors + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 3] + res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 3] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactEstim + res[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf), ] <- ddlsm1[(nct + ncff + 1):(nresT), 3] + rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf)] <- namesEstimate + # lower CI of the difference between levels of factors + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 7] + res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 7] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactLowerCI + res[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf), ] <- ddlsm1[(nct + ncff + 1):(nresf), 7] + rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresf / 2))] <- namesLowerCI + # Upper CI of the difference between levels of factors + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 8] + res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 8] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactUpperCI + res[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresT)), ] <- ddlsm1[(nct + ncff + 1):(nresT), 8] + rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresT))] <- namesUpperCI + + } else { + ## one of the subject has only one time, subject can't be a random variable + ## A repeated measure could be run instead function lme of package nlme, in next version? + res[1, ] <- NA + cat("\n Computing impossible for feature ", tit, ": at least one subject has only one time.\n") + } + tres <- data.frame(t(res)) + rownames(tres)[1] <- nameVar + cres <- list(tres, rn, fn) + return(cres) +} + +############################################################################################################## +lmRepeated1FF <- function(ids, ifixfact = 0, itime, isubject, ivd, ndim, nameVar = colnames(ids)[[ivd]], + dffOption, pvalCutof = 0.05) { + ### function to perform linear mixed model with factor Time + random factor subject + ### based on lmerTest package providing functions giving the same results as SAS proc mixed + + + if (!is.numeric(ids[[ivd]])) stop("Dependant variable is not numeric") + if (!is.factor(ids[[itime]])) stop("Repeated factor is not a factor") + if (!is.factor(ids[[isubject]])) stop("Random factor is not a factor") + # Could be interesting here to add an experience plan check to give back a specific error message + # in case time points are missing for some individuals + + time <- ids[[itime]] + subject <- ids[[isubject]] + vd <- ids[[ivd]] ## dependant variables (quatitative) + + # nfp : nombre de facteurs principaux + model infos + normality test + nfp <- ndim[1] + # nlt number of time levels; nct number of comparisons of the time factor + nlt <- ndim[4] + nct <- ndim[5] + # number of all lmer results + nresT <- nct + ## initialization of the result vector (1 line) + res <- data.frame(array(rep(NA, (nfp + 4 * nresT)))) + colnames(res)[1] <- "resultLM" + + ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip + ### after excluding NA, table function is used to seek subjects with only 1 data + ids <- ids[!is.na(ids[[ivd]]), ] + skip <- length(which(table(ids[[isubject]]) == 1)) + + if (skip == 0) { + + mfl <- lmer(vd ~ time + (1 | subject), ids) # lmer remix + rsum <- summary(mfl, ddf = dffOption) + ## test Shapiro Wilks on the residus of the model + rShapiro <- shapiro.test(rsum$residuals) + raov <- anova(mfl, ddf = dffOption) + ## Sum of square : aov$'Sum Sq', Mean square : aov$`Mean Sq`, proba : aov$`Pr(>F)` + + ## Test of all differences estimates between levels as SAS proc mixed. + ## results are in diffs.lsmeans.table dataframe + ## test.effs=NULL perform all pairs comparisons including interaction effect + dlsm1 <- data.frame(difflsmeans(mfl, test.effs = NULL)) + ddlsm1 <- dlsm1 + ## save rownames and factor names + rn <- rownames(ddlsm1) + fn <- ddlsm1[, c(1, 2)] + ## writing the results on a single line + namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct))], sep = "") + namesFactPval <- paste("pvalue ", rownames(ddlsm1)[c(1:(nct))], sep = "") + namesFactprinc <- "pval_time" + namesLowerCI <- paste("lowerCI ", rownames(ddlsm1)[c(1:(nct))], sep = "") + namesUpperCI <- paste("upperCI ", rownames(ddlsm1)[c(1:(nct))], sep = "") + + ### lmer results on 1 vector + # pvalue of shapiro Wilks test of the residuals + res[1, ] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals" + res[2, ] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance" + res[3, ] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML" + + ### factor time pvalue results + shapiro test + res[nfp, ] <- raov[, 6] + rownames(res)[nfp] <- namesFactprinc + + # pvalues of time factor levels comparisons + res[(nfp + 1):(nfp + nct), ] <- ddlsm1[c(1:nct), 9] + rownames(res)[(nfp + 1):(nfp + nct)] <- namesFactPval + + # Estimates of time factor levels + nresf <- nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 3] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesFactEstim + + # Lower CI of the difference between levels of factors + # nresf is incremeted + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 7] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesLowerCI + # Lower CI of the difference between levels of factors + # nresf is incremeted + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 8] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesUpperCI + + + } else { + ## one of the subject has only one time, subject can't be a random variable + ## A repeated measure could be run instead function lme of package nlme, next version + res[1, ] <- NA + cat("\n Computing impossible for feature ", colnames(ids)[4], ": at least one subject has only one time.\n") + } + tres <- data.frame(t(res)) + rownames(tres)[1] <- nameVar + cres <- list(tres, rn, fn) + return(cres) + +} + +############################################################################################################## +defColRes <- function(ids, ifixfact, itime) { + ## define the size of the result file depending on the numbers of levels of the fixed and time factor. + ## Numbers of levels define the numbers of comparisons with pvalue and estimate of the difference. + ## The result file also contains the pvalue of the fixed factor, time factor and interaction + ## plus Shapiro normality test. This is define by nfp + ## subscript of fixed factor=0 means no other fixed factor than "time" + if (ifixfact > 0) { + nfp <- 6 # shapiro + subject variance + REML + time + fixfact + interaction + time <- ids[[itime]] + fixfact <- ids[[ifixfact]] + + cat("\n Levels fixfact: ", levels(fixfact)) + cat("\n Levels time: ", levels(time)) + + # ncff number of comparisons of the fixed factor (nlff number of levels of fixed factor) + nlff <- length(levels(fixfact)) + ncff <- (nlff * (nlff - 1)) / 2 + # nct number of comparison of the time factor (nlt number of levels of time factor) + nlt <- length(levels(time)) + nct <- (nlt * (nlt - 1)) / 2 + # nci number of comparison of the interaction + nli <- nlff * nlt + nci <- (nli * (nli - 1)) / 2 + ndim <- c(NA, NA, NA, NA, NA, NA, NA) + + ndim[1] <- nfp # pvalues of fixed factor, time factor and interaction (3columns) and shapiro test pvalue + ndim[2] <- nlff # number of levels of fixed factor + ndim[3] <- ncff # number of comparisons (2by2) of the fixed factor + ndim[4] <- nlt # number of levels of time factor + ndim[5] <- nct # number of comparisons (2by2) of the time factor + ndim[6] <- nli # number of levels of interaction + ndim[7] <- nci # number of comparisons (2by2) of the interaction + + } + else { + nfp <- 4 # Mandatory columns: shapiro + subject variance + REML + time + time <- ids[[itime]] + # nct number of comparison of the time factor + nlt <- length(levels(time)) + nct <- (nlt * (nlt - 1)) / 2 + ndim <- c(NA, NA, NA, NA, NA, NA, NA) + + ndim[1] <- nfp # pvalues of shapiro test + subject variance + REML + time factor + ndim[4] <- nlt # number of levels of time factor + ndim[5] <- nct # number of comparisons (2by2) of the time factor + } + return(ndim) +} + +############################################################################################################## +lmixedm <- function(datMN, + samDF, + varDF, + fixfact, time, subject, + logtr = "none", + pvalCutof = 0.05, + pvalcorMeth = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], + dffOption, + visu = "no", + least.confounded = FALSE, + outlier.limit = 3, + pdfC, + pdfE + ) { + sampids <- samDF + dataMatrix <- datMN + varids <- varDF + + options("scipen" = 50, "digits" = 5) + pvalCutof <- as.numeric(pvalCutof) + + cat("\n dff computation method=", dffOption) + ### Function running lmer function on a set of variables described in + ### 3 different dataframes as used by W4M + ### results are merge with the metadata variables varids + ### ifixfact, itime, isubject are subscripts of the dependant variables. if only time factor the ifixfat is set to 0 and no diag is performed (visu="no") + if (fixfact == "none") { + ifixfact <- 0 ; visu <- "no" + } else ifixfact <- which(colnames(sampids) == fixfact) + itime <- which(colnames(sampids) == time) + isubject <- which(colnames(sampids) == subject) + + lmmds <- dataMatrix + if (logtr != "log10" & logtr != "log2") logtr <- "none" + if (logtr == "log10") lmmds <- log10(lmmds + 1) + if (logtr == "log2") lmmds <- log2(lmmds + 1) + + dslm <- cbind(sampids, lmmds) + + nvar <- ncol(lmmds) + firstvar <- ncol(sampids) + 1 + lastvar <- firstvar + ncol(lmmds) - 1 + + if (ifixfact > 0) dslm[[ifixfact]] <- factor(dslm[[ifixfact]]) + dslm[[itime]] <- factor(dslm[[itime]]) + dslm[[isubject]] <- factor(dslm[[isubject]]) + ## call defColres to define the numbers of test and so the number of columns of results + ## depends on whether or not there is a fixed factor with time. If only time factor ifixfact=0 + if (ifixfact > 0) { + ndim <- defColRes(dslm[, c(ifixfact, itime)], ifixfact = 1, itime = 2) + nColRes <- ndim[1] + (4 * (ndim[3] + ndim[5] + ndim[7])) + firstpval <- ndim[1] - 2 + lastpval <- ndim[1] + ndim[3] + ndim[5] + ndim[7] + } else { + ndim <- defColRes(dslm[, itime], ifixfact = 0, itime = 1) + nColRes <- ndim[1] + (4 * (ndim[5])) + firstpval <- ndim[1] + lastpval <- ndim[1] + ndim[5] + } + ## initialisation of the result file + resLM <- data.frame(array(rep(NA, nvar * nColRes), dim = c(nvar, nColRes))) + rownames(resLM) <- rownames(varids) + + ## PDF initialisation + if (visu == "yes") { + pdf(pdfC, onefile = TRUE, height = 15, width = 30) + par(mfrow = c(1, 3)) + } + + + for (i in firstvar:lastvar) { + + subds <- dslm[, c(ifixfact, itime, isubject, i)] + + tryCatch({ + if (ifixfact > 0) + reslmer <- lmRepeated2FF(subds, ifixfact = 1, itime = 2, isubject = 3, ivd = 4, ndim = ndim, visu = visu, + tit = colnames(dslm)[i], pvalCutof = pvalCutof, + dffOption = dffOption, least.confounded = least.confounded, + outlier.limit = outlier.limit) + else + reslmer <- lmRepeated1FF(subds, ifixfact = 0, itime = 1, isubject = 2, ivd = 3, ndim = ndim, + pvalCutof = pvalCutof, dffOption = dffOption) + + resLM[i - firstvar + 1, ] <- reslmer[[1]] + }, error = function(e) { + cat("\nERROR with ", rownames(resLM)[i - firstvar + 1], ": ", conditionMessage(e), "\n") + } + ) + + } + + if (exists("reslmer")) { + colnames(resLM) <- colnames(reslmer[[1]]) + labelRow <- reslmer[[2]] + factorRow <- reslmer[[3]] + } else { + stop("\n- - - - -\nModel computation impossible for every single variables in the dataset: no result returned.\n- - - - -\n") + } + + if (visu == "yes") dev.off() + + + ## pvalue correction with p.adjust library multtest + ## Possible methods of pvalue correction + AdjustMeth <- c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none") + if (length(which(pvalcorMeth == AdjustMeth)) == 0) pvalcorMeth <- "none" + + if (pvalcorMeth != "none") { + for (k in firstpval:lastpval) { + resLM[[k]] <- p.adjust(resLM[[k]], method = pvalcorMeth, n = dim(resLM[k])[[1]]) + + } + } + + ## for each variables, set pvalues to NA and estimates = 0 when pvalue of factor > pvalCutof value define by user + if (ifixfact > 0) { + ## time effect + resLM[which(resLM[, firstpval] > pvalCutof), c((lastpval + 1):(lastpval + ndim[5]))] <- 0 + resLM[which(resLM[, firstpval] > pvalCutof), c((ndim[1] + 1):(ndim[1] + ndim[5]))] <- NA + ## treatment effect + resLM[which(resLM[, firstpval + 1] > pvalCutof), c((lastpval + ndim[5] + 1):(lastpval + ndim[5] + ndim[3]))] <- 0 + resLM[which(resLM[, firstpval + 1] > pvalCutof), c((ndim[1] + ndim[5] + 1):(ndim[1] + ndim[5] + ndim[3]))] <- NA + ## interaction effect + resLM[which(resLM[, firstpval + 2] > pvalCutof), c((lastpval + ndim[5] + ndim[3] + 1):(lastpval + ndim[5] + ndim[3] + ndim[7]))] <- 0 + resLM[which(resLM[, firstpval + 2] > pvalCutof), c((ndim[1] + ndim[5] + ndim[3] + 1):(ndim[1] + ndim[5] + ndim[3] + ndim[7]))] <- NA + } else { + ## time effect only + resLM[which(resLM[, firstpval] > pvalCutof), c((lastpval + 1):(lastpval + ndim[5]))] <- 0 + resLM[which(resLM[, firstpval] > pvalCutof), c((firstpval + 1):(firstpval + ndim[5]))] <- NA + } + + ## for each variable, estimates plots are performed if at least one factor is significant after p-value correction + pdf(pdfE, onefile = TRUE, height = 15, width = 30) + + ## for each variable (in row) + for (i in seq_len(nrow(resLM))) { + + ## if any fixed factor + time factor + if (ifixfact > 0) + + ## if any main factor after p-value correction is significant -> plot estimates and time course + if (length(which(resLM[i, c(4:6)] < pvalCutof)) > 0) { + + ## Plot of time course by fixfact : data prep with factors and quantitative var to be plot + subv <- dslm[, colnames(dslm) == rownames(resLM)[i]] + subds <- data.frame(dslm[[ifixfact]], dslm[[itime]], dslm[[isubject]], subv) + libvar <- c(fixfact, time, subject) + colnames(subds) <- c(libvar, rownames(resLM)[i]) + + ## Plot of estimates with error bars for all fixed factors and interaction + rddlsm1 <- t(resLM[i, ]) + pval <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "pvalue"] + esti <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "estima"] + loci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "lowerC"] + upci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "UpperC"] + rddlsm1 <- data.frame(pval, esti, loci, upci, factorRow) + colnames(rddlsm1) <- c("p.value", "Estimate", "Lower.CI", "Upper.CI", colnames(factorRow)) + rownames(rddlsm1) <- labelRow + + ## function for plotting these 2 graphs + plot.res.Lmixed(rddlsm1, subds, title = rownames(resLM)[i], pvalCutof = pvalCutof) + + } + + ## if only a time factor + if (ifixfact == 0) + + ## if time factor after p-value correction is significant -> plot time course + if (length(which(resLM[i, 4] < pvalCutof)) > 0) { + + ## Plot of time course : data prep with factors and quantitative var to be plot + subv <- dslm[, colnames(dslm) == rownames(resLM)[i]] + subds <- data.frame(dslm[[itime]], dslm[[isubject]], subv) + libvar <- c(time, subject) + colnames(subds) <- c(libvar, rownames(resLM)[i]) + + ## Plot of estimates with error bars for all fixed factors and interaction + rddlsm1 <- t(resLM[i, ]) + pval <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "pvalue"] + esti <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "estima"] + loci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "lowerC"] + upci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "upperC"] + rddlsm1 <- data.frame(pval, esti, loci, upci, factorRow) + colnames(rddlsm1) <- c("p.value", "Estimate", "Lower.CI", "Upper.CI", colnames(factorRow)) + rownames(rddlsm1) <- labelRow + + ## function for plotting these 2 graphs + plot.res.Lmixed(rddlsm1, subds, title = rownames(resLM)[i], pvalCutof = pvalCutof) + + } + + } + dev.off() + + ## return result file with pvalues and estimates (exclude confidence interval used for plotting) + iCI <- which(substr(colnames(resLM), 4, 7) == "erCI") + resLM <- resLM[, - iCI] + resLM <- cbind(varids, resLM) + return(resLM) +} diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/mixmodel_wrapper.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/mixmodel_wrapper.R Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,200 @@ +#!/usr/bin/env Rscript + +library(lme4) ## mixed model computing +library(Matrix) +library(MASS) +library(lmerTest) ## computing pvalue and lsmeans from results of lme4 package +library(multtest) ## multiple testing +library(ggplot2) +library(gridExtra) +library(grid) + +source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep = "/")) +} + +source_local("mixmodel_script.R") +source_local("diagmfl.R") + +parse_args <- function() { + args <- commandArgs() + start <- which(args == "--args")[1] + 1 + if (is.na(start)) { + return(list()) + } + seq_by2 <- seq(start, length(args), by = 2) + result <- as.list(args[seq_by2 + 1]) + names(result) <- args[seq_by2] + return(result) +} + +argVc <- unlist(parse_args()) + +##------------------------------ +## Initializing +##------------------------------ + +## options +##-------- + +strAsFacL <- options()$stringsAsFactors +options(stringsAsFactors = FALSE) + +## constants +##---------- + +modNamC <- "mixmodel" ## module name + +topEnvC <- environment() +flagC <- "\n" + +## functions +##---------- + +flgF <- function(tesC, + envC = topEnvC, + txtC = NA) { ## management of warning and error messages + + tesL <- eval(parse(text = tesC), envir = envC) + + if (!tesL) { + + sink(NULL) + stpTxtC <- ifelse(is.na(txtC), + paste0(tesC, " is FALSE"), + txtC) + + stop(stpTxtC, + call. = FALSE) + + } + +} ## flgF + +## log file +##--------- + +sink(argVc["information"]) + +cat("\nStart of the '", modNamC, "' Galaxy module call: ", format(Sys.time(), "%a %d %b %Y %X"), "\n", sep = "") +cat("\nParameters used:\n\n") +print(argVc) +cat("\n\n") + +## loading +##-------- + +datMN <- t(as.matrix(read.table(argVc["dataMatrix_in"], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t"))) + +samDF <- read.table(argVc["sampleMetadata_in"], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t") + +varDF <- read.table(argVc["variableMetadata_in"], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t") + + +## checking +##--------- + +flgF("identical(rownames(datMN), rownames(samDF))", txtC = "Column names of the dataMatrix are not identical to the row names of the sampleMetadata; check your data with the 'Check Format' module in the 'Quality Control' section") +flgF("identical(colnames(datMN), rownames(varDF))", txtC = "Row names of the dataMatrix are not identical to the row names of the variableMetadata; check your data with the 'Check Format' module in the 'Quality Control' section") + +flgF("argVc['time'] %in% colnames(samDF)", txtC = paste0("Required time factor '", argVc["time"], "' could not be found in the column names of the sampleMetadata")) +flgF("argVc['subject'] %in% colnames(samDF)", txtC = paste0("Required subject factor '", argVc["subject"], "' could not be found in the column names of the sampleMetadata")) + +flgF("mode(samDF[, argVc['time']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc["time"], "' column of the sampleMetadata should contain either number only, or character only")) +flgF("mode(samDF[, argVc['subject']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc["subject"], "' column of the sampleMetadata should contain either number only, or character only")) + +flgF("argVc['adjC'] %in% c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')") +flgF("argVc['trf'] %in% c('none', 'log10', 'log2')") + +flgF("0 <= as.numeric(argVc['thrN']) && as.numeric(argVc['thrN']) <= 1", txtC = "(corrected) p-value threshold must be between 0 and 1") +flgF("argVc['diaR'] %in% c('no', 'yes')") + + +##------------------------------ +## Formating +##------------------------------ + +if (argVc["dff"] == "Satt") { + dffmeth <- "Satterthwaite" +} else { + dffmeth <- "Kenward-Roger" +} + + +##------------------------------ +## Computation +##------------------------------ + + +varDFout <- lmixedm(datMN = datMN, + samDF = samDF, + varDF = varDF, + fixfact = argVc["fixfact"], + time = argVc["time"], + subject = argVc["subject"], + logtr = argVc["trf"], + pvalCutof = argVc["thrN"], + pvalcorMeth = argVc["adjC"], + dffOption = dffmeth, + visu = argVc["diaR"], + least.confounded = FALSE, + outlier.limit = 3, + pdfC = argVc["out_graph_pdf"], + pdfE = argVc["out_estim_pdf"] + ) + + + + +##------------------------------ +## Rounding +##------------------------------ + +if (argVc["rounding"] == "yes") { + varDFout[, which(!(colnames(varDFout) %in% colnames(varDF)))] <- apply(varDFout[, which(!(colnames(varDFout) %in% colnames(varDF)))], 2, round, digits = as.numeric(argVc["decplaces"])) +} + +##------------------------------ +## Ending +##------------------------------ + + +## saving +##-------- + +varDFout <- cbind.data.frame(variableMetadata = rownames(varDFout), + varDFout) + +write.table(varDFout, + file = argVc["variableMetadata_out"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + +## closing +##-------- + +cat("\n\nEnd of '", modNamC, "' Galaxy module call: ", + as.character(Sys.time()), "\n", sep = "") +cat("\nInformation about R (version, Operating System, attached or loaded packages):\n\n") +sessionInfo() + +sink() + +options(stringsAsFactors = strAsFacL) + +rm(list = ls()) diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/test-data/OneFactor_dataMatrix.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/test-data/OneFactor_dataMatrix.tsv Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,23 @@ +variableMetadata s1D2 s2D2 s3D2 s1D3 s2D3 s3D3 s1D4 s2D4 s3D4 s1D5 s2D5 s3D5 s1D6 s2D6 s3D6 +V1 1.06E+06 1.07E+06 8.93E+05 9.11E+05 8.07E+05 6.90E+05 7.07E+05 7.66E+05 6.82E+05 6.25E+05 7.25E+05 6.78E+05 7.59E+05 6.70E+05 7.41E+05 +V2 4.67E+06 4.78E+06 4.17E+06 4.84E+06 4.50E+06 4.41E+06 6.96E+06 7.05E+06 6.65E+06 4.12E+06 4.42E+06 4.39E+06 1.26E+07 1.28E+07 1.32E+07 +V3 9.03E+05 8.44E+05 7.31E+05 9.97E+05 7.97E+05 7.47E+05 7.18E+05 8.28E+05 7.81E+05 9.08E+05 8.35E+05 8.37E+05 1.12E+06 1.11E+06 1.16E+06 +V4 1.62E+05 1.27E+05 1.21E+05 1.41E+05 8.48E+04 1.06E+05 9.72E+04 1.52E+05 1.27E+05 1.29E+05 1.42E+05 1.17E+05 1.72E+05 1.67E+05 1.87E+05 +V5 5.22E+04 6.20E+04 5.55E+04 4.78E+04 3.41E+04 5.84E+04 6.30E+04 1.11E+05 1.16E+05 4.59E+04 3.34E+04 4.08E+04 9.14E+04 6.78E+04 1.08E+05 +V6 1.68E+05 1.83E+05 2.76E+05 2.30E+05 1.62E+05 3.13E+05 4.47E+05 5.34E+05 6.59E+05 2.38E+05 2.30E+05 1.81E+05 4.93E+05 3.83E+05 6.30E+05 +V7 8.85E+06 8.39E+06 8.56E+06 5.51E+06 5.07E+06 4.90E+06 2.90E+06 3.10E+06 3.01E+06 2.36E+06 2.40E+06 2.30E+06 5.06E+06 4.94E+06 4.93E+06 +V8 2.53E+05 2.26E+05 1.90E+05 2.75E+05 2.67E+05 2.22E+05 2.12E+05 2.43E+05 1.72E+05 1.51E+05 1.56E+05 1.50E+05 2.49E+05 2.22E+05 2.48E+05 +V9 7.91E+05 7.16E+05 6.57E+05 6.08E+05 4.62E+05 4.88E+05 5.49E+05 6.13E+05 5.54E+05 7.56E+05 7.81E+05 7.59E+05 1.47E+06 1.46E+06 1.60E+06 +V10 1.76E+04 1.79E+04 1.86E+04 4.00E+04 4.27E+04 3.30E+04 5.34E+04 6.26E+04 6.09E+04 1.04E+05 8.24E+04 7.16E+04 2.09E+05 2.07E+05 2.19E+05 +V11 1.03E+05 9.28E+04 9.34E+04 8.70E+04 5.73E+04 8.05E+04 1.82E+06 1.98E+06 1.76E+06 8.41E+04 7.80E+04 7.31E+04 5.62E+04 5.01E+04 7.48E+04 +V12 4.48E+04 6.71E+04 4.27E+04 3.54E+04 3.04E+04 2.73E+04 2.47E+04 2.85E+04 3.26E+04 2.11E+04 1.55E+04 2.71E+04 3.44E+04 4.14E+04 5.87E+04 +V13 4.48E+04 6.71E+04 4.27E+04 3.54E+04 3.04E+04 2.73E+04 2.47E+04 2.85E+04 3.26E+04 2.11E+04 1.55E+04 2.71E+04 3.44E+04 4.14E+04 5.87E+04 +V14 1.16E+04 6.64E+03 1.29E+04 2.59E+04 1.55E+04 2.21E+04 3.15E+04 4.05E+04 3.10E+04 4.37E+04 2.90E+04 2.67E+04 6.62E+04 1.00E+05 8.75E+04 +V15 3.04E+05 3.05E+05 2.69E+05 2.57E+05 1.99E+05 2.16E+05 1.97E+05 1.97E+05 2.03E+05 2.65E+05 2.71E+05 2.60E+05 1.67E+05 1.89E+05 1.99E+05 +V16 2.69E+05 2.45E+05 2.18E+05 1.23E+05 1.27E+05 1.04E+05 2.91E+05 2.82E+05 2.79E+05 2.37E+05 2.11E+05 2.18E+05 5.60E+05 5.89E+05 6.08E+05 +V17 9.30E+04 8.55E+04 7.01E+04 4.84E+04 4.06E+04 3.14E+04 8.77E+04 1.03E+05 9.48E+04 9.88E+04 6.91E+04 7.81E+04 3.19E+05 3.25E+05 3.76E+05 +V18 2.30E+04 3.87E+04 2.01E+04 9.56E+03 1.27E+04 1.69E+04 2.38E+04 2.66E+04 2.99E+04 5.98E+04 4.93E+04 5.13E+04 1.08E+05 1.30E+05 1.36E+05 +V19 6.25E+04 7.43E+04 4.71E+04 1.62E+04 1.05E+04 1.57E+04 3.73E+04 3.08E+04 3.67E+04 3.50E+04 3.90E+04 2.78E+04 4.64E+05 1.60E+05 1.75E+05 +V20 1.72E+04 1.58E+04 1.35E+04 1.20E+04 1.02E+04 8.85E+03 1.35E+04 1.71E+04 1.62E+04 1.64E+04 8.41E+03 6.02E+03 5.07E+04 7.33E+04 6.57E+04 +V21 1.86E+06 1.90E+06 1.90E+06 2.05E+06 2.06E+06 2.07E+06 1.97E+06 1.94E+06 1.90E+06 1.64E+06 1.67E+06 1.78E+06 1.65E+06 1.73E+06 1.77E+06 +V22 1.52E+04 8.17E+03 1.37E+04 1.39E+04 1.34E+04 1.36E+04 1.40E+04 1.67E+04 1.60E+04 1.33E+04 1.69E+04 1.20E+04 1.05E+04 1.15E+04 1.11E+04 diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/test-data/OneFactor_sampleMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/test-data/OneFactor_sampleMetadata.tsv Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,16 @@ +sampleMetadata time subject +s1D2 D2 s1 +s2D2 D2 s2 +s3D2 D2 s3 +s1D3 D3 s1 +s2D3 D3 s2 +s3D3 D3 s3 +s1D4 D4 s1 +s2D4 D4 s2 +s3D4 D4 s3 +s1D5 D5 s1 +s2D5 D5 s2 +s3D5 D5 s3 +s1D6 D6 s1 +s2D6 D6 s2 +s3D6 D6 s3 diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/test-data/OneFactor_variableMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/test-data/OneFactor_variableMetadata.tsv Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,23 @@ +variableMetadata name +V1 name1 +V2 name2 +V3 name3 +V4 name4 +V5 name5 +V6 name6 +V7 name7 +V8 name8 +V9 name9 +V10 name10 +V11 name11 +V12 name12 +V13 name13 +V14 name14 +V15 name15 +V16 name16 +V17 name17 +V18 name18 +V19 name19 +V20 name20 +V21 name21 +V22 name22 diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/test-data/TwoFactor_dataMatrix.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/test-data/TwoFactor_dataMatrix.txt Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,6 @@ +variable S01T0 S01T1 S01T2 S01T3 S02T0 S02T1 S02T2 S02T3 S03T0 S03T1 S03T2 S03T3 S04T0 S04T1 S04T2 S04T3 S05T0 S05T1 S05T2 S05T3 S06T0 S06T1 S06T2 S06T3 S07T0 S07T1 S07T2 S07T3 S08T0 S08T1 S08T2 S08T3 S09T0 S09T1 S09T2 S09T3 S10T0 S10T1 S10T2 S10T3 S11T0 S11T1 S11T2 S11T3 S12T0 S12T1 S12T2 S12T3 S13T0 S13T1 S13T2 S13T3 S14T0 S14T1 S14T2 S14T3 S15T0 S15T1 S15T2 S15T3 S16T0 S16T1 S16T2 S16T3 S17T0 S17T1 S17T2 S17T3 S18T0 S18T1 S18T2 S18T3 +v0.4425_385 126.39 234.11 162.68 115.29 172.44 171.53 180.31 114.94 224.14 195.09 189.45 122.79 212.02 243.48 188.42 138.91 241.59 192.95 214.74 98.69 270.03 176.79 208.73 78.04 247.82 176.51 175.78 98.94 260.63 181.93 178.06 111.58 218.95 176.24 200.29 112.77 140.23 157.38 124.62 114.34 147.23 165.29 125.85 128.41 126.35 139.75 127.95 74.86 124.92 155.12 109.4 86.88 145.57 142 135.29 83.08 116.95 142.28 113.43 118.26 80.81 146.03 130.52 103.4 79.58 168 109.35 88.19 138.12 166.61 115.05 83.05 +v0.4498_625 108.9 133.53 128.66 205.38 121.48 112.23 142.97 199.35 127.44 102.45 173.31 252.14 123.96 96.93 118.7 234.54 146.33 109.1 178.69 233.61 154.27 138.81 157.97 213.18 176.06 108.79 142.11 116.14 138.78 131.42 172.65 214.14 159.19 102.17 137.67 211.04 120.1 69.85 65.75 92.68 121.22 70.04 76.06 91.89 81.18 73.14 80.93 92.69 110.68 66.84 79.72 92.89 93.5 62.98 87.39 98.32 84.59 65.18 99.23 36.94 85.1 80.49 95.17 115.22 113.47 65.32 53.29 113.25 106.68 74.04 66.5 88.96 +v0.4711_463 124.1 166.84 156.18 82.57 158.4 190.09 152.89 100.76 155.75 174.85 153.14 112.76 171.54 150.37 163.49 116.06 198.78 161.53 247.61 122.94 167.38 186.12 194.59 93.05 208.24 150.15 140.97 55.62 200.32 138.59 145.99 89.44 192.49 131.44 145.89 93.38 102.94 67.11 83.99 86.86 110.09 98.61 77.91 100.75 136.32 92.56 86.79 75.62 144.39 65.27 110.24 94.26 136.48 77.62 95.43 60.3 135.98 87.54 65.37 92.04 112.09 58.9 92.35 72.92 132.84 97.12 80.71 79.01 144.75 85.78 73.61 83.68 +v0.4715_659 688.13 739.41 529.62 354.46 828.48 660.76 545.32 318.51 798.21 714.6 617.95 400.94 815.2 696.85 611.31 354.28 909.08 792.05 650.85 386.48 897.32 729.1 638.99 420.19 967.92 765.14 579.41 369.82 929.8 759.62 617.5 339.81 989.48 781.37 606.77 386.24 289.67 359.43 240.13 294.74 314.71 338.43 203.29 264.08 349.01 365.76 245.06 283.41 327.8 354.82 228.03 263.83 300.57 362.51 242.41 282.21 280.17 344.99 253.88 273.4 304.37 325.89 292.93 292.98 317.8 318.16 205.93 245.92 310.23 330.51 230.27 270.3 +v0.4766_757 117.8 110.31 114.56 99.99 144.39 155.33 114.47 122 147.16 132.82 129.78 94.86 128.26 161.38 119.12 96.73 177.26 134.94 149.39 134.12 163.22 146.12 148.26 108.68 159.39 140.75 119.11 65.83 170.45 131.79 163.58 104.63 167.33 112.88 142.58 85.38 80.53 84.3 42.33 62.04 73.74 97.78 58 79.33 91.7 82.39 69.71 83.58 77.86 65.56 74.11 82.9 90.53 75.27 80.46 81.88 92.77 52.94 67.99 84.21 91.27 69.54 77.13 85.27 93.18 83.68 57.01 65.09 92.12 82.63 70.9 75.53 diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/test-data/TwoFactor_sampleMetadata.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/test-data/TwoFactor_sampleMetadata.txt Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,73 @@ +sample sujet phenotype time +S01T0 S01 WT T0 +S01T1 S01 WT T1 +S01T2 S01 WT T2 +S01T3 S01 WT T3 +S02T0 S02 WT T0 +S02T1 S02 WT T1 +S02T2 S02 WT T2 +S02T3 S02 WT T3 +S03T0 S03 WT T0 +S03T1 S03 WT T1 +S03T2 S03 WT T2 +S03T3 S03 WT T3 +S04T0 S04 WT T0 +S04T1 S04 WT T1 +S04T2 S04 WT T2 +S04T3 S04 WT T3 +S05T0 S05 WT T0 +S05T1 S05 WT T1 +S05T2 S05 WT T2 +S05T3 S05 WT T3 +S06T0 S06 WT T0 +S06T1 S06 WT T1 +S06T2 S06 WT T2 +S06T3 S06 WT T3 +S07T0 S07 WT T0 +S07T1 S07 WT T1 +S07T2 S07 WT T2 +S07T3 S07 WT T3 +S08T0 S08 WT T0 +S08T1 S08 WT T1 +S08T2 S08 WT T2 +S08T3 S08 WT T3 +S09T0 S09 WT T0 +S09T1 S09 WT T1 +S09T2 S09 WT T2 +S09T3 S09 WT T3 +S10T0 S10 MT T0 +S10T1 S10 MT T1 +S10T2 S10 MT T2 +S10T3 S10 MT T3 +S11T0 S11 MT T0 +S11T1 S11 MT T1 +S11T2 S11 MT T2 +S11T3 S11 MT T3 +S12T0 S12 MT T0 +S12T1 S12 MT T1 +S12T2 S12 MT T2 +S12T3 S12 MT T3 +S13T0 S13 MT T0 +S13T1 S13 MT T1 +S13T2 S13 MT T2 +S13T3 S13 MT T3 +S14T0 S14 MT T0 +S14T1 S14 MT T1 +S14T2 S14 MT T2 +S14T3 S14 MT T3 +S15T0 S15 MT T0 +S15T1 S15 MT T1 +S15T2 S15 MT T2 +S15T3 S15 MT T3 +S16T0 S16 MT T0 +S16T1 S16 MT T1 +S16T2 S16 MT T2 +S16T3 S16 MT T3 +S17T0 S17 MT T0 +S17T1 S17 MT T1 +S17T2 S17 MT T2 +S17T3 S17 MT T3 +S18T0 S18 MT T0 +S18T1 S18 MT T1 +S18T2 S18 MT T2 +S18T3 S18 MT T3 diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/test-data/TwoFactor_variableMetadata.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/test-data/TwoFactor_variableMetadata.txt Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,6 @@ +variable num +v0.4425_385 1 +v0.4498_625 2 +v0.4711_463 3 +v0.4715_659 4 +v0.4766_757 5 diff -r 1422de181204 -r a3147e3d66e2 mixmodel4repeated_measures/test-data/mixmodel_TwoFactor_variableMetadata.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel4repeated_measures/test-data/mixmodel_TwoFactor_variableMetadata.txt Mon May 16 12:31:58 2022 +0000 @@ -0,0 +1,6 @@ +variableMetadata num Shapiro.pvalue.residuals Subject.Variance REML pval_time pval_trt pval_inter pvalue.timeT0...timeT1 pvalue.timeT0...timeT2 pvalue.timeT0...timeT3 pvalue.timeT1...timeT2 pvalue.timeT1...timeT3 pvalue.timeT2...timeT3 pvalue.fixfactMT...fixfactWT pvalue..timeT0.fixfactMT...timeT1.fixfactMT pvalue..timeT0.fixfactMT...timeT2.fixfactMT pvalue..timeT0.fixfactMT...timeT3.fixfactMT pvalue..timeT0.fixfactMT...timeT0.fixfactWT pvalue..timeT0.fixfactMT...timeT1.fixfactWT pvalue..timeT0.fixfactMT...timeT2.fixfactWT pvalue..timeT0.fixfactMT...timeT3.fixfactWT pvalue..timeT1.fixfactMT...timeT2.fixfactMT pvalue..timeT1.fixfactMT...timeT3.fixfactMT pvalue..timeT1.fixfactMT...timeT0.fixfactWT pvalue..timeT1.fixfactMT...timeT1.fixfactWT pvalue..timeT1.fixfactMT...timeT2.fixfactWT pvalue..timeT1.fixfactMT...timeT3.fixfactWT pvalue..timeT2.fixfactMT...timeT3.fixfactMT pvalue..timeT2.fixfactMT...timeT0.fixfactWT pvalue..timeT2.fixfactMT...timeT1.fixfactWT pvalue..timeT2.fixfactMT...timeT2.fixfactWT pvalue..timeT2.fixfactMT...timeT3.fixfactWT pvalue..timeT3.fixfactMT...timeT0.fixfactWT pvalue..timeT3.fixfactMT...timeT1.fixfactWT pvalue..timeT3.fixfactMT...timeT2.fixfactWT pvalue..timeT3.fixfactMT...timeT3.fixfactWT pvalue..timeT0.fixfactWT...timeT1.fixfactWT pvalue..timeT0.fixfactWT...timeT2.fixfactWT pvalue..timeT0.fixfactWT...timeT3.fixfactWT pvalue..timeT1.fixfactWT...timeT2.fixfactWT pvalue..timeT1.fixfactWT...timeT3.fixfactWT pvalue..timeT2.fixfactWT...timeT3.fixfactWT estimate.timeT0...timeT1 estimate.timeT0...timeT2 estimate.timeT0...timeT3 estimate.timeT1...timeT2 estimate.timeT1...timeT3 estimate.timeT2...timeT3 estimate.fixfactMT...fixfactWT estimate..timeT0.fixfactMT...timeT1.fixfactMT estimate..timeT0.fixfactMT...timeT2.fixfactMT estimate..timeT0.fixfactMT...timeT3.fixfactMT estimate..timeT0.fixfactMT...timeT0.fixfactWT estimate..timeT0.fixfactMT...timeT1.fixfactWT estimate..timeT0.fixfactMT...timeT2.fixfactWT estimate..timeT0.fixfactMT...timeT3.fixfactWT estimate..timeT1.fixfactMT...timeT2.fixfactMT estimate..timeT1.fixfactMT...timeT3.fixfactMT estimate..timeT1.fixfactMT...timeT0.fixfactWT estimate..timeT1.fixfactMT...timeT1.fixfactWT estimate..timeT1.fixfactMT...timeT2.fixfactWT estimate..timeT1.fixfactMT...timeT3.fixfactWT estimate..timeT2.fixfactMT...timeT3.fixfactMT estimate..timeT2.fixfactMT...timeT0.fixfactWT estimate..timeT2.fixfactMT...timeT1.fixfactWT estimate..timeT2.fixfactMT...timeT2.fixfactWT estimate..timeT2.fixfactMT...timeT3.fixfactWT estimate..timeT3.fixfactMT...timeT0.fixfactWT estimate..timeT3.fixfactMT...timeT1.fixfactWT estimate..timeT3.fixfactMT...timeT2.fixfactWT estimate..timeT3.fixfactMT...timeT3.fixfactWT estimate..timeT0.fixfactWT...timeT1.fixfactWT estimate..timeT0.fixfactWT...timeT2.fixfactWT estimate..timeT0.fixfactWT...timeT3.fixfactWT estimate..timeT1.fixfactWT...timeT2.fixfactWT estimate..timeT1.fixfactWT...timeT3.fixfactWT estimate..timeT2.fixfactWT...timeT3.fixfactWT +v0.4425_385 1 0.004001 0 -140.931331 0 0 0.000264 0.191909 0.277993 0 0.018695 0 0 0 0.001835 0.869646 0.006728 0 0 0 0.238471 0.002992 0 0.000044 0.003836 0.009486 0.000036 0.004238 0 0 0 0.180279 0 0 0 0.112058 0.170546 0.091717 0 0.745427 0 0 -0.030835 0.02558 0.192247 0.056415 0.223081 0.166666 -0.148875 -0.107496 -0.005448 0.092614 -0.252536 -0.206709 -0.195927 0.039343 0.102048 0.200111 -0.14504 -0.099213 -0.088431 0.146839 0.098062 -0.247088 -0.201261 -0.19048 0.044791 -0.34515 -0.299323 -0.288542 -0.053272 0.045827 0.056608 0.291879 0.010781 0.246052 0.23527 +v0.4498_625 2 0.000002 0 -120.084702 0 0 0.000801 0.000045 0.135374 0.047823 0.005607 0 0.000776 0 0.000128 0.004174 0.135488 0.000855 0.170794 0.000053 0 0.272071 0.012599 0 0.000001 0 0 0.149351 0 0.000049 0 0 0.000005 0.00515 0 0 0.038427 0.408218 0.000047 0.00448 0 0.000769 0.120489 0.041611 -0.055515 -0.078879 -0.176005 -0.097126 -0.249448 0.158724 0.115616 0.058829 -0.136156 -0.053901 -0.16855 -0.306015 -0.043108 -0.099895 -0.29488 -0.212625 -0.327274 -0.46474 -0.056787 -0.251772 -0.169517 -0.284166 -0.421632 -0.194985 -0.11273 -0.227379 -0.364845 0.082255 -0.032394 -0.16986 -0.114649 -0.252114 -0.137466 +v0.4711_463 3 0.474695 0.000093 -135.57425 0 0 0.000006 0.000013 0.000128 0 0.488485 0.000043 0.000004 0 0 0.000004 0.000001 0.000295 0.006152 0.002433 0.000335 0.51173 0.755875 0 0 0 0.046325 0.729076 0 0 0 0.173445 0 0 0 0.089918 0.319884 0.50045 0 0.745698 0 0 0.11757 0.100696 0.22644 -0.016874 0.108869 0.125743 -0.194248 0.200783 0.178185 0.190094 -0.13207 -0.097712 -0.108863 0.130715 -0.022598 -0.010689 -0.332853 -0.298495 -0.309645 -0.070068 0.011909 -0.310255 -0.275897 -0.287048 -0.04747 -0.322164 -0.287806 -0.298957 -0.059379 0.034358 0.023208 0.262785 -0.01115 0.228427 0.239578 +v0.4715_659 4 0.623926 0.000349 -238.084202 0 0 0 0.219999 0 0 0 0 0 0 0.001773 0 0.000326 0 0 0 0.000025 0 0 0 0 0 0.070602 0.000027 0 0 0 0 0 0 0 0 0.000006 0 0 0 0 0 0.012039 0.138015 0.211119 0.125976 0.19908 0.073104 -0.326171 -0.045354 0.116577 0.053052 -0.44462 -0.375188 -0.285168 -0.075434 0.161931 0.098406 -0.399266 -0.329834 -0.239814 -0.03008 -0.063525 -0.561197 -0.491765 -0.401744 -0.192011 -0.497672 -0.42824 -0.33822 -0.128486 0.069432 0.159453 0.369186 0.09002 0.299754 0.209734 +v0.4766_757 5 0.005552 0.000365 -149.179654 0.00001 0 0.00028 0.015826 0.000092 0.000002 0.083272 0.004665 0.23638 0 0.064552 0.000169 0.102895 0 0 0 0.062636 0.033569 0.819707 0 0 0 0.000447 0.019489 0 0 0 0 0 0 0 0.000898 0.106307 0.055879 0 0.755242 0.000055 0.000152 0.052746 0.090042 0.115324 0.037296 0.062579 0.025283 -0.224878 0.056412 0.121655 0.049578 -0.240112 -0.191032 -0.181683 -0.059041 0.065243 -0.006834 -0.296524 -0.247444 -0.238095 -0.115453 -0.072077 -0.361767 -0.312687 -0.303338 -0.180696 -0.28969 -0.24061 -0.231262 -0.10862 0.04908 0.058428 0.181071 0.009349 0.131991 0.122642 diff -r 1422de181204 -r a3147e3d66e2 mixmodel_script.R --- a/mixmodel_script.R Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,456 +0,0 @@ -####### R functions to perform linear mixed model for repeated measures -####### on a multi var dataset using 3 files as used in W4M -############################################################################################################## -lmRepeated2FF <- function(ids, ifixfact, itime, isubject, ivd, ndim, nameVar=colnames(ids)[[ivd]], - pvalCutof=0.05,dffOption, visu , tit = "", least.confounded = FALSE, outlier.limit =3) - { - ### function to perform linear mixed model with 1 Fixed factor + Time + random factor subject - ### based on lmerTest package providing functions giving the same results as SAS proc mixed - options(scipen = 50, digits = 5) - - if (!is.numeric(ids[[ivd]])) {stop("Dependant variable is not numeric")} - if (!is.factor(ids[[ifixfact]])) {stop("fixed factor is not a factor")} - if (!is.factor(ids[[itime]])) {stop("Repeated factor is not a factor")} - if (!is.factor(ids[[isubject]])) {stop("Random factor is not a factor")} - # a ce stade, il faudrait pr?voir des tests sur la validit? du plan d'exp?rience - - time <- ids[[itime]] - fixfact <- ids[[ifixfact]] - subject <- ids[[isubject]] - vd <- ids[[ivd]] - - # argument of the function instead of re re-running ndim <- defColRes(ids,ifixfact,itime) - # nfp : number of main factors + model infos (REML, varSubject) + normality test - nfp <- ndim[1]; - # ncff number of comparison of the fixed factor - nlff <- ndim[2]; ncff <- ndim[3] - # nct number of comparison of the time factor - nlt <- ndim[4] ; nct <- ndim[5] - # nci number of comparison of the interaction - nli <- ndim[6]; nci <- ndim[7] - # number of all lmer results - nresT <- ncff+nct+nci - ## initialization of the result vector (1 line) - ## 4 * because nresf for : pvalues + Etimates + lower CI + Upper CI - res <- data.frame(array(rep(NA,(nfp + 4 * nresT)))) - colnames(res)[1] <- "resultLM" - - ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip - ### after excluding NA, table function is used to seek subjects with only 1 data - ids <- ids[!is.na(ids[[ivd]]),] - skip <- length(which(table(ids[[isubject]])==1)) - - if (skip==0) { - - mfl <- lmer( vd ~ time + fixfact + time:fixfact + (1| subject), ids) # lmer remix - - # ## NL add - # ### DEPLACE APRES CALCUL PVALUES AJUSTEES ET NE FAIRE QUE SI AU MOINS 1 FACTEUR SIGNIFICATIF - # if(visu) diagmflF(mfl, title = tit, least.confounded = least.confounded, outlier.limit = outlier.limit) - # ## end of NL add - - rsum <- summary(mfl,ddf = dffOption) - ## test Shapiro Wilks on the residus of the model - rShapiro <- shapiro.test(rsum$residuals) - raov <- anova(mfl,ddf = dffOption) - dlsm1 <- data.frame(difflsmeans(mfl,test.effs=NULL)) - ddlsm1 <- dlsm1 - ## save rownames and factor names - rn <- rownames(ddlsm1) - fn <- ddlsm1[,c(1,2)] - ## writing the results on a single line - namesFactEstim <- paste("estimate ",rownames(ddlsm1)[c(1:(nct+ncff))],sep="") - namesFactPval <- paste("pvalue ",rownames(ddlsm1)[c(1:(nct+ncff))],sep="") - namesInter <- rownames(ddlsm1)[-c(1:(nct+ncff))] - #ncI <- nchar(namesInter) - namesEstimate <- paste("estimate ",namesInter) - namespvalues <- paste("pvalue ",namesInter) - namesFactprinc <- c("pval_time","pval_trt","pval_inter") - namesFactEstim <- paste("estimate ",rownames(ddlsm1)[c(1:(nct+ncff))],sep="") - - namesFactLowerCI <- paste("lowerCI ",rownames(ddlsm1)[c(1:(nct+ncff))],sep="") - namesLowerCI <- paste("lowerCI ",namesInter,sep="") - - namesFactUpperCI <- paste("UpperCI ",rownames(ddlsm1)[c(1:(nct+ncff))],sep="") - namesUpperCI <- paste("UpperCI ",namesInter,sep="") - - - ### lmer results on 1 vector row - # pvalue of shapiro Wilks test of the residuals - res[1,] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals" - res[2,] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance" - res[3,] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML" - ### 3 principal factors pvalues results + shapiro test => nfp <- 4 - res[c((nfp-2):nfp),] <- raov[,6]; rownames(res)[c((nfp-2):nfp)] <- namesFactprinc - - #################### Residuals diagnostics for significants variables ######################### - ### Il at least 1 factor is significant and visu=TRUE NL graphics add to pdf - ## ajout JF du passage de la valeur de p-value cutoff - if (length(which(raov[,6]<=pvalCutof))>0 & visu == 'yes') { - diagmflF(mfl, title = tit, pvalCutof = pvalCutof, least.confounded = least.confounded, - outlier.limit = outlier.limit) - - cat(" Signif ",pvalCutof) - - - } - - # pvalue of fixed factor comparisons - nresf <- nresT - res[(nfp+1):(nfp+nct),] <- ddlsm1[c(1:nct),9] - res[(nfp+nct+1):(nfp+nct+ncff),] <- ddlsm1[(nct+1):(nct+ncff),9] - rownames(res)[(nfp+1):(nfp+nct+ncff)] <- namesFactPval - res[(nfp+nct+ncff+1):(nfp+nresf),] <- ddlsm1[(nct+ncff+1):(nresT),9] - rownames(res)[(nfp+nct+ncff+1):(nfp+nresT)] <- namespvalues - # Estimate of the difference between levels of factors - res[(nfp+nresf+1):(nfp+nresf+nct),] <- ddlsm1[c(1:nct),3] - res[(nfp+nresf+nct+1):(nfp+nresf+nct+ncff),] <- ddlsm1[(nct+1):(nct+ncff),3] - rownames(res)[(nfp+nresf+1):(nfp+nresf+nct+ncff)] <- namesFactEstim - res[(nfp+nresf+nct+ncff+1):(nfp+2*nresf),] <- ddlsm1[(nct+ncff+1):(nresT),3] - rownames(res)[(nfp+nresf+nct+ncff+1):(nfp+2*nresf)] <- namesEstimate - # lower CI of the difference between levels of factors - nresf <- nresf + nresT - res[(nfp+nresf+1):(nfp+nresf+nct),] <- ddlsm1[c(1:nct),7] - res[(nfp+nresf+nct+1):(nfp+nresf+nct+ncff),] <- ddlsm1[(nct+1):(nct+ncff),7] - rownames(res)[(nfp+nresf+1):(nfp+nresf+nct+ncff)] <- namesFactLowerCI - res[(nfp+nresf+nct+ncff+1):(nfp+2*nresf),] <- ddlsm1[(nct+ncff+1):(nresf),7] - rownames(res)[(nfp+nresf+nct+ncff+1):(nfp+nresf+(nresf/2))] <- namesLowerCI - # Upper CI of the difference between levels of factors - nresf <- nresf + nresT - res[(nfp+nresf+1):(nfp+nresf+nct),] <- ddlsm1[c(1:nct),8] - res[(nfp+nresf+nct+1):(nfp+nresf+nct+ncff),] <- ddlsm1[(nct+1):(nct+ncff),8] - rownames(res)[(nfp+nresf+1):(nfp+nresf+nct+ncff)] <- namesFactUpperCI - res[(nfp+nresf+nct+ncff+1):(nfp+nresf+(nresT)),] <- ddlsm1[(nct+ncff+1):(nresT),8] - rownames(res)[(nfp+nresf+nct+ncff+1):(nfp+nresf+(nresT))] <- namesUpperCI - - - } - else - ## one of the subject has only one time, subject can't be a random variable - ## A repeated measure could be run instead function lme of package nlme, next version - { res[1,] <- NA - #cat("impossible computing\n") - - # # ## NL add (useless) - # if(visu){ - # grid.arrange(ggplot(data.frame()) + geom_point() + xlim(-1, 1) + ylim(-1, 1)+ - # annotate("text", x = 0, y = 0, label = "impossible computing")+ - # xlab(NULL) + theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())+ - # ylab(NULL) + theme(axis.text.y=element_blank(),axis.ticks.y=element_blank())+ - # theme(panel.grid.minor = element_blank() , - # panel.grid.major = element_blank() , - # panel.background = element_rect(fill = "white")) - # , top = textGrob(tit,gp=gpar(fontsize=40,font=4))) - # - # } - # # ## end of NL add - - } - tres <- data.frame(t(res)); rownames(tres)[1] <- nameVar - cres <- list(tres,rn, fn) - return(cres) -} - -############################################################################################################## -lmRepeated1FF <- function(ids, ifixfact=0, itime, isubject, ivd, ndim, nameVar=colnames(ids)[[ivd]], - dffOption,pvalCutof=0.05) - { - ### function to perform linear mixed model with factor Time + random factor subject - ### based on lmerTest package providing functions giving the same results as SAS proc mixed - - if (!is.numeric(ids[[ivd]])) {stop("Dependant variable is not numeric")} - if (!is.factor(ids[[itime]])) {stop("Repeated factor is not a factor")} - if (!is.factor(ids[[isubject]])) {stop("Random factor is not a factor")} - # a ce stade, il faudrait pr?voir des tests sur la validit? du plan d'exp?rience - - time <- ids[[itime]] - subject <- ids[[isubject]] - vd <- ids[[ivd]] ## dependant variables (quatitative) - - # ndim <- defColRes(ids,0,itime) - # nfp : nombre de facteurs principaux + model infos + normality test - nfp <- ndim[1] - # nct number of comparison of the time factor - nlt <- ndim[4] ; nct <- ndim[5] - # number of all lmer results - nresf <- nct - ## initialization of the result vector (1 line) - res <- data.frame(array(rep(NA,(nfp+2*nresf)))) - colnames(res)[1] <- "resultLM" - - ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip - ### after excluding NA, table function is used to seek subjects with only 1 data - ids <- ids[!is.na(ids[[ivd]]),] - skip <- length(which(table(ids[[isubject]])==1)) - - if (skip==0) { - - mfl <- lmer( vd ~ time + (1| subject), ids) # lmer remix - rsum <- summary(mfl,ddf = dffOption) - ## test Shapiro Wilks on the residus of the model - rShapiro <- shapiro.test(rsum$residuals) - raov <- anova(mfl,ddf = dffOption) - ## Sum of square : aov$'Sum Sq', Mean square : aov$`Mean Sq`, proba : aov$`Pr(>F)` - - ## Test of all differences estimates between levels as SAS proc mixed. - ## results are in diffs.lsmeans.table dataframe - ## test.effs=NULL perform all pairs comparisons including interaction effect - dlsm1 <- difflsmeans(mfl,test.effs=NULL) - ddlsm1 <- dlsm1$diffs.lsmeans.table - - ## writing the results on a single line - namesFactEstim <- paste("estimate ",rownames(ddlsm1)[c(1:(nct))],sep="") - namesFactPval <- paste("pvalue ",rownames(ddlsm1)[c(1:(nct))],sep="") - namesFactprinc <- "pval_time" - - ### lmer results on 1 vector - # pvalue of shapiro Wilks test of the residuals - res[1,] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals" - res[2,] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance" - res[3,] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML" - - ### principal factor time pvalue results + shapiro test - res[nfp,] <- raov[,6]; rownames(res)[nfp] <- namesFactprinc - # pvalue of fixed factor comparisons - res[(nfp+1):(nfp+nct),] <- ddlsm1[c(1:nct),7] - rownames(res)[(nfp+1):(nfp+nct)] <- namesFactPval - - # Estimate of the difference between levels of factors - res[(nfp+nresf+1):(nfp+nresf+nct),] <- ddlsm1[c(1:nct),1] - rownames(res)[(nfp+nresf+1):(nfp+nresf+nct)] <- namesFactEstim - } - else - ## one of the subject has only one time, subject can't be a random variable - ## A repeated measure could be run instead function lme of package nlme, next version - { res[1,] <- NA - #cat("traitement impossible\n") - } - tres <- data.frame(t(res)); rownames(tres)[1] <- nameVar - return(tres) -} - -############################################################################################################## -defColRes <- function(ids, ifixfact, itime) { - ## define the size of the result file depending on the numbers of levels of the fixed and time factor. - ## Numbers of levels define the numbers of comparisons with pvalue and estimate of the difference. - ## The result file also contains the pvalue of the fixed factor, time factor and interaction - ## plus Shapiro normality test. This is define by nfp - ## subscript of fixed factor=0 means no other fixed factor than "time" - if (ifixfact>0){ - nfp <- 6 # shapiro+time+fixfact+interaction+ others.... - time <- ids[[itime]] - fixfact <- ids[[ifixfact]] - - cat("\n levels fixfact",levels(fixfact)) - cat("\n levels time",levels(time)) - - # ncff number of comparisons of the fixed factor (nlff number of levels of fixed factor) - nlff <- length(levels(fixfact)); ncff <- (nlff*(nlff-1))/2 - # nct number of comparison of the time factor (nlt number of levels of time factor) - nlt <- length(levels(time)); nct <- (nlt*(nlt-1))/2 - # nci number of comparison of the interaction - nli <- nlff*nlt; nci <- (nli*(nli-1))/2 - ndim <- c(NA,NA,NA,NA,NA,NA,NA) - - ndim[1] <- nfp # pvalues of fixed factor, time factor and interaction (3columns) and shapiro test pvalue - ndim[2] <- nlff # number of levels of fixed factor - ndim[3] <- ncff # number of comparisons (2by2) of the fixed factor - ndim[4] <- nlt # number of levels of time factor - ndim[5] <- nct # number of comparisons (2by2) of the time factor - ndim[6] <- nli # number of levels of interaction - ndim[7] <- nci # number of comparisons (2by2) of the interaction - - } - else { - nfp <- 4 # shapiro+time - time <- ids[[itime]] - # nct number of comparison of the time factor - nlt <- length(levels(time)); nct <- (nlt*(nlt-1))/2 - ndim <- c(NA,NA,NA,NA,NA,NA,NA) - - ndim[1] <- nfp # pvalues of time factor and shapiro test pvalue - ndim[4] <- nlt # number of levels of time factor - ndim[5] <- nct # number of comparisons (2by2) of the time factor - } - return(ndim) -} - -############################################################################################################## -lmixedm <- function(datMN, - samDF, - varDF, - fixfact, time, subject, - logtr = "none", - pvalCutof = 0.05, - pvalcorMeth = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], - dffOption, - visu = "no", - least.confounded = FALSE, - outlier.limit = 3, - pdfC, - pdfE - ) - { - sampids <- samDF - dataMatrix <- datMN - varids <- varDF - - options("scipen" = 50, "digits" = 5) - pvalCutof <- as.numeric(pvalCutof) - - cat("\n dff computation method=",dffOption) - ### Function running lmer function on a set of variables described in - ### 3 different dataframes as used by W4M - ### results are merge with the metadata variables varids - ### ifixfact, itime, isubject are subscripts of the dependant variables - if (fixfact=="none") ifixfact <-0 else ifixfact <- which(colnames(sampids)==fixfact) - itime <- which(colnames(sampids)==time) - isubject <- which(colnames(sampids)==subject) - - #lmmds <- dataMatrix[,-1] - - lmmds <- dataMatrix - if (logtr!="log10" & logtr!="log2") logtr <- "none" - if (logtr=="log10") lmmds <- log10(lmmds+1) - if (logtr== "log2") lmmds <- log2(lmmds+1) - - #idsamp <- dataMatrix[,1] - #lmmds <- t(lmmds) - dslm <- cbind(sampids,lmmds) - - nvar <- ncol(lmmds); firstvar <- ncol(sampids)+1; lastvar <- firstvar+ncol(lmmds)-1 - - dslm[[ifixfact]] <- factor(dslm[[ifixfact]]) - dslm[[itime]] <- factor(dslm[[itime]]) - dslm[[isubject]] <- factor(dslm[[isubject]]) - ## call defColres to define the numbers of test and so the number of columns of results - ## depends on whether or not there is a fixed factor with time. If only time factor ifixfact=0 - if (ifixfact>0) { - ndim <- defColRes(dslm[,c(ifixfact,itime)],ifixfact=1,itime=2) - nColRes <- ndim[1]+(4*(ndim[3]+ndim[5]+ndim[7])) - firstpval <- ndim[1]-2 - lastpval <- ndim[1]+ndim[3]+ndim[5]+ndim[7] - } else - { - ndim <- defColRes(dslm[,itime],ifixfact=0,itime=1) - nColRes <- ndim[1]+(2*(ndim[5])) - firstpval <- ndim[1] - lastpval <- ndim[1]+ndim[5] - } - ## initialisation of the result file - resLM <- data.frame(array(rep(NA,nvar*nColRes),dim=c(nvar,nColRes))) - rownames(resLM) <- rownames(varids) - - ############### test ecriture dans pdf - if(visu == "yes") { - pdf(pdfC, onefile=TRUE, height = 15, width = 30) - par(mfrow=c(1,3)) - } - ############### fin test ecriture dans pdf - ## pour test : lastvar <- 15 - cat("\n pvalCutof ", pvalCutof) - - for (i in firstvar:lastvar) { - - ## NL modif - cat("\n[",colnames(dslm)[i],"] ") - ## end of NL modif - - subds <- dslm[,c(ifixfact,itime,isubject,i)] - - ## NL modif - tryCatch({ - if (ifixfact>0) - reslmer <- lmRepeated2FF(subds,ifixfact=1,itime=2,isubject=3, ivd=4, ndim=ndim, visu = visu, - tit = colnames(dslm)[i], pvalCutof=pvalCutof, - dffOption=dffOption,least.confounded = least.confounded, - outlier.limit = outlier.limit) - else - reslmer <- lmRepeated1FF(subds,ifixfact=0,1,2, ivd=3, ndim=ndim, pvalCutof=pvalCutof,dffOption) - ## end of NL modif - resLM[i-firstvar+1,] <- reslmer[[1]] - }, error=function(e){cat("ERROR : ",conditionMessage(e), "\n");}) - if (i==firstvar) { - colnames(resLM) <- colnames(reslmer[[1]]) - labelRow <- reslmer[[2]] - factorRow <- reslmer[[3]] - } - } - ## for debug : ifixfact=1;itime=2;isubject=3; ivd=4;tit = colnames(dslm)[i]; ids <- subds - - - ## NL add - if(visu == "yes") dev.off() - ## end of NL add - - ## pvalue correction with p.adjust library multtest - ## Possible methods of pvalue correction - AdjustMeth <- c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr","none") - if (length(which(pvalcorMeth == AdjustMeth))==0) pvalcorMeth <- "none" - - if (pvalcorMeth !="none") { - for (k in firstpval:lastpval){ - resLM[[k]]=p.adjust(resLM[[k]], method=pvalcorMeth, n=dim(resLM[k])[[1]]) - - } - } - - ## for each variables, set pvalues to NA and estimates = 0 when pvalue of factor > pvalCutof value define by user - if (ifixfact>0) { - ## time effect - resLM[which(resLM[,firstpval]> pvalCutof),c((lastpval+1):(lastpval+ndim[5]))] <- 0 - resLM[which(resLM[,firstpval]> pvalCutof),c((ndim[1]+1):(ndim[1]+ndim[5]))] <- NA - ## treatment effect - resLM[which(resLM[,firstpval+1]> pvalCutof),c((lastpval+ndim[5]+1):(lastpval+ndim[5]+ndim[3]))] <- 0 - resLM[which(resLM[,firstpval+1]> pvalCutof),c((ndim[1]+ndim[5]+1):(ndim[1]+ndim[5]+ndim[3]))] <- NA - ## interaction effect - resLM[which(resLM[,firstpval+2]> pvalCutof),c((lastpval+ndim[5]+ndim[3]+1):(lastpval+ndim[5]+ndim[3]+ndim[7]))] <- 0 - resLM[which(resLM[,firstpval+2]> pvalCutof),c((ndim[1]+ndim[5]+ndim[3]+1):(ndim[1]+ndim[5]+ndim[3]+ndim[7]))] <- NA - } else { - ## time effect only - resLM[which(resLM[,firstpval]> pvalCutof),c((lastpval+1):(lastpval+ndim[5]))] <- 0 - resLM[which(resLM[,firstpval]> pvalCutof),c((firstpval+1):(firstpval+ndim[5]))] <- NA - } - - ## for each variable, estimates plots are performed if at least one factor is significant after p-value correction - pdf(pdfE, onefile=TRUE, height = 15, width = 30) - #par(mfrow=c(2,2)) - - ## for each variable (in row) - for (i in 1:nrow(resLM)) { - #cat("\n",rownames(resLM)[i]) - ## if any main factor after p-value correction is significant -> plot estimates and time course - if (length(which(resLM[i,c(4:6)]0) { - - ## Plot of time course by fixfact : data prep with factors and quantitative var to be plot - subv <- dslm[,colnames(dslm)==rownames(resLM)[i]] - subds <- data.frame(dslm[[ifixfact]],dslm[[itime]], dslm[[isubject]],subv) - #colnames(subds) <- c(colnames(dslm)[ifixfact],colnames(dslm)[itime],colnames(dslm)[isubject],rownames(resLM)[i] <- rownames(resLM)[i] ) - libvar <- c(fixfact,time,subject) - colnames(subds) <- c(libvar,rownames(resLM)[i]) - - ## Plot of estimates with error bars for all fixed factors and interaction - rddlsm1 <- t(resLM[i,]) - pval <- rddlsm1[substr(rownames(rddlsm1),1,6)=="pvalue"] - esti <- rddlsm1[substr(rownames(rddlsm1),1,6)=="estima"] - loci <- rddlsm1[substr(rownames(rddlsm1),1,6)=="lowerC"] - upci <- rddlsm1[substr(rownames(rddlsm1),1,6)=="UpperC"] - rddlsm1 <- data.frame(pval,esti,loci,upci,factorRow) - colnames(rddlsm1) <- c("p.value","Estimate","Lower.CI","Upper.CI",colnames(factorRow)) - rownames(rddlsm1) <- labelRow - - ## function for plotting these 2 graphs - plot.res.Lmixed(rddlsm1, subds, title = rownames(resLM)[i], pvalCutof = pvalCutof) - - } - } - dev.off() - - ## return result file with pvalues and estimates (exclude confidence interval used for plotting) - iCI <- which(substr(colnames(resLM),4,7)=="erCI") - resLM <- resLM[,-iCI] - resLM <- cbind(varids,resLM) - return(resLM) -} - - diff -r 1422de181204 -r a3147e3d66e2 mixmodel_wrapper.R --- a/mixmodel_wrapper.R Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,164 +0,0 @@ -#!/usr/bin/env Rscript - -library(batch) ## parseCommandArgs - -library(lme4) ## mixed model computing -library(Matrix) -library(lmerTest) ## computing pvalue and lsmeans from results of lme4 package -library(multtest) ## multiple testing - - -source_local <- function(fname){ - argv <- commandArgs(trailingOnly = FALSE) - base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) - source(paste(base_dir, fname, sep="/")) -} - -#source_local("univariate_script.R") -source_local("mixmodel_script.R") -source_local("diagmfl.R") - -argVc <- unlist(parseCommandArgs(evaluate=FALSE)) - -##------------------------------ -## Initializing -##------------------------------ - -## options -##-------- - -strAsFacL <- options()$stringsAsFactors -options(stringsAsFactors = FALSE) - -## constants -##---------- - -modNamC <- "mixmodel" ## module name - -topEnvC <- environment() -flagC <- "\n" - -## functions -##---------- - -flgF <- function(tesC, - envC = topEnvC, - txtC = NA) { ## management of warning and error messages - - tesL <- eval(parse(text = tesC), envir = envC) - - if(!tesL) { - - sink(NULL) - stpTxtC <- ifelse(is.na(txtC), - paste0(tesC, " is FALSE"), - txtC) - - stop(stpTxtC, - call. = FALSE) - - } - -} ## flgF - -## log file -##--------- - -sink(argVc["information"]) - -cat("\nStart of the '", modNamC, "' Galaxy module call: ", format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") - -## loading -##-------- - -datMN <- t(as.matrix(read.table(argVc["dataMatrix_in"], - check.names = FALSE, - header = TRUE, - row.names = 1, - sep = "\t"))) - -samDF <- read.table(argVc["sampleMetadata_in"], - check.names = FALSE, - header = TRUE, - row.names = 1, - sep = "\t") - -varDF <- read.table(argVc["variableMetadata_in"], - check.names = FALSE, - header = TRUE, - row.names = 1, - sep = "\t") - - -## checking -##--------- - -flgF("identical(rownames(datMN), rownames(samDF))", txtC = "Column names of the dataMatrix are not identical to the row names of the sampleMetadata; check your data with the 'Check Format' module in the 'Quality Control' section") -flgF("identical(colnames(datMN), rownames(varDF))", txtC = "Row names of the dataMatrix are not identical to the row names of the variableMetadata; check your data with the 'Check Format' module in the 'Quality Control' section") - -flgF("argVc['fixfact'] %in% colnames(samDF)", txtC = paste0("Required fixed factor '" , argVc['fixfact'], "' could not be found in the column names of the sampleMetadata")) -flgF("argVc['time'] %in% colnames(samDF)", txtC = paste0("Required time factor '" , argVc['time'] , "' could not be found in the column names of the sampleMetadata")) -flgF("argVc['subject'] %in% colnames(samDF)", txtC = paste0("Required subject factor '", argVc['subject'], "' could not be found in the column names of the sampleMetadata")) - -flgF("mode(samDF[, argVc['fixfact']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc['fixfact'], "' column of the sampleMetadata should contain either number only, or character only")) -flgF("mode(samDF[, argVc['time']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc['time'] , "' column of the sampleMetadata should contain either number only, or character only")) -flgF("mode(samDF[, argVc['subject']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc['subject'], "' column of the sampleMetadata should contain either number only, or character only")) - -flgF("argVc['adjC'] %in% c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')") -flgF("argVc['trf'] %in% c('none', 'log10', 'log2')") - -flgF("0 <= as.numeric(argVc['thrN']) && as.numeric(argVc['thrN']) <= 1", txtC = "(corrected) p-value threshold must be between 0 and 1") -flgF("argVc['diaR'] %in% c('no', 'yes')") - - -##------------------------------ -## Computation -##------------------------------ - - -varDF <- lmixedm(datMN = datMN, - samDF = samDF, - varDF = varDF, - fixfact = argVc["fixfact"], - time = argVc["time"], - subject = argVc["subject"], - logtr = argVc['trf'], - pvalCutof = argVc['thrN'], - pvalcorMeth= argVc["adjC"], - dffOption = "Satterthwaite", - visu = argVc["diaR"], - least.confounded = FALSE, - outlier.limit = 3, - pdfC = argVc["out_graph_pdf"], - pdfE = argVc["out_estim_pdf"] - ) - - -##------------------------------ -## Ending -##------------------------------ - - -## saving -##-------- - -varDF <- cbind.data.frame(variableMetadata = rownames(varDF), - varDF) - -write.table(varDF, - file = argVc["variableMetadata_out"], - quote = FALSE, - row.names = FALSE, - sep = "\t") - -## closing -##-------- - -cat("\nEnd of '", modNamC, "' Galaxy module call: ", - as.character(Sys.time()), "\n", sep = "") - -sink() - -options(stringsAsFactors = strAsFacL) - -rm(list = ls()) diff -r 1422de181204 -r a3147e3d66e2 test-data/demo1_Samples.txt --- a/test-data/demo1_Samples.txt Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -idsample idsujet treatment time -s1AT1 s1 A T1 -s2AT1 s2 A T1 -s3AT1 s3 A T1 -s4AT1 s4 A T1 -s5BT1 s5 B T1 -s6BT1 s6 B T1 -s7BT1 s7 B T1 -s8BT1 s8 B T1 -s1AT2 s1 A T2 -s2AT2 s2 A T2 -s3AT2 s3 A T2 -s4AT2 s4 A T2 -s5BT2 s5 B T2 -s6BT2 s6 B T2 -s7BT2 s7 B T2 -s8BT2 s8 B T2 -s1AT3 s1 A T3 -s2AT3 s2 A T3 -s3AT3 s3 A T3 -s4AT3 s4 A T3 -s5BT3 s5 B T3 -s6BT3 s6 B T3 -s7BT3 s7 B T3 -s8BT3 s8 B T3 diff -r 1422de181204 -r a3147e3d66e2 test-data/demo1_matrix.txt --- a/test-data/demo1_matrix.txt Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -var s1AT1 s2AT1 s3AT1 s4AT1 s5BT1 s6BT1 s7BT1 s8BT1 s1AT2 s2AT2 s3AT2 s4AT2 s5BT2 s6BT2 s7BT2 s8BT2 s1AT3 s2AT3 s3AT3 s4AT3 s5BT3 s6BT3 s7BT3 s8BT3 -var1 2.944804979 49.75484579 25.89657977 31.21956172 645.7733673 207.8199481 993.1398444 771.2810657 33.8574053 10.27450194 42.27358423 0.259314085 727.698667 975.6024581 501.887448 101.55269 26.00199056 42.81100563 24.51317508 4.306905516 255.7178467 844.9974405 590.486975 586.4456671 -var2 3.667579463 305.5106362 689.8621337 960.0407436 515.7640893 925.1943751 414.9315685 285.7495409 948.0050691 821.9239079 686.8984302 254.7828577 866.587021 520.530133 835.3730004 966.3284721 165.9697054 726.3462446 72.57517711 370.4785168 349.7503837 368.2889788 592.7939878 502.4401631 -var3 172.8780668 522.9435356 60.99162384 981.0661058 838.0443428 245.4660659 320.52462 402.9176733 NA NA 924.6445422 109.0789884 NA 885.9453218 821.7969874 677.6235027 NA NA 573.2824613 809.8096851 NA 989.3551375 438.6697135 99.02984484 -var4 476.6964656 877.5340214 873.7968183 260.3147842 48.1446747 49.99827418 15.0500212 2.454515607 655.4373272 158.3886794 533.7596174 75.0229305 21.36363224 7.115041484 6.141056371 43.55349275 287.4224888 833.7583586 60.74044966 63.68551611 47.30457162 33.36978463 42.5284983 32.03159121 -var5 947.6929416 453.2570661 815.904467 347.4032396 24.53297914 787.2949824 241.7110583 538.24695 506.6958296 266.0208725 69.09085633 5.974715233 337.6606509 259.2015248 849.6838592 87.20042666 426.9964488 698.8356796 339.6737985 156.684501 86.32011891 30.19974119 978.5537816 199.8844321 -var6 31.82849796 44.69044713 38.31707386 18.51302603 214.3392118 712.6254117 505.3932955 707.9672445 46.62706509 43.77479739 8.943157968 39.26259778 662.8065096 808.9399247 352.764633 205.7874917 3.766673404 5.418740715 37.59618013 24.94170082 516.9694331 599.1699534 951.1241327 531.0029822 -var7 48.58915493 9.737940091 3.070545533 13.24393267 35.71840252 38.19313066 13.71876739 37.84443501 219.4206035 529.5333088 53.1798515 720.0361187 752.6900262 884.0021377 30.21174379 639.3453975 240.6978432 287.5449343 548.0445628 269.850493 742.9348096 278.06316 486.3017402 805.5343043 -var8 680.6370973 71.8828146 981.710732 221.6863381 12.03643502 18.67598209 34.62782874 47.00794697 160.3053655 566.2693544 829.457417 499.9228264 2.015472805 30.21909563 11.46036299 13.81338086 227.484243 99.45633839 681.8675495 973.652002 41.87877455 19.80605008 43.40502059 44.99090783 -var9 635.471832 144.3056969 334.1090387 194.5727886 405.5066688 413.3236386 237.899457 996.7447354 394.5598965 519.4043987 334.3332952 866.3076014 70.92464897 689.4803042 641.9404835 453.5002735 946.3136083 557.380772 725.8870537 496.8042416 251.7627825 818.34199 427.2466856 193.6539535 -var10 255.5452475 349.8598576 617.453113 707.2373216 370.3060574 469.7181814 950.858553 16.30868366 806.6413776 533.2787143 6.329383006 474.5446616 462.0279273 635.0181573 651.8771692 230.5368272 222.9092267 933.4693531 548.8575506 930.0440301 265.5413981 108.8758062 68.08971196 51.93824899 -var11 838.9985259 745.5941668 654.6952748 235.1477422 596.3175073 844.6677532 605.1399495 432.9177807 222.7906951 873.972494 822.0492133 36.99986055 537.8279339 822.7241457 5.419705705 722.8959106 881.2207647 635.6996394 677.2407238 714.8925607 389.7490318 807.083624 436.4182718 458.9015398 -var12 892.7197353 335.7591691 536.4785811 74.7070765 576.9435273 849.4743011 401.1719511 956.6287203 40.56447695 20.80947493 1.140173277 13.15588959 30.28143383 41.51846899 25.38649045 20.38415994 793.7861762 747.996181 964.4964041 786.1129855 923.5151221 394.6684835 826.0571324 213.7310158 -var13 499.4591238 575.155097 743.3397119 27.23255924 127.2970324 640.1411168 139.5822564 642.8816939 505.788513 749.2321969 691.3797399 885.1521409 60.46414235 591.4272376 94.30606951 970.3055793 755.4305079 932.7703718 653.9442423 940.2452863 661.9597309 255.5623762 72.89694711 37.33840067 -var14 764.8635745 186.441139 403.7151201 560.2602192 833.8914349 191.505535 44.84334646 467.5189404 79.4493536 212.8491263 238.0580142 375.2379514 182.983498 880.3888158 231.1490768 121.1663277 27.39904658 41.53081082 21.7902401 28.79650625 3.695658319 39.08581264 8.728099961 29.01601807 -var15 475.61245 685.4556671 190.0615424 471.9712592 718.2379497 840.0102468 692.8335088 147.6304425 290.2309756 931.9162133 270.459451 886.5880713 50.94873104 466.1788023 44.41386812 474.7108421 618.1677591 750.4606209 328.6055748 689.4155942 765.1291671 131.1187259 181.3857814 672.8191185 -var16 782.0253269 383.6194843 448.7394755 256.0739668 916.7521481 578.2756451 584.9660264 747.03336 39.43319406 29.70533995 1.212567512 14.64999391 21.14262619 23.16415025 49.11618914 11.17864695 406.1818878 150.94278 289.5691207 56.92421419 648.0250226 198.2729531 275.0748821 312.9787398 -var17 344.1042227 263.8577179 383.9310588 71.86662402 219.609616 503.536874 831.7376651 364.8343012 502.2406499 192.9684458 478.9487927 100.6659149 914.2793285 946.6503854 815.8690205 12.28421243 133.0699957 443.0292691 853.6526512 173.9554979 966.088388 28.54399015 544.6324093 166.515741 -var18 572.1893596 434.6787245 416.4450412 996.2426946 764.5658467 928.475767 126.9497817 380.3024547 830.1193473 743.9091564 787.7162003 10.2844036 322.4987626 109.0838311 514.5731206 704.5376167 291.0819021 240.2840956 384.5981756 574.0733832 193.6266192 406.8604216 499.6458882 508.3758382 -var19 962.9268096 732.892012 873.6750374 381.8819302 885.5713062 557.1618561 30.70125314 175.5702281 373.6292957 656.0401266 855.6345053 237.0343289 909.0191129 565.845622 2.619197312 592.4598408 271.6269463 908.7534668 154.3326283 741.619006 50.89536747 258.9720716 598.5982384 696.9350951 -var20 955.6208037 760.3496062 763.0027309 137.7903097 83.07543699 13.16372919 262.883898 621.5663619 241.9705169 587.1813263 529.9999891 796.1608483 506.5263383 518.8464669 709.2211864 119.5830928 688.7399221 205.5964318 896.0570768 766.2400908 71.40153132 982.4178461 108.0644696 708.0798878 -var21 425.811513 354.9341452 568.6882922 934.7958584 93.75223061 842.7176642 672.1633276 177.1402181 866.7983996 707.8481658 360.7181698 372.89955 215.7915665 748.8049642 48.04739129 648.7814873 887.4741981 858.4963219 356.8248511 218.9601844 296.7414322 977.8897077 492.9162228 600.3325479 -var22 135.8019557 182.3707909 758.4490827 115.3496569 78.0852081 891.4781454 578.9448879 908.1745803 179.7729789 442.3747186 494.3446595 643.9812239 801.4712403 147.5694536 178.2429548 116.486032 613.752729 444.8519937 223.645388 274.7196253 177.6650684 523.4303071 881.6579313 573.8312846 -var23 670.7139034 926.3557994 375.2748184 721.6170819 512.6321228 951.8431446 192.5348307 580.5435336 964.0723839 500.7694574 820.1932216 145.6134455 414.9974438 467.6073566 159.5392785 668.5341109 758.8131326 196.5248151 301.19594 845.4847165 494.6262648 194.4975994 729.2659019 837.0103437 -var24 111.361587 132.6771996 604.796782 193.4272655 633.4844772 754.5088368 252.8815967 743.4701613 220.2874864 256.9863425 617.2620172 706.159452 112.5974307 821.2765976 250.0502724 767.4552008 569.2572494 88.66317499 952.9486913 637.4739239 149.7656874 508.7793442 571.4643925 91.89315336 -var25 937.3637162 194.4973645 293.830158 483.5274779 539.5670407 241.9616259 1851.993664 552.4165266 851.6453972 801.2501283 852.2209093 244.5855762 883.9823192 1749.980578 166.3694876 342.9511308 345.159761 915.1125012 219.7324801 701.163318 1951.407992 1683.643056 980.1456989 377.8502832 -var26 211.2818569 592.3259982 273.7089841 504.2987201 554.036957 1922.475446 107.0711858 1449.683569 323.3498772 828.5355284 790.3779344 994.458895 1450.298244 511.7879493 1311.881338 773.2419236 564.8848834 427.1103398 493.8250068 863.9994595 1488.945818 657.0463188 1937.832317 1839.029818 -var27 839.6126127 280.2699035 669.4600993 438.9562325 267.2700714 1197.047553 699.8378806 890.4637327 0.075892853 3.35103701 970.023551 322.2403265 1099.36841 74.43502979 1785.51484 1459.638754 990.5479732 622.4609113 109.2705883 195.858416 1059.939364 1169.690758 279.4441535 511.1997851 -var28 433.1795515 659.3852008 304.342599 2.420033492 602.8680774 1663.739933 1022.569927 342.8899009 396.0198013 788.9557203 944.3908309 314.6700035 782.0471554 309.1982655 420.9024191 592.3394811 912.6321664 414.073431 482.1836798 300.8122585 689.3589282 407.5987977 1970.506461 1274.174077 -var29 75.1274556 667.9528257 949.5909143 332.3614352 797.9255526 1759.324563 1697.283532 1353.092656 190.6264431 16.51271938 563.7405071 674.3045958 229.4363482 1203.567204 1431.917446 854.3388249 726.1664838 236.2903037 715.0134159 201.2570507 705.0542982 1288.135354 1282.10826 742.1465211 -var30 372.3648963 386.2979775 484.8330806 607.8727647 844.1851676 1564.698819 225.4369875 222.5468039 675.5516492 900.2904825 321.0200632 124.8706177 219.1452961 1620.109962 1443.686531 30.20502128 711.3623969 472.7569176 131.2124078 4.11041311 314.6987183 1480.330689 1335.262401 1975.43864 -var31 830.7603594 877.7867945 167.2341465 890.4469672 475.8462996 1286.985999 325.0642312 1206.54618 871.5761917 924.3084225 375.9059868 841.7447645 223.2287944 489.4455531 1276.794717 555.4166789 665.9058972 900.4792176 606.8139413 662.8645656 1453.245611 386.6570783 28.38822884 850.4592565 -var32 287.6665566 704.2399753 202.4551918 749.5138905 251.7461375 27.86278674 1587.838351 725.2323834 927.9858767 879.604207 533.5445114 684.1694546 926.6281568 164.3194326 226.4151943 1448.842914 313.5647655 54.62111634 772.0807022 659.3092555 1303.021777 1946.40263 250.5414262 1999.222444 -var33 484.1877297 242.788894 391.1647338 296.6169619 566.7181555 556.7362445 1705.383419 118.3629431 196.6473393 943.5570103 514.8669868 994.6891166 1630.677314 1743.953602 1686.234573 1025.512791 139.1387544 102.1118108 138.6877081 679.5824526 1399.967774 1964.658448 1942.234917 345.9381031 -var34 654.4772218 855.843767 710.7433264 332.3445897 822.7186717 1732.030543 1071.843154 977.4774333 555.832087 947.4166002 845.8471497 52.26234852 1759.519813 252.0201757 132.8152855 460.9241952 589.2474936 793.5101148 323.7116484 698.2504426 949.3640103 549.1089047 1268.608356 1981.901916 -var35 649.8157994 596.3701266 459.9072448 636.8172769 756.8655665 1682.670261 1356.109792 1582.682511 984.4088071 737.4489045 431.9834858 175.3068228 1640.146811 611.9998332 1107.834995 983.1653253 636.2798018 0.545485104 498.2985486 657.9607458 1574.724131 725.3778129 1287.929168 396.8659763 -var36 524.2048663 329.6938613 884.8469578 890.8421732 27.18495267 914.3060082 196.5344281 625.4772393 818.8061525 722.1835868 872.4183559 972.937521 762.369294 793.0758578 1222.96799 360.5858065 95.91319149 654.6684817 373.220399 804.0331353 1402.29363 571.7247152 1489.69958 774.8183349 -var37 678.7777667 363.5061746 834.5242973 684.8352169 760.735293 222.6371125 60.32352661 867.7113126 479.1262946 325.0865937 434.5733939 893.4304109 1606.009688 509.1794575 1282.08164 1094.820467 12.68285865 2.822794117 940.643551 315.5426037 682.6764091 1041.812735 177.9454557 848.1309378 -var38 706.0233496 934.3787643 820.7876871 72.28857763 493.9847378 247.5833014 638.2539566 1344.694015 688.708953 343.7132988 491.9668005 757.1955777 573.0037148 1605.206211 298.9895983 1269.280218 994.2564165 207.6765578 299.4243272 956.917879 771.6799955 223.0914032 1996.397524 67.29379012 -var39 490.8321846 438.2277554 656.9503593 933.3194445 868.3614729 366.8485261 1513.524538 764.5321233 871.1015601 569.8457348 323.2530982 484.6563769 368.7677212 833.4574507 1390.149132 78.84558674 483.1216159 309.9850905 657.4770171 587.362726 308.5230175 1303.562509 1352.843606 474.7223959 -var40 50.90655113 69.18390457 457.9171462 405.0173308 146.918983 1142.527384 1412.337297 850.7689924 35.11405349 23.21297831 792.7634193 387.1686795 838.6976213 1603.972482 1325.575054 1448.659011 31.93235194 135.4397238 75.06461922 543.702984 216.6262105 220.2400455 1875.783019 1121.44313 diff -r 1422de181204 -r a3147e3d66e2 test-data/demo1_variables.txt --- a/test-data/demo1_variables.txt Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -var withNA -var1 0 -var2 0 -var3 1 -var4 0 -var5 0 -var6 0 -var7 0 -var8 0 -var9 0 -var10 0 -var11 0 -var12 0 -var13 0 -var14 0 -var15 0 -var16 0 -var17 0 -var18 0 -var19 0 -var20 0 -var21 0 -var22 0 -var23 0 -var24 0 -var25 0 -var26 0 -var27 0 -var28 0 -var29 0 -var30 0 -var31 0 -var32 0 -var33 0 -var34 0 -var35 0 -var36 0 -var37 0 -var38 0 -var39 0 -var40 0 diff -r 1422de181204 -r a3147e3d66e2 test-data/mixmodel_demo1_variables.txt --- a/test-data/mixmodel_demo1_variables.txt Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -variableMetadata withNA Shapiro.pvalue.residuals Subject.Variance REML pval_time pval_trt pval_inter pvalue.timeT1...timeT2 pvalue.timeT1...timeT3 pvalue.timeT2...timeT3 pvalue.fixfactA...fixfactB pvalue..timeT1.fixfactA...timeT2.fixfactA pvalue..timeT1.fixfactA...timeT3.fixfactA pvalue..timeT1.fixfactA...timeT1.fixfactB pvalue..timeT1.fixfactA...timeT2.fixfactB pvalue..timeT1.fixfactA...timeT3.fixfactB pvalue..timeT2.fixfactA...timeT3.fixfactA pvalue..timeT2.fixfactA...timeT1.fixfactB pvalue..timeT2.fixfactA...timeT2.fixfactB pvalue..timeT2.fixfactA...timeT3.fixfactB pvalue..timeT3.fixfactA...timeT1.fixfactB pvalue..timeT3.fixfactA...timeT2.fixfactB pvalue..timeT3.fixfactA...timeT3.fixfactB pvalue..timeT1.fixfactB...timeT2.fixfactB pvalue..timeT1.fixfactB...timeT3.fixfactB pvalue..timeT2.fixfactB...timeT3.fixfactB estimate.timeT1...timeT2 estimate.timeT1...timeT3 estimate.timeT2...timeT3 estimate.fixfactA...fixfactB estimate..timeT1.fixfactA...timeT2.fixfactA estimate..timeT1.fixfactA...timeT3.fixfactA estimate..timeT1.fixfactA...timeT1.fixfactB estimate..timeT1.fixfactA...timeT2.fixfactB estimate..timeT1.fixfactA...timeT3.fixfactB estimate..timeT2.fixfactA...timeT3.fixfactA estimate..timeT2.fixfactA...timeT1.fixfactB estimate..timeT2.fixfactA...timeT2.fixfactB estimate..timeT2.fixfactA...timeT3.fixfactB estimate..timeT3.fixfactA...timeT1.fixfactB estimate..timeT3.fixfactA...timeT2.fixfactB estimate..timeT3.fixfactA...timeT3.fixfactB estimate..timeT1.fixfactB...timeT2.fixfactB estimate..timeT1.fixfactB...timeT3.fixfactB estimate..timeT2.fixfactB...timeT3.fixfactB -var1 0 0.00152990892918407 0 254.537609570589 0.989895720028428 0.000138446468356235 0.999849571595415 NA NA NA 0.000138448022388127 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 -575.690811941667 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var2 0 0.960464619805585 0 262.571810056621 0.448325032764408 0.602586916593328 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var3 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var4 0 0.0441977246388017 7213.02082711591 254.223292186154 0.983047523518005 0.0690465434631971 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var5 0 0.064287156921339 17220.7805833911 266.097162357616 0.983047523518005 0.850320276396892 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var6 0 0.0381978835199867 0 244.361560756404 0.983047523518005 0.0000168443985191467 0.877942440820325 NA NA NA 0.0000168442499891452 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 -535.434188801917 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var7 0 0.193737436286724 0 254.906518619133 0.0156061251704555 0.293289541312568 0.877942440820325 0.0118965177919926 0.0269815160377476 0.977421297378864 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -453.53785986075 -432.3569423245 21.18091753625 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var8 0 0.014695144502403 15197.0828494013 259.222125604721 1 0.0697161146068648 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var9 0 0.148540192056018 0.00000000018132043075478 259.983587153763 0.983047523518005 0.818083737709546 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var10 0 0.768501224171673 4386.50412060244 262.180681429873 0.983047523518005 0.372676427326723 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var11 0 0.711106348589254 19233.8026294981 260.769418500537 0.983047523518005 0.819711630041906 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var12 0 0.388310543095299 0 254.652163866062 0.000464327919921784 1 0.862142650192906 0.00237921060351957 0.90841262671406 0.000434791514257952 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 553.830311730375 -128.310054849999 -682.140366580375 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var13 0 0.846722054364422 0 263.195387302844 0.983047523518005 0.10284211352158 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var14 0 0.0360856422369758 0 255.35073473538 0.0804717081264756 1 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var15 0 0.0626184338173826 8153.26008240148 262.067244816087 0.983047523518005 0.592583606736207 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var16 0 0.171687740370142 10568.0720173727 237.006726048068 0.0000604559603062954 0.372676427326723 0.862142650192906 0.0000143686244382967 0.00841082627257787 0.00921196337149363 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 563.485340642249 294.939479101249 -268.545861541 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var17 0 0.746933462377266 23409.8484229528 266.135923308681 0.983047523518005 0.46568888380733 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var18 0 0.735764575145315 0 262.073544952402 0.983047523518005 0.71138248160657 0.924943894585718 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var19 0 0.194540582137284 0 268.220382686494 0.983047523518005 0.477011066880217 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var20 0 0.287706917791718 0 266.878020990071 0.983047523518005 0.286161368832769 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var21 0 0.0515625925674887 21271.8195701478 265.151387312325 0.983047523518005 0.71138248160657 0.924943894585718 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var22 0 0.607405931022872 0 263.535518930099 0.983047523518005 0.569060445862623 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var23 0 0.212615804058253 0 263.751117472845 0.983047523518005 0.644748172383697 0.924943894585718 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var24 0 0.453100802782389 29510.6568674354 260.71769299974 0.989895720028428 0.853217576536927 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var25 0 0.511477701192744 0 286.65672225126 0.983047523518005 0.286161368832769 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var26 0 0.815661522629652 0 281.509414777202 0.983047523518005 0.0690465434631971 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var27 0 0.653920548984792 0 280.838740914533 0.989895720028428 0.150203459443103 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var28 0 0.719759503524831 0 277.41708583405 0.983047523518005 0.1969140094154 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var29 0 0.725675776865143 86615.1480315815 268.196774164521 0.448325032764408 0.121472365285413 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var30 0 0.857478473660295 37081.0431925278 286.162827544403 0.983047523518005 0.253534413626269 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var31 0 0.751744017902084 0 276.362228496729 0.989895720028428 1 0.924943894585718 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var32 0 0.978108618401383 0 285.72256677923 0.983047523518005 0.306152094558791 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var33 0 0.99993316514364 124822.847216428 276.589351488647 0.144068402464455 0.121472365285413 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var34 0 0.242528626574342 0 281.137098865892 0.983047523518005 0.1969140094154 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var35 0 0.508321244841884 0 273.186257975745 0.983047523518005 0.0141111367336112 0.924943894585718 NA NA NA 0.0141114522811781 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 -603.435761166334 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var36 0 0.316164521545131 0.0000000010285854101153 268.90084474405 0.983047523518005 0.644748172383697 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var37 0 0.953719843399109 20144.5877011241 271.448160048801 0.806157065268918 0.356762591033804 0.862142650192906 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var38 0 0.428134779270157 0 285.508280238472 0.989895720028428 0.592583606736207 0.924943894585718 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var39 0 0.868929253676507 74721.1043100503 271.9525623048 0.983047523518005 0.569060445862623 0.924943894585718 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -var40 0 0.967902408294312 97407.8193511745 276.870664881354 0.983047523518005 0.121472365285413 0.877942440820325 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff -r 1422de181204 -r a3147e3d66e2 test-data/test_raw_variable_out.txt --- a/test-data/test_raw_variable_out.txt Wed Oct 10 05:18:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -variableMetadata var withNA Shapiro.pvalue.residuals Subject.Variance REML pval_time pval_trt pval_inter pvalue.time.T1...T2 pvalue.time.T1...T3 pvalue.time.T2...T3 pvalue.fixfact.A...B pvalue.T1.A....T2.A pvalue.T1.A....T3.A pvalue.T1.A....T1.B pvalue.T1.A....T2.B pvalue.T1.A....T3.B pvalue.T2.A....T3.A pvalue.T2.A....T1.B pvalue.T2.A....T2.B pvalue.T2.A....T3.B pvalue.T3.A....T1.B pvalue.T3.A....T2.B pvalue.T3.A....T3.B pvalue.T1.B....T2.B pvalue.T1.B....T3.B pvalue.T2.B....T3.B estimate.time.T1...T2 estimate.time.T1...T3 estimate.time.T2...T3 estimate.fixfact.A...B estimate.T1.A....T2.A estimate.T1.A....T3.A estimate.T1.A....T1.B estimate.T1.A....T2.B estimate.T1.A....T3.B estimate.T2.A....T3.A estimate.T2.A....T1.B estimate.T2.A....T2.B estimate.T2.A....T3.B estimate.T3.A....T1.B estimate.T3.A....T2.B estimate.T3.A....T3.B estimate.T1.B....T2.B estimate.T1.B....T3.B estimate.T2.B....T3.B -var1 var1 0 0.00152990892918407 0 254.537609570589 0.989895081688929 0.000142332953130264 0.999851637979215 NA NA NA 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -575.6908 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var2 var2 0 0.960464619805585 0 262.571810056621 0.443498818238816 0.602652253312485 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var3 var3 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var4 var4 0 0.0441977246388017 7213.02082711591 254.223292186154 0.983046491866558 0.0690481436570156 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var5 var5 0 0.064287156921339 17220.7805833911 266.097162357616 0.983046491866558 0.850330775367304 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var6 var6 0 0.0381978835199867 0 244.361560756404 0.983046491866558 1.69724227738755e-05 0.877939640795251 NA NA NA 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -535.4342 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var7 var7 0 0.193737436286724 0 254.906518619133 0.0157977802240783 0.293529919182109 0.877939640795251 0.012 0.028 0.977485714285714 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -453.5379 -432.3569 21.1809 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var8 var8 0 0.014695144502403 15197.0828494013 259.222125604721 1 0.0697160454979429 1 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var9 var9 0 0.148540192056018 1.8132043075478e-10 259.983587153763 0.983046491866558 0.818040560247076 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var10 var10 0 0.768501224171673 4386.50412060244 262.180681429873 0.983046491866558 0.372670267631705 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var11 var11 0 0.711106348589254 19233.8026294981 260.769418500537 0.983046491866558 0.819708219928867 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var12 var12 0 0.388310543095299 0 254.652163866062 0.000451504869487529 1 0.861541599913403 0.002 0.908 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 553.8303 -128.3101 -682.1404 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var13 var13 0 0.846722054364422 0 263.195387302844 0.983046491866558 0.102587754537138 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var14 var14 0 0.0360856422369758 0 255.35073473538 0.0804913814946417 1 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var15 var15 0 0.0626184338173826 8153.26008240148 262.067244816087 0.983046491866558 0.592722965710858 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var16 var16 0 0.171687740370142 10568.0720173727 237.006726048068 6.02686289052912e-05 0.372670267631705 0.861541599913403 0 0.008 0.01 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 563.4853 294.9395 -268.5459 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var17 var17 0 0.746933462377266 23409.8484229528 266.135923308681 0.983046491866558 0.465689496368746 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var18 var18 0 0.735764575145315 0 262.073544952402 0.983046491866558 0.711384175405778 0.924958848665671 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var19 var19 0 0.194540582137284 0 268.220382686494 0.983046491866558 0.477013011572336 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var20 var20 0 0.287706917791718 0 266.878020990071 0.983046491866558 0.285791545615653 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var21 var21 0 0.0515625925674887 21271.8195701478 265.151387312325 0.983046491866558 0.711384175405778 0.924958848665671 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var22 var22 0 0.607405931022872 0 263.535518930099 0.983046491866558 0.568870942400682 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var23 var23 0 0.212615804058253 0 263.751117472845 0.983046491866558 0.644803266722344 0.924958848665671 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var24 var24 0 0.453100802782389 29510.6568674354 260.71769299974 0.989895081688929 0.853232770669574 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var25 var25 0 0.511477701192744 0 286.65672225126 0.983046491866558 0.285791545615653 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var26 var26 0 0.815661522629652 0 281.509414777202 0.983046491866558 0.0688280391920837 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var27 var27 0 0.653920548984792 0 280.838740914533 0.989895081688929 0.14979041564372 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var28 var28 0 0.719759503524831 0 277.41708583405 0.983046491866558 0.196622531113559 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var29 var29 0 0.725675776865143 86615.1480315815 268.196774164521 0.443498818238816 0.121470044896006 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var30 var30 0 0.857478473660295 37081.0431925278 286.162827544403 0.983046491866558 0.253530786238282 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var31 var31 0 0.751744017902084 0 276.362228496729 0.989895081688929 1 0.924958848665671 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var32 var32 0 0.978108618401383 0 285.72256677923 0.983046491866558 0.306024424517496 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var33 var33 0 0.99993316514364 124822.847216428 276.589351488647 0.147948925847613 0.121470044896006 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var34 var34 0 0.242528626574342 0 281.137098865892 0.983046491866558 0.196622531113559 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var35 var35 0 0.508321244841884 0 273.186257975745 0.983046491866558 0.0141133867640431 0.924958848665671 NA NA NA 0.0146666666666667 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -603.4358 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var36 var36 0 0.316164521545126 2.61433151397349e-10 268.90084474405 0.983046491866558 0.644803266722344 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var37 var37 0 0.953719843399109 20144.5877011241 271.448160048801 0.800264575774149 0.356750278198418 0.861541599913403 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var38 var38 0 0.428134779270157 0 285.508280238472 0.989895081688929 0.592722965710858 0.924958848665671 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var39 var39 0 0.868929253676507 74721.1043100503 271.9525623048 0.983046491866558 0.568870942400682 0.924958848665671 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA -var40 var40 0 0.967902408294312 97407.8193511745 276.870664881354 0.983046491866558 0.121470044896006 0.877939640795251 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA