comparison univariate_script.R @ 3:140290de7986 draft

planemo upload for repository https://github.com/workflow4metabolomics/univariate.git commit 27bc6157f43574f038b3fb1be1f46ce4786e24b1
author ethevenot
date Sun, 30 Oct 2016 14:17:09 -0400
parents 09799fc16bc6
children
comparison
equal deleted inserted replaced
2:09799fc16bc6 3:140290de7986
2 samDF, 2 samDF,
3 varDF, 3 varDF,
4 facC, 4 facC,
5 tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1], 5 tesC = c("ttest", "wilcoxon", "anova", "kruskal", "pearson", "spearman")[1],
6 adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], 6 adjC = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7],
7 thrN = 0.05) { 7 thrN = 0.05,
8 pdfC) {
8 9
9 10
10 ## Option 11 ## Option
11 ##---------------
12 12
13 strAsFacL <- options()$stringsAsFactors 13 strAsFacL <- options()$stringsAsFactors
14 options(stingsAsFactors = FALSE) 14 options(stingsAsFactors = FALSE)
15 options(warn = -1) 15 options(warn = -1)
16
17 ## Getting the response (either a factor or a numeric)
16 18
17 if(mode(samDF[, facC]) == "character") { 19 if(mode(samDF[, facC]) == "character") {
18 facFcVn <- factor(samDF[, facC]) 20 facFcVn <- factor(samDF[, facC])
19 facLevVc <- levels(facFcVn) 21 facLevVc <- levels(facFcVn)
20 } else 22 } else
22 24
23 cat("\nPerforming '", tesC, "'\n", sep="") 25 cat("\nPerforming '", tesC, "'\n", sep="")
24 26
25 varPfxC <- paste0(make.names(facC), "_", tesC, "_") 27 varPfxC <- paste0(make.names(facC), "_", tesC, "_")
26 28
29
27 if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) { 30 if(tesC %in% c("ttest", "wilcoxon", "pearson", "spearman")) {
28 31
32
29 switch(tesC, 33 switch(tesC,
30 ttest = { 34 ttest = {
31 staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE))) 35 staF <- function(y) diff(tapply(y, facFcVn, function(x) mean(x, na.rm = TRUE)))
32 tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]] 36 tesF <- function(y) t.test(y ~ facFcVn)[["p.value"]]
33 }, 37 },
44 tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]] 48 tesF <- function(y) cor.test(facFcVn, y, method = "spearman", use = "pairwise.complete.obs")[["p.value"]]
45 }) 49 })
46 50
47 staVn <- apply(datMN, 2, staF) 51 staVn <- apply(datMN, 2, staF)
48 52
49 fdrVn <- p.adjust(apply(datMN, 53 adjVn <- p.adjust(apply(datMN,
50 2, 54 2,
51 tesF), 55 tesF),
52 method = adjC) 56 method = adjC)
53 57
54 sigVn <- as.numeric(fdrVn < thrN) 58 sigVn <- as.numeric(adjVn < thrN)
55 59
56 if(tesC %in% c("ttest", "wilcoxon")) 60 if(tesC %in% c("ttest", "wilcoxon"))
57 varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "."), "_") 61 varPfxC <- paste0(varPfxC, paste(rev(facLevVc), collapse = "."), "_")
58 62
59 varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn 63 varDF[, paste0(varPfxC, ifelse(tesC %in% c("ttest", "wilcoxon"), "dif", "cor"))] <- staVn
60 64
61 varDF[, paste0(varPfxC, adjC)] <- fdrVn 65 varDF[, paste0(varPfxC, adjC)] <- adjVn
62 66
63 varDF[, paste0(varPfxC, "sig")] <- sigVn 67 varDF[, paste0(varPfxC, "sig")] <- sigVn
64 68
69 ## graphic
70
71 pdf(pdfC, onefile = TRUE)
72
73 varVi <- which(sigVn > 0)
74
75 if(tesC %in% c("ttest", "wilcoxon")) {
76
77 facVc <- as.character(facFcVn)
78 names(facVc) <- rownames(samDF)
79
80 for(varI in varVi) {
81
82 varC <- rownames(varDF)[varI]
83
84 boxF(facFcVn,
85 datMN[, varI],
86 paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"),
87 facVc)
88
89 }
90
91 } else { ## pearson or spearman
92
93 for(varI in varVi) {
94
95 varC <- rownames(varDF)[varI]
96
97 mod <- lm(datMN[, varI] ~ facFcVn)
98
99 plot(facFcVn, datMN[, varI],
100 xlab = facC,
101 ylab = "",
102 pch = 18,
103 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ", R2 = ", signif(summary(mod)$r.squared, 2), ")"))
104
105 abline(mod, col = "red")
106
107 }
108
109 }
110
111 dev.off()
112
113
65 } else if(tesC == "anova") { 114 } else if(tesC == "anova") {
66 115
116
67 ## getting the names of the pairwise comparisons 'class1Vclass2' 117 ## getting the names of the pairwise comparisons 'class1Vclass2'
68 prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]]) 118 prwVc <- rownames(TukeyHSD(aov(datMN[, 1] ~ facFcVn))[["facFcVn"]])
69 119
70 prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1 120 prwVc <- gsub("-", ".", prwVc, fixed = TRUE) ## 2016-08-05: '-' character in dataframe column names seems not to be converted to "." by write.table on ubuntu R-3.3.1
71 121
122 ## omnibus and post-hoc tests
123
72 aovMN <- t(apply(datMN, 2, function(varVn) { 124 aovMN <- t(apply(datMN, 2, function(varVn) {
73 125
74 aovMod <- aov(varVn ~ facFcVn) 126 aovMod <- aov(varVn ~ facFcVn)
75 pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"] 127 pvaN <- summary(aovMod)[[1]][1, "Pr(>F)"]
76 hsdMN <- TukeyHSD(aovMod)[["facFcVn"]] 128 hsdMN <- TukeyHSD(aovMod)[["facFcVn"]]
77 c(pvaN, c(hsdMN[, c("diff", "p adj")]), as.numeric(hsdMN[, "p adj"] < thrN)) 129 c(pvaN, c(hsdMN[, c("diff", "p adj")]))
78 130
79 })) 131 }))
80 132
81 aovMN[, 1] <- p.adjust(aovMN[, 1], method = adjC) 133 difVi <- 1:length(prwVc) + 1
82 sigVn <- as.numeric(aovMN[, 1] < thrN) 134
83 aovMN <- cbind(aovMN[, 1], sigVn, aovMN[, 2:ncol(aovMN)]) 135 ## difference of the means for each pairwise comparison
84 ## aovMN[which(aovMN[, 2] < 1), (3 + length(prwVc)):ncol(aovMN)] <- NA 136
85 colnames(aovMN) <- paste0(varPfxC, 137 difMN <- aovMN[, difVi]
86 c(adjC, 138 colnames(difMN) <- paste0(varPfxC, prwVc, "_dif")
87 "sig", 139
88 paste0(prwVc, "_dif"), 140 ## correction for multiple testing
89 paste0(prwVc, "_pva"), 141
90 paste0(prwVc, "_sig"))) 142 aovMN <- aovMN[, -difVi, drop = FALSE]
91 aovMN[which(aovMN[, paste0(varPfxC, "sig")] < 1), paste0(varPfxC, c(paste0(prwVc, "_pva"), paste0(prwVc, "_sig")))] <- NA 143 aovMN <- apply(aovMN, 2, function(pvaVn) p.adjust(pvaVn, method = adjC))
92 144
93 varDF <- cbind.data.frame(varDF, as.data.frame(aovMN)) 145 ## significance coding (0 = not significant, 1 = significant)
94 146
147 adjVn <- aovMN[, 1]
148 sigVn <- as.numeric(adjVn < thrN)
149
150 aovMN <- aovMN[, -1, drop = FALSE]
151 colnames(aovMN) <- paste0(varPfxC, prwVc, "_", adjC)
152
153 aovSigMN <- aovMN < thrN
154 mode(aovSigMN) <- "numeric"
155 colnames(aovSigMN) <- paste0(varPfxC, prwVc, "_sig")
156
157 ## final aggregated table
158
159 resMN <- cbind(adjVn, sigVn, difMN, aovMN, aovSigMN)
160 colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig"))
161
162 varDF <- cbind.data.frame(varDF, as.data.frame(resMN))
163
164 ## graphic
165
166 pdf(pdfC, onefile = TRUE)
167
168 for(varI in 1:nrow(varDF)) {
169
170 if(sum(aovSigMN[varI, ]) > 0) {
171
172 varC <- rownames(varDF)[varI]
173
174 boxplot(datMN[, varI] ~ facFcVn,
175 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"))
176
177 for(prwI in 1:length(prwVc)) {
178
179 if(aovSigMN[varI, paste0(varPfxC, prwVc[prwI], "_sig")] == 1) {
180
181 claVc <- unlist(strsplit(prwVc[prwI], ".", fixed = TRUE))
182 aovClaVl <- facFcVn %in% claVc
183 aovFc <- facFcVn[aovClaVl, drop = TRUE]
184 aovVc <- as.character(aovFc)
185 names(aovVc) <- rownames(samDF)[aovClaVl]
186 boxF(aovFc,
187 datMN[aovClaVl, varI],
188 paste0(varC, " (", adjC, " = ", signif(aovMN[varI, paste0(varPfxC, prwVc[prwI], "_", adjC)], 2), ")"),
189 aovVc)
190
191 }
192
193 }
194
195 }
196
197 }
198
199 dev.off()
200
201
95 } else if(tesC == "kruskal") { 202 } else if(tesC == "kruskal") {
96 203
97 ## getting the names of the pairwise comparisons 'class1Vclass2' 204
205 ## getting the names of the pairwise comparisons 'class1.class2'
206
98 nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]] 207 nemMN <- posthoc.kruskal.nemenyi.test(datMN[, 1], facFcVn, "Tukey")[["p.value"]]
99 nemVl <- c(lower.tri(nemMN, diag = TRUE)) 208 nemVl <- c(lower.tri(nemMN, diag = TRUE))
100 nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl], 209 nemClaMC <- cbind(rownames(nemMN)[c(row(nemMN))][nemVl],
101 colnames(nemMN)[c(col(nemMN))][nemVl]) 210 colnames(nemMN)[c(col(nemMN))][nemVl])
102 nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2]) 211 nemNamVc <- paste0(nemClaMC[, 1], ".", nemClaMC[, 2])
103 nemNamVc <- paste0(varPfxC, nemNamVc) 212 pfxNemVc <- paste0(varPfxC, nemNamVc)
213
214 ## omnibus and post-hoc tests
104 215
105 nemMN <- t(apply(datMN, 2, function(varVn) { 216 nemMN <- t(apply(datMN, 2, function(varVn) {
106 217
107 pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]] 218 pvaN <- kruskal.test(varVn ~ facFcVn)[["p.value"]]
108 varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]] 219 varNemMN <- posthoc.kruskal.nemenyi.test(varVn, facFcVn, "Tukey")[["p.value"]]
109 c(pvaN, c(varNemMN)) 220 c(pvaN, c(varNemMN))
110 221
111 })) 222 }))
112 pvaVn <- nemMN[, 1] 223
113 fdrVn <- p.adjust(pvaVn, method = adjC) 224 ## correction for multiple testing
114 sigVn <- as.numeric(fdrVn < thrN) 225
226 nemMN <- apply(nemMN, 2,
227 function(pvaVn) p.adjust(pvaVn, method = adjC))
228 adjVn <- nemMN[, 1]
229 sigVn <- as.numeric(adjVn < thrN)
115 nemMN <- nemMN[, c(FALSE, nemVl)] 230 nemMN <- nemMN[, c(FALSE, nemVl)]
116 colnames(nemMN) <- paste0(nemNamVc, "_pva") 231 colnames(nemMN) <- paste0(pfxNemVc, "_", adjC)
232
233 ## significance coding (0 = not significant, 1 = significant)
234
117 nemSigMN <- nemMN < thrN 235 nemSigMN <- nemMN < thrN
118 mode(nemSigMN) <- "numeric" 236 mode(nemSigMN) <- "numeric"
119 colnames(nemSigMN) <- paste0(nemNamVc, "_sig") 237 colnames(nemSigMN) <- paste0(pfxNemVc, "_sig")
120 nemMN[sigVn < 1, ] <- NA 238
121 nemSigMN[sigVn < 1, ] <- NA 239 ## difference of the medians for each pairwise comparison
122 240
123 difMN <- sapply(1:nrow(nemClaMC), function(prwI) { 241 difMN <- sapply(1:nrow(nemClaMC), function(prwI) {
124 prwVc <- nemClaMC[prwI, ] 242 prwVc <- nemClaMC[prwI, ]
125 prwVi <- which(facFcVn %in% prwVc) 243 prwVi <- which(facFcVn %in% prwVc)
126 prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc) 244 prwFacFc <- factor(as.character(facFcVn)[prwVi], levels = prwVc)
127 apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE))))) 245 apply(datMN[prwVi, ], 2, function(varVn) -diff(as.numeric(tapply(varVn, prwFacFc, function(x) median(x, na.rm = TRUE)))))
128 }) 246 })
129 colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN)) 247 colnames(difMN) <- gsub("_sig", "_dif", colnames(nemSigMN))
130 248
131 nemMN <- cbind(fdrVn, sigVn, difMN, nemMN, nemSigMN) 249 ## final aggregated table
132 colnames(nemMN)[1:2] <- paste0(varPfxC, c(adjC, "sig")) 250
133 251 resMN <- cbind(adjVn, sigVn, difMN, nemMN, nemSigMN)
134 varDF <- cbind.data.frame(varDF, as.data.frame(nemMN)) 252 colnames(resMN)[1:2] <- paste0(varPfxC, c(adjC, "sig"))
135 253
254 varDF <- cbind.data.frame(varDF, as.data.frame(resMN))
255
256 ## graphic
257
258 pdf(pdfC, onefile = TRUE)
259
260 for(varI in 1:nrow(varDF)) {
261
262 if(sum(nemSigMN[varI, ]) > 0) {
263
264 varC <- rownames(varDF)[varI]
265
266 boxplot(datMN[, varI] ~ facFcVn,
267 main = paste0(varC, " (", adjC, " = ", signif(adjVn[varI], 2), ")"))
268
269 for(nemI in 1:length(nemNamVc)) {
270
271 if(nemSigMN[varI, paste0(varPfxC, nemNamVc[nemI], "_sig")] == 1) {
272
273 nemClaVc <- nemClaMC[nemI, ]
274 nemClaVl <- facFcVn %in% nemClaVc
275 nemFc <- facFcVn[nemClaVl, drop = TRUE]
276 nemVc <- as.character(nemFc)
277 names(nemVc) <- rownames(samDF)[nemClaVl]
278 boxF(nemFc,
279 datMN[nemClaVl, varI],
280 paste0(varC, " (", adjC, " = ", signif(nemMN[varI, paste0(varPfxC, nemNamVc[nemI], "_", adjC)], 2), ")"),
281 nemVc)
282
283 }
284
285 }
286
287 }
288
289 }
290
291 dev.off()
292
136 } 293 }
137 294
138 names(sigVn) <- rownames(varDF) 295 names(sigVn) <- rownames(varDF)
139 sigSumN <- sum(sigVn, na.rm = TRUE) 296 sigSumN <- sum(sigVn, na.rm = TRUE)
140 if(sigSumN) { 297 if(sigSumN) {
141 cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "") 298 cat("\nThe following ", sigSumN, " variable", ifelse(sigSumN > 1, "s", ""), " (", round(sigSumN / length(sigVn) * 100), "%) ", ifelse(sigSumN > 1, "were", "was"), " found significant at the ", thrN, " level:\n", sep = "")
142 cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "") 299 cat(paste(rownames(varDF)[sigVn > 0], collapse = "\n"), "\n", sep = "")
146 options(stingsAsFactors = strAsFacL) 303 options(stingsAsFactors = strAsFacL)
147 304
148 return(varDF) 305 return(varDF)
149 306
150 } 307 }
308
309
310 boxF <- function(xFc,
311 yVn,
312 maiC,
313 xVc) {
314
315 boxLs <- boxplot(yVn ~ xFc,
316 main = maiC)
317
318 outVn <- boxLs[["out"]]
319
320 if(length(outVn)) {
321
322 for(outI in 1:length(outVn)) {
323 levI <- which(levels(xFc) == xVc[names(outVn)[outI]])
324 text(levI,
325 outVn[outI],
326 labels = names(outVn)[outI],
327 pos = ifelse(levI == 2, 2, 4))
328 }
329
330 }
331
332 }