comparison tests/biosigner_tests.R @ 0:48e4be935243 draft

planemo upload for repository https://github.com/workflow4metabolomics/biosigner.git commit b8af709c9fd6ed283fc4e4249dcf692556927b2d
author ethevenot
date Wed, 27 Jul 2016 11:40:20 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:48e4be935243
1 library(RUnit)
2
3 wrapperF <- function(argVc) {
4
5
6 #### Start_of_testing_code <- function() {}
7
8
9 ##------------------------------
10 ## Initializing
11 ##------------------------------
12
13 ## options
14 ##--------
15
16 strAsFacL <- options()$stringsAsFactors
17 options(stringsAsFactors = FALSE)
18
19 ## libraries
20 ##----------
21
22 suppressMessages(library(biosigner))
23
24 if(packageVersion("biosigner") < "1.0.0")
25 stop("Please use 'biosigner' versions of 1.0.0 and above")
26 if(packageVersion("ropls") < "1.4.0")
27 stop("Please use 'ropls' versions of 1.4.0 and above")
28
29 ## constants
30 ##----------
31
32 modNamC <- "Biosigner" ## module name
33
34 topEnvC <- environment()
35 flgC <- "\n"
36
37 ## functions
38 ##----------
39
40 flgF <- function(tesC,
41 envC = topEnvC,
42 txtC = NA) { ## management of warning and error messages
43
44 tesL <- eval(parse(text = tesC), envir = envC)
45
46 if(!tesL) {
47
48 sink(NULL)
49 stpTxtC <- ifelse(is.na(txtC),
50 paste0(tesC, " is FALSE"),
51 txtC)
52
53 stop(stpTxtC,
54 call. = FALSE)
55
56 }
57
58 } ## flgF
59
60
61 ## log file
62 ##---------
63
64 sink(argVc["information"])
65
66 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
67 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
68
69
70 ## arguments
71 ##----------
72
73 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
74 check.names = FALSE,
75 header = TRUE,
76 row.names = 1,
77 sep = "\t")))
78
79 samDF <- read.table(argVc["sampleMetadata_in"],
80 check.names = FALSE,
81 header = TRUE,
82 row.names = 1,
83 sep = "\t")
84 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
85
86 varDF <- read.table(argVc["variableMetadata_in"],
87 check.names = FALSE,
88 header = TRUE,
89 row.names = 1,
90 sep = "\t")
91 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
92
93 flgF("argVc['respC'] %in% colnames(samDF)",
94 txtC = paste0("Class argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata"))
95 respVc <- samDF[, argVc["respC"]]
96 flgF("mode(respVc) == 'character'",
97 txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only characters"))
98 respFc <- factor(respVc)
99 flgF("length(levels(respFc)) == 2",
100 txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only 2 types of characters (e.g., 'case' and 'control')"))
101 tierMaxC <- ifelse("tierC" %in% names(argVc), argVc["tierC"], "S")
102 pvalN <- ifelse("pvalN" %in% names(argVc), as.numeric(argVc["pvalN"]), 0.05)
103
104
105 ##------------------------------
106 ## Computation and plot
107 ##------------------------------
108
109
110 sink()
111
112 optWrnN <- options()$warn
113 options(warn = -1)
114
115 if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
116 set.seed(as.integer(argVc["seedI"]))
117
118 bsnLs <- biosign(x = xMN,
119 y = respFc,
120 methodVc = ifelse("methodC" %in% names(argVc), argVc["methodC"], "all"),
121 bootI = ifelse("bootI" %in% names(argVc), as.numeric(argVc["bootI"]), 50),
122 pvalN = pvalN,
123 printL = FALSE,
124 plotL = FALSE,
125 .sinkC = argVc["information"])
126
127 if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
128 set.seed(NULL)
129
130 tierMC <- bsnLs@tierMC
131
132 if(!is.null(tierMC)) {
133 plot(bsnLs,
134 tierMaxC = tierMaxC,
135 file.pdfC = argVc["figure_tier"],
136 .sinkC = argVc["information"])
137 plot(bsnLs,
138 tierMaxC = tierMaxC,
139 typeC = "boxplot",
140 file.pdfC = argVc["figure_boxplot"],
141 .sinkC = argVc["information"])
142 } else {
143 pdf(argVc["figure_tier"])
144 plot(1, bty = "n", type = "n",
145 xaxt = "n", yaxt = "n", xlab = "", ylab = "")
146 text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
147 labels = "No significant variable to display")
148 dev.off()
149 pdf(argVc["figure_boxplot"])
150 plot(1, bty = "n", type = "n",
151 xaxt = "n", yaxt = "n", xlab = "", ylab = "")
152 text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
153 labels = "No significant variable to display")
154 dev.off()
155 }
156
157
158 options(warn = optWrnN)
159
160
161 ##------------------------------
162 ## Print
163 ##------------------------------
164
165 sink(argVc["information"], append = TRUE)
166
167 tierFullVc <- c("S", LETTERS[1:5])
168 tierVc <- tierFullVc[1:which(tierFullVc == tierMaxC)]
169
170 if(sum(tierMC %in% tierVc)) {
171 cat("\nSignificant features from '", paste(tierVc, collapse = "', '"), "' tiers:\n", sep = "")
172 print(tierMC[apply(tierMC, 1, function(rowVc) sum(rowVc %in% tierVc) > 0), ,
173 drop = FALSE])
174 cat("\nAccuracy:\n")
175 print(round(getAccuracyMN(bsnLs), 3))
176 } else
177 cat("\nNo significant variable found for any classifier\n")
178
179
180 ##------------------------------
181 ## Ending
182 ##------------------------------
183
184 ## Saving
185 ##-------
186
187 if(!is.null(tierMC)) {
188 tierDF <- data.frame(tier = sapply(rownames(varDF),
189 function(varC) {
190 varTirVc <- tierMC[varC, ]
191 varTirVc <- names(varTirVc)[varTirVc %in% tierVc]
192 paste(varTirVc, collapse = "|")
193 }),
194 stringsAsFactors = FALSE)
195 colnames(tierDF) <- paste(argVc["respC"],
196 colnames(tierDF),
197 paste(tierVc, collapse = ""),
198 sep = "_")
199 varDF <- cbind.data.frame(varDF, tierDF)
200 }
201
202 ## variableMetadata
203
204 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
205 varDF)
206 write.table(varDF,
207 file = argVc["variableMetadata_out"],
208 quote = FALSE,
209 row.names = FALSE,
210 sep = "\t")
211
212
213 ## Closing
214 ##--------
215
216 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
217 as.character(Sys.time()), "\n", sep = "")
218
219 sink()
220
221 options(stringsAsFactors = strAsFacL)
222
223
224 #### End_of_testing_code <- function() {}
225
226
227 return(list(bsnLs = bsnLs))
228
229
230 rm(list = ls())
231
232
233 }
234
235
236 exaDirOutC <- "output"
237 if(!file.exists(exaDirOutC))
238 stop("Please create an 'output' subfolder into the (current) 'tests' folder")
239
240 tesArgLs <- list(sacurine_all = c(respC = "gender",
241 methodC = "all",
242 bootI = "5",
243 pvalN = "0.05",
244 seedI = "123",
245 .chkC = "checkEqualsNumeric(getAccuracyMN(outLs[['bsnLs']])['AS', 'randomforest'], 0.8534348, tolerance = 1e-7)"))
246
247 for(tesC in names(tesArgLs))
248 tesArgLs[[tesC]] <- c(tesArgLs[[tesC]],
249 dataMatrix_in = file.path(unlist(strsplit(tesC, "_"))[1], "dataMatrix.tsv"),
250 sampleMetadata_in = file.path(unlist(strsplit(tesC, "_"))[1], "sampleMetadata.tsv"),
251 variableMetadata_in = file.path(unlist(strsplit(tesC, "_"))[1], "variableMetadata.tsv"),
252 variableMetadata_out = file.path(exaDirOutC, "variableMetadata.tsv"),
253 figure_tier = file.path(exaDirOutC, "figure-tier.pdf"),
254 figure_boxplot = file.path(exaDirOutC, "figure-boxplot.pdf"),
255 information = file.path(exaDirOutC, "information.txt"))
256
257 for(tesC in names(tesArgLs)) {
258 print(tesC)
259 outLs <- wrapperF(tesArgLs[[tesC]])
260 if(".chkC" %in% names(tesArgLs[[tesC]]))
261 stopifnot(eval(parse(text = tesArgLs[[tesC]][[".chkC"]])))
262 }
263
264 message("Checks successfully completed")