Mercurial > repos > ethevenot > multivariate
comparison multivariate_wrapper.R @ 0:fafba524dca6 draft
planemo upload for repository https://github.com/workflow4metabolomics/multivariate.git commit 6596dbd39d20ee1962d9ebdd87eec04821239760
| author | ethevenot |
|---|---|
| date | Wed, 27 Jul 2016 11:22:56 -0400 |
| parents | |
| children | da272496b32d |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:fafba524dca6 |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 | |
| 3 library(batch) ## parseCommandArgs | |
| 4 | |
| 5 ######## | |
| 6 # MAIN # | |
| 7 ######## | |
| 8 | |
| 9 argVc <- unlist(parseCommandArgs(evaluate=FALSE)) | |
| 10 | |
| 11 | |
| 12 #### Start_of_tested_code <- function() {} | |
| 13 | |
| 14 | |
| 15 ##------------------------------ | |
| 16 ## Initializing | |
| 17 ##------------------------------ | |
| 18 | |
| 19 ## options | |
| 20 ##-------- | |
| 21 | |
| 22 strAsFacL <- options()$stringsAsFactors | |
| 23 options(stringsAsFactors = FALSE) | |
| 24 | |
| 25 ## libraries | |
| 26 ##---------- | |
| 27 | |
| 28 suppressMessages(library(ropls)) | |
| 29 | |
| 30 if(packageVersion("ropls") < "1.4.0") | |
| 31 cat("\nWarning: new version of the 'ropls' package is available\n", sep="") | |
| 32 | |
| 33 ## constants | |
| 34 ##---------- | |
| 35 | |
| 36 modNamC <- "Multivariate" ## module name | |
| 37 | |
| 38 topEnvC <- environment() | |
| 39 flgC <- "\n" | |
| 40 | |
| 41 ## functions | |
| 42 ##---------- | |
| 43 | |
| 44 flgF <- function(tesC, | |
| 45 envC = topEnvC, | |
| 46 txtC = NA) { ## management of warning and error messages | |
| 47 | |
| 48 tesL <- eval(parse(text = tesC), envir = envC) | |
| 49 | |
| 50 if(!tesL) { | |
| 51 | |
| 52 sink(NULL) | |
| 53 stpTxtC <- ifelse(is.na(txtC), | |
| 54 paste0(tesC, " is FALSE"), | |
| 55 txtC) | |
| 56 | |
| 57 stop(stpTxtC, | |
| 58 call. = FALSE) | |
| 59 | |
| 60 } | |
| 61 | |
| 62 } ## flgF | |
| 63 | |
| 64 | |
| 65 ## log file | |
| 66 ##--------- | |
| 67 | |
| 68 sink(argVc["information"]) | |
| 69 | |
| 70 cat("\nStart of the '", modNamC, "' Galaxy module call: ", | |
| 71 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") | |
| 72 | |
| 73 | |
| 74 ## arguments | |
| 75 ##---------- | |
| 76 | |
| 77 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"], | |
| 78 check.names = FALSE, | |
| 79 header = TRUE, | |
| 80 row.names = 1, | |
| 81 sep = "\t"))) | |
| 82 | |
| 83 samDF <- read.table(argVc["sampleMetadata_in"], | |
| 84 check.names = FALSE, | |
| 85 header = TRUE, | |
| 86 row.names = 1, | |
| 87 sep = "\t") | |
| 88 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") | |
| 89 | |
| 90 varDF <- read.table(argVc["variableMetadata_in"], | |
| 91 check.names = FALSE, | |
| 92 header = TRUE, | |
| 93 row.names = 1, | |
| 94 sep = "\t") | |
| 95 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") | |
| 96 | |
| 97 flgF("argVc['respC'] == 'none' || (argVc['respC'] %in% colnames(samDF))", | |
| 98 txtC = paste0("Y Response argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
| 99 if(argVc["respC"] != "none") { | |
| 100 yMCN <- matrix(samDF[, argVc['respC']], ncol = 1, dimnames = list(rownames(xMN), argVc['respC'])) | |
| 101 } else | |
| 102 yMCN <- NULL | |
| 103 | |
| 104 if(argVc["testL"] == "TRUE") { | |
| 105 flgF("!is.null(yMCN)", | |
| 106 txtC = "Predictions cannot be peformed with PCA models") | |
| 107 flgF("'test.' %in% colnames(samDF)", | |
| 108 txtC = "No 'test.' column found in the sample metadata") | |
| 109 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))", | |
| 110 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else") | |
| 111 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))", | |
| 112 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else") | |
| 113 flgF("!any(is.na(yMCN[samDF[, 'test.'] == 'no', ]))", | |
| 114 txtC = "samples for model training (i.e. 'no' value in the 'test.' column) should not contain NA in the response") | |
| 115 tesVl <- samDF[, "test."] == "yes" | |
| 116 xTesMN <- xMN[tesVl, , drop = FALSE] | |
| 117 xMN <- xMN[!tesVl, , drop = FALSE] | |
| 118 yMCN <- yMCN[!tesVl, , drop = FALSE] | |
| 119 } else | |
| 120 tesVl <- NULL | |
| 121 | |
| 122 if(!('parAsColC' %in% names(argVc))) | |
| 123 argVc["parAsColC"] <- "none" | |
| 124 flgF("argVc['parAsColC'] == 'none' || argVc['parAsColC'] %in% colnames(samDF)", txtC = paste0("Sample color argument (", argVc['parAsColC'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
| 125 if(argVc["parAsColC"] != "none") { | |
| 126 parAsColFcVn <- samDF[, argVc['parAsColC']] | |
| 127 if(is.character(parAsColFcVn)) | |
| 128 parAsColFcVn <- factor(parAsColFcVn) | |
| 129 } else | |
| 130 parAsColFcVn <- NA | |
| 131 | |
| 132 if(!('parMahalC' %in% names(argVc)) || argVc["parMahalC"] == "NA") { | |
| 133 if(!is.null(yMCN) && ncol(yMCN) == 1 && mode(yMCN) == "character") | |
| 134 argVc["parMahalC"] <- argVc["respC"] | |
| 135 else | |
| 136 argVc["parMahalC"] <- "none" | |
| 137 } | |
| 138 flgF("argVc['parMahalC'] == 'none' || (argVc['parMahalC'] %in% colnames(samDF))", | |
| 139 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must be either 'NA', 'none' or one of the column names (first row) of your sample metadata")) | |
| 140 if(argVc["parMahalC"] == "none") { | |
| 141 parEllipsesL <- FALSE | |
| 142 } else { | |
| 143 if(is.null(yMCN)) { ## PCA case | |
| 144 flgF("mode(samDF[, argVc['parMahalC']]) == 'character'", | |
| 145 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must correspond to a column of characters in your sampleMetadata")) | |
| 146 parAsColFcVn <- factor(samDF[, argVc["parMahalC"]]) | |
| 147 parEllipsesL <- TRUE | |
| 148 } else { ## (O)PLS-DA case | |
| 149 flgF("identical(as.character(argVc['respC']), as.character(argVc['parMahalC']))", | |
| 150 txtC = paste0("The Mahalanobis argument (", argVc['parMahalC'], ") must be identical to the Y response argument (", argVc['respC'], ")")) | |
| 151 parEllipsesL <- TRUE | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 if(!('parLabVc' %in% names(argVc))) | |
| 156 argVc["parLabVc"] <- "none" | |
| 157 flgF("argVc['parLabVc'] == 'none' || (argVc['parLabVc'] %in% colnames(samDF))", | |
| 158 txtC = paste0("Sample labels argument (", argVc['parLabVc'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
| 159 if('parLabVc' %in% names(argVc)) | |
| 160 if(argVc["parLabVc"] != "none") { | |
| 161 flgF("mode(samDF[, argVc['parLabVc']]) == 'character'", | |
| 162 txtC = paste0("The sample label argument (", argVc['parLabVc'], ") must correspond to a sample metadata column of characters (not numerics)")) | |
| 163 parLabVc <- samDF[, argVc['parLabVc']] | |
| 164 } else | |
| 165 parLabVc <- NA | |
| 166 | |
| 167 if('parPc1I' %in% names(argVc)) { | |
| 168 parCompVi <- as.numeric(c(argVc["parPc1I"], argVc["parPc2I"])) | |
| 169 } else | |
| 170 parCompVi <- c(1, 2) | |
| 171 | |
| 172 | |
| 173 ## checking | |
| 174 ##--------- | |
| 175 | |
| 176 | |
| 177 flgF("argVc['predI'] == 'NA' || argVc['orthoI'] == 'NA' || as.numeric(argVc['orthoI']) > 0 || parCompVi[2] <= as.numeric(argVc['predI'])", | |
| 178 txtC = paste0("The highest component to display (", parCompVi[2], ") must not exceed the number of predictive components of the model (", argVc['predI'], ")")) | |
| 179 | |
| 180 | |
| 181 if(argVc["orthoI"] == "NA" || argVc["orthoI"] != "0") | |
| 182 if(argVc["predI"] == "NA" || argVc["predI"] != "0") { | |
| 183 argVc["predI"] <- "1" | |
| 184 cat("\nWarning: OPLS: number of predictive components ('predI' argument) set to 1\n", sep = "") | |
| 185 } | |
| 186 | |
| 187 if(argVc["predI"] != "NA") | |
| 188 if(as.numeric(argVc["predI"]) > min(nrow(xMN), ncol(xMN))) { | |
| 189 argVc["predI"] <- as.character(min(nrow(xMN), ncol(xMN))) | |
| 190 cat("\nWarning: 'predI' set to the minimum of the dataMatrix dimensions: ", as.numeric(argVc["predI"]), "\n", sep = "") | |
| 191 } | |
| 192 | |
| 193 if("algoC" %in% names(argVc) && argVc["algoC"] == "svd" && length(which(is.na(c(xMN)))) > 0) { | |
| 194 minN <- min(c(xMN[!is.na(xMN)])) / 2 | |
| 195 cat("\nWarning: Missing values set to ", round(minN, 1), " (half minimum value) for 'svd' algorithm to be used\n", sep = "") | |
| 196 } | |
| 197 | |
| 198 | |
| 199 ##------------------------------ | |
| 200 ## Computation and plot | |
| 201 ##------------------------------ | |
| 202 | |
| 203 | |
| 204 sink() | |
| 205 | |
| 206 optWrnN <- options()$warn | |
| 207 options(warn = -1) | |
| 208 | |
| 209 ropLs <- opls(x = xMN, | |
| 210 y = yMCN, | |
| 211 predI = ifelse(argVc["predI"] == "NA", NA, as.numeric(argVc["predI"])), | |
| 212 orthoI = ifelse(argVc["orthoI"] == "NA", NA, as.numeric(argVc["orthoI"])), | |
| 213 algoC = ifelse('algoC' %in% names(argVc), argVc["algoC"], "default"), | |
| 214 crossvalI = ifelse('crossvalI' %in% names(argVc), as.numeric(argVc["crossvalI"]), 7), | |
| 215 log10L = ifelse('log10L' %in% names(argVc), as.logical(argVc["log10L"]), FALSE), | |
| 216 permI = ifelse('permI' %in% names(argVc), as.numeric(argVc["permI"]), 20), | |
| 217 scaleC = ifelse('scaleC' %in% names(argVc), argVc["scaleC"], "standard"), | |
| 218 subset = NULL, | |
| 219 printL = FALSE, | |
| 220 plotL = FALSE, | |
| 221 .sinkC = argVc['information']) | |
| 222 | |
| 223 modC <- ropLs@typeC | |
| 224 sumDF <- getSummaryDF(ropLs) | |
| 225 desMC <- ropLs@descriptionMC | |
| 226 scoreMN <- getScoreMN(ropLs) | |
| 227 loadingMN <- getLoadingMN(ropLs) | |
| 228 | |
| 229 vipVn <- coeMN <- orthoScoreMN <- orthoLoadingMN <- orthoVipVn <- NULL | |
| 230 | |
| 231 if(grepl("PLS", modC)) { | |
| 232 | |
| 233 vipVn <- getVipVn(ropLs) | |
| 234 coeMN <- coef(ropLs) | |
| 235 | |
| 236 if(grepl("OPLS", modC)) { | |
| 237 orthoScoreMN <- getScoreMN(ropLs, orthoL = TRUE) | |
| 238 orthoLoadingMN <- getLoadingMN(ropLs, orthoL = TRUE) | |
| 239 orthoVipVn <- getVipVn(ropLs, orthoL = TRUE) | |
| 240 } | |
| 241 | |
| 242 } | |
| 243 | |
| 244 ploC <- ifelse('typeC' %in% names(argVc), argVc["typeC"], "summary") | |
| 245 | |
| 246 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) { | |
| 247 if(!(ploC %in% c("permutation", "overview"))) { | |
| 248 ploC <- "summary" | |
| 249 plotWarnL <- TRUE | |
| 250 } | |
| 251 } else | |
| 252 plotWarnL <- FALSE | |
| 253 | |
| 254 plot(ropLs, | |
| 255 typeVc = ploC, | |
| 256 parAsColFcVn = parAsColFcVn, | |
| 257 parCexN = ifelse('parCexN' %in% names(argVc), as.numeric(argVc["parCexN"]), 0.8), | |
| 258 parCompVi = parCompVi, | |
| 259 parEllipsesL = parEllipsesL, | |
| 260 parLabVc = parLabVc, | |
| 261 file.pdfC = argVc['figure'], | |
| 262 .sinkC = argVc['information']) | |
| 263 | |
| 264 options(warn = optWrnN) | |
| 265 | |
| 266 | |
| 267 ##------------------------------ | |
| 268 ## Print | |
| 269 ##------------------------------ | |
| 270 | |
| 271 | |
| 272 sink(argVc["information"], append = TRUE) | |
| 273 | |
| 274 if(plotWarnL) | |
| 275 cat("\nWarning: For single component models, only 'overview' (and 'permutation' in case of single response (O)PLS(-DA)) plot(s) are available\n", sep = "") | |
| 276 | |
| 277 | |
| 278 cat("\n", modC, "\n", sep = "") | |
| 279 | |
| 280 cat("\n", desMC["samples", ], | |
| 281 " samples x ", | |
| 282 desMC["X_variables", ], | |
| 283 " variables", | |
| 284 ifelse(modC != "PCA", | |
| 285 " and 1 response", | |
| 286 ""), | |
| 287 "\n", sep = "") | |
| 288 | |
| 289 cat("\n", ropLs@suppLs[["scaleC"]], " scaling of dataMatrix", | |
| 290 ifelse(modC == "PCA", | |
| 291 "", | |
| 292 paste0(" and ", | |
| 293 ifelse(mode(ropLs@suppLs[["yMCN"]]) == "character" && ropLs@suppLs[["scaleC"]] != "standard", | |
| 294 "standard scaling of ", | |
| 295 ""), | |
| 296 "response\n")), sep = "") | |
| 297 | |
| 298 if(substr(desMC["missing_values", ], 1, 1) != "0") | |
| 299 cat("\n", desMC["missing_values", ], " NAs\n", sep = "") | |
| 300 | |
| 301 if(substr(desMC["near_zero_excluded_X_variables", ], 1, 1) != "0") | |
| 302 cat("\n", desMC["near_zero_excluded_X_variables", ], | |
| 303 " excluded variables during model building (because of near zero variance)\n", sep = "") | |
| 304 | |
| 305 cat("\n") | |
| 306 | |
| 307 optDigN <- options()[["digits"]] | |
| 308 options(digits = 3) | |
| 309 print(ropLs@modelDF) | |
| 310 options(digits = optDigN) | |
| 311 | |
| 312 | |
| 313 ##------------------------------ | |
| 314 ## Ending | |
| 315 ##------------------------------ | |
| 316 | |
| 317 | |
| 318 ## Saving | |
| 319 ##------- | |
| 320 | |
| 321 | |
| 322 rspModC <- gsub("-", "", modC) | |
| 323 if(rspModC != "PCA") | |
| 324 rspModC <- paste0(make.names(argVc['respC']), "_", rspModC) | |
| 325 | |
| 326 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) { | |
| 327 | |
| 328 tCompMN <- scoreMN | |
| 329 pCompMN <- loadingMN | |
| 330 | |
| 331 } else { | |
| 332 | |
| 333 if(sumDF[, "ort"] > 0) { | |
| 334 if(parCompVi[2] > sumDF[, "ort"] + 1) | |
| 335 stop("Selected orthogonal component for plotting (ordinate) exceeds the total number of orthogonal components of the model", call. = FALSE) | |
| 336 tCompMN <- cbind(scoreMN[, 1], orthoScoreMN[, parCompVi[2] - 1]) | |
| 337 pCompMN <- cbind(loadingMN[, 1], orthoLoadingMN[, parCompVi[2] - 1]) | |
| 338 colnames(pCompMN) <- colnames(tCompMN) <- c("h1", paste("o", parCompVi[2] - 1, sep = "")) | |
| 339 } else { | |
| 340 if(max(parCompVi) > sumDF[, "pre"]) | |
| 341 stop("Selected component for plotting as ordinate exceeds the total number of predictive components of the model", call. = FALSE) | |
| 342 tCompMN <- scoreMN[, parCompVi, drop = FALSE] | |
| 343 pCompMN <- loadingMN[, parCompVi, drop = FALSE] | |
| 344 } | |
| 345 | |
| 346 } | |
| 347 | |
| 348 ## x-scores and prediction | |
| 349 | |
| 350 colnames(tCompMN) <- paste0(rspModC, "_XSCOR-", colnames(tCompMN)) | |
| 351 tCompDF <- as.data.frame(tCompMN)[rownames(samDF), , drop = FALSE] | |
| 352 | |
| 353 if(modC != "PCA") { | |
| 354 | |
| 355 if(!is.null(tesVl)) { | |
| 356 tCompFulMN <- matrix(NA, | |
| 357 nrow = nrow(samDF), | |
| 358 ncol = ncol(tCompMN), | |
| 359 dimnames = list(rownames(samDF), colnames(tCompMN))) | |
| 360 mode(tCompFulMN) <- "numeric" | |
| 361 tCompFulMN[rownames(tCompMN), ] <- tCompMN | |
| 362 tCompMN <- tCompFulMN | |
| 363 | |
| 364 fitMCN <- fitted(ropLs) | |
| 365 fitFulMCN <- matrix(NA, | |
| 366 nrow = nrow(samDF), | |
| 367 ncol = 1, | |
| 368 dimnames = list(rownames(samDF), NULL)) | |
| 369 mode(fitFulMCN) <- mode(fitMCN) | |
| 370 fitFulMCN[rownames(fitMCN), ] <- fitMCN | |
| 371 yPreMCN <- predict(ropLs, newdata = as.data.frame(xTesMN)) | |
| 372 fitFulMCN[rownames(yPreMCN), ] <- yPreMCN | |
| 373 fitMCN <- fitFulMCN | |
| 374 | |
| 375 } else | |
| 376 fitMCN <- fitted(ropLs) | |
| 377 | |
| 378 colnames(fitMCN) <- paste0(rspModC, | |
| 379 "_predictions") | |
| 380 fitDF <- as.data.frame(fitMCN)[rownames(samDF), , drop = FALSE] | |
| 381 | |
| 382 tCompDF <- cbind.data.frame(tCompDF, fitDF) | |
| 383 } | |
| 384 | |
| 385 samDF <- cbind.data.frame(samDF, tCompDF) | |
| 386 | |
| 387 ## x-loadings and VIP | |
| 388 | |
| 389 colnames(pCompMN) <- paste0(rspModC, "_XLOAD-", colnames(pCompMN)) | |
| 390 if(!is.null(vipVn)) { | |
| 391 pCompMN <- cbind(pCompMN, vipVn) | |
| 392 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, | |
| 393 "_VIP", | |
| 394 ifelse(!is.null(orthoVipVn), | |
| 395 "_pred", | |
| 396 "")) | |
| 397 if(!is.null(orthoVipVn)) { | |
| 398 pCompMN <- cbind(pCompMN, orthoVipVn) | |
| 399 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, | |
| 400 "_VIP_ortho") | |
| 401 } | |
| 402 } | |
| 403 if(!is.null(coeMN)) { | |
| 404 pCompMN <- cbind(pCompMN, coeMN) | |
| 405 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, "_COEFF") | |
| 406 } | |
| 407 pCompDF <- as.data.frame(pCompMN)[rownames(varDF), , drop = FALSE] | |
| 408 varDF <- cbind.data.frame(varDF, pCompDF) | |
| 409 | |
| 410 ## sampleMetadata | |
| 411 | |
| 412 samDF <- cbind.data.frame(sampleMetadata = rownames(samDF), | |
| 413 samDF) | |
| 414 write.table(samDF, | |
| 415 file = argVc["sampleMetadata_out"], | |
| 416 quote = FALSE, | |
| 417 row.names = FALSE, | |
| 418 sep = "\t") | |
| 419 | |
| 420 ## variableMetadata | |
| 421 | |
| 422 varDF <- cbind.data.frame(variableMetadata = rownames(varDF), | |
| 423 varDF) | |
| 424 write.table(varDF, | |
| 425 file = argVc["variableMetadata_out"], | |
| 426 quote = FALSE, | |
| 427 row.names = FALSE, | |
| 428 sep = "\t") | |
| 429 | |
| 430 # Output ropLs | |
| 431 if ( ! is.null(argVc['ropls_out'])) | |
| 432 save(ropLs, file = argVc['ropls_out']) | |
| 433 | |
| 434 ## Closing | |
| 435 ##-------- | |
| 436 | |
| 437 cat("\nEnd of '", modNamC, "' Galaxy module call: ", | |
| 438 as.character(Sys.time()), "\n", sep = "") | |
| 439 | |
| 440 sink() | |
| 441 | |
| 442 options(stringsAsFactors = strAsFacL) | |
| 443 | |
| 444 | |
| 445 #### End_of_tested_code <- function() {} | |
| 446 | |
| 447 | |
| 448 rm(list = ls()) |
