comparison launcher.R @ 0:067d45e6caa9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
author iuc
date Tue, 12 Jan 2021 10:12:04 +0000
parents
children 4e73ea176c34
comparison
equal deleted inserted replaced
-1:000000000000 0:067d45e6caa9
1 # Author: Etienne CAMENEN
2 # Date: 2020
3 # Contact: arthur.tenenhaus@centralesupelec.fr
4 # Key-words: omics, RGCCA, multi-block
5 # EDAM operation: analysis, correlation, visualisation
6 #
7 # Abstract: Performs multi-variate analysis (PCA, CCA, PLS, R/SGCCA, etc.)
8 # and produces textual and graphical outputs (e.g. variables and individuals
9 # plots).
10
11 rm(list = ls())
12 graphics.off()
13 separator <- NULL
14
15 ########## Arguments ##########
16
17 # Parse the arguments from a command line launch
18 get_args <- function() {
19 option_list <- list(
20 # File parameters
21 make_option(
22 opt_str = c("-d", "--datasets"),
23 type = "character",
24 metavar = "path list",
25 help = "List of comma-separated file paths corresponding to the
26 blocks to be analyzed (one per block and without spaces between
27 them; e.g., path/file1.txt,path/file2.txt) [required]"
28 ),
29 make_option(
30 opt_str = c("-c", "--connection"),
31 type = "character",
32 metavar = "path",
33 help = "Path of the file defining the connections between the blocks
34 [if not used, activates the superblock mode]"
35 ),
36 make_option(
37 opt_str = "--group",
38 type = "character",
39 metavar = "path",
40 help = "Path of the file coloring the individuals in the ad hoc
41 plot"
42 ),
43 make_option(
44 opt_str = c("-r", "--response"),
45 type = "integer",
46 metavar = "integer",
47 help = "Position of the response file for the supervised mode within
48 the block path list [actives the supervised mode]"
49 ),
50 make_option(
51 opt_str = "--names",
52 type = "character",
53 metavar = "character list",
54 help = "List of comma-separated block names to rename them (one per
55 block; without spaces between them) [default: the block file names]"
56 ),
57 make_option(
58 opt_str = c("-H", "--header"),
59 type = "logical",
60 action = "store_false",
61 help = "DO NOT consider the first row as the column header"
62 ),
63 make_option(
64 opt_str = "--separator",
65 type = "integer",
66 metavar = "integer",
67 default = opt[1],
68 help = "Character used to separate columns (1: tabulation,
69 2: semicolon, 3: comma) [default: %default]"
70 ),
71 # Analysis parameter
72 make_option(
73 opt_str = "--type",
74 type = "character",
75 metavar = "character",
76 default = opt[2],
77 help = "Type of analysis [default: %default] (among: rgcca, pca,
78 cca, gcca, cpca-w, hpca, maxbet-b, maxbet, maxdiff-b, maxdiff,
79 maxvar-a, maxvar-b, maxvar, niles, r-maxvar, rcon-pca, ridge-gca,
80 sabscor, ssqcor, ssqcor, ssqcov-1, ssqcov-2, ssqcov, sum-pca,
81 sumcor, sumcov-1, sumcov-2, sumcov)"
82 ),
83 make_option(
84 opt_str = "--ncomp",
85 type = "character",
86 metavar = "integer list",
87 default = opt[3],
88 help = "Number of components in the analysis for each block
89 [default: %default]. The number should be higher than 1 and lower
90 than the minimum number of variables among the blocks. It can be a
91 single values or a comma-separated list (e.g 2,2,3,2)."
92 ),
93 make_option(
94 opt_str = "--penalty",
95 type = "character",
96 metavar = "float list",
97 default = opt[4],
98 help = "For RGCCA, a regularization parameter for each block (i.e., tau)
99 [default: %default]. Tau varies from 0 (maximizing the correlation)
100 to 1 (maximizing the covariance). For SGCCA, tau is automatically
101 set to 1 and shrinkage parameter can be defined instead for
102 automatic variable selection, varying from the square root of the
103 variable number (the fewest selected variables) to 1 (all the
104 variables are included). It can be a single value or a
105 comma-separated list (e.g. 0,1,0.75,1)."
106 ),
107 make_option(
108 opt_str = "--scheme",
109 type = "integer",
110 metavar = "integer",
111 default = opt[5],
112 help = "Link (i.e. scheme) function for covariance maximization
113 (1: x, 2: x^2, 3: |x|, 4: x^4) [default: %default]. Onnly, the x
114 function ('horst scheme') penalizes structural negative correlation.
115 The x^2 function ('factorial scheme') discriminates more strongly
116 the blocks than the |x| ('centroid scheme') one."
117 ),
118 make_option(
119 opt_str = "--scale",
120 type = "logical",
121 action = "store_false",
122 help = "DO NOT scale the blocks (i.e., a data centering step is
123 always performed). Otherwise, each block is normalised and divided
124 by the squareroot of its number of variables."
125 ),
126 make_option(
127 opt_str = "--superblock",
128 type = "logical",
129 action = "store_false",
130 help = "DO NOT use a superblock (i.e. a concatenation of all the
131 blocks to visualize them all together in a consensus space). In
132 this case, all blocks are assumed to be connected or a connection
133 file could be used."
134 ),
135 # Graphical parameters
136 make_option(
137 opt_str = "--text",
138 type = "logical",
139 action = "store_false",
140 help = "DO NOT display the name of the points instead of shapes when
141 plotting"
142 ),
143 make_option(
144 opt_str = "--block",
145 type = "integer",
146 metavar = "integer",
147 default = opt[6],
148 help = "Position in the path list of the plotted block (0: the
149 superblock or, if not activated, the last one, 1: the fist one,
150 2: the 2nd, etc.)[default: the last one]"
151 ),
152 make_option(
153 opt_str = "--block_y",
154 type = "integer",
155 metavar = "integer",
156 help = "Position in the path list of the plotted block for the
157 Y-axis in the individual plot (0: the superblock or, if not
158 activated, the last one, 1: the fist one, 2: the 2nd, etc.)
159 [default: the last one]"
160 ),
161 make_option(
162 opt_str = "--compx",
163 type = "integer",
164 metavar = "integer",
165 default = opt[7],
166 help = "Component used in the X-axis for biplots and the only
167 component used for histograms [default: %default] (should not be
168 higher than the number of components of the analysis)"
169 ),
170 make_option(
171 opt_str = "--compy",
172 type = "integer",
173 metavar = "integer",
174 default = opt[8],
175 help = "Component used in the Y-axis for biplots
176 [default: %default] (should not be higher than the number of
177 components of the analysis)"
178 ),
179 make_option(
180 opt_str = "--nmark",
181 type = "integer",
182 metavar = "integer",
183 default = opt[9],
184 help = "Number maximum of top variables in ad hoc plot
185 [default: %default]"
186 ),
187 # output parameters
188 make_option(
189 opt_str = "--o1",
190 type = "character",
191 metavar = "path",
192 default = opt[10],
193 help = "Path for the individual plot [default: %default]"
194 ),
195 make_option(
196 opt_str = "--o2",
197 type = "character",
198 metavar = "path",
199 default = opt[11],
200 help = "Path for the variable plot [default: %default]"
201 ),
202 make_option(
203 opt_str = "--o3",
204 type = "character",
205 metavar = "path",
206 default = opt[12],
207 help = "Path for the top variables plot [default: %default]"
208 ),
209 make_option(
210 opt_str = "--o4",
211 type = "character",
212 metavar = "path",
213 default = opt[13],
214 help = "Path for the explained variance plot [default: %default]"
215 ),
216 make_option(
217 opt_str = "--o5",
218 type = "character",
219 metavar = "path",
220 default = opt[14],
221 help = "Path for the design plot [default: %default]"
222 ),
223 make_option(
224 opt_str = "--o6",
225 type = "character",
226 metavar = "path",
227 default = opt[15],
228 help = "Path for the individual table [default: %default]"
229 ),
230 make_option(
231 opt_str = "--o7",
232 type = "character",
233 metavar = "path",
234 default = opt[16],
235 help = "Path for the variable table [default: %default]"
236 ),
237 make_option(
238 opt_str = "--o8",
239 type = "character",
240 metavar = "path",
241 default = opt[17],
242 help = "Path for the analysis results in RData [default: %default]"
243 )
244 )
245 return(optparse::OptionParser(option_list = option_list))
246 }
247
248 char_to_list <- function(x) {
249 strsplit(gsub(" ", "", as.character(x)), ",")[[1]]
250 }
251
252 check_arg <- function(opt) {
253 # Check the validity of the arguments opt : an optionParser object
254
255 if (is.null(opt$datasets))
256 stop_rgcca(paste0("datasets is required."), exit_code = 121)
257
258 if (is.null(opt$scheme))
259 opt$scheme <- "factorial"
260 else if (!opt$scheme %in% seq(4)) {
261 stop_rgcca(
262 paste0(
263 "scheme should be comprise between 1 and 4 [by default: 2], not be equal to ",
264 opt$scheme,
265 "."
266 ),
267 exit_code = 122
268 )
269 } else {
270 schemes <- c("horst", "factorial", "centroid")
271 if (opt$scheme == 4)
272 opt$scheme <- function(x) x ^ 4
273 else
274 opt$scheme <- schemes[opt$scheme]
275 }
276
277 if (!opt$separator %in% seq(3)) {
278 stop_rgcca(
279 paste0(
280 "separator should be comprise between 1 and 3 (1: Tabulation, 2: Semicolon, 3: Comma) [by default: 2], not be equal to ",
281 opt$separator,
282 "."
283 ),
284 exit_code = 123
285 )
286 } else {
287 separators <- c("\t", ";", ",")
288 opt$separator <- separators[opt$separator]
289 }
290
291 nmark <- NULL
292 RGCCA:::check_integer("nmark", opt$nmark, min = 2)
293
294 for (x in c("ncomp", "penalty"))
295 opt[[x]] <- char_to_list(opt[[x]])
296
297 return(opt)
298 }
299
300 post_check_arg <- function(opt, rgcca) {
301 # Check the validity of the arguments after loading the blocks opt : an
302 # optionParser object blocks : a list of matrix
303 blocks <- NULL
304 for (x in c("block", "block_y")) {
305 if (!is.null(opt[[x]])) {
306 if (opt[[x]] == 0)
307 opt[[x]] <- length(rgcca$call$blocks)
308 opt[[x]] <- RGCCA:::check_blockx(x, opt[[x]], rgcca$call$blocks)
309 }
310 }
311
312 if (any(opt$ncomp == 1))
313 opt$compy <- 1
314
315 for (x in c("compx", "compy"))
316 opt[[x]] <- check_compx(x, opt[[x]], rgcca$call$ncomp, opt$block)
317
318 return(opt)
319 }
320
321 check_integer <- function(x, y = x, type = "scalar", float = FALSE, min = 1) {
322
323 if (is.null(y))
324 y <- x
325
326 if (type %in% c("matrix", "data.frame"))
327 y_temp <- y
328
329 y <- suppressWarnings(as.double(as.matrix(y)))
330
331 if (any(is.na(y)))
332 stop_rgcca(paste(x, "should not be NA."))
333
334 if (!is(y, "numeric"))
335 stop_rgcca(paste(x, "should be numeric."))
336
337 if (type == "scalar" && length(y) != 1)
338 stop_rgcca(paste(x, "should be of length 1."))
339
340 if (!float)
341 y <- as.integer(y)
342
343 if (all(y < min))
344 stop_rgcca(paste0(x, " should be higher than or equal to ", min, "."))
345
346 if (type %in% c("matrix", "data.frame"))
347 y <- matrix(
348 y,
349 dim(y_temp)[1],
350 dim(y_temp)[2],
351 dimnames = dimnames(y_temp)
352 )
353
354 if (type == "data.frame")
355 as.data.frame(y)
356
357 return(y)
358 }
359
360 load_libraries <- function(librairies) {
361 for (l in librairies) {
362 if (!(l %in% installed.packages()[, "Package"]))
363 utils::install.packages(l, repos = "cran.us.r-project.org")
364 suppressPackageStartupMessages(
365 library(
366 l,
367 character.only = TRUE,
368 warn.conflicts = FALSE,
369 quietly = TRUE
370 ))
371 }
372 }
373
374 stop_rgcca <- function(
375 message,
376 exit_code = "1",
377 call = NULL) {
378
379 base::stop(
380 structure(
381 class = c(exit_code, "simpleError", "error", "condition"),
382 list(message = message, call. = NULL)
383 ))
384 }
385
386 ########## Main ##########
387
388 # Get arguments : R packaging install, need an opt variable with associated
389 # arguments
390 opt <- list(
391 separator = 1,
392 type = "rgcca",
393 ncomp = 2,
394 penalty = 1,
395 scheme = 2,
396 block = 0,
397 compx = 1,
398 compy = 2,
399 nmark = 100,
400 o1 = "individuals.pdf",
401 o2 = "corcircle.pdf",
402 o3 = "top_variables.pdf",
403 o4 = "ave.pdf",
404 o5 = "design.pdf",
405 o6 = "individuals.tsv",
406 o7 = "variables.tsv",
407 o8 = "rgcca_result.RData",
408 datasets = paste0("inst/extdata/",
409 c("agriculture", "industry", "politic"),
410 ".tsv",
411 collapse = ",")
412 )
413
414 load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "rlang", "Deriv"))
415 try(load_libraries("ggrepel"), silent = TRUE)
416
417 tryCatch(
418 opt <- check_arg(optparse::parse_args(get_args())),
419 error = function(e) {
420 if (length(grep("nextArg", e[[1]])) != 1)
421 stop_rgcca(e[[1]], exit_code = 140)
422 }, warning = function(w)
423 stop_rgcca(w[[1]], exit_code = 141)
424 )
425
426 # Load functions
427 all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = T))
428 for (i in all_funcs)
429 eval(parse(text = paste0(i, "<-RGCCA:::", i)))
430
431 # Set missing parameters by default
432 opt$header <- !("header" %in% names(opt))
433 opt$superblock <- !("superblock" %in% names(opt))
434 opt$scale <- !("scale" %in% names(opt))
435 opt$text <- !("text" %in% names(opt))
436
437 status <- 0
438 tryCatch({
439
440 blocks <- load_blocks(opt$datasets, opt$names, opt$separator)
441 group <- load_response(blocks, opt$group, opt$separator, opt$header)
442 connection <- load_connection(file = opt$connection, separator = opt$separator)
443
444 func <- quote(
445 rgcca(
446 blocks = blocks,
447 connection = connection,
448 response = opt$response,
449 superblock = opt$superblock,
450 ncomp = opt$ncomp,
451 scheme = opt$scheme,
452 scale = opt$scale,
453 type = opt$type
454 )
455 )
456 if (tolower(opt$type) %in% c("sgcca", "spca", "spls")) {
457 func[["sparsity"]] <- opt$penalty
458 }else {
459 func[["tau"]] <- opt$penalty
460 }
461
462 rgcca_out <- eval(as.call(func))
463
464 opt <- post_check_arg(opt, rgcca_out)
465
466 ########## Plot ##########
467
468 if (rgcca_out$call$ncomp[opt$block] == 1 && is.null(opt$block_y)) {
469 warning("With a number of component of 1, a second block should be chosen to perform an individual plot")
470 } else {
471 (
472 individual_plot <- plot_ind(
473 rgcca_out,
474 group,
475 opt$compx,
476 opt$compy,
477 opt$block,
478 opt$text,
479 opt$block_y,
480 "Response"
481 )
482 )
483 save_plot(opt$o1, individual_plot)
484 }
485
486 if (rgcca_out$call$ncomp[opt$block] > 1) {
487 (
488 corcircle <- plot_var_2D(
489 rgcca_out,
490 opt$compx,
491 opt$compy,
492 opt$block,
493 opt$text,
494 n_mark = opt$nmark
495 )
496 )
497 save_plot(opt$o2, corcircle)
498 }
499
500 top_variables <- plot_var_1D(
501 rgcca_out,
502 opt$compx,
503 opt$nmark,
504 opt$block,
505 type = "cor"
506 )
507 save_plot(opt$o3, top_variables)
508
509 # Average Variance Explained
510 (ave <- plot_ave(rgcca_out))
511 save_plot(opt$o4, ave)
512
513 # Creates design scheme
514 design <- function() plot_network(rgcca_out)
515 save_plot(opt$o5, design)
516
517 save_ind(rgcca_out, opt$compx, opt$compy, opt$o6)
518 save_var(rgcca_out, opt$compx, opt$compy, opt$o7)
519 save(rgcca_out, file = opt$o8)
520
521 }, error = function(e) {
522 if (class(e)[1] %in% c("simpleError", "error", "condition"))
523 status <<- 1
524 else
525 status <<- class(e)[1]
526 message(e$message)
527 })
528 quit(status = status)