Mercurial > repos > iuc > rgcca
comparison launcher.R @ 0:067d45e6caa9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
author | iuc |
---|---|
date | Tue, 12 Jan 2021 10:12:04 +0000 |
parents | |
children | 4e73ea176c34 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:067d45e6caa9 |
---|---|
1 # Author: Etienne CAMENEN | |
2 # Date: 2020 | |
3 # Contact: arthur.tenenhaus@centralesupelec.fr | |
4 # Key-words: omics, RGCCA, multi-block | |
5 # EDAM operation: analysis, correlation, visualisation | |
6 # | |
7 # Abstract: Performs multi-variate analysis (PCA, CCA, PLS, R/SGCCA, etc.) | |
8 # and produces textual and graphical outputs (e.g. variables and individuals | |
9 # plots). | |
10 | |
11 rm(list = ls()) | |
12 graphics.off() | |
13 separator <- NULL | |
14 | |
15 ########## Arguments ########## | |
16 | |
17 # Parse the arguments from a command line launch | |
18 get_args <- function() { | |
19 option_list <- list( | |
20 # File parameters | |
21 make_option( | |
22 opt_str = c("-d", "--datasets"), | |
23 type = "character", | |
24 metavar = "path list", | |
25 help = "List of comma-separated file paths corresponding to the | |
26 blocks to be analyzed (one per block and without spaces between | |
27 them; e.g., path/file1.txt,path/file2.txt) [required]" | |
28 ), | |
29 make_option( | |
30 opt_str = c("-c", "--connection"), | |
31 type = "character", | |
32 metavar = "path", | |
33 help = "Path of the file defining the connections between the blocks | |
34 [if not used, activates the superblock mode]" | |
35 ), | |
36 make_option( | |
37 opt_str = "--group", | |
38 type = "character", | |
39 metavar = "path", | |
40 help = "Path of the file coloring the individuals in the ad hoc | |
41 plot" | |
42 ), | |
43 make_option( | |
44 opt_str = c("-r", "--response"), | |
45 type = "integer", | |
46 metavar = "integer", | |
47 help = "Position of the response file for the supervised mode within | |
48 the block path list [actives the supervised mode]" | |
49 ), | |
50 make_option( | |
51 opt_str = "--names", | |
52 type = "character", | |
53 metavar = "character list", | |
54 help = "List of comma-separated block names to rename them (one per | |
55 block; without spaces between them) [default: the block file names]" | |
56 ), | |
57 make_option( | |
58 opt_str = c("-H", "--header"), | |
59 type = "logical", | |
60 action = "store_false", | |
61 help = "DO NOT consider the first row as the column header" | |
62 ), | |
63 make_option( | |
64 opt_str = "--separator", | |
65 type = "integer", | |
66 metavar = "integer", | |
67 default = opt[1], | |
68 help = "Character used to separate columns (1: tabulation, | |
69 2: semicolon, 3: comma) [default: %default]" | |
70 ), | |
71 # Analysis parameter | |
72 make_option( | |
73 opt_str = "--type", | |
74 type = "character", | |
75 metavar = "character", | |
76 default = opt[2], | |
77 help = "Type of analysis [default: %default] (among: rgcca, pca, | |
78 cca, gcca, cpca-w, hpca, maxbet-b, maxbet, maxdiff-b, maxdiff, | |
79 maxvar-a, maxvar-b, maxvar, niles, r-maxvar, rcon-pca, ridge-gca, | |
80 sabscor, ssqcor, ssqcor, ssqcov-1, ssqcov-2, ssqcov, sum-pca, | |
81 sumcor, sumcov-1, sumcov-2, sumcov)" | |
82 ), | |
83 make_option( | |
84 opt_str = "--ncomp", | |
85 type = "character", | |
86 metavar = "integer list", | |
87 default = opt[3], | |
88 help = "Number of components in the analysis for each block | |
89 [default: %default]. The number should be higher than 1 and lower | |
90 than the minimum number of variables among the blocks. It can be a | |
91 single values or a comma-separated list (e.g 2,2,3,2)." | |
92 ), | |
93 make_option( | |
94 opt_str = "--penalty", | |
95 type = "character", | |
96 metavar = "float list", | |
97 default = opt[4], | |
98 help = "For RGCCA, a regularization parameter for each block (i.e., tau) | |
99 [default: %default]. Tau varies from 0 (maximizing the correlation) | |
100 to 1 (maximizing the covariance). For SGCCA, tau is automatically | |
101 set to 1 and shrinkage parameter can be defined instead for | |
102 automatic variable selection, varying from the square root of the | |
103 variable number (the fewest selected variables) to 1 (all the | |
104 variables are included). It can be a single value or a | |
105 comma-separated list (e.g. 0,1,0.75,1)." | |
106 ), | |
107 make_option( | |
108 opt_str = "--scheme", | |
109 type = "integer", | |
110 metavar = "integer", | |
111 default = opt[5], | |
112 help = "Link (i.e. scheme) function for covariance maximization | |
113 (1: x, 2: x^2, 3: |x|, 4: x^4) [default: %default]. Onnly, the x | |
114 function ('horst scheme') penalizes structural negative correlation. | |
115 The x^2 function ('factorial scheme') discriminates more strongly | |
116 the blocks than the |x| ('centroid scheme') one." | |
117 ), | |
118 make_option( | |
119 opt_str = "--scale", | |
120 type = "logical", | |
121 action = "store_false", | |
122 help = "DO NOT scale the blocks (i.e., a data centering step is | |
123 always performed). Otherwise, each block is normalised and divided | |
124 by the squareroot of its number of variables." | |
125 ), | |
126 make_option( | |
127 opt_str = "--superblock", | |
128 type = "logical", | |
129 action = "store_false", | |
130 help = "DO NOT use a superblock (i.e. a concatenation of all the | |
131 blocks to visualize them all together in a consensus space). In | |
132 this case, all blocks are assumed to be connected or a connection | |
133 file could be used." | |
134 ), | |
135 # Graphical parameters | |
136 make_option( | |
137 opt_str = "--text", | |
138 type = "logical", | |
139 action = "store_false", | |
140 help = "DO NOT display the name of the points instead of shapes when | |
141 plotting" | |
142 ), | |
143 make_option( | |
144 opt_str = "--block", | |
145 type = "integer", | |
146 metavar = "integer", | |
147 default = opt[6], | |
148 help = "Position in the path list of the plotted block (0: the | |
149 superblock or, if not activated, the last one, 1: the fist one, | |
150 2: the 2nd, etc.)[default: the last one]" | |
151 ), | |
152 make_option( | |
153 opt_str = "--block_y", | |
154 type = "integer", | |
155 metavar = "integer", | |
156 help = "Position in the path list of the plotted block for the | |
157 Y-axis in the individual plot (0: the superblock or, if not | |
158 activated, the last one, 1: the fist one, 2: the 2nd, etc.) | |
159 [default: the last one]" | |
160 ), | |
161 make_option( | |
162 opt_str = "--compx", | |
163 type = "integer", | |
164 metavar = "integer", | |
165 default = opt[7], | |
166 help = "Component used in the X-axis for biplots and the only | |
167 component used for histograms [default: %default] (should not be | |
168 higher than the number of components of the analysis)" | |
169 ), | |
170 make_option( | |
171 opt_str = "--compy", | |
172 type = "integer", | |
173 metavar = "integer", | |
174 default = opt[8], | |
175 help = "Component used in the Y-axis for biplots | |
176 [default: %default] (should not be higher than the number of | |
177 components of the analysis)" | |
178 ), | |
179 make_option( | |
180 opt_str = "--nmark", | |
181 type = "integer", | |
182 metavar = "integer", | |
183 default = opt[9], | |
184 help = "Number maximum of top variables in ad hoc plot | |
185 [default: %default]" | |
186 ), | |
187 # output parameters | |
188 make_option( | |
189 opt_str = "--o1", | |
190 type = "character", | |
191 metavar = "path", | |
192 default = opt[10], | |
193 help = "Path for the individual plot [default: %default]" | |
194 ), | |
195 make_option( | |
196 opt_str = "--o2", | |
197 type = "character", | |
198 metavar = "path", | |
199 default = opt[11], | |
200 help = "Path for the variable plot [default: %default]" | |
201 ), | |
202 make_option( | |
203 opt_str = "--o3", | |
204 type = "character", | |
205 metavar = "path", | |
206 default = opt[12], | |
207 help = "Path for the top variables plot [default: %default]" | |
208 ), | |
209 make_option( | |
210 opt_str = "--o4", | |
211 type = "character", | |
212 metavar = "path", | |
213 default = opt[13], | |
214 help = "Path for the explained variance plot [default: %default]" | |
215 ), | |
216 make_option( | |
217 opt_str = "--o5", | |
218 type = "character", | |
219 metavar = "path", | |
220 default = opt[14], | |
221 help = "Path for the design plot [default: %default]" | |
222 ), | |
223 make_option( | |
224 opt_str = "--o6", | |
225 type = "character", | |
226 metavar = "path", | |
227 default = opt[15], | |
228 help = "Path for the individual table [default: %default]" | |
229 ), | |
230 make_option( | |
231 opt_str = "--o7", | |
232 type = "character", | |
233 metavar = "path", | |
234 default = opt[16], | |
235 help = "Path for the variable table [default: %default]" | |
236 ), | |
237 make_option( | |
238 opt_str = "--o8", | |
239 type = "character", | |
240 metavar = "path", | |
241 default = opt[17], | |
242 help = "Path for the analysis results in RData [default: %default]" | |
243 ) | |
244 ) | |
245 return(optparse::OptionParser(option_list = option_list)) | |
246 } | |
247 | |
248 char_to_list <- function(x) { | |
249 strsplit(gsub(" ", "", as.character(x)), ",")[[1]] | |
250 } | |
251 | |
252 check_arg <- function(opt) { | |
253 # Check the validity of the arguments opt : an optionParser object | |
254 | |
255 if (is.null(opt$datasets)) | |
256 stop_rgcca(paste0("datasets is required."), exit_code = 121) | |
257 | |
258 if (is.null(opt$scheme)) | |
259 opt$scheme <- "factorial" | |
260 else if (!opt$scheme %in% seq(4)) { | |
261 stop_rgcca( | |
262 paste0( | |
263 "scheme should be comprise between 1 and 4 [by default: 2], not be equal to ", | |
264 opt$scheme, | |
265 "." | |
266 ), | |
267 exit_code = 122 | |
268 ) | |
269 } else { | |
270 schemes <- c("horst", "factorial", "centroid") | |
271 if (opt$scheme == 4) | |
272 opt$scheme <- function(x) x ^ 4 | |
273 else | |
274 opt$scheme <- schemes[opt$scheme] | |
275 } | |
276 | |
277 if (!opt$separator %in% seq(3)) { | |
278 stop_rgcca( | |
279 paste0( | |
280 "separator should be comprise between 1 and 3 (1: Tabulation, 2: Semicolon, 3: Comma) [by default: 2], not be equal to ", | |
281 opt$separator, | |
282 "." | |
283 ), | |
284 exit_code = 123 | |
285 ) | |
286 } else { | |
287 separators <- c("\t", ";", ",") | |
288 opt$separator <- separators[opt$separator] | |
289 } | |
290 | |
291 nmark <- NULL | |
292 RGCCA:::check_integer("nmark", opt$nmark, min = 2) | |
293 | |
294 for (x in c("ncomp", "penalty")) | |
295 opt[[x]] <- char_to_list(opt[[x]]) | |
296 | |
297 return(opt) | |
298 } | |
299 | |
300 post_check_arg <- function(opt, rgcca) { | |
301 # Check the validity of the arguments after loading the blocks opt : an | |
302 # optionParser object blocks : a list of matrix | |
303 blocks <- NULL | |
304 for (x in c("block", "block_y")) { | |
305 if (!is.null(opt[[x]])) { | |
306 if (opt[[x]] == 0) | |
307 opt[[x]] <- length(rgcca$call$blocks) | |
308 opt[[x]] <- RGCCA:::check_blockx(x, opt[[x]], rgcca$call$blocks) | |
309 } | |
310 } | |
311 | |
312 if (any(opt$ncomp == 1)) | |
313 opt$compy <- 1 | |
314 | |
315 for (x in c("compx", "compy")) | |
316 opt[[x]] <- check_compx(x, opt[[x]], rgcca$call$ncomp, opt$block) | |
317 | |
318 return(opt) | |
319 } | |
320 | |
321 check_integer <- function(x, y = x, type = "scalar", float = FALSE, min = 1) { | |
322 | |
323 if (is.null(y)) | |
324 y <- x | |
325 | |
326 if (type %in% c("matrix", "data.frame")) | |
327 y_temp <- y | |
328 | |
329 y <- suppressWarnings(as.double(as.matrix(y))) | |
330 | |
331 if (any(is.na(y))) | |
332 stop_rgcca(paste(x, "should not be NA.")) | |
333 | |
334 if (!is(y, "numeric")) | |
335 stop_rgcca(paste(x, "should be numeric.")) | |
336 | |
337 if (type == "scalar" && length(y) != 1) | |
338 stop_rgcca(paste(x, "should be of length 1.")) | |
339 | |
340 if (!float) | |
341 y <- as.integer(y) | |
342 | |
343 if (all(y < min)) | |
344 stop_rgcca(paste0(x, " should be higher than or equal to ", min, ".")) | |
345 | |
346 if (type %in% c("matrix", "data.frame")) | |
347 y <- matrix( | |
348 y, | |
349 dim(y_temp)[1], | |
350 dim(y_temp)[2], | |
351 dimnames = dimnames(y_temp) | |
352 ) | |
353 | |
354 if (type == "data.frame") | |
355 as.data.frame(y) | |
356 | |
357 return(y) | |
358 } | |
359 | |
360 load_libraries <- function(librairies) { | |
361 for (l in librairies) { | |
362 if (!(l %in% installed.packages()[, "Package"])) | |
363 utils::install.packages(l, repos = "cran.us.r-project.org") | |
364 suppressPackageStartupMessages( | |
365 library( | |
366 l, | |
367 character.only = TRUE, | |
368 warn.conflicts = FALSE, | |
369 quietly = TRUE | |
370 )) | |
371 } | |
372 } | |
373 | |
374 stop_rgcca <- function( | |
375 message, | |
376 exit_code = "1", | |
377 call = NULL) { | |
378 | |
379 base::stop( | |
380 structure( | |
381 class = c(exit_code, "simpleError", "error", "condition"), | |
382 list(message = message, call. = NULL) | |
383 )) | |
384 } | |
385 | |
386 ########## Main ########## | |
387 | |
388 # Get arguments : R packaging install, need an opt variable with associated | |
389 # arguments | |
390 opt <- list( | |
391 separator = 1, | |
392 type = "rgcca", | |
393 ncomp = 2, | |
394 penalty = 1, | |
395 scheme = 2, | |
396 block = 0, | |
397 compx = 1, | |
398 compy = 2, | |
399 nmark = 100, | |
400 o1 = "individuals.pdf", | |
401 o2 = "corcircle.pdf", | |
402 o3 = "top_variables.pdf", | |
403 o4 = "ave.pdf", | |
404 o5 = "design.pdf", | |
405 o6 = "individuals.tsv", | |
406 o7 = "variables.tsv", | |
407 o8 = "rgcca_result.RData", | |
408 datasets = paste0("inst/extdata/", | |
409 c("agriculture", "industry", "politic"), | |
410 ".tsv", | |
411 collapse = ",") | |
412 ) | |
413 | |
414 load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "rlang", "Deriv")) | |
415 try(load_libraries("ggrepel"), silent = TRUE) | |
416 | |
417 tryCatch( | |
418 opt <- check_arg(optparse::parse_args(get_args())), | |
419 error = function(e) { | |
420 if (length(grep("nextArg", e[[1]])) != 1) | |
421 stop_rgcca(e[[1]], exit_code = 140) | |
422 }, warning = function(w) | |
423 stop_rgcca(w[[1]], exit_code = 141) | |
424 ) | |
425 | |
426 # Load functions | |
427 all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = T)) | |
428 for (i in all_funcs) | |
429 eval(parse(text = paste0(i, "<-RGCCA:::", i))) | |
430 | |
431 # Set missing parameters by default | |
432 opt$header <- !("header" %in% names(opt)) | |
433 opt$superblock <- !("superblock" %in% names(opt)) | |
434 opt$scale <- !("scale" %in% names(opt)) | |
435 opt$text <- !("text" %in% names(opt)) | |
436 | |
437 status <- 0 | |
438 tryCatch({ | |
439 | |
440 blocks <- load_blocks(opt$datasets, opt$names, opt$separator) | |
441 group <- load_response(blocks, opt$group, opt$separator, opt$header) | |
442 connection <- load_connection(file = opt$connection, separator = opt$separator) | |
443 | |
444 func <- quote( | |
445 rgcca( | |
446 blocks = blocks, | |
447 connection = connection, | |
448 response = opt$response, | |
449 superblock = opt$superblock, | |
450 ncomp = opt$ncomp, | |
451 scheme = opt$scheme, | |
452 scale = opt$scale, | |
453 type = opt$type | |
454 ) | |
455 ) | |
456 if (tolower(opt$type) %in% c("sgcca", "spca", "spls")) { | |
457 func[["sparsity"]] <- opt$penalty | |
458 }else { | |
459 func[["tau"]] <- opt$penalty | |
460 } | |
461 | |
462 rgcca_out <- eval(as.call(func)) | |
463 | |
464 opt <- post_check_arg(opt, rgcca_out) | |
465 | |
466 ########## Plot ########## | |
467 | |
468 if (rgcca_out$call$ncomp[opt$block] == 1 && is.null(opt$block_y)) { | |
469 warning("With a number of component of 1, a second block should be chosen to perform an individual plot") | |
470 } else { | |
471 ( | |
472 individual_plot <- plot_ind( | |
473 rgcca_out, | |
474 group, | |
475 opt$compx, | |
476 opt$compy, | |
477 opt$block, | |
478 opt$text, | |
479 opt$block_y, | |
480 "Response" | |
481 ) | |
482 ) | |
483 save_plot(opt$o1, individual_plot) | |
484 } | |
485 | |
486 if (rgcca_out$call$ncomp[opt$block] > 1) { | |
487 ( | |
488 corcircle <- plot_var_2D( | |
489 rgcca_out, | |
490 opt$compx, | |
491 opt$compy, | |
492 opt$block, | |
493 opt$text, | |
494 n_mark = opt$nmark | |
495 ) | |
496 ) | |
497 save_plot(opt$o2, corcircle) | |
498 } | |
499 | |
500 top_variables <- plot_var_1D( | |
501 rgcca_out, | |
502 opt$compx, | |
503 opt$nmark, | |
504 opt$block, | |
505 type = "cor" | |
506 ) | |
507 save_plot(opt$o3, top_variables) | |
508 | |
509 # Average Variance Explained | |
510 (ave <- plot_ave(rgcca_out)) | |
511 save_plot(opt$o4, ave) | |
512 | |
513 # Creates design scheme | |
514 design <- function() plot_network(rgcca_out) | |
515 save_plot(opt$o5, design) | |
516 | |
517 save_ind(rgcca_out, opt$compx, opt$compy, opt$o6) | |
518 save_var(rgcca_out, opt$compx, opt$compy, opt$o7) | |
519 save(rgcca_out, file = opt$o8) | |
520 | |
521 }, error = function(e) { | |
522 if (class(e)[1] %in% c("simpleError", "error", "condition")) | |
523 status <<- 1 | |
524 else | |
525 status <<- class(e)[1] | |
526 message(e$message) | |
527 }) | |
528 quit(status = status) |