Mercurial > repos > iuc > rgcca
changeset 0:067d45e6caa9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/launcher.R Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,528 @@ +# Author: Etienne CAMENEN +# Date: 2020 +# Contact: arthur.tenenhaus@centralesupelec.fr +# Key-words: omics, RGCCA, multi-block +# EDAM operation: analysis, correlation, visualisation +# +# Abstract: Performs multi-variate analysis (PCA, CCA, PLS, R/SGCCA, etc.) +# and produces textual and graphical outputs (e.g. variables and individuals +# plots). + +rm(list = ls()) +graphics.off() +separator <- NULL + +########## Arguments ########## + +# Parse the arguments from a command line launch +get_args <- function() { + option_list <- list( + # File parameters + make_option( + opt_str = c("-d", "--datasets"), + type = "character", + metavar = "path list", + help = "List of comma-separated file paths corresponding to the + blocks to be analyzed (one per block and without spaces between + them; e.g., path/file1.txt,path/file2.txt) [required]" + ), + make_option( + opt_str = c("-c", "--connection"), + type = "character", + metavar = "path", + help = "Path of the file defining the connections between the blocks + [if not used, activates the superblock mode]" + ), + make_option( + opt_str = "--group", + type = "character", + metavar = "path", + help = "Path of the file coloring the individuals in the ad hoc + plot" + ), + make_option( + opt_str = c("-r", "--response"), + type = "integer", + metavar = "integer", + help = "Position of the response file for the supervised mode within + the block path list [actives the supervised mode]" + ), + make_option( + opt_str = "--names", + type = "character", + metavar = "character list", + help = "List of comma-separated block names to rename them (one per + block; without spaces between them) [default: the block file names]" + ), + make_option( + opt_str = c("-H", "--header"), + type = "logical", + action = "store_false", + help = "DO NOT consider the first row as the column header" + ), + make_option( + opt_str = "--separator", + type = "integer", + metavar = "integer", + default = opt[1], + help = "Character used to separate columns (1: tabulation, + 2: semicolon, 3: comma) [default: %default]" + ), + # Analysis parameter + make_option( + opt_str = "--type", + type = "character", + metavar = "character", + default = opt[2], + help = "Type of analysis [default: %default] (among: rgcca, pca, + cca, gcca, cpca-w, hpca, maxbet-b, maxbet, maxdiff-b, maxdiff, + maxvar-a, maxvar-b, maxvar, niles, r-maxvar, rcon-pca, ridge-gca, + sabscor, ssqcor, ssqcor, ssqcov-1, ssqcov-2, ssqcov, sum-pca, + sumcor, sumcov-1, sumcov-2, sumcov)" + ), + make_option( + opt_str = "--ncomp", + type = "character", + metavar = "integer list", + default = opt[3], + help = "Number of components in the analysis for each block + [default: %default]. The number should be higher than 1 and lower + than the minimum number of variables among the blocks. It can be a + single values or a comma-separated list (e.g 2,2,3,2)." + ), + make_option( + opt_str = "--penalty", + type = "character", + metavar = "float list", + default = opt[4], + help = "For RGCCA, a regularization parameter for each block (i.e., tau) + [default: %default]. Tau varies from 0 (maximizing the correlation) + to 1 (maximizing the covariance). For SGCCA, tau is automatically + set to 1 and shrinkage parameter can be defined instead for + automatic variable selection, varying from the square root of the + variable number (the fewest selected variables) to 1 (all the + variables are included). It can be a single value or a + comma-separated list (e.g. 0,1,0.75,1)." + ), + make_option( + opt_str = "--scheme", + type = "integer", + metavar = "integer", + default = opt[5], + help = "Link (i.e. scheme) function for covariance maximization + (1: x, 2: x^2, 3: |x|, 4: x^4) [default: %default]. Onnly, the x + function ('horst scheme') penalizes structural negative correlation. + The x^2 function ('factorial scheme') discriminates more strongly + the blocks than the |x| ('centroid scheme') one." + ), + make_option( + opt_str = "--scale", + type = "logical", + action = "store_false", + help = "DO NOT scale the blocks (i.e., a data centering step is + always performed). Otherwise, each block is normalised and divided + by the squareroot of its number of variables." + ), + make_option( + opt_str = "--superblock", + type = "logical", + action = "store_false", + help = "DO NOT use a superblock (i.e. a concatenation of all the + blocks to visualize them all together in a consensus space). In + this case, all blocks are assumed to be connected or a connection + file could be used." + ), + # Graphical parameters + make_option( + opt_str = "--text", + type = "logical", + action = "store_false", + help = "DO NOT display the name of the points instead of shapes when + plotting" + ), + make_option( + opt_str = "--block", + type = "integer", + metavar = "integer", + default = opt[6], + help = "Position in the path list of the plotted block (0: the + superblock or, if not activated, the last one, 1: the fist one, + 2: the 2nd, etc.)[default: the last one]" + ), + make_option( + opt_str = "--block_y", + type = "integer", + metavar = "integer", + help = "Position in the path list of the plotted block for the + Y-axis in the individual plot (0: the superblock or, if not + activated, the last one, 1: the fist one, 2: the 2nd, etc.) + [default: the last one]" + ), + make_option( + opt_str = "--compx", + type = "integer", + metavar = "integer", + default = opt[7], + help = "Component used in the X-axis for biplots and the only + component used for histograms [default: %default] (should not be + higher than the number of components of the analysis)" + ), + make_option( + opt_str = "--compy", + type = "integer", + metavar = "integer", + default = opt[8], + help = "Component used in the Y-axis for biplots + [default: %default] (should not be higher than the number of + components of the analysis)" + ), + make_option( + opt_str = "--nmark", + type = "integer", + metavar = "integer", + default = opt[9], + help = "Number maximum of top variables in ad hoc plot + [default: %default]" + ), + # output parameters + make_option( + opt_str = "--o1", + type = "character", + metavar = "path", + default = opt[10], + help = "Path for the individual plot [default: %default]" + ), + make_option( + opt_str = "--o2", + type = "character", + metavar = "path", + default = opt[11], + help = "Path for the variable plot [default: %default]" + ), + make_option( + opt_str = "--o3", + type = "character", + metavar = "path", + default = opt[12], + help = "Path for the top variables plot [default: %default]" + ), + make_option( + opt_str = "--o4", + type = "character", + metavar = "path", + default = opt[13], + help = "Path for the explained variance plot [default: %default]" + ), + make_option( + opt_str = "--o5", + type = "character", + metavar = "path", + default = opt[14], + help = "Path for the design plot [default: %default]" + ), + make_option( + opt_str = "--o6", + type = "character", + metavar = "path", + default = opt[15], + help = "Path for the individual table [default: %default]" + ), + make_option( + opt_str = "--o7", + type = "character", + metavar = "path", + default = opt[16], + help = "Path for the variable table [default: %default]" + ), + make_option( + opt_str = "--o8", + type = "character", + metavar = "path", + default = opt[17], + help = "Path for the analysis results in RData [default: %default]" + ) + ) + return(optparse::OptionParser(option_list = option_list)) +} + +char_to_list <- function(x) { + strsplit(gsub(" ", "", as.character(x)), ",")[[1]] +} + +check_arg <- function(opt) { + # Check the validity of the arguments opt : an optionParser object + + if (is.null(opt$datasets)) + stop_rgcca(paste0("datasets is required."), exit_code = 121) + + if (is.null(opt$scheme)) + opt$scheme <- "factorial" + else if (!opt$scheme %in% seq(4)) { + stop_rgcca( + paste0( + "scheme should be comprise between 1 and 4 [by default: 2], not be equal to ", + opt$scheme, + "." + ), + exit_code = 122 + ) + } else { + schemes <- c("horst", "factorial", "centroid") + if (opt$scheme == 4) + opt$scheme <- function(x) x ^ 4 + else + opt$scheme <- schemes[opt$scheme] + } + + if (!opt$separator %in% seq(3)) { + stop_rgcca( + paste0( + "separator should be comprise between 1 and 3 (1: Tabulation, 2: Semicolon, 3: Comma) [by default: 2], not be equal to ", + opt$separator, + "." + ), + exit_code = 123 + ) + } else { + separators <- c("\t", ";", ",") + opt$separator <- separators[opt$separator] + } + + nmark <- NULL + RGCCA:::check_integer("nmark", opt$nmark, min = 2) + + for (x in c("ncomp", "penalty")) + opt[[x]] <- char_to_list(opt[[x]]) + + return(opt) +} + +post_check_arg <- function(opt, rgcca) { +# Check the validity of the arguments after loading the blocks opt : an +# optionParser object blocks : a list of matrix + blocks <- NULL + for (x in c("block", "block_y")) { + if (!is.null(opt[[x]])) { + if (opt[[x]] == 0) + opt[[x]] <- length(rgcca$call$blocks) + opt[[x]] <- RGCCA:::check_blockx(x, opt[[x]], rgcca$call$blocks) + } + } + + if (any(opt$ncomp == 1)) + opt$compy <- 1 + + for (x in c("compx", "compy")) + opt[[x]] <- check_compx(x, opt[[x]], rgcca$call$ncomp, opt$block) + + return(opt) +} + +check_integer <- function(x, y = x, type = "scalar", float = FALSE, min = 1) { + + if (is.null(y)) + y <- x + + if (type %in% c("matrix", "data.frame")) + y_temp <- y + + y <- suppressWarnings(as.double(as.matrix(y))) + + if (any(is.na(y))) + stop_rgcca(paste(x, "should not be NA.")) + + if (!is(y, "numeric")) + stop_rgcca(paste(x, "should be numeric.")) + + if (type == "scalar" && length(y) != 1) + stop_rgcca(paste(x, "should be of length 1.")) + + if (!float) + y <- as.integer(y) + + if (all(y < min)) + stop_rgcca(paste0(x, " should be higher than or equal to ", min, ".")) + + if (type %in% c("matrix", "data.frame")) + y <- matrix( + y, + dim(y_temp)[1], + dim(y_temp)[2], + dimnames = dimnames(y_temp) + ) + + if (type == "data.frame") + as.data.frame(y) + + return(y) +} + +load_libraries <- function(librairies) { + for (l in librairies) { + if (!(l %in% installed.packages()[, "Package"])) + utils::install.packages(l, repos = "cran.us.r-project.org") + suppressPackageStartupMessages( + library( + l, + character.only = TRUE, + warn.conflicts = FALSE, + quietly = TRUE + )) + } +} + +stop_rgcca <- function( + message, + exit_code = "1", + call = NULL) { + + base::stop( + structure( + class = c(exit_code, "simpleError", "error", "condition"), + list(message = message, call. = NULL) + )) + } + +########## Main ########## + +# Get arguments : R packaging install, need an opt variable with associated +# arguments +opt <- list( + separator = 1, + type = "rgcca", + ncomp = 2, + penalty = 1, + scheme = 2, + block = 0, + compx = 1, + compy = 2, + nmark = 100, + o1 = "individuals.pdf", + o2 = "corcircle.pdf", + o3 = "top_variables.pdf", + o4 = "ave.pdf", + o5 = "design.pdf", + o6 = "individuals.tsv", + o7 = "variables.tsv", + o8 = "rgcca_result.RData", + datasets = paste0("inst/extdata/", + c("agriculture", "industry", "politic"), + ".tsv", + collapse = ",") +) + +load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "rlang", "Deriv")) +try(load_libraries("ggrepel"), silent = TRUE) + +tryCatch( + opt <- check_arg(optparse::parse_args(get_args())), + error = function(e) { + if (length(grep("nextArg", e[[1]])) != 1) + stop_rgcca(e[[1]], exit_code = 140) + }, warning = function(w) + stop_rgcca(w[[1]], exit_code = 141) +) + +# Load functions +all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = T)) +for (i in all_funcs) + eval(parse(text = paste0(i, "<-RGCCA:::", i))) + +# Set missing parameters by default +opt$header <- !("header" %in% names(opt)) +opt$superblock <- !("superblock" %in% names(opt)) +opt$scale <- !("scale" %in% names(opt)) +opt$text <- !("text" %in% names(opt)) + +status <- 0 +tryCatch({ + + blocks <- load_blocks(opt$datasets, opt$names, opt$separator) + group <- load_response(blocks, opt$group, opt$separator, opt$header) + connection <- load_connection(file = opt$connection, separator = opt$separator) + + func <- quote( + rgcca( + blocks = blocks, + connection = connection, + response = opt$response, + superblock = opt$superblock, + ncomp = opt$ncomp, + scheme = opt$scheme, + scale = opt$scale, + type = opt$type + ) + ) + if (tolower(opt$type) %in% c("sgcca", "spca", "spls")) { + func[["sparsity"]] <- opt$penalty + }else { + func[["tau"]] <- opt$penalty + } + + rgcca_out <- eval(as.call(func)) + + opt <- post_check_arg(opt, rgcca_out) + + ########## Plot ########## + + if (rgcca_out$call$ncomp[opt$block] == 1 && is.null(opt$block_y)) { + warning("With a number of component of 1, a second block should be chosen to perform an individual plot") + } else { + ( + individual_plot <- plot_ind( + rgcca_out, + group, + opt$compx, + opt$compy, + opt$block, + opt$text, + opt$block_y, + "Response" + ) + ) + save_plot(opt$o1, individual_plot) + } + + if (rgcca_out$call$ncomp[opt$block] > 1) { + ( + corcircle <- plot_var_2D( + rgcca_out, + opt$compx, + opt$compy, + opt$block, + opt$text, + n_mark = opt$nmark + ) + ) + save_plot(opt$o2, corcircle) + } + + top_variables <- plot_var_1D( + rgcca_out, + opt$compx, + opt$nmark, + opt$block, + type = "cor" + ) + save_plot(opt$o3, top_variables) + + # Average Variance Explained + (ave <- plot_ave(rgcca_out)) + save_plot(opt$o4, ave) + + # Creates design scheme + design <- function() plot_network(rgcca_out) + save_plot(opt$o5, design) + + save_ind(rgcca_out, opt$compx, opt$compy, opt$o6) + save_var(rgcca_out, opt$compx, opt$compy, opt$o7) + save(rgcca_out, file = opt$o8) + + }, error = function(e) { + if (class(e)[1] %in% c("simpleError", "error", "condition")) + status <<- 1 + else + status <<- class(e)[1] + message(e$message) +}) +quit(status = status)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macro.xml Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,44 @@ +<macros> + + <token name="@TOOL_VERSION@">3.0.0</token> + + <token name="@BLOCK_RULES@">1 corresponds to the first block, 2 corresponds to the second one, etc. This number should not be greater than the number of blocks selected.</token> + + <token name="@COMP_RULES@">This number should not be greater than the selected number of component (2, by default).</token> + + <xml name="output_tests" token_path="" token_compx="1" token_compy="2"> + <param name="output_selector" value="individuals,corcircle,top_variables,ave,design,individual_table,variable_table,rdata"/> + <output name="individual_plot" file="@PATH@/individuals.pdf" ftype="pdf"/> + <output name="top_variables" file="@PATH@/top_variables.pdf" ftype="pdf"/> + <output name="corcircle" file="@PATH@/corcircle.pdf" ftype="pdf"/> + <output name="ave" file="@PATH@/ave.pdf" ftype="pdf"/> + <output name="design" file="@PATH@/design.pdf" ftype="pdf"/> + <output name="rdata" file="@PATH@/rgcca.result.RData" compare="sim_size" delta="1000" ftype="rdata"/> + <output name="variable_table"> + <assert_contents> + <has_n_columns n="5"/> + <has_line_matching + expression='.*"cor.axis.@COMPX@"\s"cor.axis.@COMPY@"\s"weight.axis.@COMPX@"\s"weight.axis.@COMPY@".*\s"block"'/> + <has_line_matching + expression='^.+(\s\-?\d+.\d+){4}.+$'/> + </assert_contents> + </output> + </xml> + + <xml name="output_tests_3blocks"> + <param name="blocks" value="agriculture.tsv,industry.tsv,politic.tsv" ftype = "tsv"/> + <section name="analyse"> + <conditional name="tau"> + <param name="bool" value="false"/> + <param name="value" value="0.75"/> + </conditional> + <param name="scheme" value="4"/> + </section> + <assert_command> + <has_text text="--penalty 0.75"/> + <has_text text="--scheme 4"/> + </assert_command> + </xml> + +</macros> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgcca.xml Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,359 @@ +<tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy0"> + + <description>performs multiblock data analysis of several sets of variables (blocks) observed on the same group of individuals.</description> + + <macros> + <import>macro.xml</import> + </macros> + + <edam_topics> + <edam_topic>topic_2269</edam_topic> + </edam_topics> + + <edam_operations> + <edam_operation>operation_2945</edam_operation> + <edam_operation>operation_3465</edam_operation> + <edam_operation>operation_0337</edam_operation> + </edam_operations> + + <requirements> + <requirement type="package" version="@TOOL_VERSION@">rgccacmd</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + #set data_paths = ",".join([str(_.file_name) for _ in $blocks]) + #set data_names = ",".join([str(_.element_identifier).replace(',', '_') for _ in $blocks]) + Rscript '$__tool_directory__/launcher.R' + --datasets '${data_paths}' + --names '${data_names}' + --o1 '$individual_plot' --o2 '$corcircle' --o3 '$top_variables' --o4 '$ave' --o5 '$design' --o6 '$individual_table' --o7 '$variable_table' --o8 '$rdata' + $parse.header + --separator $parse.separator + $analyse.superblock + $analyse.scale + #if $analyse.tau.bool == 'false' + --penalty $analyse.tau.value + #else + --penalty $analyse.tau.bool + #end if + --ncomp $analyse.ncomp + --scheme $analyse.scheme + #if $analyse.method.family == '1' + --type pca + #else + --type $analyse.method.type + #end if + #if $analyse.connection + --connection $analyse.connection + #end if + #if $analyse.supervised.learning_mode == 'supervised' + --response $analyse.supervised.block_response + #end if + #if $graphic.response + --group $graphic.response + #end if + --compx $graphic.compx + --compy $graphic.compy + --nmark $graphic.nmark + $graphic.text + --block $graphic.blockx + --block_y $graphic.blocky + ]]></command> + + <inputs> + <param name="blocks" type="data" format="tsv,tabular,txt,csv" multiple="true" optional="false" label = "Load blocks" + help="TSV file containing a matrix with: (i) quantitative values only (decimal should be separated by '.'), (ii) the samples in lines (should be labelled in the 1rst column) and (iii) variables in columns (should have a header)."/> + + <section name="parse" title="Advanced parsing" help="By default, on tabulated files with a header."> + <param name="header" type="boolean" truevalue="" falsevalue="-H" checked="true" label="Consider the first row as header of columns" help="Used for both blocks and color files."/> + <param name="separator" type="select" display="radio" label="Column separator" help="Character used to separate the column (for all blocks, connection and color files)."> + <option value="1" selected="true">Tabulation</option> + <option value="2">Semicolon</option> + </param> + </section> + + <section name="analyse" title="Advanced analysis" + help="By default, the analysis: is a Regularised Generalised Canonical Correlation Analysis, scales the blocks, uses a superblock with a factorial scheme function, a tau equals to one and two components for each block."> + + <param name="ncomp" type="integer" label="Number of component" value="2" min="2" max="5" + help="The number of component to use in the analysis for each block (should not be greater than the minimum number of variable among the blocks)."/> + + <param name="scale" type="boolean" truevalue="" falsevalue="--scale" checked="true" label="Scale the blocks" + help="A data centering step is always performed. If activated, each block is normalised and divided by the square root of its number of variables."/> + + <conditional name="method"> + + <param name="family" type="select" label="Analysis method"> + <option value="1">One block</option> + <option value="2">Two blocks</option> + <option value="m" selected="true">Multiple blocks</option> + <option value="ms">Multiple blocks with superblock</option> + </param> + + <when value="2"> + <param name="type" type="select" label=" "> + <option value="pls">Partial Least Squares Regression</option> + <option value="cca">Canonical Correlation Analysis</option> + <option value="ifa">Interbattery Factor Analysis</option> + <option value="ra">Redundancy analysis</option> + </param> + </when> + + <when value="m"> + <param name="type" type="select" label=" "> + <option value="rgcca">Regularized Generalized CCA</option> + <option value="sgcca">Sparse Generalized CCA</option> + <option value="sumcor">SUM of CORrelations method</option> + <option value="ssqcor">Sum of SQuared CORrelations method</option> + <option value="sabscor">Sum of ABSolute value CORrelations method</option> + <option value="sumcov">SUM of COVariances method</option> + <option value="ssqcov">Sum of SQuared COVariances method</option> + <option value="sabscov">Sum of ABSolute value COVariances method</option> + <option value="maxbet">MAXBET</option> + <option value="maxbet-b">MAXBET-B</option> + </param> + </when> + + <when value="ms"> + <param name="type" type="select" label=" "> + <option value="gcca">Generalized CCA</option> + <option value="hpca">Hierarchical PCA</option> + <option value="mfa">Multiple Factor Analysis</option> + </param> + </when> + + <when value="1"/> + + </conditional> + + <param name="connection" optional="true" type="data" format="tsv,tabular,txt,csv" label="Load the design matrix (if superblock or supervised disabled)" + help="TSV file without header and without row names. This file describes the connections between the blocks. It should contain 1 (if two blocks are related) or 0 values otherwise. The columns are separated by tabulations. It is a symmetric matrix with the same dimension as the number of blocks."/> + + <param name="superblock" type="boolean" truevalue="" falsevalue="--superblock" checked="true" label="Use a superblock" + help="A block defined as the concatenation of all the other blocks. The space spanned by global components is viewed as a compromise space that integrated all the modalities and facilitates the visualization of the results and their interpretation. If disabled, all blocks are assumed to be connected or a connection file could be used."/> + + <conditional name="supervised"> + <param name="learning_mode" type="select" display="radio" label="Learning mode"> + <option value="unsupervised">Unsupervised</option> + <option value="supervised">Supervised</option> + </param> + <when value="supervised"> + <param name="block_response" type="integer" value="1" min="1" max="10" label="Use a block as response (supervised analysis)" help="@BLOCK_RULES@ By default, the first block is selected."/> + </when> + <when value="unsupervised"/> + </conditional> + + <conditional name="tau"> + <param name="bool" type="select" display="radio" label="Tau selection" + help="For RGCCA, a regularization parameter for each block (i.e., tau) [default: 1]. Tau varies from 0 (maximizing the correlation) to 1 (maximizing the covariance). For SGCCA, tau is automatically set to 1 and a shrinkage parameter can be defined instead for automatic variable selection, varying from the square root of the variable number (the fewest selected variables) to 1 (all the variables are included)."> + <option value="false">Manual</option> + <option value="optimal">Optimal</option> + </param> + <when value="false"> + <param name="value" type="float" label=" " value="1" min="0" max="1"/> + </when> + <when value="optimal"/> + </conditional> + + <param name="scheme" type="select" label="Scheme function" help="Link (i.e. scheme) function for covariance maximization is calculated with: the identity function (horst scheme), +the absolute values (centroid scheme), the squared values (factorial scheme). Only, the horst scheme penalizes structural +negative correlation. The factorial scheme discriminates more strongly the blocks than the centroid one."> + <option value="1">Horst : f(x)</option> + <option value="2" selected="true">Factorial : f(x)^2</option> + <option value="3">Centroid : f|x|</option> + <option value="4">Other: f(x)^4</option> + </param> + + </section> + + <section name="graphic" title="Advanced graphic" help="By default, the x-axis and y-axis are respectively the first and the second components, the number of top variables is 100 and a superblock is used."> + <param name="response" optional="true" type="data" format="tsv,tabular,txt,csv" label="Color the individual plot with a response variable" + help="A TSV file containing either: (i) an only column with a qualitative or a quantitative variable; (ii) multiple columns corresponding to a disjunctive table."/> + <param name="text" type="boolean" truevalue="" falsevalue="--text" checked="true" label="Display the names of the points (in biplots)"/> + <param name="compx" type="integer" label="Component for the X-axis" help="The component used in the X-axis for biplots and the only component used for top variable plot. @COMP_RULES@" value="1" min="1" max="5"/> + <param name="compy" type="integer" label="Component for the Y-axis" help="The component used in the Y-axis for biplots. @COMP_RULES@" value="2" min="1" max="5"/> + <param name="blockx" type="integer" value="0" min="0" max="10" label="Visualise this block" help="Block used in the X-axis for individual plot and the only block used for corcircle and top variable plots. @BLOCK_RULES@"/> + <param name="blocky" type="integer" value="0" min="0" max="10" label="Visualise this block for the Y-axis (in individual plot)" help="0 corresponds to the superblock (or the last block loaded), @BLOCK_RULES@ By default, the superblock is selected."/> + <param name="nmark" type="integer" label="Number of top variables" value="100" min="10" max="300"/> + </section> + + <param name="output_selector" type="select" multiple="true" label="Outputs"> + <option value="individuals" selected="true">Individual plot</option> + <option value="corcircle" selected = "true">Corcircle plot</option> + <option value="top_variables">Top variables plot</option> + <option value="ave">Averages plot</option> + <option value="design">Design plot</option> + <option value="individual_table" selected="true">Individual table</option> + <option value="variable_table" selected="true">Variable table</option> + <option value="rdata">RData file</option> + </param> + </inputs> + + <outputs> + <data name="individual_plot" label="${tool.name} on ${on_string}: individuals.pdf" format="pdf"> + <filter>"individuals" in output_selector</filter> + </data> + <data name="corcircle" label="${tool.name} on ${on_string}: corcircle.pdf" format="pdf"> + <filter>"corcircle" in output_selector</filter> + </data> + <data name="top_variables" label="${tool.name} on ${on_string}: top_variables.pdf" format="pdf"> + <filter>"top_variables" in output_selector</filter> + </data> + <data name="ave" label="${tool.name} on ${on_string}: ave.pdf" format="pdf"> + <filter>"ave" in output_selector</filter> + </data> + <data name="design" label="${tool.name} on ${on_string}: design.pdf" format="pdf"> + <filter>"design" in output_selector</filter> + </data> + <data name="individual_table" label="${tool.name} on ${on_string}: individuals.tsv" format="tsv"> + <filter>"individual_table" in output_selector</filter> + </data> + <data name="variable_table" label="${tool.name} on ${on_string}: variables.tsv" format="tsv"> + <filter>"variable_table" in output_selector</filter> + </data> + <data name="rdata" label="${tool.name} on ${on_string}: rgcca.result.RData" format="rdata"> + <filter>"rdata" in output_selector</filter> + </data> + </outputs> + + <tests> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="1block"/> + <param name="blocks" value="agriculture.tsv" ftype = "tsv"/> + <output name="individual_table"> + <assert_contents> + <has_n_columns n="4"/> + <has_line_matching + expression='"agriculture.axis1"\s"agriculture.axis2"\s"superblock.axis1"\s"superblock.axis2"'/> + <has_line_matching + expression='^.+(\s\-?\d+.\d+){4}$'/> + </assert_contents> + </output> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks_connection"/> + <expand macro="output_tests_3blocks"/> + <section name="analyse"> + <param name="connection" value="connection.tsv" ftype = "tsv"/> + <param name="superblock" value="false"/> + </section> + <assert_command> + <has_text text="-connection"/> + <has_text text="--superblock"/> + </assert_command> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks_supervised"/> + <expand macro="output_tests_3blocks"/> + <section name="analyse"> + <param name="superblock" value="false"/> + <conditional name="supervised" > + <param name="learning_mode" value="supervised"/> + <param name="block_response" value="3"/> + </conditional> + </section> + <assert_command> + <has_text text="--response 3"/> + <has_text text="--superblock"/> + </assert_command> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks"/> + <expand macro="output_tests_3blocks"/> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks_sgcca"/> + <expand macro="output_tests_3blocks"/> + <section name="analyse"> + <conditional name="method"> + <param name="family" value="m"/> + <param name="type" value="sgcca"/> + </conditional> + </section> + <assert_command> + <has_text text="sgcca"/> + </assert_command> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="2blocks" compx="3" compy="1"/> + <param name="blocks" value="agriculture.tsv,politic.tsv"/> + <section name="analyse"> + <param name="scale" value="false"/> + <conditional name="tau"> + <param name="bool" value="false"/> + <param name="value" value="0"/> + </conditional> + <param name="scheme" value="3"/> + <param name="ncomp" value="3"/> + <conditional name="method"> + <param name="family" value="2"/> + <param name="type" value="pls"/> + </conditional> + </section> + <section name="graphic"> + <param name="response" value="political_system.tsv" ftype = "tsv"/> + <param name="text" value="false"/> + <param name="compx" value="3"/> + <param name="compy" value="1"/> + <param name="blockx" value="2"/> + <param name="blocky" value="1"/> + <param name="nmark" value="11"/> + </section> + <assert_command> + <has_text text="pls"/> + <has_text text="--group"/> + </assert_command> + </test> + + </tests> +<help> + +================================== +ABOUT +================================== + + +**Author:** +Etienne CAMENEN + + +**Contact:** +arthur.tenenhaus@centralesupelec.fr + + +**R package:** +The RGCCA package is available from the CRAN repository (https://cran.r-project.org/web/packages/RGCCA). + +--------------------------------------------------- + +================================== +R/SGCCA +================================== + +A user-friendly multi-blocks analysis (Regularized Generalized Canonical Correlation Analysis, RGCCA) as described in [1] and [2] with all default settings predefined. The software produces figures to explore the analysis' results: individuals and variables projected on two components of the multi-block analysis, list of top variables and explained variance in the model. + +**Working example** + + | From Russett data (RGCCA package): https://github.com/rgcca-factory/RGCCA/tree/master/inst/extdata + | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* as new blocks. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings. + +**Documentation** + +- RGCCA: https://cran.r-project.org/web/packages/RGCCA/vignettes/vignette_RGCCA.pdf +- accepted input / output formats: https://github.com/rgcca-factory/RGCCA#input-files +<!-- - tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy/blob/release/0.2/README.md--> + +</help> + + <citations> + <citation type="doi">10.1007/s11336-017-9573-x</citation> + <citation type="doi">10.1007/s11336-011-9206-8</citation> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/agriculture.tsv Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,48 @@ + gini farm rent +Argentina 86.3 98.2 3.52 +Australia 92.9 99.6 3.27 +Austria 74 97.4 2.46 +Belgium 58.7 85.8 4.15 +Bolivia 93.8 97.7 3.04 +Brasil 83.7 98.5 2.31 +Canada 49.7 82.9 2.1 +Chile 93.8 99.7 2.67 +Colombia 84.9 98.1 2.57 +CostaRica 88.1 99.1 1.86 +Cuba 79.2 97.8 4 +Denmark 45.8 79.3 1.5 +DominicanRepublic 79.5 98.5 3.08 +Ecuador 86.4 99.3 2.75 +Egypt 74 98.1 2.53 +Salvador 82.8 98.8 2.78 +Finland 59.9 86.3 1.22 +France 58.3 86.1 3.3 +Guatemala 86 99.7 2.89 +Greece 74.7 99.4 2.93 +Honduras 75.7 97.4 2.87 +India 52.2 86.9 3.99 +Irak 88.1 99.3 4.33 +Irland 59.8 85.9 1.25 +Italy 80.3 98 3.21 +Japan 47 81.5 1.36 +Libia 70 93 2.25 +Luxemburg 63.8 87.7 2.99 +TheNetherlands 60.5 86.2 3.99 +NewZealand 77.3 95.5 3.15 +Nicaragua 75.7 96.4 2.39 +Norway 66.9 87.5 2.14 +Panama 73.7 95 2.59 +Peru 87.5 96.9 2.61 +Philippine 56.4 88.2 3.65 +Poland 45 77.7 0 +SouthVietnam 67.1 94.6 3.04 +Spain 78 99.5 3.8 +Sweden 57.7 87.2 2.99 +Switzerland 49.8 81.5 2.99 +Taiwan 65.2 94.1 3.71 +UK 71 93.4 3.82 +USA 70.5 95.4 3.06 +Uruguay 81.7 96.6 3.58 +Venezuela 90.9 99.3 3.07 +WestGermany 67.4 93 1.9 +Yugoslavia 43.7 79.8 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/connection.tsv Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,3 @@ +0 1 1 +1 0 1 +1 1 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/industry.tsv Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,48 @@ + gnpr labo +Argentina 5.92 3.22 +Australia 7.1 2.64 +Austria 6.28 3.47 +Belgium 6.92 2.3 +Bolivia 4.19 4.28 +Brasil 5.57 4.11 +Canada 7.42 2.48 +Chile 5.19 3.4 +Colombia 5.8 4.01 +CostaRica 5.73 4.01 +Cuba 5.89 3.74 +Denmark 6.82 3.14 +DominicanRepublic 5.32 4.03 +Ecuador 5.32 3.97 +Egypt 4.89 4.16 +Salvador 5.5 4.14 +Finland 6.85 3.83 +France 6.95 3.26 +Guatemala 5.19 4.22 +Greece 5.48 3.87 +Honduras 4.92 4.19 +India 4.28 4.26 +Irak 5.27 4.39 +Irland 6.23 3.69 +Italy 6.09 3.37 +Japan 5.48 3.69 +Libia 4.5 4.32 +Luxemburg 7.09 3.14 +TheNetherlands 6.56 2.4 +NewZealand 7.14 2.77 +Nicaragua 5.54 4.22 +Norway 6.88 3.26 +Panama 5.86 3.99 +Peru 4.94 4.09 +Philippine 5.3 4.08 +Poland 6.15 4.04 +SouthVietnam 4.89 4.17 +Spain 5.54 3.91 +Sweden 7.06 2.56 +Switzerland 7.11 2.3 +Taiwan 4.88 3.91 +UK 6.91 1.61 +USA 7.76 2.3 +Uruguay 6.34 3.61 +Venezuela 6.64 3.74 +WestGermany 6.64 2.64 +Yugoslavia 5.69 4.2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/politic.tsv Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,48 @@ + inst ecks death demostab demoinst dictator +Argentina 0.07 4.06 5.38 0 1 0 +Australia 0.01 0 0 1 0 0 +Austria 0.03 1.61 0 0 1 0 +Belgium 0.45 2.2 0.69 1 0 0 +Bolivia 0.37 3.99 6.5 0 0 1 +Brasil 0.45 3.91 0.69 0 1 0 +Canada 0.01 3.14 0 1 0 0 +Chile 0.12 3.09 1.1 0 1 0 +Colombia 0.18 3.87 5.76 0 1 0 +CostaRica 0.18 3 3.22 0 1 0 +Cuba 0.07 4.62 7.97 0 0 1 +Denmark 0.18 0 0 1 0 0 +DominicanRepublic 0.01 1.95 3.47 0 0 1 +Ecuador 0.3 3.74 2.94 0 0 1 +Egypt 0.61 3.83 1.1 0 0 1 +Salvador 0.3 2.3 1.1 0 0 1 +Finland 0.5 1.61 0 0 1 0 +France 1 3.85 0.69 0 1 0 +Guatemala 0.25 3.83 4.06 0 0 1 +Greece 0.61 2.3 1.1 0 1 0 +Honduras 0.07 3.83 4.72 0 0 1 +India 0 4.43 2.71 1 0 0 +Irak 0.9 3.22 5.84 0 0 1 +Irland 0.12 2.3 0 1 0 0 +Italy 0.45 3.95 0.69 0 1 0 +Japan 0.55 3.14 0.69 0 1 0 +Libia 0.22 2.2 0 0 0 1 +Luxemburg 0.03 0 0 1 0 0 +TheNetherlands 0.07 1.1 0 1 0 0 +NewZealand 0.03 0 0 1 0 0 +Nicaragua 0.03 2.83 2.83 0 0 1 +Norway 0.03 0.69 0 1 0 0 +Panama 0.5 3.4 3.26 0 0 1 +Peru 0.18 3.18 3.3 0 0 1 +Philippine 0.1 2.77 5.68 0 0 1 +Poland 0 3 1.79 0 0 1 +SouthVietnam 0 3.93 6.91 0 0 1 +Spain 0 3.14 0.69 0 0 1 +Sweden 0 0 0 1 0 0 +Switzerland 0 0 0 1 0 0 +Taiwan 0 1.39 0 0 0 1 +UK 0.07 2.56 0 1 0 0 +USA 0.03 3.14 0 1 0 0 +Uruguay 0.18 0.69 0.69 1 0 0 +Venezuela 0.25 3.61 4.72 0 0 1 +WestGermany 0 1.61 0 0 1 0 +Yugoslavia 0 2.3 0 0 0 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/political_system.tsv Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,48 @@ + demostab demoinst dictator +Argentina 0 1 0 +Australia 1 0 0 +Austria 0 1 0 +Belgium 1 0 0 +Bolivia 0 0 1 +Brasil 0 1 0 +Canada 1 0 0 +Chile 0 1 0 +Colombia 0 1 0 +CostaRica 0 1 0 +Cuba 0 0 1 +Denmark 1 0 0 +DominicanRepublic 0 0 1 +Ecuador 0 0 1 +Egypt 0 0 1 +Salvador 0 0 1 +Finland 0 1 0 +France 0 1 0 +Guatemala 0 0 1 +Greece 0 1 0 +Honduras 0 0 1 +India 1 0 0 +Irak 0 0 1 +Irland 1 0 0 +Italy 0 1 0 +Japan 0 1 0 +Libia 0 0 1 +Luxemburg 1 0 0 +TheNetherlands 1 0 0 +NewZealand 1 0 0 +Nicaragua 0 0 1 +Norway 1 0 0 +Panama 0 0 1 +Peru 0 0 1 +Philippine 0 0 1 +Poland 0 0 1 +SouthVietnam 0 0 1 +Spain 0 0 1 +Sweden 1 0 0 +Switzerland 1 0 0 +Taiwan 0 0 1 +UK 1 0 0 +USA 1 0 0 +Uruguay 1 0 0 +Venezuela 0 0 1 +WestGermany 0 1 0 +Yugoslavia 0 0 1