Mercurial > repos > iuc > rgcca

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/launcher.R	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,528 @@
+# Author: Etienne CAMENEN
+# Date: 2020
+# Contact: arthur.tenenhaus@centralesupelec.fr
+# Key-words: omics, RGCCA, multi-block
+# EDAM operation: analysis, correlation, visualisation
+#
+# Abstract: Performs multi-variate analysis (PCA, CCA, PLS, R/SGCCA, etc.)
+# and produces textual and graphical outputs (e.g. variables and individuals
+# plots).
+
+rm(list = ls())
+graphics.off()
+separator <- NULL
+
+########## Arguments ##########
+
+# Parse the arguments from a command line launch
+get_args <- function() {
+    option_list <- list(
+        # File parameters
+        make_option(
+            opt_str = c("-d", "--datasets"),
+            type = "character",
+            metavar = "path list",
+            help = "List of comma-separated file paths corresponding to the
+            blocks to be analyzed (one per block and without spaces between
+            them; e.g., path/file1.txt,path/file2.txt) [required]"
+        ),
+        make_option(
+            opt_str = c("-c", "--connection"),
+            type = "character",
+            metavar = "path",
+            help = "Path of the file defining the connections between the blocks
+            [if not used, activates the superblock mode]"
+        ),
+        make_option(
+            opt_str = "--group",
+            type = "character",
+            metavar = "path",
+            help = "Path of the file coloring the individuals in the ad hoc
+            plot"
+        ),
+        make_option(
+            opt_str = c("-r", "--response"),
+            type = "integer",
+            metavar = "integer",
+            help = "Position of the response file for the supervised mode within
+            the block path list [actives the supervised mode]"
+        ),
+        make_option(
+            opt_str = "--names",
+            type = "character",
+            metavar = "character list",
+            help = "List of comma-separated block names to rename them (one per
+            block; without spaces between them) [default: the block file names]"
+        ),
+        make_option(
+            opt_str = c("-H", "--header"),
+            type = "logical",
+            action = "store_false",
+            help = "DO NOT consider the first row as the column header"
+        ),
+        make_option(
+            opt_str = "--separator",
+            type = "integer",
+            metavar = "integer",
+            default = opt[1],
+            help = "Character used to separate columns (1: tabulation,
+            2: semicolon, 3: comma) [default: %default]"
+        ),
+        # Analysis parameter
+        make_option(
+            opt_str = "--type",
+            type = "character",
+            metavar = "character",
+            default = opt[2],
+            help = "Type of analysis [default: %default] (among: rgcca, pca,
+            cca, gcca, cpca-w, hpca, maxbet-b, maxbet, maxdiff-b, maxdiff,
+            maxvar-a, maxvar-b, maxvar, niles, r-maxvar, rcon-pca, ridge-gca,
+            sabscor, ssqcor, ssqcor, ssqcov-1, ssqcov-2, ssqcov, sum-pca,
+            sumcor, sumcov-1, sumcov-2, sumcov)"
+        ),
+        make_option(
+            opt_str = "--ncomp",
+            type = "character",
+            metavar = "integer list",
+            default = opt[3],
+            help = "Number of components in the analysis for each block
+            [default: %default]. The number should be higher than 1 and lower
+            than the minimum number of variables among the blocks. It can be a
+            single values or a comma-separated list (e.g 2,2,3,2)."
+        ),
+        make_option(
+            opt_str = "--penalty",
+            type = "character",
+            metavar = "float list",
+            default = opt[4],
+            help = "For RGCCA, a regularization parameter for each block (i.e., tau)
+            [default: %default]. Tau varies from 0 (maximizing the correlation)
+            to 1 (maximizing the covariance). For SGCCA, tau is automatically
+            set to 1 and shrinkage parameter can be defined instead for
+            automatic variable selection, varying from the square root of the
+            variable number (the fewest selected variables) to 1 (all the
+            variables are included). It can be a single value or a
+            comma-separated list (e.g. 0,1,0.75,1)."
+        ),
+        make_option(
+            opt_str = "--scheme",
+            type = "integer",
+            metavar = "integer",
+            default = opt[5],
+            help = "Link (i.e. scheme) function for covariance maximization
+            (1: x, 2: x^2, 3: |x|, 4: x^4) [default: %default]. Onnly, the x
+            function ('horst scheme') penalizes structural negative correlation.
+            The x^2 function ('factorial scheme') discriminates more strongly
+            the blocks than the |x| ('centroid scheme') one."
+        ),
+        make_option(
+            opt_str = "--scale",
+            type = "logical",
+            action = "store_false",
+            help = "DO NOT scale the blocks (i.e., a data centering step is
+            always performed). Otherwise, each block is normalised and divided
+            by the squareroot of its number of variables."
+        ),
+        make_option(
+            opt_str = "--superblock",
+            type = "logical",
+            action = "store_false",
+            help = "DO NOT use a superblock (i.e. a concatenation of all the
+            blocks to visualize them all together in a consensus space). In
+            this case, all blocks are assumed to be connected or a connection
+            file could be used."
+        ),
+        # Graphical parameters
+        make_option(
+            opt_str = "--text",
+            type = "logical",
+            action = "store_false",
+            help = "DO NOT display the name of the points instead of shapes when
+            plotting"
+        ),
+        make_option(
+            opt_str = "--block",
+            type = "integer",
+            metavar = "integer",
+            default = opt[6],
+            help = "Position in the path list of the plotted block (0: the
+            superblock or, if not activated, the last one, 1: the fist one,
+            2: the 2nd, etc.)[default: the last one]"
+        ),
+        make_option(
+            opt_str = "--block_y",
+            type = "integer",
+            metavar = "integer",
+            help = "Position in the path list of the plotted block for the
+            Y-axis in the individual plot (0: the superblock or, if not
+            activated, the last one, 1: the fist one, 2: the 2nd, etc.)
+            [default: the last one]"
+        ),
+        make_option(
+            opt_str = "--compx",
+            type = "integer",
+            metavar = "integer",
+            default = opt[7],
+            help = "Component used in the X-axis for biplots and the only
+            component used for histograms [default: %default] (should not be
+            higher than the number of components of the analysis)"
+        ),
+        make_option(
+            opt_str = "--compy",
+            type = "integer",
+            metavar = "integer",
+            default = opt[8],
+            help = "Component used in the Y-axis for biplots
+            [default: %default] (should not be higher than the number of
+            components of the analysis)"
+        ),
+        make_option(
+            opt_str = "--nmark",
+            type = "integer",
+            metavar = "integer",
+            default = opt[9],
+            help = "Number maximum of top variables in ad hoc plot
+            [default: %default]"
+        ),
+        # output parameters
+        make_option(
+            opt_str = "--o1",
+            type = "character",
+            metavar = "path",
+            default = opt[10],
+            help = "Path for the individual plot [default: %default]"
+        ),
+        make_option(
+            opt_str = "--o2",
+            type = "character",
+            metavar = "path",
+            default = opt[11],
+            help = "Path for the variable plot [default: %default]"
+        ),
+        make_option(
+            opt_str = "--o3",
+            type = "character",
+            metavar = "path",
+            default = opt[12],
+            help = "Path for the top variables plot [default: %default]"
+        ),
+        make_option(
+            opt_str = "--o4",
+            type = "character",
+            metavar = "path",
+            default = opt[13],
+            help = "Path for the explained variance plot [default: %default]"
+        ),
+        make_option(
+            opt_str = "--o5",
+            type = "character",
+            metavar = "path",
+            default = opt[14],
+            help = "Path for the design plot [default: %default]"
+        ),
+        make_option(
+            opt_str = "--o6",
+            type = "character",
+            metavar = "path",
+            default = opt[15],
+            help = "Path for the individual table [default: %default]"
+        ),
+        make_option(
+            opt_str = "--o7",
+            type = "character",
+            metavar = "path",
+            default = opt[16],
+            help = "Path for the variable table [default: %default]"
+        ),
+        make_option(
+            opt_str = "--o8",
+            type = "character",
+            metavar = "path",
+            default = opt[17],
+            help = "Path for the analysis results in RData [default: %default]"
+        )
+    )
+    return(optparse::OptionParser(option_list = option_list))
+}
+
+char_to_list <- function(x) {
+    strsplit(gsub(" ", "", as.character(x)), ",")[[1]]
+}
+
+check_arg <- function(opt) {
+    # Check the validity of the arguments opt : an optionParser object
+
+    if (is.null(opt$datasets))
+        stop_rgcca(paste0("datasets is required."), exit_code = 121)
+
+    if (is.null(opt$scheme))
+        opt$scheme <- "factorial"
+    else if (!opt$scheme %in% seq(4)) {
+        stop_rgcca(
+            paste0(
+                "scheme should be comprise between 1 and 4 [by default: 2], not be equal to ",
+                opt$scheme,
+                "."
+            ),
+            exit_code = 122
+        )
+    } else {
+        schemes <- c("horst", "factorial", "centroid")
+        if (opt$scheme == 4)
+            opt$scheme <- function(x) x ^ 4
+        else
+            opt$scheme <- schemes[opt$scheme]
+    }
+
+    if (!opt$separator %in% seq(3)) {
+        stop_rgcca(
+            paste0(
+                "separator should be comprise between 1 and 3 (1: Tabulation, 2: Semicolon, 3: Comma) [by default: 2], not be equal to ",
+                opt$separator,
+                "."
+            ),
+            exit_code = 123
+        )
+    } else {
+        separators <- c("\t", ";", ",")
+        opt$separator <- separators[opt$separator]
+    }
+
+    nmark <- NULL
+    RGCCA:::check_integer("nmark", opt$nmark, min = 2)
+
+    for (x in c("ncomp", "penalty"))
+        opt[[x]] <- char_to_list(opt[[x]])
+
+    return(opt)
+}
+
+post_check_arg <- function(opt, rgcca) {
+# Check the validity of the arguments after loading the blocks opt : an
+# optionParser object blocks : a list of matrix
+    blocks <- NULL
+    for (x in c("block", "block_y")) {
+        if (!is.null(opt[[x]])) {
+            if (opt[[x]] == 0)
+                opt[[x]] <- length(rgcca$call$blocks)
+            opt[[x]] <- RGCCA:::check_blockx(x, opt[[x]], rgcca$call$blocks)
+        }
+    }
+
+    if (any(opt$ncomp == 1))
+        opt$compy <- 1
+
+    for (x in c("compx", "compy"))
+        opt[[x]] <- check_compx(x, opt[[x]], rgcca$call$ncomp, opt$block)
+
+    return(opt)
+}
+
+check_integer <- function(x, y = x, type = "scalar", float = FALSE, min = 1) {
+
+    if (is.null(y))
+        y <- x
+
+    if (type %in% c("matrix", "data.frame"))
+        y_temp <- y
+
+    y <- suppressWarnings(as.double(as.matrix(y)))
+
+    if (any(is.na(y)))
+        stop_rgcca(paste(x, "should not be NA."))
+
+    if (!is(y, "numeric"))
+        stop_rgcca(paste(x, "should be numeric."))
+
+    if (type == "scalar" && length(y) != 1)
+        stop_rgcca(paste(x, "should be of length 1."))
+
+    if (!float)
+        y <- as.integer(y)
+
+    if (all(y < min))
+        stop_rgcca(paste0(x, " should be higher than or equal to ", min, "."))
+
+    if (type %in% c("matrix", "data.frame"))
+        y <- matrix(
+            y,
+            dim(y_temp)[1],
+            dim(y_temp)[2],
+            dimnames = dimnames(y_temp)
+        )
+
+    if (type == "data.frame")
+        as.data.frame(y)
+
+    return(y)
+}
+
+load_libraries <- function(librairies) {
+    for (l in librairies) {
+        if (!(l %in% installed.packages()[, "Package"]))
+            utils::install.packages(l, repos = "cran.us.r-project.org")
+        suppressPackageStartupMessages(
+            library(
+                l,
+                character.only = TRUE,
+                warn.conflicts = FALSE,
+                quietly = TRUE
+        ))
+    }
+}
+
+stop_rgcca <- function(
+    message,
+    exit_code = "1",
+    call = NULL) {
+
+    base::stop(
+        structure(
+            class = c(exit_code, "simpleError", "error", "condition"),
+            list(message = message, call. = NULL)
+    ))
+ }
+
+########## Main ##########
+
+# Get arguments : R packaging install, need an opt variable with associated
+# arguments
+opt <- list(
+    separator = 1,
+    type = "rgcca",
+    ncomp = 2,
+    penalty = 1,
+    scheme = 2,
+    block = 0,
+    compx = 1,
+    compy = 2,
+    nmark = 100,
+    o1 = "individuals.pdf",
+    o2 = "corcircle.pdf",
+    o3 = "top_variables.pdf",
+    o4 = "ave.pdf",
+    o5 = "design.pdf",
+    o6 = "individuals.tsv",
+    o7 = "variables.tsv",
+    o8 = "rgcca_result.RData",
+    datasets = paste0("inst/extdata/",
+        c("agriculture", "industry", "politic"),
+        ".tsv",
+        collapse = ",")
+)
+
+load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "rlang", "Deriv"))
+try(load_libraries("ggrepel"), silent = TRUE)
+
+tryCatch(
+    opt <- check_arg(optparse::parse_args(get_args())),
+    error = function(e) {
+        if (length(grep("nextArg", e[[1]])) != 1)
+            stop_rgcca(e[[1]], exit_code = 140)
+    }, warning = function(w)
+        stop_rgcca(w[[1]], exit_code = 141)
+)
+
+# Load functions
+all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = T))
+for (i in all_funcs)
+    eval(parse(text = paste0(i, "<-RGCCA:::", i)))
+
+# Set missing parameters by default
+opt$header <- !("header" %in% names(opt))
+opt$superblock <- !("superblock" %in% names(opt))
+opt$scale <- !("scale" %in% names(opt))
+opt$text <- !("text" %in% names(opt))
+
+status <- 0
+tryCatch({
+
+    blocks <- load_blocks(opt$datasets, opt$names, opt$separator)
+    group <- load_response(blocks, opt$group, opt$separator, opt$header)
+    connection <- load_connection(file = opt$connection, separator = opt$separator)
+
+    func <- quote(
+        rgcca(
+            blocks = blocks,
+            connection = connection,
+            response = opt$response,
+            superblock = opt$superblock,
+            ncomp = opt$ncomp,
+            scheme = opt$scheme,
+            scale = opt$scale,
+            type = opt$type
+        )
+    )
+    if (tolower(opt$type) %in% c("sgcca", "spca", "spls")) {
+        func[["sparsity"]] <- opt$penalty
+    }else {
+        func[["tau"]] <- opt$penalty
+    }
+
+    rgcca_out <- eval(as.call(func))
+
+    opt <- post_check_arg(opt, rgcca_out)
+
+    ########## Plot ##########
+
+    if (rgcca_out$call$ncomp[opt$block] == 1 && is.null(opt$block_y)) {
+        warning("With a number of component of 1, a second block should be chosen to perform an individual plot")
+    } else {
+        (
+            individual_plot <- plot_ind(
+                rgcca_out,
+                group,
+                opt$compx,
+                opt$compy,
+                opt$block,
+                opt$text,
+                opt$block_y,
+                "Response"
+            )
+        )
+        save_plot(opt$o1, individual_plot)
+    }
+
+    if (rgcca_out$call$ncomp[opt$block] > 1) {
+        (
+            corcircle <- plot_var_2D(
+                rgcca_out,
+                opt$compx,
+                opt$compy,
+                opt$block,
+                opt$text,
+                n_mark = opt$nmark
+            )
+        )
+        save_plot(opt$o2, corcircle)
+    }
+
+    top_variables <- plot_var_1D(
+            rgcca_out,
+            opt$compx,
+            opt$nmark,
+            opt$block,
+            type = "cor"
+        )
+    save_plot(opt$o3, top_variables)
+
+    # Average Variance Explained
+    (ave <- plot_ave(rgcca_out))
+    save_plot(opt$o4, ave)
+
+    # Creates design scheme
+    design <- function() plot_network(rgcca_out)
+    save_plot(opt$o5, design)
+
+    save_ind(rgcca_out, opt$compx, opt$compy, opt$o6)
+    save_var(rgcca_out, opt$compx, opt$compy, opt$o7)
+    save(rgcca_out, file = opt$o8)
+
+    }, error = function(e) {
+        if (class(e)[1] %in% c("simpleError", "error", "condition"))
+            status <<- 1
+        else
+            status <<- class(e)[1]
+        message(e$message)
+})
+quit(status = status)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macro.xml	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,44 @@
+<macros>
+
+    <token name="@TOOL_VERSION@">3.0.0</token>
+
+    <token name="@BLOCK_RULES@">1 corresponds to the first block, 2 corresponds to the second one, etc. This number should not be greater than the number of blocks selected.</token>
+
+    <token name="@COMP_RULES@">This number should not be greater than the selected number of component (2, by default).</token>
+
+    <xml name="output_tests" token_path="" token_compx="1" token_compy="2">
+        <param name="output_selector" value="individuals,corcircle,top_variables,ave,design,individual_table,variable_table,rdata"/>
+        <output name="individual_plot" file="@PATH@/individuals.pdf" ftype="pdf"/>
+        <output name="top_variables" file="@PATH@/top_variables.pdf" ftype="pdf"/>
+        <output name="corcircle" file="@PATH@/corcircle.pdf" ftype="pdf"/>
+        <output name="ave" file="@PATH@/ave.pdf" ftype="pdf"/>
+        <output name="design" file="@PATH@/design.pdf" ftype="pdf"/>
+        <output name="rdata" file="@PATH@/rgcca.result.RData" compare="sim_size" delta="1000" ftype="rdata"/>
+        <output name="variable_table">
+            <assert_contents>
+                <has_n_columns n="5"/>
+                <has_line_matching
+                        expression='.*"cor.axis.@COMPX@"\s"cor.axis.@COMPY@"\s"weight.axis.@COMPX@"\s"weight.axis.@COMPY@".*\s"block"'/>
+                <has_line_matching
+                        expression='^.+(\s\-?\d+.\d+){4}.+$'/>
+            </assert_contents>
+        </output>
+    </xml>
+
+    <xml name="output_tests_3blocks">
+        <param name="blocks" value="agriculture.tsv,industry.tsv,politic.tsv" ftype = "tsv"/>
+        <section name="analyse">
+            <conditional name="tau">
+                <param name="bool" value="false"/>
+                <param name="value" value="0.75"/>
+            </conditional>
+            <param name="scheme" value="4"/>
+        </section>
+        <assert_command>
+            <has_text text="--penalty 0.75"/>
+            <has_text text="--scheme 4"/>
+        </assert_command>
+    </xml>
+
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgcca.xml	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,359 @@
+<tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy0">
+
+    <description>performs multiblock data analysis of several sets of variables (blocks) observed on the same group of individuals.</description>
+
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+
+    <edam_topics>
+        <edam_topic>topic_2269</edam_topic>
+    </edam_topics>
+
+    <edam_operations>
+        <edam_operation>operation_2945</edam_operation>
+        <edam_operation>operation_3465</edam_operation>
+        <edam_operation>operation_0337</edam_operation>
+    </edam_operations>
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">rgccacmd</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        #set data_paths = ",".join([str(_.file_name) for _ in $blocks])
+        #set data_names = ",".join([str(_.element_identifier).replace(',', '_') for _ in $blocks])
+        Rscript '$__tool_directory__/launcher.R'
+            --datasets '${data_paths}'
+            --names '${data_names}'
+            --o1 '$individual_plot' --o2 '$corcircle' --o3 '$top_variables' --o4 '$ave' --o5 '$design' --o6 '$individual_table' --o7 '$variable_table' --o8 '$rdata'
+            $parse.header
+            --separator $parse.separator
+            $analyse.superblock
+            $analyse.scale
+            #if $analyse.tau.bool == 'false'
+            --penalty $analyse.tau.value
+            #else
+            --penalty $analyse.tau.bool
+            #end if
+            --ncomp $analyse.ncomp
+            --scheme $analyse.scheme
+            #if $analyse.method.family == '1'
+            --type pca
+            #else
+            --type $analyse.method.type
+            #end if
+            #if $analyse.connection
+            --connection $analyse.connection
+            #end if
+            #if $analyse.supervised.learning_mode == 'supervised'
+            --response $analyse.supervised.block_response
+            #end if
+            #if $graphic.response
+            --group $graphic.response
+            #end if
+            --compx $graphic.compx
+            --compy $graphic.compy
+            --nmark $graphic.nmark
+            $graphic.text
+            --block $graphic.blockx
+            --block_y $graphic.blocky
+    ]]></command>
+
+    <inputs>
+        <param name="blocks" type="data" format="tsv,tabular,txt,csv" multiple="true" optional="false" label = "Load blocks"
+            help="TSV file containing a matrix with: (i) quantitative values only (decimal should be separated by '.'), (ii) the samples in lines (should be labelled in the 1rst column) and (iii) variables in columns (should have a header)."/>
+
+        <section name="parse" title="Advanced parsing" help="By default, on tabulated files with a header.">
+            <param name="header" type="boolean" truevalue="" falsevalue="-H" checked="true" label="Consider the first row as header of columns" help="Used for both blocks and color files."/>
+            <param name="separator" type="select" display="radio" label="Column separator" help="Character used to separate the column (for all blocks, connection and color files).">
+                <option value="1" selected="true">Tabulation</option>
+                <option value="2">Semicolon</option>
+            </param>
+        </section>
+
+        <section name="analyse" title="Advanced analysis"
+            help="By default, the analysis: is a Regularised Generalised Canonical Correlation Analysis, scales the blocks, uses a superblock with a factorial scheme function, a tau equals to one and two components for each block.">
+
+            <param name="ncomp" type="integer" label="Number of component" value="2" min="2" max="5"
+                help="The number of component to use in the analysis for each block (should not be greater than the minimum number of variable among the blocks)."/>
+
+            <param name="scale" type="boolean" truevalue="" falsevalue="--scale" checked="true" label="Scale the blocks"
+                help="A data centering step is always performed. If activated, each block is normalised and divided by the square root of its number of variables."/>
+
+            <conditional name="method">
+
+                <param name="family" type="select" label="Analysis method">
+                    <option value="1">One block</option>
+                    <option value="2">Two blocks</option>
+                    <option value="m" selected="true">Multiple blocks</option>
+                    <option value="ms">Multiple blocks with superblock</option>
+                </param>
+
+                <when value="2">
+                    <param name="type" type="select" label=" ">
+                        <option value="pls">Partial Least Squares Regression</option>
+                        <option value="cca">Canonical Correlation Analysis</option>
+                        <option value="ifa">Interbattery Factor Analysis</option>
+                        <option value="ra">Redundancy analysis</option>
+                    </param>
+                </when>
+
+                <when value="m">
+                    <param name="type" type="select" label=" ">
+                        <option value="rgcca">Regularized Generalized CCA</option>
+                        <option value="sgcca">Sparse Generalized CCA</option>
+                        <option value="sumcor">SUM of CORrelations method</option>
+                        <option value="ssqcor">Sum of SQuared CORrelations method</option>
+                        <option value="sabscor">Sum of ABSolute value CORrelations method</option>
+                        <option value="sumcov">SUM of COVariances method</option>
+                        <option value="ssqcov">Sum of SQuared COVariances method</option>
+                        <option value="sabscov">Sum of ABSolute value COVariances method</option>
+                        <option value="maxbet">MAXBET</option>
+                        <option value="maxbet-b">MAXBET-B</option>
+                    </param>
+                </when>
+
+                <when value="ms">
+                    <param name="type" type="select" label=" ">
+                        <option value="gcca">Generalized CCA</option>
+                        <option value="hpca">Hierarchical PCA</option>
+                        <option value="mfa">Multiple Factor Analysis</option>
+                    </param>
+                </when>
+
+                <when value="1"/>
+
+            </conditional>
+
+            <param name="connection" optional="true" type="data" format="tsv,tabular,txt,csv" label="Load the design matrix (if superblock or supervised disabled)"
+                help="TSV file without header and without row names. This file describes the connections between the blocks. It should contain 1 (if two blocks are related) or 0 values otherwise. The columns are separated by tabulations. It is a symmetric matrix with the same dimension as the number of blocks."/>
+
+            <param name="superblock" type="boolean" truevalue="" falsevalue="--superblock" checked="true" label="Use a superblock"
+                help="A block defined as the concatenation of all the other blocks. The space spanned by global components is viewed as a compromise space that integrated all the modalities and facilitates the visualization of the results and their interpretation. If disabled, all blocks are assumed to be connected or a connection file could be used."/>
+
+            <conditional name="supervised">
+                <param name="learning_mode" type="select" display="radio" label="Learning mode">
+                    <option value="unsupervised">Unsupervised</option>
+                    <option value="supervised">Supervised</option>
+                </param>
+                <when value="supervised">
+                    <param name="block_response" type="integer" value="1" min="1" max="10" label="Use a block as response (supervised analysis)" help="@BLOCK_RULES@ By default, the first block is selected."/>
+                </when>
+                <when value="unsupervised"/>
+            </conditional>
+
+            <conditional name="tau">
+                <param name="bool" type="select" display="radio" label="Tau selection"
+                    help="For RGCCA, a regularization parameter for each block (i.e., tau) [default: 1]. Tau varies from 0 (maximizing the correlation) to 1 (maximizing the covariance). For SGCCA, tau is automatically set to 1 and a shrinkage parameter can be defined instead for automatic variable selection, varying from the square root of the variable number (the fewest selected variables) to 1 (all the variables are included).">
+                    <option value="false">Manual</option>
+                    <option value="optimal">Optimal</option>
+                </param>
+                <when value="false">
+                    <param name="value" type="float" label=" " value="1" min="0" max="1"/>
+                </when>
+                <when value="optimal"/>
+            </conditional>
+
+            <param name="scheme" type="select" label="Scheme function" help="Link (i.e. scheme) function for covariance maximization is calculated with: the identity function (horst scheme),
+the absolute values (centroid scheme), the squared values (factorial scheme). Only, the horst scheme penalizes structural
+negative correlation. The factorial scheme discriminates more strongly the blocks than the centroid one.">
+                <option value="1">Horst : f(x)</option>
+                <option value="2" selected="true">Factorial : f(x)^2</option>
+                <option value="3">Centroid : f|x|</option>
+                <option value="4">Other: f(x)^4</option>
+            </param>
+
+        </section>
+
+        <section name="graphic" title="Advanced graphic" help="By default, the x-axis and y-axis are respectively the first and the second components, the number of top variables is 100 and a superblock is used.">
+            <param name="response" optional="true" type="data" format="tsv,tabular,txt,csv" label="Color the individual plot with a response variable"
+                help="A TSV file containing either: (i) an only column with a qualitative or a quantitative variable; (ii) multiple columns corresponding to a disjunctive table."/>
+            <param name="text" type="boolean" truevalue="" falsevalue="--text" checked="true" label="Display the names of the points (in biplots)"/>
+            <param name="compx" type="integer" label="Component for the X-axis" help="The component used in the X-axis for biplots and the only component used for top variable plot. @COMP_RULES@" value="1" min="1" max="5"/>
+            <param name="compy" type="integer" label="Component for the Y-axis" help="The component used in the Y-axis for biplots. @COMP_RULES@" value="2" min="1" max="5"/>
+            <param name="blockx" type="integer" value="0" min="0" max="10" label="Visualise this block" help="Block used in the X-axis for individual plot and the only block used for corcircle and top variable plots. @BLOCK_RULES@"/>
+            <param name="blocky" type="integer" value="0" min="0" max="10" label="Visualise this block for the Y-axis (in individual plot)" help="0 corresponds to the superblock (or the last block loaded), @BLOCK_RULES@ By default, the superblock is selected."/>
+            <param name="nmark" type="integer" label="Number of top variables" value="100" min="10" max="300"/>
+        </section>
+
+        <param name="output_selector" type="select" multiple="true" label="Outputs">
+            <option value="individuals" selected="true">Individual plot</option>
+            <option value="corcircle" selected = "true">Corcircle plot</option>
+            <option value="top_variables">Top variables plot</option>
+            <option value="ave">Averages plot</option>
+            <option value="design">Design plot</option>
+            <option value="individual_table" selected="true">Individual table</option>
+            <option value="variable_table" selected="true">Variable table</option>
+            <option value="rdata">RData file</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data name="individual_plot" label="${tool.name} on ${on_string}: individuals.pdf" format="pdf">
+            <filter>"individuals" in output_selector</filter>
+        </data>
+        <data name="corcircle" label="${tool.name} on ${on_string}: corcircle.pdf" format="pdf">
+            <filter>"corcircle" in output_selector</filter>
+        </data>
+        <data name="top_variables"  label="${tool.name} on ${on_string}: top_variables.pdf" format="pdf">
+            <filter>"top_variables" in output_selector</filter>
+        </data>
+        <data name="ave"  label="${tool.name} on ${on_string}: ave.pdf" format="pdf">
+            <filter>"ave" in output_selector</filter>
+        </data>
+        <data name="design"  label="${tool.name} on ${on_string}: design.pdf" format="pdf">
+            <filter>"design" in output_selector</filter>
+        </data>
+        <data name="individual_table"  label="${tool.name} on ${on_string}: individuals.tsv" format="tsv">
+            <filter>"individual_table" in output_selector</filter>
+        </data>
+        <data name="variable_table"  label="${tool.name} on ${on_string}: variables.tsv" format="tsv">
+            <filter>"variable_table" in output_selector</filter>
+        </data>
+        <data name="rdata"  label="${tool.name} on ${on_string}: rgcca.result.RData" format="rdata">
+            <filter>"rdata" in output_selector</filter>
+        </data>
+    </outputs>
+
+    <tests>
+
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="1block"/>
+            <param name="blocks" value="agriculture.tsv" ftype = "tsv"/>
+            <output name="individual_table">
+                <assert_contents>
+                    <has_n_columns n="4"/>
+                    <has_line_matching
+                            expression='"agriculture.axis1"\s"agriculture.axis2"\s"superblock.axis1"\s"superblock.axis2"'/>
+                    <has_line_matching
+                            expression='^.+(\s\-?\d+.\d+){4}$'/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks_connection"/>
+            <expand macro="output_tests_3blocks"/>
+            <section name="analyse">
+                <param name="connection" value="connection.tsv" ftype = "tsv"/>
+                <param name="superblock" value="false"/>
+            </section>
+            <assert_command>
+                <has_text text="-connection"/>
+                <has_text text="--superblock"/>
+            </assert_command>
+        </test>
+
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks_supervised"/>
+            <expand macro="output_tests_3blocks"/>
+            <section name="analyse">
+                <param name="superblock" value="false"/>
+                <conditional name="supervised" >
+                    <param name="learning_mode" value="supervised"/>
+                    <param name="block_response" value="3"/>
+                </conditional>
+            </section>
+            <assert_command>
+                <has_text text="--response 3"/>
+                <has_text text="--superblock"/>
+            </assert_command>
+        </test>
+
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks"/>
+            <expand macro="output_tests_3blocks"/>
+        </test>
+
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks_sgcca"/>
+            <expand macro="output_tests_3blocks"/>
+            <section name="analyse">
+                <conditional name="method">
+                    <param name="family" value="m"/>
+                    <param name="type" value="sgcca"/>
+                </conditional>
+            </section>
+            <assert_command>
+                <has_text text="sgcca"/>
+            </assert_command>
+        </test>
+
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="2blocks" compx="3" compy="1"/>
+            <param name="blocks" value="agriculture.tsv,politic.tsv"/>
+            <section name="analyse">
+                <param name="scale" value="false"/>
+                <conditional name="tau">
+                    <param name="bool" value="false"/>
+                    <param name="value" value="0"/>
+                </conditional>
+                <param name="scheme" value="3"/>
+                <param name="ncomp" value="3"/>
+                <conditional name="method">
+                    <param name="family" value="2"/>
+                    <param name="type" value="pls"/>
+                </conditional>
+            </section>
+            <section name="graphic">
+                <param name="response" value="political_system.tsv" ftype = "tsv"/>
+                <param name="text" value="false"/>
+                <param name="compx" value="3"/>
+                <param name="compy" value="1"/>
+                <param name="blockx" value="2"/>
+                <param name="blocky" value="1"/>
+                <param name="nmark" value="11"/>
+            </section>
+            <assert_command>
+                <has_text text="pls"/>
+                <has_text text="--group"/>
+            </assert_command>
+        </test>
+
+    </tests>
+<help>
+
+==================================
+ABOUT
+==================================
+
+
+**Author:**
+Etienne CAMENEN
+
+
+**Contact:**
+arthur.tenenhaus@centralesupelec.fr
+
+
+**R package:**
+The RGCCA package is available from the CRAN repository (https://cran.r-project.org/web/packages/RGCCA).
+
+---------------------------------------------------
+
+==================================
+R/SGCCA
+==================================
+
+A user-friendly multi-blocks analysis (Regularized Generalized Canonical Correlation Analysis, RGCCA) as described in [1] and [2] with all default settings predefined. The software produces figures to explore the analysis' results: individuals and variables projected on two components of the multi-block analysis, list of top variables and explained variance in the model.
+
+**Working example**
+
+    | From Russett data (RGCCA package): https://github.com/rgcca-factory/RGCCA/tree/master/inst/extdata
+    | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* as new blocks. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings.
+
+**Documentation**
+
+- RGCCA: https://cran.r-project.org/web/packages/RGCCA/vignettes/vignette_RGCCA.pdf
+- accepted input / output formats: https://github.com/rgcca-factory/RGCCA#input-files
+<!-- - tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy/blob/release/0.2/README.md-->
+
+</help>
+
+    <citations>
+        <citation type="doi">10.1007/s11336-017-9573-x</citation>
+        <citation type="doi">10.1007/s11336-011-9206-8</citation>
+    </citations>
+
+</tool>
Binary file test-data/1block/ave.pdf has changed
Binary file test-data/1block/corcircle.pdf has changed
Binary file test-data/1block/design.pdf has changed
Binary file test-data/1block/individuals.pdf has changed
Binary file test-data/1block/rgcca.result.RData has changed
Binary file test-data/1block/top_variables.pdf has changed
Binary file test-data/2blocks/ave.pdf has changed
Binary file test-data/2blocks/corcircle.pdf has changed
Binary file test-data/2blocks/design.pdf has changed
Binary file test-data/2blocks/individuals.pdf has changed
Binary file test-data/2blocks/rgcca.result.RData has changed
Binary file test-data/2blocks/top_variables.pdf has changed
Binary file test-data/3blocks/ave.pdf has changed
Binary file test-data/3blocks/corcircle.pdf has changed
Binary file test-data/3blocks/design.pdf has changed
Binary file test-data/3blocks/individuals.pdf has changed
Binary file test-data/3blocks/rgcca.result.RData has changed
Binary file test-data/3blocks/top_variables.pdf has changed
Binary file test-data/3blocks_connection/ave.pdf has changed
Binary file test-data/3blocks_connection/corcircle.pdf has changed
Binary file test-data/3blocks_connection/design.pdf has changed
Binary file test-data/3blocks_connection/individuals.pdf has changed
Binary file test-data/3blocks_connection/rgcca.result.RData has changed
Binary file test-data/3blocks_connection/top_variables.pdf has changed
Binary file test-data/3blocks_sgcca/ave.pdf has changed
Binary file test-data/3blocks_sgcca/corcircle.pdf has changed
Binary file test-data/3blocks_sgcca/design.pdf has changed
Binary file test-data/3blocks_sgcca/individuals.pdf has changed
Binary file test-data/3blocks_sgcca/rgcca.result.RData has changed
Binary file test-data/3blocks_sgcca/top_variables.pdf has changed
Binary file test-data/3blocks_supervised/ave.pdf has changed
Binary file test-data/3blocks_supervised/corcircle.pdf has changed
Binary file test-data/3blocks_supervised/design.pdf has changed
Binary file test-data/3blocks_supervised/individuals.pdf has changed
Binary file test-data/3blocks_supervised/rgcca.result.RData has changed
Binary file test-data/3blocks_supervised/top_variables.pdf has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/agriculture.tsv	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,48 @@
+	gini	farm	rent
+Argentina	86.3	98.2	3.52
+Australia	92.9	99.6	3.27
+Austria	74	97.4	2.46
+Belgium	58.7	85.8	4.15
+Bolivia	93.8	97.7	3.04
+Brasil	83.7	98.5	2.31
+Canada	49.7	82.9	2.1
+Chile	93.8	99.7	2.67
+Colombia	84.9	98.1	2.57
+CostaRica	88.1	99.1	1.86
+Cuba	79.2	97.8	4
+Denmark	45.8	79.3	1.5
+DominicanRepublic	79.5	98.5	3.08
+Ecuador	86.4	99.3	2.75
+Egypt	74	98.1	2.53
+Salvador	82.8	98.8	2.78
+Finland	59.9	86.3	1.22
+France	58.3	86.1	3.3
+Guatemala	86	99.7	2.89
+Greece	74.7	99.4	2.93
+Honduras	75.7	97.4	2.87
+India	52.2	86.9	3.99
+Irak	88.1	99.3	4.33
+Irland	59.8	85.9	1.25
+Italy	80.3	98	3.21
+Japan	47	81.5	1.36
+Libia	70	93	2.25
+Luxemburg	63.8	87.7	2.99
+TheNetherlands	60.5	86.2	3.99
+NewZealand	77.3	95.5	3.15
+Nicaragua	75.7	96.4	2.39
+Norway	66.9	87.5	2.14
+Panama	73.7	95	2.59
+Peru	87.5	96.9	2.61
+Philippine	56.4	88.2	3.65
+Poland	45	77.7	0
+SouthVietnam	67.1	94.6	3.04
+Spain	78	99.5	3.8
+Sweden	57.7	87.2	2.99
+Switzerland	49.8	81.5	2.99
+Taiwan	65.2	94.1	3.71
+UK	71	93.4	3.82
+USA	70.5	95.4	3.06
+Uruguay	81.7	96.6	3.58
+Venezuela	90.9	99.3	3.07
+WestGermany	67.4	93	1.9
+Yugoslavia	43.7	79.8	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/connection.tsv	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,3 @@
+0	1	1
+1	0	1
+1	1	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/industry.tsv	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,48 @@
+	gnpr	labo
+Argentina	5.92	3.22
+Australia	7.1	2.64
+Austria	6.28	3.47
+Belgium	6.92	2.3
+Bolivia	4.19	4.28
+Brasil	5.57	4.11
+Canada	7.42	2.48
+Chile	5.19	3.4
+Colombia	5.8	4.01
+CostaRica	5.73	4.01
+Cuba	5.89	3.74
+Denmark	6.82	3.14
+DominicanRepublic	5.32	4.03
+Ecuador	5.32	3.97
+Egypt	4.89	4.16
+Salvador	5.5	4.14
+Finland	6.85	3.83
+France	6.95	3.26
+Guatemala	5.19	4.22
+Greece	5.48	3.87
+Honduras	4.92	4.19
+India	4.28	4.26
+Irak	5.27	4.39
+Irland	6.23	3.69
+Italy	6.09	3.37
+Japan	5.48	3.69
+Libia	4.5	4.32
+Luxemburg	7.09	3.14
+TheNetherlands	6.56	2.4
+NewZealand	7.14	2.77
+Nicaragua	5.54	4.22
+Norway	6.88	3.26
+Panama	5.86	3.99
+Peru	4.94	4.09
+Philippine	5.3	4.08
+Poland	6.15	4.04
+SouthVietnam	4.89	4.17
+Spain	5.54	3.91
+Sweden	7.06	2.56
+Switzerland	7.11	2.3
+Taiwan	4.88	3.91
+UK	6.91	1.61
+USA	7.76	2.3
+Uruguay	6.34	3.61
+Venezuela	6.64	3.74
+WestGermany	6.64	2.64
+Yugoslavia	5.69	4.2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/politic.tsv	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,48 @@
+	inst	ecks	death	demostab	demoinst	dictator
+Argentina	0.07	4.06	5.38	0	1	0
+Australia	0.01	0	0	1	0	0
+Austria	0.03	1.61	0	0	1	0
+Belgium	0.45	2.2	0.69	1	0	0
+Bolivia	0.37	3.99	6.5	0	0	1
+Brasil	0.45	3.91	0.69	0	1	0
+Canada	0.01	3.14	0	1	0	0
+Chile	0.12	3.09	1.1	0	1	0
+Colombia	0.18	3.87	5.76	0	1	0
+CostaRica	0.18	3	3.22	0	1	0
+Cuba	0.07	4.62	7.97	0	0	1
+Denmark	0.18	0	0	1	0	0
+DominicanRepublic	0.01	1.95	3.47	0	0	1
+Ecuador	0.3	3.74	2.94	0	0	1
+Egypt	0.61	3.83	1.1	0	0	1
+Salvador	0.3	2.3	1.1	0	0	1
+Finland	0.5	1.61	0	0	1	0
+France	1	3.85	0.69	0	1	0
+Guatemala	0.25	3.83	4.06	0	0	1
+Greece	0.61	2.3	1.1	0	1	0
+Honduras	0.07	3.83	4.72	0	0	1
+India	0	4.43	2.71	1	0	0
+Irak	0.9	3.22	5.84	0	0	1
+Irland	0.12	2.3	0	1	0	0
+Italy	0.45	3.95	0.69	0	1	0
+Japan	0.55	3.14	0.69	0	1	0
+Libia	0.22	2.2	0	0	0	1
+Luxemburg	0.03	0	0	1	0	0
+TheNetherlands	0.07	1.1	0	1	0	0
+NewZealand	0.03	0	0	1	0	0
+Nicaragua	0.03	2.83	2.83	0	0	1
+Norway	0.03	0.69	0	1	0	0
+Panama	0.5	3.4	3.26	0	0	1
+Peru	0.18	3.18	3.3	0	0	1
+Philippine	0.1	2.77	5.68	0	0	1
+Poland	0	3	1.79	0	0	1
+SouthVietnam	0	3.93	6.91	0	0	1
+Spain	0	3.14	0.69	0	0	1
+Sweden	0	0	0	1	0	0
+Switzerland	0	0	0	1	0	0
+Taiwan	0	1.39	0	0	0	1
+UK	0.07	2.56	0	1	0	0
+USA	0.03	3.14	0	1	0	0
+Uruguay	0.18	0.69	0.69	1	0	0
+Venezuela	0.25	3.61	4.72	0	0	1
+WestGermany	0	1.61	0	0	1	0
+Yugoslavia	0	2.3	0	0	0	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/political_system.tsv	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,48 @@
+	demostab	demoinst	dictator
+Argentina	0	1	0
+Australia	1	0	0
+Austria	0	1	0
+Belgium	1	0	0
+Bolivia	0	0	1
+Brasil	0	1	0
+Canada	1	0	0
+Chile	0	1	0
+Colombia	0	1	0
+CostaRica	0	1	0
+Cuba	0	0	1
+Denmark	1	0	0
+DominicanRepublic	0	0	1
+Ecuador	0	0	1
+Egypt	0	0	1
+Salvador	0	0	1
+Finland	0	1	0
+France	0	1	0
+Guatemala	0	0	1
+Greece	0	1	0
+Honduras	0	0	1
+India	1	0	0
+Irak	0	0	1
+Irland	1	0	0
+Italy	0	1	0
+Japan	0	1	0
+Libia	0	0	1
+Luxemburg	1	0	0
+TheNetherlands	1	0	0
+NewZealand	1	0	0
+Nicaragua	0	0	1
+Norway	1	0	0
+Panama	0	0	1
+Peru	0	0	1
+Philippine	0	0	1
+Poland	0	0	1
+SouthVietnam	0	0	1
+Spain	0	0	1
+Sweden	1	0	0
+Switzerland	1	0	0
+Taiwan	0	0	1
+UK	1	0	0
+USA	1	0	0
+Uruguay	1	0	0
+Venezuela	0	0	1
+WestGermany	0	1	0
+Yugoslavia	0	0	1