view launcher.R @ 0:067d45e6caa9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
author iuc
date Tue, 12 Jan 2021 10:12:04 +0000
parents
children 4e73ea176c34
line wrap: on
line source

# Author: Etienne CAMENEN
# Date: 2020
# Contact: arthur.tenenhaus@centralesupelec.fr
# Key-words: omics, RGCCA, multi-block
# EDAM operation: analysis, correlation, visualisation
#
# Abstract: Performs multi-variate analysis (PCA, CCA, PLS, R/SGCCA, etc.)
# and produces textual and graphical outputs (e.g. variables and individuals
# plots).

rm(list = ls())
graphics.off()
separator <- NULL

########## Arguments ##########

# Parse the arguments from a command line launch
get_args <- function() {
    option_list <- list(
        # File parameters
        make_option(
            opt_str = c("-d", "--datasets"),
            type = "character",
            metavar = "path list",
            help = "List of comma-separated file paths corresponding to the
            blocks to be analyzed (one per block and without spaces between
            them; e.g., path/file1.txt,path/file2.txt) [required]"
        ),
        make_option(
            opt_str = c("-c", "--connection"),
            type = "character",
            metavar = "path",
            help = "Path of the file defining the connections between the blocks
            [if not used, activates the superblock mode]"
        ),
        make_option(
            opt_str = "--group",
            type = "character",
            metavar = "path",
            help = "Path of the file coloring the individuals in the ad hoc
            plot"
        ),
        make_option(
            opt_str = c("-r", "--response"),
            type = "integer",
            metavar = "integer",
            help = "Position of the response file for the supervised mode within
            the block path list [actives the supervised mode]"
        ),
        make_option(
            opt_str = "--names",
            type = "character",
            metavar = "character list",
            help = "List of comma-separated block names to rename them (one per
            block; without spaces between them) [default: the block file names]"
        ),
        make_option(
            opt_str = c("-H", "--header"),
            type = "logical",
            action = "store_false",
            help = "DO NOT consider the first row as the column header"
        ),
        make_option(
            opt_str = "--separator",
            type = "integer",
            metavar = "integer",
            default = opt[1],
            help = "Character used to separate columns (1: tabulation,
            2: semicolon, 3: comma) [default: %default]"
        ),
        # Analysis parameter
        make_option(
            opt_str = "--type",
            type = "character",
            metavar = "character",
            default = opt[2],
            help = "Type of analysis [default: %default] (among: rgcca, pca,
            cca, gcca, cpca-w, hpca, maxbet-b, maxbet, maxdiff-b, maxdiff,
            maxvar-a, maxvar-b, maxvar, niles, r-maxvar, rcon-pca, ridge-gca,
            sabscor, ssqcor, ssqcor, ssqcov-1, ssqcov-2, ssqcov, sum-pca,
            sumcor, sumcov-1, sumcov-2, sumcov)"
        ),
        make_option(
            opt_str = "--ncomp",
            type = "character",
            metavar = "integer list",
            default = opt[3],
            help = "Number of components in the analysis for each block
            [default: %default]. The number should be higher than 1 and lower
            than the minimum number of variables among the blocks. It can be a
            single values or a comma-separated list (e.g 2,2,3,2)."
        ),
        make_option(
            opt_str = "--penalty",
            type = "character",
            metavar = "float list",
            default = opt[4],
            help = "For RGCCA, a regularization parameter for each block (i.e., tau)
            [default: %default]. Tau varies from 0 (maximizing the correlation)
            to 1 (maximizing the covariance). For SGCCA, tau is automatically
            set to 1 and shrinkage parameter can be defined instead for
            automatic variable selection, varying from the square root of the
            variable number (the fewest selected variables) to 1 (all the
            variables are included). It can be a single value or a
            comma-separated list (e.g. 0,1,0.75,1)."
        ),
        make_option(
            opt_str = "--scheme",
            type = "integer",
            metavar = "integer",
            default = opt[5],
            help = "Link (i.e. scheme) function for covariance maximization
            (1: x, 2: x^2, 3: |x|, 4: x^4) [default: %default]. Onnly, the x
            function ('horst scheme') penalizes structural negative correlation.
            The x^2 function ('factorial scheme') discriminates more strongly
            the blocks than the |x| ('centroid scheme') one."
        ),
        make_option(
            opt_str = "--scale",
            type = "logical",
            action = "store_false",
            help = "DO NOT scale the blocks (i.e., a data centering step is
            always performed). Otherwise, each block is normalised and divided
            by the squareroot of its number of variables."
        ),
        make_option(
            opt_str = "--superblock",
            type = "logical",
            action = "store_false",
            help = "DO NOT use a superblock (i.e. a concatenation of all the
            blocks to visualize them all together in a consensus space). In
            this case, all blocks are assumed to be connected or a connection
            file could be used."
        ),
        # Graphical parameters
        make_option(
            opt_str = "--text",
            type = "logical",
            action = "store_false",
            help = "DO NOT display the name of the points instead of shapes when
            plotting"
        ),
        make_option(
            opt_str = "--block",
            type = "integer",
            metavar = "integer",
            default = opt[6],
            help = "Position in the path list of the plotted block (0: the
            superblock or, if not activated, the last one, 1: the fist one,
            2: the 2nd, etc.)[default: the last one]"
        ),
        make_option(
            opt_str = "--block_y",
            type = "integer",
            metavar = "integer",
            help = "Position in the path list of the plotted block for the
            Y-axis in the individual plot (0: the superblock or, if not
            activated, the last one, 1: the fist one, 2: the 2nd, etc.)
            [default: the last one]"
        ),
        make_option(
            opt_str = "--compx",
            type = "integer",
            metavar = "integer",
            default = opt[7],
            help = "Component used in the X-axis for biplots and the only
            component used for histograms [default: %default] (should not be
            higher than the number of components of the analysis)"
        ),
        make_option(
            opt_str = "--compy",
            type = "integer",
            metavar = "integer",
            default = opt[8],
            help = "Component used in the Y-axis for biplots
            [default: %default] (should not be higher than the number of
            components of the analysis)"
        ),
        make_option(
            opt_str = "--nmark",
            type = "integer",
            metavar = "integer",
            default = opt[9],
            help = "Number maximum of top variables in ad hoc plot
            [default: %default]"
        ),
        # output parameters
        make_option(
            opt_str = "--o1",
            type = "character",
            metavar = "path",
            default = opt[10],
            help = "Path for the individual plot [default: %default]"
        ),
        make_option(
            opt_str = "--o2",
            type = "character",
            metavar = "path",
            default = opt[11],
            help = "Path for the variable plot [default: %default]"
        ),
        make_option(
            opt_str = "--o3",
            type = "character",
            metavar = "path",
            default = opt[12],
            help = "Path for the top variables plot [default: %default]"
        ),
        make_option(
            opt_str = "--o4",
            type = "character",
            metavar = "path",
            default = opt[13],
            help = "Path for the explained variance plot [default: %default]"
        ),
        make_option(
            opt_str = "--o5",
            type = "character",
            metavar = "path",
            default = opt[14],
            help = "Path for the design plot [default: %default]"
        ),
        make_option(
            opt_str = "--o6",
            type = "character",
            metavar = "path",
            default = opt[15],
            help = "Path for the individual table [default: %default]"
        ),
        make_option(
            opt_str = "--o7",
            type = "character",
            metavar = "path",
            default = opt[16],
            help = "Path for the variable table [default: %default]"
        ),
        make_option(
            opt_str = "--o8",
            type = "character",
            metavar = "path",
            default = opt[17],
            help = "Path for the analysis results in RData [default: %default]"
        )
    )
    return(optparse::OptionParser(option_list = option_list))
}

char_to_list <- function(x) {
    strsplit(gsub(" ", "", as.character(x)), ",")[[1]]
}

check_arg <- function(opt) {
    # Check the validity of the arguments opt : an optionParser object

    if (is.null(opt$datasets))
        stop_rgcca(paste0("datasets is required."), exit_code = 121)

    if (is.null(opt$scheme))
        opt$scheme <- "factorial"
    else if (!opt$scheme %in% seq(4)) {
        stop_rgcca(
            paste0(
                "scheme should be comprise between 1 and 4 [by default: 2], not be equal to ",
                opt$scheme,
                "."
            ),
            exit_code = 122
        )
    } else {
        schemes <- c("horst", "factorial", "centroid")
        if (opt$scheme == 4)
            opt$scheme <- function(x) x ^ 4
        else
            opt$scheme <- schemes[opt$scheme]
    }

    if (!opt$separator %in% seq(3)) {
        stop_rgcca(
            paste0(
                "separator should be comprise between 1 and 3 (1: Tabulation, 2: Semicolon, 3: Comma) [by default: 2], not be equal to ",
                opt$separator,
                "."
            ),
            exit_code = 123
        )
    } else {
        separators <- c("\t", ";", ",")
        opt$separator <- separators[opt$separator]
    }

    nmark <- NULL
    RGCCA:::check_integer("nmark", opt$nmark, min = 2)

    for (x in c("ncomp", "penalty"))
        opt[[x]] <- char_to_list(opt[[x]])

    return(opt)
}

post_check_arg <- function(opt, rgcca) {
# Check the validity of the arguments after loading the blocks opt : an
# optionParser object blocks : a list of matrix
    blocks <- NULL
    for (x in c("block", "block_y")) {
        if (!is.null(opt[[x]])) {
            if (opt[[x]] == 0)
                opt[[x]] <- length(rgcca$call$blocks)
            opt[[x]] <- RGCCA:::check_blockx(x, opt[[x]], rgcca$call$blocks)
        }
    }

    if (any(opt$ncomp == 1))
        opt$compy <- 1

    for (x in c("compx", "compy"))
        opt[[x]] <- check_compx(x, opt[[x]], rgcca$call$ncomp, opt$block)

    return(opt)
}

check_integer <- function(x, y = x, type = "scalar", float = FALSE, min = 1) {

    if (is.null(y))
        y <- x

    if (type %in% c("matrix", "data.frame"))
        y_temp <- y

    y <- suppressWarnings(as.double(as.matrix(y)))

    if (any(is.na(y)))
        stop_rgcca(paste(x, "should not be NA."))

    if (!is(y, "numeric"))
        stop_rgcca(paste(x, "should be numeric."))

    if (type == "scalar" && length(y) != 1)
        stop_rgcca(paste(x, "should be of length 1."))

    if (!float)
        y <- as.integer(y)

    if (all(y < min))
        stop_rgcca(paste0(x, " should be higher than or equal to ", min, "."))

    if (type %in% c("matrix", "data.frame"))
        y <- matrix(
            y,
            dim(y_temp)[1],
            dim(y_temp)[2],
            dimnames = dimnames(y_temp)
        )

    if (type == "data.frame")
        as.data.frame(y)

    return(y)
}

load_libraries <- function(librairies) {
    for (l in librairies) {
        if (!(l %in% installed.packages()[, "Package"]))
            utils::install.packages(l, repos = "cran.us.r-project.org")
        suppressPackageStartupMessages(
            library(
                l,
                character.only = TRUE,
                warn.conflicts = FALSE,
                quietly = TRUE
        ))
    }
}

stop_rgcca <- function(
    message,
    exit_code = "1",
    call = NULL) {

    base::stop(
        structure(
            class = c(exit_code, "simpleError", "error", "condition"),
            list(message = message, call. = NULL)
    ))
 }

########## Main ##########

# Get arguments : R packaging install, need an opt variable with associated
# arguments
opt <- list(
    separator = 1,
    type = "rgcca",
    ncomp = 2,
    penalty = 1,
    scheme = 2,
    block = 0,
    compx = 1,
    compy = 2,
    nmark = 100,
    o1 = "individuals.pdf",
    o2 = "corcircle.pdf",
    o3 = "top_variables.pdf",
    o4 = "ave.pdf",
    o5 = "design.pdf",
    o6 = "individuals.tsv",
    o7 = "variables.tsv",
    o8 = "rgcca_result.RData",
    datasets = paste0("inst/extdata/",
        c("agriculture", "industry", "politic"),
        ".tsv",
        collapse = ",")
)

load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "rlang", "Deriv"))
try(load_libraries("ggrepel"), silent = TRUE)

tryCatch(
    opt <- check_arg(optparse::parse_args(get_args())),
    error = function(e) {
        if (length(grep("nextArg", e[[1]])) != 1)
            stop_rgcca(e[[1]], exit_code = 140)
    }, warning = function(w)
        stop_rgcca(w[[1]], exit_code = 141)
)

# Load functions
all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = T))
for (i in all_funcs)
    eval(parse(text = paste0(i, "<-RGCCA:::", i)))

# Set missing parameters by default
opt$header <- !("header" %in% names(opt))
opt$superblock <- !("superblock" %in% names(opt))
opt$scale <- !("scale" %in% names(opt))
opt$text <- !("text" %in% names(opt))

status <- 0
tryCatch({

    blocks <- load_blocks(opt$datasets, opt$names, opt$separator)
    group <- load_response(blocks, opt$group, opt$separator, opt$header)
    connection <- load_connection(file = opt$connection, separator = opt$separator)

    func <- quote(
        rgcca(
            blocks = blocks,
            connection = connection,
            response = opt$response,
            superblock = opt$superblock,
            ncomp = opt$ncomp,
            scheme = opt$scheme,
            scale = opt$scale,
            type = opt$type
        )
    )
    if (tolower(opt$type) %in% c("sgcca", "spca", "spls")) {
        func[["sparsity"]] <- opt$penalty
    }else {
        func[["tau"]] <- opt$penalty
    }

    rgcca_out <- eval(as.call(func))

    opt <- post_check_arg(opt, rgcca_out)

    ########## Plot ##########

    if (rgcca_out$call$ncomp[opt$block] == 1 && is.null(opt$block_y)) {
        warning("With a number of component of 1, a second block should be chosen to perform an individual plot")
    } else {
        (
            individual_plot <- plot_ind(
                rgcca_out,
                group,
                opt$compx,
                opt$compy,
                opt$block,
                opt$text,
                opt$block_y,
                "Response"
            )
        )
        save_plot(opt$o1, individual_plot)
    }

    if (rgcca_out$call$ncomp[opt$block] > 1) {
        (
            corcircle <- plot_var_2D(
                rgcca_out,
                opt$compx,
                opt$compy,
                opt$block,
                opt$text,
                n_mark = opt$nmark
            )
        )
        save_plot(opt$o2, corcircle)
    }

    top_variables <- plot_var_1D(
            rgcca_out,
            opt$compx,
            opt$nmark,
            opt$block,
            type = "cor"
        )
    save_plot(opt$o3, top_variables)

    # Average Variance Explained
    (ave <- plot_ave(rgcca_out))
    save_plot(opt$o4, ave)

    # Creates design scheme
    design <- function() plot_network(rgcca_out)
    save_plot(opt$o5, design)

    save_ind(rgcca_out, opt$compx, opt$compy, opt$o6)
    save_var(rgcca_out, opt$compx, opt$compy, opt$o7)
    save(rgcca_out, file = opt$o8)

    }, error = function(e) {
        if (class(e)[1] %in% c("simpleError", "error", "condition"))
            status <<- 1
        else
            status <<- class(e)[1]
        message(e$message)
})
quit(status = status)