changeset 9:58aa18e1fe14 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit 7343be2d3b1b8cb1ba0c4c55767b60dbce8f8b22
author artbio
date Thu, 07 Nov 2024 22:43:01 +0000
parents fe6f76030168
children
files high_dim_visu.R high_dim_visu.xml
diffstat 2 files changed, 396 insertions(+), 368 deletions(-) [+]
line wrap: on
line diff
--- a/high_dim_visu.R	Wed Nov 29 17:28:18 2023 +0000
+++ b/high_dim_visu.R	Thu Nov 07 22:43:01 2024 +0000
@@ -1,8 +1,9 @@
-options(show.error.messages = FALSE,
-  error = function() {
-    cat(geterrmessage(), file = stderr())
-    q("no", 1, FALSE)
-  }
+options(
+    show.error.messages = FALSE,
+    error = function() {
+        cat(geterrmessage(), file = stderr())
+        q("no", 1, FALSE)
+    }
 )
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 warnings()
@@ -19,397 +20,421 @@
 library(Polychrome)
 
 option_list <- list(
-  make_option(
-    "--data",
-    default = NA,
-    type = "character",
-    help = "Input file that contains expression value to visualise"
-  ),
-  make_option(
-    "--labels",
-    default = FALSE,
-    type = "logical",
-    help = "add labels in scatter plots [default : '%default' ]"
-  ),
-  make_option(
-    "--factor",
-    default = "",
-    type = "character",
-    help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]"
-  ),
-  make_option(
-    "--visu_choice",
-    default = "PCA",
-    type = "character",
-    help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_seed",
-    default = 42,
-    type = "integer",
-    help = "Seed value for reproducibility [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_dims",
-    default = 2,
-    type = "integer",
-    help = "Output dimensionality [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_initial_dims",
-    default = 50,
-    type = "integer",
-    help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_perplexity",
-    default = 5.0,
-    type = "numeric",
-    help = "perplexity [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_theta",
-    default = 1.0,
-    type = "numeric",
-    help = "theta [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_max_iter",
-    default = 1000,
-    type = "integer",
-    help = "max_iter [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_pca",
-    default = TRUE,
-    type = "logical",
-    help = "Whether an initial PCA step should be performed [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_pca_center",
-    default = TRUE,
-    type = "logical",
-    help = "Should data be centered before pca is applied? [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_pca_scale",
-    default = FALSE,
-    type = "logical",
-    help = "Should data be scaled before pca is applied? [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_normalize",
-    default = TRUE,
-    type = "logical",
-    help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]"
-  ),
-  make_option(
-    "--Rtsne_exaggeration_factor",
-    default = 12.0,
-    type = "numeric",
-    help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]"
-  ),
-  make_option(
-    "--PCA_npc",
-    default = 5,
-    type = "integer",
-    help = "number of dimensions kept in the results [default : '%default' ]"
-  ),
-  make_option(
-    "--item_size",
-    default = 1,
-    type = "numeric",
-    help = "Size of points/labels in PCA [default : '%default' ]"
-  ),
-  make_option(
-    "--x_axis",
-    default = 1,
-    type = "integer",
-    help = "PC to plot in the x axis [default : '%default' ]"
-  ),
-  make_option(
-    "--y_axis",
-    default = 2,
-    type = "integer",
-    help = "PC to plot in the y axis [default : '%default' ]"
-  ),
-  make_option(
-    "--HCPC_ncluster",
-    default = -1,
-    type = "numeric",
-    help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]"
-  ),
-  make_option(
-    "--HCPC_npc",
-    default = 5,
-    type = "integer",
-    help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]"
-  ),
-  make_option(
-    "--HCPC_metric",
-    default = "euclidean",
-    type = "character",
-    help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]"
-  ),
-  make_option(
-    "--HCPC_method",
-    default = "ward",
-    type = "character",
-    help = "Clustering method between 'ward','average','single', 'complete', 'weighted'  [default :'%default']"
-  ),
-  make_option(
-    "--pdf_out",
-    default = "out.pdf",
-    type = "character",
-    help = "pdf of plots [default : '%default' ]"
-  ),
-  make_option(
-    "--HCPC_consol",
-    default = "TRUE",
-    type = "logical",
-    help = "If TRUE, a k-means consolidation is performed [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_itermax",
-    default = "10",
-    type = "integer",
-    help = "The maximum number of iterations for the consolidation [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_min",
-    default = "3",
-    type = "integer",
-    help = "The least possible number of clusters suggested [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_max",
-    default = -1,
-    type = "integer",
-    help = "The higher possible number of clusters suggested [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_clusterCA",
-    default = "rows",
-    type = "character",
-    help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_kk",
-    default = Inf,
-    type = "numeric",
-    help = "The maximum number of iterations for the consolidation [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_mutual_info",
-    default = "",
-    type = "character",
-    help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_cell_clust",
-    default = "",
-    type = "character",
-    help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']"
-  ),
-  make_option(
-    "--HCPC_contributions",
-    default = "",
-    type = "character",
-    help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']"
-  )
+    make_option(
+        "--data",
+        default = NA,
+        type = "character",
+        help = "Input file that contains expression value to visualise"
+    ),
+    make_option(
+        "--labels",
+        default = FALSE,
+        type = "logical",
+        help = "add labels in scatter plots [default : '%default' ]"
+    ),
+    make_option(
+        "--factor",
+        default = "",
+        type = "character",
+        help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]"
+    ),
+    make_option(
+        "--visu_choice",
+        default = "PCA",
+        type = "character",
+        help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_seed",
+        default = 42,
+        type = "integer",
+        help = "Seed value for reproducibility [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_dims",
+        default = 2,
+        type = "integer",
+        help = "Output dimensionality [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_initial_dims",
+        default = 50,
+        type = "integer",
+        help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_perplexity",
+        default = 5.0,
+        type = "numeric",
+        help = "perplexity [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_theta",
+        default = 1.0,
+        type = "numeric",
+        help = "theta [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_max_iter",
+        default = 1000,
+        type = "integer",
+        help = "max_iter [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_pca",
+        default = TRUE,
+        type = "logical",
+        help = "Whether an initial PCA step should be performed [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_pca_center",
+        default = TRUE,
+        type = "logical",
+        help = "Should data be centered before pca is applied? [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_pca_scale",
+        default = FALSE,
+        type = "logical",
+        help = "Should data be scaled before pca is applied? [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_normalize",
+        default = TRUE,
+        type = "logical",
+        help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]"
+    ),
+    make_option(
+        "--Rtsne_exaggeration_factor",
+        default = 12.0,
+        type = "numeric",
+        help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]"
+    ),
+    make_option(
+        "--PCA_npc",
+        default = 5,
+        type = "integer",
+        help = "number of dimensions kept in the results [default : '%default' ]"
+    ),
+    make_option(
+        "--item_size",
+        default = 1,
+        type = "numeric",
+        help = "Size of points/labels in PCA [default : '%default' ]"
+    ),
+    make_option(
+        "--x_axis",
+        default = 1,
+        type = "integer",
+        help = "PC to plot in the x axis [default : '%default' ]"
+    ),
+    make_option(
+        "--y_axis",
+        default = 2,
+        type = "integer",
+        help = "PC to plot in the y axis [default : '%default' ]"
+    ),
+    make_option(
+        "--HCPC_ncluster",
+        default = -1,
+        type = "numeric",
+        help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]"
+    ),
+    make_option(
+        "--HCPC_npc",
+        default = 5,
+        type = "integer",
+        help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]"
+    ),
+    make_option(
+        "--HCPC_metric",
+        default = "euclidean",
+        type = "character",
+        help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]"
+    ),
+    make_option(
+        "--HCPC_method",
+        default = "ward",
+        type = "character",
+        help = "Clustering method between 'ward','average','single', 'complete', 'weighted'  [default :'%default']"
+    ),
+    make_option(
+        "--pdf_out",
+        default = "out.pdf",
+        type = "character",
+        help = "pdf of plots [default : '%default' ]"
+    ),
+    make_option(
+        "--HCPC_consol",
+        default = "TRUE",
+        type = "logical",
+        help = "If TRUE, a k-means consolidation is performed [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_itermax",
+        default = "10",
+        type = "integer",
+        help = "The maximum number of iterations for the consolidation [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_min",
+        default = "3",
+        type = "integer",
+        help = "The least possible number of clusters suggested [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_max",
+        default = -1,
+        type = "integer",
+        help = "The higher possible number of clusters suggested [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_clusterCA",
+        default = "rows",
+        type = "character",
+        help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_kk",
+        default = Inf,
+        type = "numeric",
+        help = "The maximum number of iterations for the consolidation [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_mutual_info",
+        default = "",
+        type = "character",
+        help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_cell_clust",
+        default = "",
+        type = "character",
+        help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']"
+    ),
+    make_option(
+        "--HCPC_contributions",
+        default = "",
+        type = "character",
+        help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']"
+    )
 )
 
 opt <- parse_args(OptionParser(option_list = option_list),
-                  args = commandArgs(trailingOnly = TRUE))
+    args = commandArgs(trailingOnly = TRUE)
+)
 
 if (opt$HCPC_max == -1) {
-  opt$HCPC_max <- NULL
+    opt$HCPC_max <- NULL
 }
 if (opt$HCPC_kk == -1) {
-  opt$HCPC_kk <- Inf
+    opt$HCPC_kk <- Inf
 }
 
 #### We treat data once, at the beginning of the script ####
 data <- read.delim(
-  opt$data,
-  check.names = FALSE,
-  header = TRUE,
-  row.names = 1,
-  sep = "\t"
+    opt$data,
+    check.names = FALSE,
+    header = TRUE,
+    row.names = 1,
+    sep = "\t"
 )
 # we transpose immediately, because this is the common data structure
 data <- as.data.frame(t(data))
 
 # we treat the factor for usage in 3 methods
 if (opt$factor != "") {
-  contrasting_factor <- read.delim(opt$factor, header = TRUE)
-  rownames(contrasting_factor) <- contrasting_factor[, 1]
-  # we pick only the relevant values of the contrasting factor
-  contrasting_factor <- contrasting_factor[rownames(data), ]
-  sup <- colnames(contrasting_factor)[2]
-  if (!is.numeric(contrasting_factor[, 2])) {
-    contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2])
-  }
+    contrasting_factor <- read.delim(opt$factor, header = TRUE)
+    rownames(contrasting_factor) <- contrasting_factor[, 1]
+    # we pick only the relevant values of the contrasting factor
+    contrasting_factor <- contrasting_factor[rownames(data), ]
+    sup <- colnames(contrasting_factor)[2]
+    if (!is.numeric(contrasting_factor[, 2])) {
+        contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2])
+    }
 }
 
 ######### make PCA with FactoMineR #################
 if (opt$visu_choice == "PCA") {
-  if (opt$labels) {
-    labels <- "ind"
-  } else {
-    labels <- "none"
-  }
-  pdf(opt$pdf_out)
-  if (opt$factor != "") {
-    data <- cbind(data, contrasting_factor[, 2])
-    colnames(data)[length(data)] <- sup
-    if (is.numeric(contrasting_factor[, 2])) {
-      res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE)
-      pca_plot <- plot(res_pca, habillage = sup, label = labels,
-                       title = "PCA graph of cells", cex = opt$item_size,
-                       axes = c(opt$x_axis, opt$y_axis))
+    if (opt$labels) {
+        labels <- "ind"
     } else {
-      res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE)
-      pca_plot <- plot(res_pca, habillage = sup, label = labels,
-                       title = "PCA graph of cells", cex = opt$item_size,
-                       axes = c(opt$x_axis, opt$y_axis))
+        labels <- "none"
     }
-  } else {
-    res_pca <- PCA(X = data, graph = FALSE)
-    pca_plot <- plot(res_pca, label = labels,
-                     title = "PCA graph of cells", cex = opt$item_size,
-                     axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4")
-  }
-  print(pca_plot)
-  dev.off()
+    pdf(opt$pdf_out)
+    if (opt$factor != "") {
+        data <- cbind(data, contrasting_factor[, 2])
+        colnames(data)[length(data)] <- sup
+        if (is.numeric(contrasting_factor[, 2])) {
+            res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE)
+            pca_plot <- plot(res_pca,
+                habillage = sup, label = labels,
+                title = "PCA graph of cells", cex = opt$item_size,
+                axes = c(opt$x_axis, opt$y_axis)
+            )
+        } else {
+            res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE)
+            pca_plot <- plot(res_pca,
+                habillage = sup, label = labels,
+                title = "PCA graph of cells", cex = opt$item_size,
+                axes = c(opt$x_axis, opt$y_axis)
+            )
+        }
+    } else {
+        res_pca <- PCA(X = data, graph = FALSE)
+        pca_plot <- plot(res_pca,
+            label = labels,
+            title = "PCA graph of cells", cex = opt$item_size,
+            axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4"
+        )
+    }
+    print(pca_plot)
+    dev.off()
 }
 
 ########### make HCPC with FactoMineR ##########
 if (opt$visu_choice == "HCPC") {
-  pdf(opt$pdf_out)
-  # HCPC starts with a PCA
-  pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE)
-  pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA
-  # Hierarchical Clustering On Principal Components Followed By Kmean Clustering
-  res_hcpc <- HCPC(pca,
-                   nb.clust = opt$HCPC_ncluster,
-                   metric = opt$HCPC_metric,
-                   method = opt$HCPC_method,
-                   graph = FALSE,
-                   consol = opt$HCPC_consol,
-                   iter.max = opt$HCPC_itermax,
-                   min = opt$HCPC_min,
-                   max = opt$HCPC_max,
-                   cluster.CA = opt$HCPC_clusterCA,
-                   kk = opt$HCPC_kk)
-  # HCPC plots
-  dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2
-  plot(res_hcpc, choice = "tree")
-  plot(res_hcpc, choice = "bar")
-  if (opt$labels == FALSE) {
-    plot(res_hcpc, choice = "3D.map", ind.names = FALSE)
-    plot(res_hcpc, choice = "map", label = "none")
-  } else {
-    plot(res_hcpc, choice = "3D.map")
-    plot(res_hcpc, choice = "map")
-  }
-  ## Normalized Mutual Information
-  if (opt$factor != "") {
-    sink(opt$HCPC_mutual_info)
-    cat("Relationship between input factor and its levels and the HCPC clusters")
-    res <- external_validation(true_labels = as.numeric(contrasting_factor[, 2]),
-                               clusters = as.numeric(res_hcpc$data.clust$clust),
-                               summary_stats = TRUE)
-    sink()
-  }
-  dev.off()
+    pdf(opt$pdf_out)
+    # HCPC starts with a PCA
+    pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE)
+    pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA
+    # Hierarchical Clustering On Principal Components Followed By Kmean Clustering
+    res_hcpc <- HCPC(pca,
+        nb.clust = opt$HCPC_ncluster,
+        metric = opt$HCPC_metric,
+        method = opt$HCPC_method,
+        graph = FALSE,
+        consol = opt$HCPC_consol,
+        iter.max = opt$HCPC_itermax,
+        min = opt$HCPC_min,
+        max = opt$HCPC_max,
+        cluster.CA = opt$HCPC_clusterCA,
+        kk = opt$HCPC_kk
+    )
+    # HCPC plots
+    dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2
+    plot(res_hcpc, choice = "tree")
+    plot(res_hcpc, choice = "bar")
+    if (opt$labels == FALSE) {
+        plot(res_hcpc, choice = "3D.map", ind.names = FALSE)
+        plot(res_hcpc, choice = "map", label = "none")
+    } else {
+        plot(res_hcpc, choice = "3D.map")
+        plot(res_hcpc, choice = "map")
+    }
+    ## Normalized Mutual Information
+    if (opt$factor != "") {
+        sink(opt$HCPC_mutual_info)
+        cat("Relationship between input factor and its levels and the HCPC clusters")
+        res <- external_validation(
+            true_labels = as.numeric(contrasting_factor[, 2]),
+            clusters = as.numeric(res_hcpc$data.clust$clust),
+            summary_stats = TRUE
+        )
+        sink()
+    }
+    dev.off()
 
-  res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust),
-                               Cluster = res_hcpc$data.clust$clust)
-  # Description of cluster by most contributing variables / gene expressions
-  # first transform list of vectors in a list of dataframes
-  extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame)
-  # second, transfer rownames (genes) to column in the dataframe, before rbinding
-  extract_description_w_genes <- Map(cbind,
-                                     extract_description,
-                                     genes = lapply(extract_description, rownames))
-  # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist()
-  cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
-  cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns
-  cluster_description <- cluster_description[order(cluster_description[[2]],
-                                                   cluster_description[[8]]), ] # sort by cluster then by pval
-  # Finally, output cluster description data frame
-  write.table(cluster_description,
-              file = opt$HCPC_contributions,
-              sep = "\t",
-              quote = FALSE,
-              col.names = TRUE,
-              row.names = FALSE)
+    res_clustering <- data.frame(
+        Cell = rownames(res_hcpc$data.clust),
+        Cluster = res_hcpc$data.clust$clust
+    )
+    # Description of cluster by most contributing variables / gene expressions
+    # first transform list of vectors in a list of dataframes
+    extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame)
+    # second, transfer rownames (genes) to column in the dataframe, before rbinding
+    extract_description_w_genes <- Map(cbind,
+        extract_description,
+        genes = lapply(extract_description, rownames)
+    )
+    # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist()
+    cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
+    cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns
+    cluster_description <- cluster_description[order(
+        cluster_description[[2]],
+        cluster_description[[8]]
+    ), ] # sort by cluster then by pval
+    # Finally, output cluster description data frame
+    write.table(cluster_description,
+        file = opt$HCPC_contributions,
+        sep = "\t",
+        quote = FALSE,
+        col.names = TRUE,
+        row.names = FALSE
+    )
 
-  ## Return cluster table to user
-  write.table(res_clustering,
-              file = opt$HCPC_cell_clust,
-              sep = "\t",
-              quote = FALSE,
-              col.names = TRUE,
-              row.names = FALSE)
+    ## Return cluster table to user
+    write.table(res_clustering,
+        file = opt$HCPC_cell_clust,
+        sep = "\t",
+        quote = FALSE,
+        col.names = TRUE,
+        row.names = FALSE
+    )
 }
 ################  t-SNE ####################
 if (opt$visu_choice == "tSNE") {
-  set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility
-  tsne_out <- Rtsne(data,
-                    dims = opt$Rtsne_dims,
-                    initial_dims = opt$Rtsne_initial_dims,
-                    perplexity = opt$Rtsne_perplexity,
-                    theta = opt$Rtsne_theta,
-                    max_iter = opt$Rtsne_max_iter,
-                    pca = opt$Rtsne_pca,
-                    pca_center = opt$Rtsne_pca_center,
-                    pca_scale = opt$Rtsne_pca_scale,
-                    normalize = opt$Rtsne_normalize,
-                    exaggeration_factor = opt$Rtsne_exaggeration_factor)
-  embedding <- as.data.frame(tsne_out$Y[, 1:2])
-  embedding$Class <- as.factor(rownames(data))
-  gg_legend <- theme(legend.position = "right")
-  pointcolor <- "#E70000"
-  pointsize <- opt$item_size * 1.5
-  the_theme <- theme(
-    panel.background = element_rect(fill = "gray100", colour = "#6D9EC1",
-                                    size = 2, linetype = "solid"),
-    panel.grid.major = element_line(size = 0.5, linetype = "solid",
-                                    colour = "#6D9EC1"),
-    panel.grid.minor = element_line(size = 0.25, linetype = "solid",
-                                    colour = "darkslategray3")
-  )
-  if (opt$factor == "") {
-    p <- ggplot(embedding, aes(x = V1, y = V2)) +
-      geom_point(size = pointsize * 0.25, color = pointcolor) +
-      gg_legend +
-      xlab("t-SNE 1") +
-      ylab("t-SNE 2") +
-      ggtitle("t-SNE") +
-      the_theme +
-      if (opt$labels) {
-        geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor)
-      }
-  } else {
-    if (is.numeric(contrasting_factor[, 2])) {
-      embedding$factor <- contrasting_factor[, 2]
+    set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility
+    tsne_out <- Rtsne(data,
+        dims = opt$Rtsne_dims,
+        initial_dims = opt$Rtsne_initial_dims,
+        perplexity = opt$Rtsne_perplexity,
+        theta = opt$Rtsne_theta,
+        max_iter = opt$Rtsne_max_iter,
+        pca = opt$Rtsne_pca,
+        pca_center = opt$Rtsne_pca_center,
+        pca_scale = opt$Rtsne_pca_scale,
+        normalize = opt$Rtsne_normalize,
+        exaggeration_factor = opt$Rtsne_exaggeration_factor
+    )
+    embedding <- as.data.frame(tsne_out$Y[, 1:2])
+    embedding$Class <- as.factor(rownames(data))
+    gg_legend <- theme(legend.position = "right")
+    pointcolor <- "#E70000"
+    pointsize <- opt$item_size * 1.5
+    the_theme <- theme(
+        panel.background = element_rect(
+            fill = "gray100", colour = "#6D9EC1",
+            size = 2, linetype = "solid"
+        ),
+        panel.grid.major = element_line(
+            size = 0.5, linetype = "solid",
+            colour = "#6D9EC1"
+        ),
+        panel.grid.minor = element_line(
+            size = 0.25, linetype = "solid",
+            colour = "darkslategray3"
+        )
+    )
+    if (opt$factor == "") {
+        p <- ggplot(embedding, aes(x = V1, y = V2)) +
+            geom_point(size = pointsize * 0.25, color = pointcolor) +
+            gg_legend +
+            xlab("t-SNE 1") +
+            ylab("t-SNE 2") +
+            ggtitle("t-SNE") +
+            the_theme +
+            if (opt$labels) {
+                geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor)
+            }
     } else {
-      embedding$factor <- as.factor(contrasting_factor[, 2])
+        if (is.numeric(contrasting_factor[, 2])) {
+            embedding$factor <- contrasting_factor[, 2]
+        } else {
+            embedding$factor <- as.factor(contrasting_factor[, 2])
+        }
+        p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) +
+            geom_point(size = pointsize * 0.25) +
+            gg_legend +
+            xlab("t-SNE 1") +
+            ylab("t-SNE 2") +
+            ggtitle("t-SNE") +
+            the_theme +
+            if (opt$labels) {
+                geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize)
+            }
     }
-    p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) +
-      geom_point(size = pointsize * 0.25) +
-      gg_legend +
-      xlab("t-SNE 1") +
-      ylab("t-SNE 2") +
-      ggtitle("t-SNE") +
-      the_theme +
-      if (opt$labels) {
-        geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize)
-      }
-  }
-  pdf(opt$pdf_out)
-  print(p)
-  dev.off()
+    pdf(opt$pdf_out)
+    print(p)
+    dev.off()
 }
--- a/high_dim_visu.xml	Wed Nov 29 17:28:18 2023 +0000
+++ b/high_dim_visu.xml	Thu Nov 07 22:43:01 2024 +0000
@@ -1,5 +1,8 @@
-<tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="4.3+galaxy0" profile="20.01">
+<tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="4.3+galaxy1" profile="20.01">
     <description>from highly dimensional expression data</description>
+    <xrefs>
+        <xref type="bio.tools">galaxy_single_cell_suite</xref>
+    </xrefs>
     <requirements>
         <requirement type="package" version="1.7.3=r43hc72bb7e_2">r-optparse</requirement>
         <requirement type="package" version="2.9=r43h57805ef_0">r-factominer</requirement>
@@ -236,7 +239,7 @@
             <param name="visu_choice" value="PCA" />
             <param name="factor_choice" value="No" />
             <param name="item_size" value="0.5" />
-            <output name="pdf_out" file="pca.1.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="pca.1.pdf" ftype="pdf" compare="sim_size" delta="1000"/>
         </test>
         <!-- test PCA PC2 vs PC3 -->
         <test expect_num_outputs="1">
@@ -246,7 +249,7 @@
             <param name="factor_choice" value="No" />
             <param name="x_axis" value="2" />
             <param name="y_axis" value="3" />
-            <output name="pdf_out" file="pca.2.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="pca.2.pdf" ftype="pdf" compare="sim_size" delta="1000"/>
         </test>
         <!-- test factor contrasting on PCA -->
         <test expect_num_outputs="1">
@@ -255,7 +258,7 @@
             <param name="visu_choice" value="PCA" />
             <param name="factor_choice" value="Yes" />
             <param name="factor" value="factor.tsv" ftype="txt"/>
-            <output name="pdf_out" file="pca.3.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="pca.3.pdf" ftype="pdf" compare="sim_size" delta="1000"/>
         </test>
         <!-- test numerical factor contrasting on PCA -->
         <test expect_num_outputs="1">