Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.R @ 9:58aa18e1fe14 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit 7343be2d3b1b8cb1ba0c4c55767b60dbce8f8b22
| author | artbio |
|---|---|
| date | Thu, 07 Nov 2024 22:43:01 +0000 |
| parents | fe6f76030168 |
| children |
comparison
equal
deleted
inserted
replaced
| 8:fe6f76030168 | 9:58aa18e1fe14 |
|---|---|
| 1 options(show.error.messages = FALSE, | 1 options( |
| 2 error = function() { | 2 show.error.messages = FALSE, |
| 3 cat(geterrmessage(), file = stderr()) | 3 error = function() { |
| 4 q("no", 1, FALSE) | 4 cat(geterrmessage(), file = stderr()) |
| 5 } | 5 q("no", 1, FALSE) |
| 6 } | |
| 6 ) | 7 ) |
| 7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 8 warnings() | 9 warnings() |
| 9 | 10 |
| 10 library(optparse) | 11 library(optparse) |
| 17 library(ClusterR) | 18 library(ClusterR) |
| 18 library(data.table) | 19 library(data.table) |
| 19 library(Polychrome) | 20 library(Polychrome) |
| 20 | 21 |
| 21 option_list <- list( | 22 option_list <- list( |
| 22 make_option( | 23 make_option( |
| 23 "--data", | 24 "--data", |
| 24 default = NA, | 25 default = NA, |
| 25 type = "character", | 26 type = "character", |
| 26 help = "Input file that contains expression value to visualise" | 27 help = "Input file that contains expression value to visualise" |
| 27 ), | 28 ), |
| 28 make_option( | 29 make_option( |
| 29 "--labels", | 30 "--labels", |
| 30 default = FALSE, | 31 default = FALSE, |
| 31 type = "logical", | 32 type = "logical", |
| 32 help = "add labels in scatter plots [default : '%default' ]" | 33 help = "add labels in scatter plots [default : '%default' ]" |
| 33 ), | 34 ), |
| 34 make_option( | 35 make_option( |
| 35 "--factor", | 36 "--factor", |
| 36 default = "", | 37 default = "", |
| 37 type = "character", | 38 type = "character", |
| 38 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" | 39 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" |
| 39 ), | 40 ), |
| 40 make_option( | 41 make_option( |
| 41 "--visu_choice", | 42 "--visu_choice", |
| 42 default = "PCA", | 43 default = "PCA", |
| 43 type = "character", | 44 type = "character", |
| 44 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" | 45 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" |
| 45 ), | 46 ), |
| 46 make_option( | 47 make_option( |
| 47 "--Rtsne_seed", | 48 "--Rtsne_seed", |
| 48 default = 42, | 49 default = 42, |
| 49 type = "integer", | 50 type = "integer", |
| 50 help = "Seed value for reproducibility [default : '%default' ]" | 51 help = "Seed value for reproducibility [default : '%default' ]" |
| 51 ), | 52 ), |
| 52 make_option( | 53 make_option( |
| 53 "--Rtsne_dims", | 54 "--Rtsne_dims", |
| 54 default = 2, | 55 default = 2, |
| 55 type = "integer", | 56 type = "integer", |
| 56 help = "Output dimensionality [default : '%default' ]" | 57 help = "Output dimensionality [default : '%default' ]" |
| 57 ), | 58 ), |
| 58 make_option( | 59 make_option( |
| 59 "--Rtsne_initial_dims", | 60 "--Rtsne_initial_dims", |
| 60 default = 50, | 61 default = 50, |
| 61 type = "integer", | 62 type = "integer", |
| 62 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" | 63 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" |
| 63 ), | 64 ), |
| 64 make_option( | 65 make_option( |
| 65 "--Rtsne_perplexity", | 66 "--Rtsne_perplexity", |
| 66 default = 5.0, | 67 default = 5.0, |
| 67 type = "numeric", | 68 type = "numeric", |
| 68 help = "perplexity [default : '%default' ]" | 69 help = "perplexity [default : '%default' ]" |
| 69 ), | 70 ), |
| 70 make_option( | 71 make_option( |
| 71 "--Rtsne_theta", | 72 "--Rtsne_theta", |
| 72 default = 1.0, | 73 default = 1.0, |
| 73 type = "numeric", | 74 type = "numeric", |
| 74 help = "theta [default : '%default' ]" | 75 help = "theta [default : '%default' ]" |
| 75 ), | 76 ), |
| 76 make_option( | 77 make_option( |
| 77 "--Rtsne_max_iter", | 78 "--Rtsne_max_iter", |
| 78 default = 1000, | 79 default = 1000, |
| 79 type = "integer", | 80 type = "integer", |
| 80 help = "max_iter [default : '%default' ]" | 81 help = "max_iter [default : '%default' ]" |
| 81 ), | 82 ), |
| 82 make_option( | 83 make_option( |
| 83 "--Rtsne_pca", | 84 "--Rtsne_pca", |
| 84 default = TRUE, | 85 default = TRUE, |
| 85 type = "logical", | 86 type = "logical", |
| 86 help = "Whether an initial PCA step should be performed [default : '%default' ]" | 87 help = "Whether an initial PCA step should be performed [default : '%default' ]" |
| 87 ), | 88 ), |
| 88 make_option( | 89 make_option( |
| 89 "--Rtsne_pca_center", | 90 "--Rtsne_pca_center", |
| 90 default = TRUE, | 91 default = TRUE, |
| 91 type = "logical", | 92 type = "logical", |
| 92 help = "Should data be centered before pca is applied? [default : '%default' ]" | 93 help = "Should data be centered before pca is applied? [default : '%default' ]" |
| 93 ), | 94 ), |
| 94 make_option( | 95 make_option( |
| 95 "--Rtsne_pca_scale", | 96 "--Rtsne_pca_scale", |
| 96 default = FALSE, | 97 default = FALSE, |
| 97 type = "logical", | 98 type = "logical", |
| 98 help = "Should data be scaled before pca is applied? [default : '%default' ]" | 99 help = "Should data be scaled before pca is applied? [default : '%default' ]" |
| 99 ), | 100 ), |
| 100 make_option( | 101 make_option( |
| 101 "--Rtsne_normalize", | 102 "--Rtsne_normalize", |
| 102 default = TRUE, | 103 default = TRUE, |
| 103 type = "logical", | 104 type = "logical", |
| 104 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" | 105 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" |
| 105 ), | 106 ), |
| 106 make_option( | 107 make_option( |
| 107 "--Rtsne_exaggeration_factor", | 108 "--Rtsne_exaggeration_factor", |
| 108 default = 12.0, | 109 default = 12.0, |
| 109 type = "numeric", | 110 type = "numeric", |
| 110 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" | 111 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" |
| 111 ), | 112 ), |
| 112 make_option( | 113 make_option( |
| 113 "--PCA_npc", | 114 "--PCA_npc", |
| 114 default = 5, | 115 default = 5, |
| 115 type = "integer", | 116 type = "integer", |
| 116 help = "number of dimensions kept in the results [default : '%default' ]" | 117 help = "number of dimensions kept in the results [default : '%default' ]" |
| 117 ), | 118 ), |
| 118 make_option( | 119 make_option( |
| 119 "--item_size", | 120 "--item_size", |
| 120 default = 1, | 121 default = 1, |
| 121 type = "numeric", | 122 type = "numeric", |
| 122 help = "Size of points/labels in PCA [default : '%default' ]" | 123 help = "Size of points/labels in PCA [default : '%default' ]" |
| 123 ), | 124 ), |
| 124 make_option( | 125 make_option( |
| 125 "--x_axis", | 126 "--x_axis", |
| 126 default = 1, | 127 default = 1, |
| 127 type = "integer", | 128 type = "integer", |
| 128 help = "PC to plot in the x axis [default : '%default' ]" | 129 help = "PC to plot in the x axis [default : '%default' ]" |
| 129 ), | 130 ), |
| 130 make_option( | 131 make_option( |
| 131 "--y_axis", | 132 "--y_axis", |
| 132 default = 2, | 133 default = 2, |
| 133 type = "integer", | 134 type = "integer", |
| 134 help = "PC to plot in the y axis [default : '%default' ]" | 135 help = "PC to plot in the y axis [default : '%default' ]" |
| 135 ), | 136 ), |
| 136 make_option( | 137 make_option( |
| 137 "--HCPC_ncluster", | 138 "--HCPC_ncluster", |
| 138 default = -1, | 139 default = -1, |
| 139 type = "numeric", | 140 type = "numeric", |
| 140 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" | 141 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" |
| 141 ), | 142 ), |
| 142 make_option( | 143 make_option( |
| 143 "--HCPC_npc", | 144 "--HCPC_npc", |
| 144 default = 5, | 145 default = 5, |
| 145 type = "integer", | 146 type = "integer", |
| 146 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" | 147 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" |
| 147 ), | 148 ), |
| 148 make_option( | 149 make_option( |
| 149 "--HCPC_metric", | 150 "--HCPC_metric", |
| 150 default = "euclidean", | 151 default = "euclidean", |
| 151 type = "character", | 152 type = "character", |
| 152 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" | 153 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" |
| 153 ), | 154 ), |
| 154 make_option( | 155 make_option( |
| 155 "--HCPC_method", | 156 "--HCPC_method", |
| 156 default = "ward", | 157 default = "ward", |
| 157 type = "character", | 158 type = "character", |
| 158 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" | 159 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" |
| 159 ), | 160 ), |
| 160 make_option( | 161 make_option( |
| 161 "--pdf_out", | 162 "--pdf_out", |
| 162 default = "out.pdf", | 163 default = "out.pdf", |
| 163 type = "character", | 164 type = "character", |
| 164 help = "pdf of plots [default : '%default' ]" | 165 help = "pdf of plots [default : '%default' ]" |
| 165 ), | 166 ), |
| 166 make_option( | 167 make_option( |
| 167 "--HCPC_consol", | 168 "--HCPC_consol", |
| 168 default = "TRUE", | 169 default = "TRUE", |
| 169 type = "logical", | 170 type = "logical", |
| 170 help = "If TRUE, a k-means consolidation is performed [default :'%default']" | 171 help = "If TRUE, a k-means consolidation is performed [default :'%default']" |
| 171 ), | 172 ), |
| 172 make_option( | 173 make_option( |
| 173 "--HCPC_itermax", | 174 "--HCPC_itermax", |
| 174 default = "10", | 175 default = "10", |
| 175 type = "integer", | 176 type = "integer", |
| 176 help = "The maximum number of iterations for the consolidation [default :'%default']" | 177 help = "The maximum number of iterations for the consolidation [default :'%default']" |
| 177 ), | 178 ), |
| 178 make_option( | 179 make_option( |
| 179 "--HCPC_min", | 180 "--HCPC_min", |
| 180 default = "3", | 181 default = "3", |
| 181 type = "integer", | 182 type = "integer", |
| 182 help = "The least possible number of clusters suggested [default :'%default']" | 183 help = "The least possible number of clusters suggested [default :'%default']" |
| 183 ), | 184 ), |
| 184 make_option( | 185 make_option( |
| 185 "--HCPC_max", | 186 "--HCPC_max", |
| 186 default = -1, | 187 default = -1, |
| 187 type = "integer", | 188 type = "integer", |
| 188 help = "The higher possible number of clusters suggested [default :'%default']" | 189 help = "The higher possible number of clusters suggested [default :'%default']" |
| 189 ), | 190 ), |
| 190 make_option( | 191 make_option( |
| 191 "--HCPC_clusterCA", | 192 "--HCPC_clusterCA", |
| 192 default = "rows", | 193 default = "rows", |
| 193 type = "character", | 194 type = "character", |
| 194 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" | 195 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" |
| 195 ), | 196 ), |
| 196 make_option( | 197 make_option( |
| 197 "--HCPC_kk", | 198 "--HCPC_kk", |
| 198 default = Inf, | 199 default = Inf, |
| 199 type = "numeric", | 200 type = "numeric", |
| 200 help = "The maximum number of iterations for the consolidation [default :'%default']" | 201 help = "The maximum number of iterations for the consolidation [default :'%default']" |
| 201 ), | 202 ), |
| 202 make_option( | 203 make_option( |
| 203 "--HCPC_mutual_info", | 204 "--HCPC_mutual_info", |
| 204 default = "", | 205 default = "", |
| 205 type = "character", | 206 type = "character", |
| 206 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" | 207 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" |
| 207 ), | 208 ), |
| 208 make_option( | 209 make_option( |
| 209 "--HCPC_cell_clust", | 210 "--HCPC_cell_clust", |
| 210 default = "", | 211 default = "", |
| 211 type = "character", | 212 type = "character", |
| 212 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']" | 213 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']" |
| 213 ), | 214 ), |
| 214 make_option( | 215 make_option( |
| 215 "--HCPC_contributions", | 216 "--HCPC_contributions", |
| 216 default = "", | 217 default = "", |
| 217 type = "character", | 218 type = "character", |
| 218 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']" | 219 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']" |
| 219 ) | 220 ) |
| 220 ) | 221 ) |
| 221 | 222 |
| 222 opt <- parse_args(OptionParser(option_list = option_list), | 223 opt <- parse_args(OptionParser(option_list = option_list), |
| 223 args = commandArgs(trailingOnly = TRUE)) | 224 args = commandArgs(trailingOnly = TRUE) |
| 225 ) | |
| 224 | 226 |
| 225 if (opt$HCPC_max == -1) { | 227 if (opt$HCPC_max == -1) { |
| 226 opt$HCPC_max <- NULL | 228 opt$HCPC_max <- NULL |
| 227 } | 229 } |
| 228 if (opt$HCPC_kk == -1) { | 230 if (opt$HCPC_kk == -1) { |
| 229 opt$HCPC_kk <- Inf | 231 opt$HCPC_kk <- Inf |
| 230 } | 232 } |
| 231 | 233 |
| 232 #### We treat data once, at the beginning of the script #### | 234 #### We treat data once, at the beginning of the script #### |
| 233 data <- read.delim( | 235 data <- read.delim( |
| 234 opt$data, | 236 opt$data, |
| 235 check.names = FALSE, | 237 check.names = FALSE, |
| 236 header = TRUE, | 238 header = TRUE, |
| 237 row.names = 1, | 239 row.names = 1, |
| 238 sep = "\t" | 240 sep = "\t" |
| 239 ) | 241 ) |
| 240 # we transpose immediately, because this is the common data structure | 242 # we transpose immediately, because this is the common data structure |
| 241 data <- as.data.frame(t(data)) | 243 data <- as.data.frame(t(data)) |
| 242 | 244 |
| 243 # we treat the factor for usage in 3 methods | 245 # we treat the factor for usage in 3 methods |
| 244 if (opt$factor != "") { | 246 if (opt$factor != "") { |
| 245 contrasting_factor <- read.delim(opt$factor, header = TRUE) | 247 contrasting_factor <- read.delim(opt$factor, header = TRUE) |
| 246 rownames(contrasting_factor) <- contrasting_factor[, 1] | 248 rownames(contrasting_factor) <- contrasting_factor[, 1] |
| 247 # we pick only the relevant values of the contrasting factor | 249 # we pick only the relevant values of the contrasting factor |
| 248 contrasting_factor <- contrasting_factor[rownames(data), ] | 250 contrasting_factor <- contrasting_factor[rownames(data), ] |
| 249 sup <- colnames(contrasting_factor)[2] | 251 sup <- colnames(contrasting_factor)[2] |
| 250 if (!is.numeric(contrasting_factor[, 2])) { | 252 if (!is.numeric(contrasting_factor[, 2])) { |
| 251 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2]) | 253 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2]) |
| 252 } | 254 } |
| 253 } | 255 } |
| 254 | 256 |
| 255 ######### make PCA with FactoMineR ################# | 257 ######### make PCA with FactoMineR ################# |
| 256 if (opt$visu_choice == "PCA") { | 258 if (opt$visu_choice == "PCA") { |
| 257 if (opt$labels) { | 259 if (opt$labels) { |
| 258 labels <- "ind" | 260 labels <- "ind" |
| 259 } else { | |
| 260 labels <- "none" | |
| 261 } | |
| 262 pdf(opt$pdf_out) | |
| 263 if (opt$factor != "") { | |
| 264 data <- cbind(data, contrasting_factor[, 2]) | |
| 265 colnames(data)[length(data)] <- sup | |
| 266 if (is.numeric(contrasting_factor[, 2])) { | |
| 267 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE) | |
| 268 pca_plot <- plot(res_pca, habillage = sup, label = labels, | |
| 269 title = "PCA graph of cells", cex = opt$item_size, | |
| 270 axes = c(opt$x_axis, opt$y_axis)) | |
| 271 } else { | 261 } else { |
| 272 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE) | 262 labels <- "none" |
| 273 pca_plot <- plot(res_pca, habillage = sup, label = labels, | 263 } |
| 274 title = "PCA graph of cells", cex = opt$item_size, | 264 pdf(opt$pdf_out) |
| 275 axes = c(opt$x_axis, opt$y_axis)) | 265 if (opt$factor != "") { |
| 276 } | 266 data <- cbind(data, contrasting_factor[, 2]) |
| 277 } else { | 267 colnames(data)[length(data)] <- sup |
| 278 res_pca <- PCA(X = data, graph = FALSE) | 268 if (is.numeric(contrasting_factor[, 2])) { |
| 279 pca_plot <- plot(res_pca, label = labels, | 269 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE) |
| 280 title = "PCA graph of cells", cex = opt$item_size, | 270 pca_plot <- plot(res_pca, |
| 281 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4") | 271 habillage = sup, label = labels, |
| 282 } | 272 title = "PCA graph of cells", cex = opt$item_size, |
| 283 print(pca_plot) | 273 axes = c(opt$x_axis, opt$y_axis) |
| 284 dev.off() | 274 ) |
| 275 } else { | |
| 276 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE) | |
| 277 pca_plot <- plot(res_pca, | |
| 278 habillage = sup, label = labels, | |
| 279 title = "PCA graph of cells", cex = opt$item_size, | |
| 280 axes = c(opt$x_axis, opt$y_axis) | |
| 281 ) | |
| 282 } | |
| 283 } else { | |
| 284 res_pca <- PCA(X = data, graph = FALSE) | |
| 285 pca_plot <- plot(res_pca, | |
| 286 label = labels, | |
| 287 title = "PCA graph of cells", cex = opt$item_size, | |
| 288 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4" | |
| 289 ) | |
| 290 } | |
| 291 print(pca_plot) | |
| 292 dev.off() | |
| 285 } | 293 } |
| 286 | 294 |
| 287 ########### make HCPC with FactoMineR ########## | 295 ########### make HCPC with FactoMineR ########## |
| 288 if (opt$visu_choice == "HCPC") { | 296 if (opt$visu_choice == "HCPC") { |
| 289 pdf(opt$pdf_out) | 297 pdf(opt$pdf_out) |
| 290 # HCPC starts with a PCA | 298 # HCPC starts with a PCA |
| 291 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE) | 299 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE) |
| 292 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA | 300 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA |
| 293 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering | 301 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering |
| 294 res_hcpc <- HCPC(pca, | 302 res_hcpc <- HCPC(pca, |
| 295 nb.clust = opt$HCPC_ncluster, | 303 nb.clust = opt$HCPC_ncluster, |
| 296 metric = opt$HCPC_metric, | 304 metric = opt$HCPC_metric, |
| 297 method = opt$HCPC_method, | 305 method = opt$HCPC_method, |
| 298 graph = FALSE, | 306 graph = FALSE, |
| 299 consol = opt$HCPC_consol, | 307 consol = opt$HCPC_consol, |
| 300 iter.max = opt$HCPC_itermax, | 308 iter.max = opt$HCPC_itermax, |
| 301 min = opt$HCPC_min, | 309 min = opt$HCPC_min, |
| 302 max = opt$HCPC_max, | 310 max = opt$HCPC_max, |
| 303 cluster.CA = opt$HCPC_clusterCA, | 311 cluster.CA = opt$HCPC_clusterCA, |
| 304 kk = opt$HCPC_kk) | 312 kk = opt$HCPC_kk |
| 305 # HCPC plots | 313 ) |
| 306 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 | 314 # HCPC plots |
| 307 plot(res_hcpc, choice = "tree") | 315 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 |
| 308 plot(res_hcpc, choice = "bar") | 316 plot(res_hcpc, choice = "tree") |
| 309 if (opt$labels == FALSE) { | 317 plot(res_hcpc, choice = "bar") |
| 310 plot(res_hcpc, choice = "3D.map", ind.names = FALSE) | 318 if (opt$labels == FALSE) { |
| 311 plot(res_hcpc, choice = "map", label = "none") | 319 plot(res_hcpc, choice = "3D.map", ind.names = FALSE) |
| 312 } else { | 320 plot(res_hcpc, choice = "map", label = "none") |
| 313 plot(res_hcpc, choice = "3D.map") | 321 } else { |
| 314 plot(res_hcpc, choice = "map") | 322 plot(res_hcpc, choice = "3D.map") |
| 315 } | 323 plot(res_hcpc, choice = "map") |
| 316 ## Normalized Mutual Information | 324 } |
| 317 if (opt$factor != "") { | 325 ## Normalized Mutual Information |
| 318 sink(opt$HCPC_mutual_info) | 326 if (opt$factor != "") { |
| 319 cat("Relationship between input factor and its levels and the HCPC clusters") | 327 sink(opt$HCPC_mutual_info) |
| 320 res <- external_validation(true_labels = as.numeric(contrasting_factor[, 2]), | 328 cat("Relationship between input factor and its levels and the HCPC clusters") |
| 321 clusters = as.numeric(res_hcpc$data.clust$clust), | 329 res <- external_validation( |
| 322 summary_stats = TRUE) | 330 true_labels = as.numeric(contrasting_factor[, 2]), |
| 323 sink() | 331 clusters = as.numeric(res_hcpc$data.clust$clust), |
| 324 } | 332 summary_stats = TRUE |
| 325 dev.off() | 333 ) |
| 326 | 334 sink() |
| 327 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust), | 335 } |
| 328 Cluster = res_hcpc$data.clust$clust) | 336 dev.off() |
| 329 # Description of cluster by most contributing variables / gene expressions | 337 |
| 330 # first transform list of vectors in a list of dataframes | 338 res_clustering <- data.frame( |
| 331 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) | 339 Cell = rownames(res_hcpc$data.clust), |
| 332 # second, transfer rownames (genes) to column in the dataframe, before rbinding | 340 Cluster = res_hcpc$data.clust$clust |
| 333 extract_description_w_genes <- Map(cbind, | 341 ) |
| 334 extract_description, | 342 # Description of cluster by most contributing variables / gene expressions |
| 335 genes = lapply(extract_description, rownames)) | 343 # first transform list of vectors in a list of dataframes |
| 336 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist() | 344 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) |
| 337 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | 345 # second, transfer rownames (genes) to column in the dataframe, before rbinding |
| 338 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns | 346 extract_description_w_genes <- Map(cbind, |
| 339 cluster_description <- cluster_description[order(cluster_description[[2]], | 347 extract_description, |
| 340 cluster_description[[8]]), ] # sort by cluster then by pval | 348 genes = lapply(extract_description, rownames) |
| 341 # Finally, output cluster description data frame | 349 ) |
| 342 write.table(cluster_description, | 350 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist() |
| 343 file = opt$HCPC_contributions, | 351 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") |
| 344 sep = "\t", | 352 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns |
| 345 quote = FALSE, | 353 cluster_description <- cluster_description[order( |
| 346 col.names = TRUE, | 354 cluster_description[[2]], |
| 347 row.names = FALSE) | 355 cluster_description[[8]] |
| 348 | 356 ), ] # sort by cluster then by pval |
| 349 ## Return cluster table to user | 357 # Finally, output cluster description data frame |
| 350 write.table(res_clustering, | 358 write.table(cluster_description, |
| 351 file = opt$HCPC_cell_clust, | 359 file = opt$HCPC_contributions, |
| 352 sep = "\t", | 360 sep = "\t", |
| 353 quote = FALSE, | 361 quote = FALSE, |
| 354 col.names = TRUE, | 362 col.names = TRUE, |
| 355 row.names = FALSE) | 363 row.names = FALSE |
| 364 ) | |
| 365 | |
| 366 ## Return cluster table to user | |
| 367 write.table(res_clustering, | |
| 368 file = opt$HCPC_cell_clust, | |
| 369 sep = "\t", | |
| 370 quote = FALSE, | |
| 371 col.names = TRUE, | |
| 372 row.names = FALSE | |
| 373 ) | |
| 356 } | 374 } |
| 357 ################ t-SNE #################### | 375 ################ t-SNE #################### |
| 358 if (opt$visu_choice == "tSNE") { | 376 if (opt$visu_choice == "tSNE") { |
| 359 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility | 377 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility |
| 360 tsne_out <- Rtsne(data, | 378 tsne_out <- Rtsne(data, |
| 361 dims = opt$Rtsne_dims, | 379 dims = opt$Rtsne_dims, |
| 362 initial_dims = opt$Rtsne_initial_dims, | 380 initial_dims = opt$Rtsne_initial_dims, |
| 363 perplexity = opt$Rtsne_perplexity, | 381 perplexity = opt$Rtsne_perplexity, |
| 364 theta = opt$Rtsne_theta, | 382 theta = opt$Rtsne_theta, |
| 365 max_iter = opt$Rtsne_max_iter, | 383 max_iter = opt$Rtsne_max_iter, |
| 366 pca = opt$Rtsne_pca, | 384 pca = opt$Rtsne_pca, |
| 367 pca_center = opt$Rtsne_pca_center, | 385 pca_center = opt$Rtsne_pca_center, |
| 368 pca_scale = opt$Rtsne_pca_scale, | 386 pca_scale = opt$Rtsne_pca_scale, |
| 369 normalize = opt$Rtsne_normalize, | 387 normalize = opt$Rtsne_normalize, |
| 370 exaggeration_factor = opt$Rtsne_exaggeration_factor) | 388 exaggeration_factor = opt$Rtsne_exaggeration_factor |
| 371 embedding <- as.data.frame(tsne_out$Y[, 1:2]) | 389 ) |
| 372 embedding$Class <- as.factor(rownames(data)) | 390 embedding <- as.data.frame(tsne_out$Y[, 1:2]) |
| 373 gg_legend <- theme(legend.position = "right") | 391 embedding$Class <- as.factor(rownames(data)) |
| 374 pointcolor <- "#E70000" | 392 gg_legend <- theme(legend.position = "right") |
| 375 pointsize <- opt$item_size * 1.5 | 393 pointcolor <- "#E70000" |
| 376 the_theme <- theme( | 394 pointsize <- opt$item_size * 1.5 |
| 377 panel.background = element_rect(fill = "gray100", colour = "#6D9EC1", | 395 the_theme <- theme( |
| 378 size = 2, linetype = "solid"), | 396 panel.background = element_rect( |
| 379 panel.grid.major = element_line(size = 0.5, linetype = "solid", | 397 fill = "gray100", colour = "#6D9EC1", |
| 380 colour = "#6D9EC1"), | 398 size = 2, linetype = "solid" |
| 381 panel.grid.minor = element_line(size = 0.25, linetype = "solid", | 399 ), |
| 382 colour = "darkslategray3") | 400 panel.grid.major = element_line( |
| 383 ) | 401 size = 0.5, linetype = "solid", |
| 384 if (opt$factor == "") { | 402 colour = "#6D9EC1" |
| 385 p <- ggplot(embedding, aes(x = V1, y = V2)) + | 403 ), |
| 386 geom_point(size = pointsize * 0.25, color = pointcolor) + | 404 panel.grid.minor = element_line( |
| 387 gg_legend + | 405 size = 0.25, linetype = "solid", |
| 388 xlab("t-SNE 1") + | 406 colour = "darkslategray3" |
| 389 ylab("t-SNE 2") + | 407 ) |
| 390 ggtitle("t-SNE") + | 408 ) |
| 391 the_theme + | 409 if (opt$factor == "") { |
| 392 if (opt$labels) { | 410 p <- ggplot(embedding, aes(x = V1, y = V2)) + |
| 393 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor) | 411 geom_point(size = pointsize * 0.25, color = pointcolor) + |
| 394 } | 412 gg_legend + |
| 395 } else { | 413 xlab("t-SNE 1") + |
| 396 if (is.numeric(contrasting_factor[, 2])) { | 414 ylab("t-SNE 2") + |
| 397 embedding$factor <- contrasting_factor[, 2] | 415 ggtitle("t-SNE") + |
| 416 the_theme + | |
| 417 if (opt$labels) { | |
| 418 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor) | |
| 419 } | |
| 398 } else { | 420 } else { |
| 399 embedding$factor <- as.factor(contrasting_factor[, 2]) | 421 if (is.numeric(contrasting_factor[, 2])) { |
| 400 } | 422 embedding$factor <- contrasting_factor[, 2] |
| 401 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) + | 423 } else { |
| 402 geom_point(size = pointsize * 0.25) + | 424 embedding$factor <- as.factor(contrasting_factor[, 2]) |
| 403 gg_legend + | 425 } |
| 404 xlab("t-SNE 1") + | 426 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) + |
| 405 ylab("t-SNE 2") + | 427 geom_point(size = pointsize * 0.25) + |
| 406 ggtitle("t-SNE") + | 428 gg_legend + |
| 407 the_theme + | 429 xlab("t-SNE 1") + |
| 408 if (opt$labels) { | 430 ylab("t-SNE 2") + |
| 409 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize) | 431 ggtitle("t-SNE") + |
| 410 } | 432 the_theme + |
| 411 } | 433 if (opt$labels) { |
| 412 pdf(opt$pdf_out) | 434 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize) |
| 413 print(p) | 435 } |
| 414 dev.off() | 436 } |
| 415 } | 437 pdf(opt$pdf_out) |
| 438 print(p) | |
| 439 dev.off() | |
| 440 } |
