comparison high_dim_visu.R @ 8:fe6f76030168 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit a3dc683410fc240f428c8fbee3c63aa9965fbf38
author artbio
date Wed, 29 Nov 2023 17:28:18 +0000
parents 18a1dc4aec4a
children 58aa18e1fe14
comparison
equal deleted inserted replaced
7:18a1dc4aec4a 8:fe6f76030168
1 options(show.error.messages = FALSE, 1 options(show.error.messages = FALSE,
2 error = function() { 2 error = function() {
3 cat(geterrmessage(), file = stderr()) 3 cat(geterrmessage(), file = stderr())
4 q("no", 1, FALSE) 4 q("no", 1, FALSE)
5 } 5 }
6 ) 6 )
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
8 warnings() 8 warnings()
9
9 library(optparse) 10 library(optparse)
10 library(FactoMineR) 11 library(FactoMineR)
11 library(factoextra) 12 library(factoextra)
12 library(Rtsne) 13 library(Rtsne)
13 library(ggplot2) 14 library(ggplot2)
15 library(RColorBrewer) 16 library(RColorBrewer)
16 library(ClusterR) 17 library(ClusterR)
17 library(data.table) 18 library(data.table)
18 library(Polychrome) 19 library(Polychrome)
19 20
20 # Arguments
21 option_list <- list( 21 option_list <- list(
22 make_option( 22 make_option(
23 "--data", 23 "--data",
24 default = NA, 24 default = NA,
25 type = "character", 25 type = "character",
26 help = "Input file that contains expression value to visualise" 26 help = "Input file that contains expression value to visualise"
27 ), 27 ),
28 make_option( 28 make_option(
29 "--sep",
30 default = "\t",
31 type = "character",
32 help = "File separator [default : '%default' ]"
33 ),
34 make_option(
35 "--colnames",
36 default = TRUE,
37 type = "logical",
38 help = "Consider first line as header ? [default : '%default' ]"
39 ),
40 make_option(
41 "--out",
42 default = "res.tab",
43 type = "character",
44 help = "Output name [default : '%default' ]"
45 ),
46 make_option(
47 "--labels", 29 "--labels",
48 default = FALSE, 30 default = FALSE,
49 type = "logical", 31 type = "logical",
50 help = "add labels in scatter plots [default : '%default' ]" 32 help = "add labels in scatter plots [default : '%default' ]"
51 ), 33 ),
60 default = "PCA", 42 default = "PCA",
61 type = "character", 43 type = "character",
62 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" 44 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]"
63 ), 45 ),
64 make_option( 46 make_option(
65 "--table_coordinates",
66 default = "",
67 type = "character",
68 help = "Table with plot coordinates [default : '%default' ]"
69 ),
70 make_option(
71 "--Rtsne_seed", 47 "--Rtsne_seed",
72 default = 42, 48 default = 42,
73 type = "integer", 49 type = "integer",
74 help = "Seed value for reproducibility [default : '%default' ]" 50 help = "Seed value for reproducibility [default : '%default' ]"
75 ), 51 ),
113 "--Rtsne_pca_center", 89 "--Rtsne_pca_center",
114 default = TRUE, 90 default = TRUE,
115 type = "logical", 91 type = "logical",
116 help = "Should data be centered before pca is applied? [default : '%default' ]" 92 help = "Should data be centered before pca is applied? [default : '%default' ]"
117 ), 93 ),
118 make_option( 94 make_option(
119 "--Rtsne_pca_scale", 95 "--Rtsne_pca_scale",
120 default = FALSE, 96 default = FALSE,
121 type = "logical", 97 type = "logical",
122 help = "Should data be scaled before pca is applied? [default : '%default' ]" 98 help = "Should data be scaled before pca is applied? [default : '%default' ]"
123 ), 99 ),
131 "--Rtsne_exaggeration_factor", 107 "--Rtsne_exaggeration_factor",
132 default = 12.0, 108 default = 12.0,
133 type = "numeric", 109 type = "numeric",
134 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" 110 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]"
135 ), 111 ),
136 make_option( 112 make_option(
137 "--PCA_npc", 113 "--PCA_npc",
138 default = 5, 114 default = 5,
139 type = "integer", 115 type = "integer",
140 help = "number of dimensions kept in the results [default : '%default' ]" 116 help = "number of dimensions kept in the results [default : '%default' ]"
141 ), 117 ),
142 make_option( 118 make_option(
143 "--PCA_x_axis", 119 "--item_size",
144 default = 1, 120 default = 1,
121 type = "numeric",
122 help = "Size of points/labels in PCA [default : '%default' ]"
123 ),
124 make_option(
125 "--x_axis",
126 default = 1,
145 type = "integer", 127 type = "integer",
146 help = "PC to plot in the x axis [default : '%default' ]" 128 help = "PC to plot in the x axis [default : '%default' ]"
147 ), 129 ),
148 make_option( 130 make_option(
149 "--PCA_y_axis", 131 "--y_axis",
150 default = 2, 132 default = 2,
151 type = "integer", 133 type = "integer",
152 help = "PC to plot in the y axis [default : '%default' ]" 134 help = "PC to plot in the y axis [default : '%default' ]"
153 ), 135 ),
154 make_option( 136 make_option(
155 "--HCPC_ncluster", 137 "--HCPC_ncluster",
156 default = -1, 138 default = -1,
157 type = "numeric", 139 type = "numeric",
158 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" 140 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]"
159 ), 141 ),
160 make_option( 142 make_option(
161 "--HCPC_npc", 143 "--HCPC_npc",
162 default = 5, 144 default = 5,
163 type = "integer", 145 type = "integer",
164 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" 146 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]"
165 ), 147 ),
197 "--HCPC_min", 179 "--HCPC_min",
198 default = "3", 180 default = "3",
199 type = "integer", 181 type = "integer",
200 help = "The least possible number of clusters suggested [default :'%default']" 182 help = "The least possible number of clusters suggested [default :'%default']"
201 ), 183 ),
202 make_option( 184 make_option(
203 "--HCPC_max", 185 "--HCPC_max",
204 default = -1, 186 default = -1,
205 type = "integer", 187 type = "integer",
206 help = "The higher possible number of clusters suggested [default :'%default']" 188 help = "The higher possible number of clusters suggested [default :'%default']"
207 ), 189 ),
208 make_option( 190 make_option(
209 "--HCPC_clusterCA", 191 "--HCPC_clusterCA",
210 default = "rows", 192 default = "rows",
211 type = "character", 193 type = "character",
212 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" 194 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']"
213 ), 195 ),
216 default = Inf, 198 default = Inf,
217 type = "numeric", 199 type = "numeric",
218 help = "The maximum number of iterations for the consolidation [default :'%default']" 200 help = "The maximum number of iterations for the consolidation [default :'%default']"
219 ), 201 ),
220 make_option( 202 make_option(
221 "--HCPC_clust",
222 default = "",
223 type = "character",
224 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']"
225 ),
226 make_option(
227 "--HCPC_mutual_info", 203 "--HCPC_mutual_info",
228 default = "", 204 default = "",
229 type = "character", 205 type = "character",
230 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" 206 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']"
231 ), 207 ),
232 make_option( 208 make_option(
233 "--HCPC_cluster_description", 209 "--HCPC_cell_clust",
234 default = "", 210 default = "",
235 type = "character", 211 type = "character",
236 help = "Output file with variables most contributing to clustering [default :'%default']" 212 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']"
213 ),
214 make_option(
215 "--HCPC_contributions",
216 default = "",
217 type = "character",
218 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']"
237 ) 219 )
238 ) 220 )
239 221
240 opt <- parse_args(OptionParser(option_list = option_list), 222 opt <- parse_args(OptionParser(option_list = option_list),
241 args = commandArgs(trailingOnly = TRUE)) 223 args = commandArgs(trailingOnly = TRUE))
242 224
243 if (opt$sep == "tab") {
244 opt$sep <- "\t"
245 }
246 if (opt$sep == "comma") {
247 opt$sep <- ","
248 }
249 if (opt$HCPC_max == -1) { 225 if (opt$HCPC_max == -1) {
250 opt$HCPC_max <- NULL 226 opt$HCPC_max <- NULL
251 } 227 }
252 if (opt$HCPC_kk == -1) { 228 if (opt$HCPC_kk == -1) {
253 opt$HCPC_kk <- Inf 229 opt$HCPC_kk <- Inf
254 } 230 }
255 231
256 ##Add legend to plot() 232 #### We treat data once, at the beginning of the script ####
257 legend_col <- function(col, lev) {
258
259 opar <- par
260
261 n <- length(col)
262
263 bx <- par("usr")
264
265 box_cx <- c(bx[2] + (bx[2] - bx[1]) / 1000,
266 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50)
267 box_cy <- c(bx[3], bx[3])
268 box_sy <- (bx[4] - bx[3]) / n
269
270 xx <- rep(box_cx, each = 2)
271
272 par(xpd = TRUE)
273 for (i in 1:n) {
274 yy <- c(box_cy[1] + (box_sy * (i - 1)),
275 box_cy[1] + (box_sy * (i)),
276 box_cy[1] + (box_sy * (i)),
277 box_cy[1] + (box_sy * (i - 1)))
278 polygon(xx, yy, col = col[i], border = col[i])
279 }
280 par(new = TRUE)
281 plot(0, 0, type = "n",
282 ylim = c(min(lev), max(lev)),
283 yaxt = "n", ylab = "",
284 xaxt = "n", xlab = "",
285 frame.plot = FALSE)
286 axis(side = 4, las = 2, tick = FALSE, line = .25)
287 par <- opar
288 }
289
290 data <- read.delim( 233 data <- read.delim(
291 opt$data, 234 opt$data,
292 check.names = FALSE, 235 check.names = FALSE,
293 header = opt$colnames, 236 header = TRUE,
294 row.names = 1, 237 row.names = 1,
295 sep = opt$sep 238 sep = "\t"
296 ) 239 )
297 240 # we transpose immediately, because this is the common data structure
298 # Contrasting factor and its colors 241 data <- as.data.frame(t(data))
242
243 # we treat the factor for usage in 3 methods
299 if (opt$factor != "") { 244 if (opt$factor != "") {
300 contrasting_factor <- read.delim( 245 contrasting_factor <- read.delim(opt$factor, header = TRUE)
301 opt$factor,
302 header = TRUE
303 )
304 rownames(contrasting_factor) <- contrasting_factor[, 1] 246 rownames(contrasting_factor) <- contrasting_factor[, 1]
305 contrasting_factor <- contrasting_factor[colnames(data), ] 247 # we pick only the relevant values of the contrasting factor
306 colnames(contrasting_factor) <- c("id", "factor") 248 contrasting_factor <- contrasting_factor[rownames(data), ]
307 if (is.numeric(contrasting_factor$factor)) { 249 sup <- colnames(contrasting_factor)[2]
308 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor] 250 if (!is.numeric(contrasting_factor[, 2])) {
251 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2])
252 }
253 }
254
255 ######### make PCA with FactoMineR #################
256 if (opt$visu_choice == "PCA") {
257 if (opt$labels) {
258 labels <- "ind"
309 } else { 259 } else {
310 contrasting_factor$factor <- as.factor(contrasting_factor$factor) 260 labels <- "none"
311 if (nlevels(contrasting_factor$factor) == 2) { 261 }
312 colors_labels <- c("#E41A1C", "#377EB8") 262 pdf(opt$pdf_out)
263 if (opt$factor != "") {
264 data <- cbind(data, contrasting_factor[, 2])
265 colnames(data)[length(data)] <- sup
266 if (is.numeric(contrasting_factor[, 2])) {
267 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE)
268 pca_plot <- plot(res_pca, habillage = sup, label = labels,
269 title = "PCA graph of cells", cex = opt$item_size,
270 axes = c(opt$x_axis, opt$y_axis))
313 } else { 271 } else {
314 set.seed(567629) 272 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE)
315 colors_labels <- createPalette(nlevels(contrasting_factor$factor), c("#5A5156", "#E4E1E3", "#F6222E")) 273 pca_plot <- plot(res_pca, habillage = sup, label = labels,
316 names(colors_labels) <- NULL 274 title = "PCA graph of cells", cex = opt$item_size,
275 axes = c(opt$x_axis, opt$y_axis))
317 } 276 }
318 factor_colors <- 277 } else {
319 with(contrasting_factor, 278 res_pca <- PCA(X = data, graph = FALSE)
320 data.frame(factor = levels(contrasting_factor$factor), 279 pca_plot <- plot(res_pca, label = labels,
321 color = I(colors_labels) 280 title = "PCA graph of cells", cex = opt$item_size,
322 ) 281 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4")
323 ) 282 }
324 factor_cols <- factor_colors$color[match(contrasting_factor$factor, 283 print(pca_plot)
325 factor_colors$factor)] 284 dev.off()
326 } 285 }
327 } else { 286
328 factor_cols <- rep("deepskyblue4", length(rownames(data))) 287 ########### make HCPC with FactoMineR ##########
329 } 288 if (opt$visu_choice == "HCPC") {
330 289 pdf(opt$pdf_out)
290 # HCPC starts with a PCA
291 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE)
292 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA
293 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering
294 res_hcpc <- HCPC(pca,
295 nb.clust = opt$HCPC_ncluster,
296 metric = opt$HCPC_metric,
297 method = opt$HCPC_method,
298 graph = FALSE,
299 consol = opt$HCPC_consol,
300 iter.max = opt$HCPC_itermax,
301 min = opt$HCPC_min,
302 max = opt$HCPC_max,
303 cluster.CA = opt$HCPC_clusterCA,
304 kk = opt$HCPC_kk)
305 # HCPC plots
306 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2
307 plot(res_hcpc, choice = "tree")
308 plot(res_hcpc, choice = "bar")
309 if (opt$labels == FALSE) {
310 plot(res_hcpc, choice = "3D.map", ind.names = FALSE)
311 plot(res_hcpc, choice = "map", label = "none")
312 } else {
313 plot(res_hcpc, choice = "3D.map")
314 plot(res_hcpc, choice = "map")
315 }
316 ## Normalized Mutual Information
317 if (opt$factor != "") {
318 sink(opt$HCPC_mutual_info)
319 cat("Relationship between input factor and its levels and the HCPC clusters")
320 res <- external_validation(true_labels = as.numeric(contrasting_factor[, 2]),
321 clusters = as.numeric(res_hcpc$data.clust$clust),
322 summary_stats = TRUE)
323 sink()
324 }
325 dev.off()
326
327 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust),
328 Cluster = res_hcpc$data.clust$clust)
329 # Description of cluster by most contributing variables / gene expressions
330 # first transform list of vectors in a list of dataframes
331 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame)
332 # second, transfer rownames (genes) to column in the dataframe, before rbinding
333 extract_description_w_genes <- Map(cbind,
334 extract_description,
335 genes = lapply(extract_description, rownames))
336 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist()
337 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
338 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns
339 cluster_description <- cluster_description[order(cluster_description[[2]],
340 cluster_description[[8]]), ] # sort by cluster then by pval
341 # Finally, output cluster description data frame
342 write.table(cluster_description,
343 file = opt$HCPC_contributions,
344 sep = "\t",
345 quote = FALSE,
346 col.names = TRUE,
347 row.names = FALSE)
348
349 ## Return cluster table to user
350 write.table(res_clustering,
351 file = opt$HCPC_cell_clust,
352 sep = "\t",
353 quote = FALSE,
354 col.names = TRUE,
355 row.names = FALSE)
356 }
331 ################ t-SNE #################### 357 ################ t-SNE ####################
332 if (opt$visu_choice == "tSNE") { 358 if (opt$visu_choice == "tSNE") {
333 # filter and transpose df for tsne and pca
334 tdf <- t(data)
335 # make tsne and plot results
336 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility 359 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility
337 360 tsne_out <- Rtsne(data,
338 tsne_out <- Rtsne(tdf,
339 dims = opt$Rtsne_dims, 361 dims = opt$Rtsne_dims,
340 initial_dims = opt$Rtsne_initial_dims, 362 initial_dims = opt$Rtsne_initial_dims,
341 perplexity = opt$Rtsne_perplexity, 363 perplexity = opt$Rtsne_perplexity,
342 theta = opt$Rtsne_theta, 364 theta = opt$Rtsne_theta,
343 max_iter = opt$Rtsne_max_iter, 365 max_iter = opt$Rtsne_max_iter,
344 pca = opt$Rtsne_pca, 366 pca = opt$Rtsne_pca,
345 pca_center = opt$Rtsne_pca_center, 367 pca_center = opt$Rtsne_pca_center,
346 pca_scale = opt$Rtsne_pca_scale, 368 pca_scale = opt$Rtsne_pca_scale,
347 normalize = opt$Rtsne_normalize, 369 normalize = opt$Rtsne_normalize,
348 exaggeration_factor = opt$Rtsne_exaggeration_factor) 370 exaggeration_factor = opt$Rtsne_exaggeration_factor)
349
350 embedding <- as.data.frame(tsne_out$Y[, 1:2]) 371 embedding <- as.data.frame(tsne_out$Y[, 1:2])
351 embedding$Class <- as.factor(rownames(tdf)) 372 embedding$Class <- as.factor(rownames(data))
352 gg_legend <- theme(legend.position = "right") 373 gg_legend <- theme(legend.position = "right")
374 pointcolor <- "#E70000"
375 pointsize <- opt$item_size * 1.5
376 the_theme <- theme(
377 panel.background = element_rect(fill = "gray100", colour = "#6D9EC1",
378 size = 2, linetype = "solid"),
379 panel.grid.major = element_line(size = 0.5, linetype = "solid",
380 colour = "#6D9EC1"),
381 panel.grid.minor = element_line(size = 0.25, linetype = "solid",
382 colour = "darkslategray3")
383 )
353 if (opt$factor == "") { 384 if (opt$factor == "") {
354 ggplot(embedding, aes(x = V1, y = V2)) + 385 p <- ggplot(embedding, aes(x = V1, y = V2)) +
355 geom_point(size = 1, color = "deepskyblue4") + 386 geom_point(size = pointsize * 0.25, color = pointcolor) +
356 gg_legend + 387 gg_legend +
357 xlab("t-SNE 1") + 388 xlab("t-SNE 1") +
358 ylab("t-SNE 2") + 389 ylab("t-SNE 2") +
359 ggtitle("t-SNE") + 390 ggtitle("t-SNE") +
391 the_theme +
360 if (opt$labels) { 392 if (opt$labels) {
361 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = 1.5, color = "deepskyblue4") 393 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor)
362 } 394 }
395 } else {
396 if (is.numeric(contrasting_factor[, 2])) {
397 embedding$factor <- contrasting_factor[, 2]
363 } else { 398 } else {
364 if (is.numeric(contrasting_factor$factor)) { 399 embedding$factor <- as.factor(contrasting_factor[, 2])
365 embedding$factor <- contrasting_factor$factor
366 } else {
367 embedding$factor <- as.factor(contrasting_factor$factor)
368 } 400 }
369 401 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) +
370 ggplot(embedding, aes(x = V1, y = V2, color = factor)) + 402 geom_point(size = pointsize * 0.25) +
371 geom_point(size = 1) +
372 gg_legend + 403 gg_legend +
373 xlab("t-SNE 1") + 404 xlab("t-SNE 1") +
374 ylab("t-SNE 2") + 405 ylab("t-SNE 2") +
375 ggtitle("t-SNE") + 406 ggtitle("t-SNE") +
407 the_theme +
376 if (opt$labels) { 408 if (opt$labels) {
377 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = 1.5) 409 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize)
378 } 410 }
379 } 411 }
380 ggsave(file = opt$pdf_out, device = "pdf")
381
382 #save coordinates table
383 if (opt$table_coordinates != "") {
384 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6))
385 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$Rtsne_dims)))
386 }
387 }
388
389
390 ######### make PCA with FactoMineR #################
391 if (opt$visu_choice == "PCA") {
392 pca <- PCA(t(data), ncp = opt$PCA_npc, graph = FALSE)
393 pdf(opt$pdf_out) 412 pdf(opt$pdf_out)
394 if (opt$labels == FALSE) { 413 print(p)
395 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), label = "none", col.ind = factor_cols) 414 dev.off()
396 } else { 415 }
397 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), cex = 0.2, col.ind = factor_cols)
398 }
399 if (opt$factor != "") {
400 if (is.factor(contrasting_factor$factor)) {
401 legend(x = "topright",
402 legend = as.character(factor_colors$factor),
403 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7)
404 } else {
405 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE))
406 }
407 }
408 dev.off()
409
410 #save coordinates table
411 if (opt$table_coordinates != "") {
412 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6))
413 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$PCA_npc)))
414 }
415
416 }
417
418 ########### make HCPC with FactoMineR ##########
419 if (opt$visu_choice == "HCPC") {
420
421 # HCPC starts with a PCA
422 pca <- PCA(
423 t(data),
424 ncp = opt$HCPC_npc,
425 graph = FALSE,
426 )
427
428 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA
429
430 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering
431 res_hcpc <- HCPC(pca,
432 nb.clust = opt$HCPC_ncluster, metric = opt$HCPC_metric, method = opt$HCPC_method,
433 graph = FALSE, consol = opt$HCPC_consol, iter.max = opt$HCPC_itermax, min = opt$HCPC_min, max = opt$HCPC_max,
434 cluster.CA = opt$HCPC_clusterCA, kk = opt$HCPC_kk)
435 # HCPC plots
436 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2
437 pdf(opt$pdf_out)
438 plot(res_hcpc, choice = "tree")
439 plot(res_hcpc, choice = "bar")
440 plot(res_hcpc, choice = "3D.map")
441 if (opt$labels == FALSE) {
442 plot(res_hcpc, choice = "map", label = "none")
443 } else {
444 plot(res_hcpc, choice = "map")
445 }
446
447 # user contrasts on the pca
448 if (opt$factor != "") {
449 plot(pca, label = "none", col.ind = factor_cols)
450 if (is.factor(contrasting_factor$factor)) {
451 legend(x = "topright",
452 legend = as.character(factor_colors$factor),
453 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7)
454
455 ## Normalized Mutual Information
456 sink(opt$HCPC_mutual_info)
457 res <- external_validation(
458 true_labels = as.numeric(contrasting_factor$factor),
459 clusters = as.numeric(res_hcpc$data.clust$clust),
460 summary_stats = TRUE
461 )
462 sink()
463
464 } else {
465 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE))
466 }
467 }
468
469 dev.off()
470
471 if (opt$table_coordinates != "") {
472 coord_table <- cbind(Cell = rownames(res_hcpc$call$X),
473 round(as.data.frame(res_hcpc$call$X[, -length(res_hcpc$call$X)]), 6),
474 as.data.frame(res_hcpc$call$X[, length(res_hcpc$call$X)])
475 )
476 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$HCPC_npc)), "Cluster")
477 }
478
479 if (opt$HCPC_clust != "") {
480 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust),
481 Cluster = res_hcpc$data.clust$clust)
482
483 }
484
485 # Description of cluster by most contributing variables / gene expressions
486
487 # first transform list of vectors in a list of dataframes
488 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame)
489 # second, transfer rownames (genes) to column in the dataframe, before rbinding
490 extract_description_w_genes <- Map(cbind,
491 extract_description,
492 genes = lapply(extract_description, rownames)
493 )
494 # Then use data.table to collapse all generated dataframe, with the cluster id in first column
495 # using the {data.table} rbindlist function
496 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
497 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # reorganize columns
498
499
500 # Finally, output cluster description data frame
501 write.table(
502 cluster_description,
503 file = opt$HCPC_cluster_description,
504 sep = "\t",
505 quote = FALSE,
506 col.names = TRUE,
507 row.names = FALSE
508 )
509
510 }
511
512 ## Return coordinates file to user
513
514 if (opt$table_coordinates != "") {
515 write.table(
516 coord_table,
517 file = opt$table_coordinates,
518 sep = "\t",
519 quote = FALSE,
520 col.names = TRUE,
521 row.names = FALSE
522 )
523 }
524
525
526 if (opt$HCPC_clust != "") {
527 write.table(
528 res_clustering,
529 file = opt$HCPC_clust,
530 sep = "\t",
531 quote = FALSE,
532 col.names = TRUE,
533 row.names = FALSE
534 )
535 }