Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.R @ 8:fe6f76030168 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit a3dc683410fc240f428c8fbee3c63aa9965fbf38
author | artbio |
---|---|
date | Wed, 29 Nov 2023 17:28:18 +0000 |
parents | 18a1dc4aec4a |
children | 58aa18e1fe14 |
comparison
equal
deleted
inserted
replaced
7:18a1dc4aec4a | 8:fe6f76030168 |
---|---|
1 options(show.error.messages = FALSE, | 1 options(show.error.messages = FALSE, |
2 error = function() { | 2 error = function() { |
3 cat(geterrmessage(), file = stderr()) | 3 cat(geterrmessage(), file = stderr()) |
4 q("no", 1, FALSE) | 4 q("no", 1, FALSE) |
5 } | 5 } |
6 ) | 6 ) |
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
8 warnings() | 8 warnings() |
9 | |
9 library(optparse) | 10 library(optparse) |
10 library(FactoMineR) | 11 library(FactoMineR) |
11 library(factoextra) | 12 library(factoextra) |
12 library(Rtsne) | 13 library(Rtsne) |
13 library(ggplot2) | 14 library(ggplot2) |
15 library(RColorBrewer) | 16 library(RColorBrewer) |
16 library(ClusterR) | 17 library(ClusterR) |
17 library(data.table) | 18 library(data.table) |
18 library(Polychrome) | 19 library(Polychrome) |
19 | 20 |
20 # Arguments | |
21 option_list <- list( | 21 option_list <- list( |
22 make_option( | 22 make_option( |
23 "--data", | 23 "--data", |
24 default = NA, | 24 default = NA, |
25 type = "character", | 25 type = "character", |
26 help = "Input file that contains expression value to visualise" | 26 help = "Input file that contains expression value to visualise" |
27 ), | 27 ), |
28 make_option( | 28 make_option( |
29 "--sep", | |
30 default = "\t", | |
31 type = "character", | |
32 help = "File separator [default : '%default' ]" | |
33 ), | |
34 make_option( | |
35 "--colnames", | |
36 default = TRUE, | |
37 type = "logical", | |
38 help = "Consider first line as header ? [default : '%default' ]" | |
39 ), | |
40 make_option( | |
41 "--out", | |
42 default = "res.tab", | |
43 type = "character", | |
44 help = "Output name [default : '%default' ]" | |
45 ), | |
46 make_option( | |
47 "--labels", | 29 "--labels", |
48 default = FALSE, | 30 default = FALSE, |
49 type = "logical", | 31 type = "logical", |
50 help = "add labels in scatter plots [default : '%default' ]" | 32 help = "add labels in scatter plots [default : '%default' ]" |
51 ), | 33 ), |
60 default = "PCA", | 42 default = "PCA", |
61 type = "character", | 43 type = "character", |
62 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" | 44 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" |
63 ), | 45 ), |
64 make_option( | 46 make_option( |
65 "--table_coordinates", | |
66 default = "", | |
67 type = "character", | |
68 help = "Table with plot coordinates [default : '%default' ]" | |
69 ), | |
70 make_option( | |
71 "--Rtsne_seed", | 47 "--Rtsne_seed", |
72 default = 42, | 48 default = 42, |
73 type = "integer", | 49 type = "integer", |
74 help = "Seed value for reproducibility [default : '%default' ]" | 50 help = "Seed value for reproducibility [default : '%default' ]" |
75 ), | 51 ), |
113 "--Rtsne_pca_center", | 89 "--Rtsne_pca_center", |
114 default = TRUE, | 90 default = TRUE, |
115 type = "logical", | 91 type = "logical", |
116 help = "Should data be centered before pca is applied? [default : '%default' ]" | 92 help = "Should data be centered before pca is applied? [default : '%default' ]" |
117 ), | 93 ), |
118 make_option( | 94 make_option( |
119 "--Rtsne_pca_scale", | 95 "--Rtsne_pca_scale", |
120 default = FALSE, | 96 default = FALSE, |
121 type = "logical", | 97 type = "logical", |
122 help = "Should data be scaled before pca is applied? [default : '%default' ]" | 98 help = "Should data be scaled before pca is applied? [default : '%default' ]" |
123 ), | 99 ), |
131 "--Rtsne_exaggeration_factor", | 107 "--Rtsne_exaggeration_factor", |
132 default = 12.0, | 108 default = 12.0, |
133 type = "numeric", | 109 type = "numeric", |
134 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" | 110 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" |
135 ), | 111 ), |
136 make_option( | 112 make_option( |
137 "--PCA_npc", | 113 "--PCA_npc", |
138 default = 5, | 114 default = 5, |
139 type = "integer", | 115 type = "integer", |
140 help = "number of dimensions kept in the results [default : '%default' ]" | 116 help = "number of dimensions kept in the results [default : '%default' ]" |
141 ), | 117 ), |
142 make_option( | 118 make_option( |
143 "--PCA_x_axis", | 119 "--item_size", |
144 default = 1, | 120 default = 1, |
121 type = "numeric", | |
122 help = "Size of points/labels in PCA [default : '%default' ]" | |
123 ), | |
124 make_option( | |
125 "--x_axis", | |
126 default = 1, | |
145 type = "integer", | 127 type = "integer", |
146 help = "PC to plot in the x axis [default : '%default' ]" | 128 help = "PC to plot in the x axis [default : '%default' ]" |
147 ), | 129 ), |
148 make_option( | 130 make_option( |
149 "--PCA_y_axis", | 131 "--y_axis", |
150 default = 2, | 132 default = 2, |
151 type = "integer", | 133 type = "integer", |
152 help = "PC to plot in the y axis [default : '%default' ]" | 134 help = "PC to plot in the y axis [default : '%default' ]" |
153 ), | 135 ), |
154 make_option( | 136 make_option( |
155 "--HCPC_ncluster", | 137 "--HCPC_ncluster", |
156 default = -1, | 138 default = -1, |
157 type = "numeric", | 139 type = "numeric", |
158 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" | 140 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" |
159 ), | 141 ), |
160 make_option( | 142 make_option( |
161 "--HCPC_npc", | 143 "--HCPC_npc", |
162 default = 5, | 144 default = 5, |
163 type = "integer", | 145 type = "integer", |
164 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" | 146 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" |
165 ), | 147 ), |
197 "--HCPC_min", | 179 "--HCPC_min", |
198 default = "3", | 180 default = "3", |
199 type = "integer", | 181 type = "integer", |
200 help = "The least possible number of clusters suggested [default :'%default']" | 182 help = "The least possible number of clusters suggested [default :'%default']" |
201 ), | 183 ), |
202 make_option( | 184 make_option( |
203 "--HCPC_max", | 185 "--HCPC_max", |
204 default = -1, | 186 default = -1, |
205 type = "integer", | 187 type = "integer", |
206 help = "The higher possible number of clusters suggested [default :'%default']" | 188 help = "The higher possible number of clusters suggested [default :'%default']" |
207 ), | 189 ), |
208 make_option( | 190 make_option( |
209 "--HCPC_clusterCA", | 191 "--HCPC_clusterCA", |
210 default = "rows", | 192 default = "rows", |
211 type = "character", | 193 type = "character", |
212 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" | 194 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" |
213 ), | 195 ), |
216 default = Inf, | 198 default = Inf, |
217 type = "numeric", | 199 type = "numeric", |
218 help = "The maximum number of iterations for the consolidation [default :'%default']" | 200 help = "The maximum number of iterations for the consolidation [default :'%default']" |
219 ), | 201 ), |
220 make_option( | 202 make_option( |
221 "--HCPC_clust", | |
222 default = "", | |
223 type = "character", | |
224 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" | |
225 ), | |
226 make_option( | |
227 "--HCPC_mutual_info", | 203 "--HCPC_mutual_info", |
228 default = "", | 204 default = "", |
229 type = "character", | 205 type = "character", |
230 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" | 206 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" |
231 ), | 207 ), |
232 make_option( | 208 make_option( |
233 "--HCPC_cluster_description", | 209 "--HCPC_cell_clust", |
234 default = "", | 210 default = "", |
235 type = "character", | 211 type = "character", |
236 help = "Output file with variables most contributing to clustering [default :'%default']" | 212 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']" |
213 ), | |
214 make_option( | |
215 "--HCPC_contributions", | |
216 default = "", | |
217 type = "character", | |
218 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']" | |
237 ) | 219 ) |
238 ) | 220 ) |
239 | 221 |
240 opt <- parse_args(OptionParser(option_list = option_list), | 222 opt <- parse_args(OptionParser(option_list = option_list), |
241 args = commandArgs(trailingOnly = TRUE)) | 223 args = commandArgs(trailingOnly = TRUE)) |
242 | 224 |
243 if (opt$sep == "tab") { | |
244 opt$sep <- "\t" | |
245 } | |
246 if (opt$sep == "comma") { | |
247 opt$sep <- "," | |
248 } | |
249 if (opt$HCPC_max == -1) { | 225 if (opt$HCPC_max == -1) { |
250 opt$HCPC_max <- NULL | 226 opt$HCPC_max <- NULL |
251 } | 227 } |
252 if (opt$HCPC_kk == -1) { | 228 if (opt$HCPC_kk == -1) { |
253 opt$HCPC_kk <- Inf | 229 opt$HCPC_kk <- Inf |
254 } | 230 } |
255 | 231 |
256 ##Add legend to plot() | 232 #### We treat data once, at the beginning of the script #### |
257 legend_col <- function(col, lev) { | |
258 | |
259 opar <- par | |
260 | |
261 n <- length(col) | |
262 | |
263 bx <- par("usr") | |
264 | |
265 box_cx <- c(bx[2] + (bx[2] - bx[1]) / 1000, | |
266 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50) | |
267 box_cy <- c(bx[3], bx[3]) | |
268 box_sy <- (bx[4] - bx[3]) / n | |
269 | |
270 xx <- rep(box_cx, each = 2) | |
271 | |
272 par(xpd = TRUE) | |
273 for (i in 1:n) { | |
274 yy <- c(box_cy[1] + (box_sy * (i - 1)), | |
275 box_cy[1] + (box_sy * (i)), | |
276 box_cy[1] + (box_sy * (i)), | |
277 box_cy[1] + (box_sy * (i - 1))) | |
278 polygon(xx, yy, col = col[i], border = col[i]) | |
279 } | |
280 par(new = TRUE) | |
281 plot(0, 0, type = "n", | |
282 ylim = c(min(lev), max(lev)), | |
283 yaxt = "n", ylab = "", | |
284 xaxt = "n", xlab = "", | |
285 frame.plot = FALSE) | |
286 axis(side = 4, las = 2, tick = FALSE, line = .25) | |
287 par <- opar | |
288 } | |
289 | |
290 data <- read.delim( | 233 data <- read.delim( |
291 opt$data, | 234 opt$data, |
292 check.names = FALSE, | 235 check.names = FALSE, |
293 header = opt$colnames, | 236 header = TRUE, |
294 row.names = 1, | 237 row.names = 1, |
295 sep = opt$sep | 238 sep = "\t" |
296 ) | 239 ) |
297 | 240 # we transpose immediately, because this is the common data structure |
298 # Contrasting factor and its colors | 241 data <- as.data.frame(t(data)) |
242 | |
243 # we treat the factor for usage in 3 methods | |
299 if (opt$factor != "") { | 244 if (opt$factor != "") { |
300 contrasting_factor <- read.delim( | 245 contrasting_factor <- read.delim(opt$factor, header = TRUE) |
301 opt$factor, | |
302 header = TRUE | |
303 ) | |
304 rownames(contrasting_factor) <- contrasting_factor[, 1] | 246 rownames(contrasting_factor) <- contrasting_factor[, 1] |
305 contrasting_factor <- contrasting_factor[colnames(data), ] | 247 # we pick only the relevant values of the contrasting factor |
306 colnames(contrasting_factor) <- c("id", "factor") | 248 contrasting_factor <- contrasting_factor[rownames(data), ] |
307 if (is.numeric(contrasting_factor$factor)) { | 249 sup <- colnames(contrasting_factor)[2] |
308 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor] | 250 if (!is.numeric(contrasting_factor[, 2])) { |
251 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2]) | |
252 } | |
253 } | |
254 | |
255 ######### make PCA with FactoMineR ################# | |
256 if (opt$visu_choice == "PCA") { | |
257 if (opt$labels) { | |
258 labels <- "ind" | |
309 } else { | 259 } else { |
310 contrasting_factor$factor <- as.factor(contrasting_factor$factor) | 260 labels <- "none" |
311 if (nlevels(contrasting_factor$factor) == 2) { | 261 } |
312 colors_labels <- c("#E41A1C", "#377EB8") | 262 pdf(opt$pdf_out) |
263 if (opt$factor != "") { | |
264 data <- cbind(data, contrasting_factor[, 2]) | |
265 colnames(data)[length(data)] <- sup | |
266 if (is.numeric(contrasting_factor[, 2])) { | |
267 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE) | |
268 pca_plot <- plot(res_pca, habillage = sup, label = labels, | |
269 title = "PCA graph of cells", cex = opt$item_size, | |
270 axes = c(opt$x_axis, opt$y_axis)) | |
313 } else { | 271 } else { |
314 set.seed(567629) | 272 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE) |
315 colors_labels <- createPalette(nlevels(contrasting_factor$factor), c("#5A5156", "#E4E1E3", "#F6222E")) | 273 pca_plot <- plot(res_pca, habillage = sup, label = labels, |
316 names(colors_labels) <- NULL | 274 title = "PCA graph of cells", cex = opt$item_size, |
275 axes = c(opt$x_axis, opt$y_axis)) | |
317 } | 276 } |
318 factor_colors <- | 277 } else { |
319 with(contrasting_factor, | 278 res_pca <- PCA(X = data, graph = FALSE) |
320 data.frame(factor = levels(contrasting_factor$factor), | 279 pca_plot <- plot(res_pca, label = labels, |
321 color = I(colors_labels) | 280 title = "PCA graph of cells", cex = opt$item_size, |
322 ) | 281 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4") |
323 ) | 282 } |
324 factor_cols <- factor_colors$color[match(contrasting_factor$factor, | 283 print(pca_plot) |
325 factor_colors$factor)] | 284 dev.off() |
326 } | 285 } |
327 } else { | 286 |
328 factor_cols <- rep("deepskyblue4", length(rownames(data))) | 287 ########### make HCPC with FactoMineR ########## |
329 } | 288 if (opt$visu_choice == "HCPC") { |
330 | 289 pdf(opt$pdf_out) |
290 # HCPC starts with a PCA | |
291 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE) | |
292 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA | |
293 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering | |
294 res_hcpc <- HCPC(pca, | |
295 nb.clust = opt$HCPC_ncluster, | |
296 metric = opt$HCPC_metric, | |
297 method = opt$HCPC_method, | |
298 graph = FALSE, | |
299 consol = opt$HCPC_consol, | |
300 iter.max = opt$HCPC_itermax, | |
301 min = opt$HCPC_min, | |
302 max = opt$HCPC_max, | |
303 cluster.CA = opt$HCPC_clusterCA, | |
304 kk = opt$HCPC_kk) | |
305 # HCPC plots | |
306 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 | |
307 plot(res_hcpc, choice = "tree") | |
308 plot(res_hcpc, choice = "bar") | |
309 if (opt$labels == FALSE) { | |
310 plot(res_hcpc, choice = "3D.map", ind.names = FALSE) | |
311 plot(res_hcpc, choice = "map", label = "none") | |
312 } else { | |
313 plot(res_hcpc, choice = "3D.map") | |
314 plot(res_hcpc, choice = "map") | |
315 } | |
316 ## Normalized Mutual Information | |
317 if (opt$factor != "") { | |
318 sink(opt$HCPC_mutual_info) | |
319 cat("Relationship between input factor and its levels and the HCPC clusters") | |
320 res <- external_validation(true_labels = as.numeric(contrasting_factor[, 2]), | |
321 clusters = as.numeric(res_hcpc$data.clust$clust), | |
322 summary_stats = TRUE) | |
323 sink() | |
324 } | |
325 dev.off() | |
326 | |
327 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust), | |
328 Cluster = res_hcpc$data.clust$clust) | |
329 # Description of cluster by most contributing variables / gene expressions | |
330 # first transform list of vectors in a list of dataframes | |
331 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) | |
332 # second, transfer rownames (genes) to column in the dataframe, before rbinding | |
333 extract_description_w_genes <- Map(cbind, | |
334 extract_description, | |
335 genes = lapply(extract_description, rownames)) | |
336 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist() | |
337 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | |
338 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns | |
339 cluster_description <- cluster_description[order(cluster_description[[2]], | |
340 cluster_description[[8]]), ] # sort by cluster then by pval | |
341 # Finally, output cluster description data frame | |
342 write.table(cluster_description, | |
343 file = opt$HCPC_contributions, | |
344 sep = "\t", | |
345 quote = FALSE, | |
346 col.names = TRUE, | |
347 row.names = FALSE) | |
348 | |
349 ## Return cluster table to user | |
350 write.table(res_clustering, | |
351 file = opt$HCPC_cell_clust, | |
352 sep = "\t", | |
353 quote = FALSE, | |
354 col.names = TRUE, | |
355 row.names = FALSE) | |
356 } | |
331 ################ t-SNE #################### | 357 ################ t-SNE #################### |
332 if (opt$visu_choice == "tSNE") { | 358 if (opt$visu_choice == "tSNE") { |
333 # filter and transpose df for tsne and pca | |
334 tdf <- t(data) | |
335 # make tsne and plot results | |
336 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility | 359 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility |
337 | 360 tsne_out <- Rtsne(data, |
338 tsne_out <- Rtsne(tdf, | |
339 dims = opt$Rtsne_dims, | 361 dims = opt$Rtsne_dims, |
340 initial_dims = opt$Rtsne_initial_dims, | 362 initial_dims = opt$Rtsne_initial_dims, |
341 perplexity = opt$Rtsne_perplexity, | 363 perplexity = opt$Rtsne_perplexity, |
342 theta = opt$Rtsne_theta, | 364 theta = opt$Rtsne_theta, |
343 max_iter = opt$Rtsne_max_iter, | 365 max_iter = opt$Rtsne_max_iter, |
344 pca = opt$Rtsne_pca, | 366 pca = opt$Rtsne_pca, |
345 pca_center = opt$Rtsne_pca_center, | 367 pca_center = opt$Rtsne_pca_center, |
346 pca_scale = opt$Rtsne_pca_scale, | 368 pca_scale = opt$Rtsne_pca_scale, |
347 normalize = opt$Rtsne_normalize, | 369 normalize = opt$Rtsne_normalize, |
348 exaggeration_factor = opt$Rtsne_exaggeration_factor) | 370 exaggeration_factor = opt$Rtsne_exaggeration_factor) |
349 | |
350 embedding <- as.data.frame(tsne_out$Y[, 1:2]) | 371 embedding <- as.data.frame(tsne_out$Y[, 1:2]) |
351 embedding$Class <- as.factor(rownames(tdf)) | 372 embedding$Class <- as.factor(rownames(data)) |
352 gg_legend <- theme(legend.position = "right") | 373 gg_legend <- theme(legend.position = "right") |
374 pointcolor <- "#E70000" | |
375 pointsize <- opt$item_size * 1.5 | |
376 the_theme <- theme( | |
377 panel.background = element_rect(fill = "gray100", colour = "#6D9EC1", | |
378 size = 2, linetype = "solid"), | |
379 panel.grid.major = element_line(size = 0.5, linetype = "solid", | |
380 colour = "#6D9EC1"), | |
381 panel.grid.minor = element_line(size = 0.25, linetype = "solid", | |
382 colour = "darkslategray3") | |
383 ) | |
353 if (opt$factor == "") { | 384 if (opt$factor == "") { |
354 ggplot(embedding, aes(x = V1, y = V2)) + | 385 p <- ggplot(embedding, aes(x = V1, y = V2)) + |
355 geom_point(size = 1, color = "deepskyblue4") + | 386 geom_point(size = pointsize * 0.25, color = pointcolor) + |
356 gg_legend + | 387 gg_legend + |
357 xlab("t-SNE 1") + | 388 xlab("t-SNE 1") + |
358 ylab("t-SNE 2") + | 389 ylab("t-SNE 2") + |
359 ggtitle("t-SNE") + | 390 ggtitle("t-SNE") + |
391 the_theme + | |
360 if (opt$labels) { | 392 if (opt$labels) { |
361 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = 1.5, color = "deepskyblue4") | 393 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor) |
362 } | 394 } |
395 } else { | |
396 if (is.numeric(contrasting_factor[, 2])) { | |
397 embedding$factor <- contrasting_factor[, 2] | |
363 } else { | 398 } else { |
364 if (is.numeric(contrasting_factor$factor)) { | 399 embedding$factor <- as.factor(contrasting_factor[, 2]) |
365 embedding$factor <- contrasting_factor$factor | |
366 } else { | |
367 embedding$factor <- as.factor(contrasting_factor$factor) | |
368 } | 400 } |
369 | 401 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) + |
370 ggplot(embedding, aes(x = V1, y = V2, color = factor)) + | 402 geom_point(size = pointsize * 0.25) + |
371 geom_point(size = 1) + | |
372 gg_legend + | 403 gg_legend + |
373 xlab("t-SNE 1") + | 404 xlab("t-SNE 1") + |
374 ylab("t-SNE 2") + | 405 ylab("t-SNE 2") + |
375 ggtitle("t-SNE") + | 406 ggtitle("t-SNE") + |
407 the_theme + | |
376 if (opt$labels) { | 408 if (opt$labels) { |
377 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = 1.5) | 409 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize) |
378 } | 410 } |
379 } | 411 } |
380 ggsave(file = opt$pdf_out, device = "pdf") | |
381 | |
382 #save coordinates table | |
383 if (opt$table_coordinates != "") { | |
384 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6)) | |
385 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$Rtsne_dims))) | |
386 } | |
387 } | |
388 | |
389 | |
390 ######### make PCA with FactoMineR ################# | |
391 if (opt$visu_choice == "PCA") { | |
392 pca <- PCA(t(data), ncp = opt$PCA_npc, graph = FALSE) | |
393 pdf(opt$pdf_out) | 412 pdf(opt$pdf_out) |
394 if (opt$labels == FALSE) { | 413 print(p) |
395 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), label = "none", col.ind = factor_cols) | 414 dev.off() |
396 } else { | 415 } |
397 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), cex = 0.2, col.ind = factor_cols) | |
398 } | |
399 if (opt$factor != "") { | |
400 if (is.factor(contrasting_factor$factor)) { | |
401 legend(x = "topright", | |
402 legend = as.character(factor_colors$factor), | |
403 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7) | |
404 } else { | |
405 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | |
406 } | |
407 } | |
408 dev.off() | |
409 | |
410 #save coordinates table | |
411 if (opt$table_coordinates != "") { | |
412 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6)) | |
413 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$PCA_npc))) | |
414 } | |
415 | |
416 } | |
417 | |
418 ########### make HCPC with FactoMineR ########## | |
419 if (opt$visu_choice == "HCPC") { | |
420 | |
421 # HCPC starts with a PCA | |
422 pca <- PCA( | |
423 t(data), | |
424 ncp = opt$HCPC_npc, | |
425 graph = FALSE, | |
426 ) | |
427 | |
428 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA | |
429 | |
430 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering | |
431 res_hcpc <- HCPC(pca, | |
432 nb.clust = opt$HCPC_ncluster, metric = opt$HCPC_metric, method = opt$HCPC_method, | |
433 graph = FALSE, consol = opt$HCPC_consol, iter.max = opt$HCPC_itermax, min = opt$HCPC_min, max = opt$HCPC_max, | |
434 cluster.CA = opt$HCPC_clusterCA, kk = opt$HCPC_kk) | |
435 # HCPC plots | |
436 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 | |
437 pdf(opt$pdf_out) | |
438 plot(res_hcpc, choice = "tree") | |
439 plot(res_hcpc, choice = "bar") | |
440 plot(res_hcpc, choice = "3D.map") | |
441 if (opt$labels == FALSE) { | |
442 plot(res_hcpc, choice = "map", label = "none") | |
443 } else { | |
444 plot(res_hcpc, choice = "map") | |
445 } | |
446 | |
447 # user contrasts on the pca | |
448 if (opt$factor != "") { | |
449 plot(pca, label = "none", col.ind = factor_cols) | |
450 if (is.factor(contrasting_factor$factor)) { | |
451 legend(x = "topright", | |
452 legend = as.character(factor_colors$factor), | |
453 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7) | |
454 | |
455 ## Normalized Mutual Information | |
456 sink(opt$HCPC_mutual_info) | |
457 res <- external_validation( | |
458 true_labels = as.numeric(contrasting_factor$factor), | |
459 clusters = as.numeric(res_hcpc$data.clust$clust), | |
460 summary_stats = TRUE | |
461 ) | |
462 sink() | |
463 | |
464 } else { | |
465 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | |
466 } | |
467 } | |
468 | |
469 dev.off() | |
470 | |
471 if (opt$table_coordinates != "") { | |
472 coord_table <- cbind(Cell = rownames(res_hcpc$call$X), | |
473 round(as.data.frame(res_hcpc$call$X[, -length(res_hcpc$call$X)]), 6), | |
474 as.data.frame(res_hcpc$call$X[, length(res_hcpc$call$X)]) | |
475 ) | |
476 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$HCPC_npc)), "Cluster") | |
477 } | |
478 | |
479 if (opt$HCPC_clust != "") { | |
480 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust), | |
481 Cluster = res_hcpc$data.clust$clust) | |
482 | |
483 } | |
484 | |
485 # Description of cluster by most contributing variables / gene expressions | |
486 | |
487 # first transform list of vectors in a list of dataframes | |
488 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) | |
489 # second, transfer rownames (genes) to column in the dataframe, before rbinding | |
490 extract_description_w_genes <- Map(cbind, | |
491 extract_description, | |
492 genes = lapply(extract_description, rownames) | |
493 ) | |
494 # Then use data.table to collapse all generated dataframe, with the cluster id in first column | |
495 # using the {data.table} rbindlist function | |
496 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | |
497 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # reorganize columns | |
498 | |
499 | |
500 # Finally, output cluster description data frame | |
501 write.table( | |
502 cluster_description, | |
503 file = opt$HCPC_cluster_description, | |
504 sep = "\t", | |
505 quote = FALSE, | |
506 col.names = TRUE, | |
507 row.names = FALSE | |
508 ) | |
509 | |
510 } | |
511 | |
512 ## Return coordinates file to user | |
513 | |
514 if (opt$table_coordinates != "") { | |
515 write.table( | |
516 coord_table, | |
517 file = opt$table_coordinates, | |
518 sep = "\t", | |
519 quote = FALSE, | |
520 col.names = TRUE, | |
521 row.names = FALSE | |
522 ) | |
523 } | |
524 | |
525 | |
526 if (opt$HCPC_clust != "") { | |
527 write.table( | |
528 res_clustering, | |
529 file = opt$HCPC_clust, | |
530 sep = "\t", | |
531 quote = FALSE, | |
532 col.names = TRUE, | |
533 row.names = FALSE | |
534 ) | |
535 } |