comparison high_dim_visu.R @ 7:18a1dc4aec4a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit ef87a68f9a33f8418699d97627eb5f49a5e2c4a6
author artbio
date Sun, 21 May 2023 16:26:23 +0000
parents 19bef589f876
children fe6f76030168
comparison
equal deleted inserted replaced
6:19bef589f876 7:18a1dc4aec4a
1 # load packages that are provided in the conda env 1 options(show.error.messages = FALSE,
2 options( show.error.messages=F, 2 error = function() {
3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 3 cat(geterrmessage(), file = stderr())
4 q("no", 1, FALSE)
5 }
6 )
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
5 warnings() 8 warnings()
6 library(optparse) 9 library(optparse)
7 library(FactoMineR) 10 library(FactoMineR)
8 library(factoextra) 11 library(factoextra)
10 library(ggplot2) 13 library(ggplot2)
11 library(ggfortify) 14 library(ggfortify)
12 library(RColorBrewer) 15 library(RColorBrewer)
13 library(ClusterR) 16 library(ClusterR)
14 library(data.table) 17 library(data.table)
18 library(Polychrome)
15 19
16 # Arguments 20 # Arguments
17 option_list = list( 21 option_list <- list(
18 make_option( 22 make_option(
19 "--data", 23 "--data",
20 default = NA, 24 default = NA,
21 type = 'character', 25 type = "character",
22 help = "Input file that contains expression value to visualise" 26 help = "Input file that contains expression value to visualise"
23 ), 27 ),
24 make_option( 28 make_option(
25 "--sep", 29 "--sep",
26 default = '\t', 30 default = "\t",
27 type = 'character', 31 type = "character",
28 help = "File separator [default : '%default' ]" 32 help = "File separator [default : '%default' ]"
29 ), 33 ),
30 make_option( 34 make_option(
31 "--colnames", 35 "--colnames",
32 default = TRUE, 36 default = TRUE,
33 type = 'logical', 37 type = "logical",
34 help = "Consider first line as header ? [default : '%default' ]" 38 help = "Consider first line as header ? [default : '%default' ]"
35 ), 39 ),
36 make_option( 40 make_option(
37 "--out", 41 "--out",
38 default = "res.tab", 42 default = "res.tab",
39 type = 'character', 43 type = "character",
40 help = "Output name [default : '%default' ]" 44 help = "Output name [default : '%default' ]"
41 ), 45 ),
42 make_option( 46 make_option(
43 "--labels", 47 "--labels",
44 default = FALSE, 48 default = FALSE,
45 type = 'logical', 49 type = "logical",
46 help = "add labels in scatter plots [default : '%default' ]" 50 help = "add labels in scatter plots [default : '%default' ]"
47 ), 51 ),
48 make_option( 52 make_option(
49 "--factor", 53 "--factor",
50 default = '', 54 default = "",
51 type = 'character', 55 type = "character",
52 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" 56 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]"
53 ), 57 ),
54 make_option( 58 make_option(
55 "--visu_choice", 59 "--visu_choice",
56 default = 'PCA', 60 default = "PCA",
57 type = 'character', 61 type = "character",
58 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" 62 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]"
59 ), 63 ),
60 make_option( 64 make_option(
61 "--table_coordinates", 65 "--table_coordinates",
62 default = '', 66 default = "",
63 type = 'character', 67 type = "character",
64 help = "Table with plot coordinates [default : '%default' ]" 68 help = "Table with plot coordinates [default : '%default' ]"
65 ), 69 ),
66 make_option( 70 make_option(
67 "--Rtsne_seed", 71 "--Rtsne_seed",
68 default = 42, 72 default = 42,
69 type = 'integer', 73 type = "integer",
70 help = "Seed value for reproducibility [default : '%default' ]" 74 help = "Seed value for reproducibility [default : '%default' ]"
71 ), 75 ),
72 make_option( 76 make_option(
73 "--Rtsne_dims", 77 "--Rtsne_dims",
74 default = 2, 78 default = 2,
75 type = 'integer', 79 type = "integer",
76 help = "Output dimensionality [default : '%default' ]" 80 help = "Output dimensionality [default : '%default' ]"
77 ), 81 ),
78 make_option( 82 make_option(
79 "--Rtsne_initial_dims", 83 "--Rtsne_initial_dims",
80 default = 50, 84 default = 50,
81 type = 'integer', 85 type = "integer",
82 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" 86 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]"
83 ), 87 ),
84 make_option( 88 make_option(
85 "--Rtsne_perplexity", 89 "--Rtsne_perplexity",
86 default = 5.0, 90 default = 5.0,
87 type = 'numeric', 91 type = "numeric",
88 help = "perplexity [default : '%default' ]" 92 help = "perplexity [default : '%default' ]"
89 ), 93 ),
90 make_option( 94 make_option(
91 "--Rtsne_theta", 95 "--Rtsne_theta",
92 default = 1.0, 96 default = 1.0,
93 type = 'numeric', 97 type = "numeric",
94 help = "theta [default : '%default' ]" 98 help = "theta [default : '%default' ]"
95 ), 99 ),
96 make_option( 100 make_option(
97 "--Rtsne_max_iter", 101 "--Rtsne_max_iter",
98 default = 1000, 102 default = 1000,
99 type = 'integer', 103 type = "integer",
100 help = "max_iter [default : '%default' ]" 104 help = "max_iter [default : '%default' ]"
101 ), 105 ),
102 make_option( 106 make_option(
103 "--Rtsne_pca", 107 "--Rtsne_pca",
104 default = TRUE, 108 default = TRUE,
105 type = 'logical', 109 type = "logical",
106 help = "Whether an initial PCA step should be performed [default : '%default' ]" 110 help = "Whether an initial PCA step should be performed [default : '%default' ]"
107 ), 111 ),
108 make_option( 112 make_option(
109 "--Rtsne_pca_center", 113 "--Rtsne_pca_center",
110 default = TRUE, 114 default = TRUE,
111 type = 'logical', 115 type = "logical",
112 help = "Should data be centered before pca is applied? [default : '%default' ]" 116 help = "Should data be centered before pca is applied? [default : '%default' ]"
113 ), 117 ),
114 make_option( 118 make_option(
115 "--Rtsne_pca_scale", 119 "--Rtsne_pca_scale",
116 default = FALSE, 120 default = FALSE,
117 type = 'logical', 121 type = "logical",
118 help = "Should data be scaled before pca is applied? [default : '%default' ]" 122 help = "Should data be scaled before pca is applied? [default : '%default' ]"
119 ), 123 ),
120 make_option( 124 make_option(
121 "--Rtsne_normalize", 125 "--Rtsne_normalize",
122 default = TRUE, 126 default = TRUE,
123 type = 'logical', 127 type = "logical",
124 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" 128 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]"
125 ), 129 ),
126 make_option( 130 make_option(
127 "--Rtsne_exaggeration_factor", 131 "--Rtsne_exaggeration_factor",
128 default = 12.0, 132 default = 12.0,
129 type = 'numeric', 133 type = "numeric",
130 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" 134 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]"
131 ), 135 ),
132 make_option( 136 make_option(
133 "--PCA_npc", 137 "--PCA_npc",
134 default = 5, 138 default = 5,
135 type = 'integer', 139 type = "integer",
136 help = "number of dimensions kept in the results [default : '%default' ]" 140 help = "number of dimensions kept in the results [default : '%default' ]"
137 ), 141 ),
138 make_option( 142 make_option(
139 "--PCA_x_axis", 143 "--PCA_x_axis",
140 default = 1, 144 default = 1,
141 type = 'integer', 145 type = "integer",
142 help = "PC to plot in the x axis [default : '%default' ]" 146 help = "PC to plot in the x axis [default : '%default' ]"
143 ), 147 ),
144 make_option( 148 make_option(
145 "--PCA_y_axis", 149 "--PCA_y_axis",
146 default = 2, 150 default = 2,
147 type = 'integer', 151 type = "integer",
148 help = "PC to plot in the y axis [default : '%default' ]" 152 help = "PC to plot in the y axis [default : '%default' ]"
149 ), 153 ),
150 make_option( 154 make_option(
151 "--HCPC_ncluster", 155 "--HCPC_ncluster",
152 default = -1, 156 default = -1,
153 type = 'numeric', 157 type = "numeric",
154 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" 158 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]"
155 ), 159 ),
156 make_option( 160 make_option(
157 "--HCPC_npc", 161 "--HCPC_npc",
158 default = 5, 162 default = 5,
159 type = 'integer', 163 type = "integer",
160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" 164 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]"
161 ), 165 ),
162 make_option( 166 make_option(
163 "--HCPC_metric", 167 "--HCPC_metric",
164 default = 'euclidean', 168 default = "euclidean",
165 type = 'character', 169 type = "character",
166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" 170 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]"
167 ), 171 ),
168 make_option( 172 make_option(
169 "--HCPC_method", 173 "--HCPC_method",
170 default = 'ward', 174 default = "ward",
171 type = 'character', 175 type = "character",
172 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" 176 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']"
173 ), 177 ),
174 make_option( 178 make_option(
175 "--pdf_out", 179 "--pdf_out",
176 default = "out.pdf", 180 default = "out.pdf",
177 type = 'character', 181 type = "character",
178 help = "pdf of plots [default : '%default' ]" 182 help = "pdf of plots [default : '%default' ]"
179 ), 183 ),
180 make_option( 184 make_option(
181 "--HCPC_consol", 185 "--HCPC_consol",
182 default = 'TRUE', 186 default = "TRUE",
183 type = 'logical', 187 type = "logical",
184 help = "If TRUE, a k-means consolidation is performed [default :'%default']" 188 help = "If TRUE, a k-means consolidation is performed [default :'%default']"
185 ), 189 ),
186 make_option( 190 make_option(
187 "--HCPC_itermax", 191 "--HCPC_itermax",
188 default = '10', 192 default = "10",
189 type = 'integer', 193 type = "integer",
190 help = "The maximum number of iterations for the consolidation [default :'%default']" 194 help = "The maximum number of iterations for the consolidation [default :'%default']"
191 ), 195 ),
192 make_option( 196 make_option(
193 "--HCPC_min", 197 "--HCPC_min",
194 default = '3', 198 default = "3",
195 type = 'integer', 199 type = "integer",
196 help = "The least possible number of clusters suggested [default :'%default']" 200 help = "The least possible number of clusters suggested [default :'%default']"
197 ), 201 ),
198 make_option( 202 make_option(
199 "--HCPC_max", 203 "--HCPC_max",
200 default = -1, 204 default = -1,
201 type = 'integer', 205 type = "integer",
202 help = "The higher possible number of clusters suggested [default :'%default']" 206 help = "The higher possible number of clusters suggested [default :'%default']"
203 ), 207 ),
204 make_option( 208 make_option(
205 "--HCPC_clusterCA", 209 "--HCPC_clusterCA",
206 default = 'rows', 210 default = "rows",
207 type = 'character', 211 type = "character",
208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" 212 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']"
209 ), 213 ),
210 make_option( 214 make_option(
211 "--HCPC_kk", 215 "--HCPC_kk",
212 default = Inf, 216 default = Inf,
213 type = 'numeric', 217 type = "numeric",
214 help = "The maximum number of iterations for the consolidation [default :'%default']" 218 help = "The maximum number of iterations for the consolidation [default :'%default']"
215 ), 219 ),
216 make_option( 220 make_option(
217 "--HCPC_clust", 221 "--HCPC_clust",
218 default = "", 222 default = "",
219 type = 'character', 223 type = "character",
220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" 224 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']"
221 ), 225 ),
222 make_option( 226 make_option(
223 "--HCPC_mutual_info", 227 "--HCPC_mutual_info",
224 default = "", 228 default = "",
231 type = "character", 235 type = "character",
232 help = "Output file with variables most contributing to clustering [default :'%default']" 236 help = "Output file with variables most contributing to clustering [default :'%default']"
233 ) 237 )
234 ) 238 )
235 239
236 opt = parse_args(OptionParser(option_list = option_list), 240 opt <- parse_args(OptionParser(option_list = option_list),
237 args = commandArgs(trailingOnly = TRUE)) 241 args = commandArgs(trailingOnly = TRUE))
238 242
239 if (opt$sep == "tab") {opt$sep <- "\t"} 243 if (opt$sep == "tab") {
240 if (opt$sep == "comma") {opt$sep <- ","} 244 opt$sep <- "\t"
241 if(opt$HCPC_max == -1) {opt$HCPC_max <- NULL} 245 }
242 if(opt$HCPC_kk == -1) {opt$HCPC_kk <- Inf} 246 if (opt$sep == "comma") {
247 opt$sep <- ","
248 }
249 if (opt$HCPC_max == -1) {
250 opt$HCPC_max <- NULL
251 }
252 if (opt$HCPC_kk == -1) {
253 opt$HCPC_kk <- Inf
254 }
243 255
244 ##Add legend to plot() 256 ##Add legend to plot()
245 legend.col <- function(col, lev){ 257 legend_col <- function(col, lev) {
246 258
247 opar <- par 259 opar <- par
248 260
249 n <- length(col) 261 n <- length(col)
250 262
251 bx <- par("usr") 263 bx <- par("usr")
252 264
253 box.cx <- c(bx[2] + (bx[2] - bx[1]) / 1000, 265 box_cx <- c(bx[2] + (bx[2] - bx[1]) / 1000,
254 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50) 266 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50)
255 box.cy <- c(bx[3], bx[3]) 267 box_cy <- c(bx[3], bx[3])
256 box.sy <- (bx[4] - bx[3]) / n 268 box_sy <- (bx[4] - bx[3]) / n
257 269
258 xx <- rep(box.cx, each = 2) 270 xx <- rep(box_cx, each = 2)
259 271
260 par(xpd = TRUE) 272 par(xpd = TRUE)
261 for(i in 1:n){ 273 for (i in 1:n) {
262 274 yy <- c(box_cy[1] + (box_sy * (i - 1)),
263 yy <- c(box.cy[1] + (box.sy * (i - 1)), 275 box_cy[1] + (box_sy * (i)),
264 box.cy[1] + (box.sy * (i)), 276 box_cy[1] + (box_sy * (i)),
265 box.cy[1] + (box.sy * (i)), 277 box_cy[1] + (box_sy * (i - 1)))
266 box.cy[1] + (box.sy * (i - 1))) 278 polygon(xx, yy, col = col[i], border = col[i])
267 polygon(xx, yy, col = col[i], border = col[i])
268
269 } 279 }
270 par(new = TRUE) 280 par(new = TRUE)
271 plot(0, 0, type = "n", 281 plot(0, 0, type = "n",
272 ylim = c(min(lev), max(lev)), 282 ylim = c(min(lev), max(lev)),
273 yaxt = "n", ylab = "", 283 yaxt = "n", ylab = "",
275 frame.plot = FALSE) 285 frame.plot = FALSE)
276 axis(side = 4, las = 2, tick = FALSE, line = .25) 286 axis(side = 4, las = 2, tick = FALSE, line = .25)
277 par <- opar 287 par <- opar
278 } 288 }
279 289
280 290 data <- read.delim(
281 data = read.delim(
282 opt$data, 291 opt$data,
283 check.names = FALSE, 292 check.names = FALSE,
284 header = opt$colnames, 293 header = opt$colnames,
285 row.names = 1, 294 row.names = 1,
286 sep = opt$sep 295 sep = opt$sep
287 ) 296 )
288 297
289 # Contrasting factor and its colors 298 # Contrasting factor and its colors
290 if (opt$factor != '') { 299 if (opt$factor != "") {
291 contrasting_factor <- read.delim( 300 contrasting_factor <- read.delim(
292 opt$factor, 301 opt$factor,
293 header = TRUE 302 header = TRUE
294 ) 303 )
295 rownames(contrasting_factor) <- contrasting_factor[,1] 304 rownames(contrasting_factor) <- contrasting_factor[, 1]
296 contrasting_factor <- contrasting_factor[colnames(data),] 305 contrasting_factor <- contrasting_factor[colnames(data), ]
297 colnames(contrasting_factor) <- c("id","factor") 306 colnames(contrasting_factor) <- c("id", "factor")
298 if(is.numeric(contrasting_factor$factor)){ 307 if (is.numeric(contrasting_factor$factor)) {
299 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor] 308 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor]
300 } else { 309 } else {
301 contrasting_factor$factor <- as.factor(contrasting_factor$factor) 310 contrasting_factor$factor <- as.factor(contrasting_factor$factor)
302 if(nlevels(contrasting_factor$factor) == 2){ 311 if (nlevels(contrasting_factor$factor) == 2) {
303 colors_labels <- c("#E41A1C", "#377EB8") 312 colors_labels <- c("#E41A1C", "#377EB8")
304 } else { 313 } else {
305 colors_labels <- brewer.pal(nlevels(contrasting_factor$factor), name = 'Paired') 314 set.seed(567629)
315 colors_labels <- createPalette(nlevels(contrasting_factor$factor), c("#5A5156", "#E4E1E3", "#F6222E"))
316 names(colors_labels) <- NULL
306 } 317 }
307 factorColors <- 318 factor_colors <-
308 with(contrasting_factor, 319 with(contrasting_factor,
309 data.frame(factor = levels(contrasting_factor$factor), 320 data.frame(factor = levels(contrasting_factor$factor),
310 color = I(colors_labels) 321 color = I(colors_labels)
311 ) 322 )
312 ) 323 )
313 factor_cols <- factorColors$color[match(contrasting_factor$factor, 324 factor_cols <- factor_colors$color[match(contrasting_factor$factor,
314 factorColors$factor)] 325 factor_colors$factor)]
315 } 326 }
316 } else { 327 } else {
317 factor_cols <- rep("deepskyblue4", length(rownames(data))) 328 factor_cols <- rep("deepskyblue4", length(rownames(data)))
318 } 329 }
319 330
320 ################ t-SNE #################### 331 ################ t-SNE ####################
321 if (opt$visu_choice == 'tSNE') { 332 if (opt$visu_choice == "tSNE") {
322 # filter and transpose df for tsne and pca 333 # filter and transpose df for tsne and pca
323 tdf = t(data) 334 tdf <- t(data)
324 # make tsne and plot results 335 # make tsne and plot results
325 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility 336 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility
326 337
327 tsne_out <- Rtsne(tdf, 338 tsne_out <- Rtsne(tdf,
328 dims = opt$Rtsne_dims, 339 dims = opt$Rtsne_dims,
329 initial_dims = opt$Rtsne_initial_dims, 340 initial_dims = opt$Rtsne_initial_dims,
330 perplexity = opt$Rtsne_perplexity , 341 perplexity = opt$Rtsne_perplexity,
331 theta = opt$Rtsne_theta, 342 theta = opt$Rtsne_theta,
332 max_iter = opt$Rtsne_max_iter, 343 max_iter = opt$Rtsne_max_iter,
333 pca = opt$Rtsne_pca, 344 pca = opt$Rtsne_pca,
334 pca_center = opt$Rtsne_pca_center, 345 pca_center = opt$Rtsne_pca_center,
335 pca_scale = opt$Rtsne_pca_scale, 346 pca_scale = opt$Rtsne_pca_scale,
336 normalize = opt$Rtsne_normalize, 347 normalize = opt$Rtsne_normalize,
337 exaggeration_factor=opt$Rtsne_exaggeration_factor) 348 exaggeration_factor = opt$Rtsne_exaggeration_factor)
338 349
339 embedding <- as.data.frame(tsne_out$Y[,1:2]) 350 embedding <- as.data.frame(tsne_out$Y[, 1:2])
340 embedding$Class <- as.factor(rownames(tdf)) 351 embedding$Class <- as.factor(rownames(tdf))
341 gg_legend = theme(legend.position="right") 352 gg_legend <- theme(legend.position = "right")
342 if (opt$factor == '') { 353 if (opt$factor == "") {
343 ggplot(embedding, aes(x=V1, y=V2)) + 354 ggplot(embedding, aes(x = V1, y = V2)) +
344 geom_point(size=1, color='deepskyblue4') + 355 geom_point(size = 1, color = "deepskyblue4") +
345 gg_legend + 356 gg_legend +
346 xlab("t-SNE 1") + 357 xlab("t-SNE 1") +
347 ylab("t-SNE 2") + 358 ylab("t-SNE 2") +
348 ggtitle('t-SNE') + 359 ggtitle("t-SNE") +
349 if (opt$labels) { 360 if (opt$labels) {
350 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=1.5, color='deepskyblue4') 361 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = 1.5, color = "deepskyblue4")
351 } 362 }
352 } else { 363 } else {
353 if(is.numeric(contrasting_factor$factor)){ 364 if (is.numeric(contrasting_factor$factor)) {
354 embedding$factor <- contrasting_factor$factor 365 embedding$factor <- contrasting_factor$factor
355 } else { 366 } else {
356 embedding$factor <- as.factor(contrasting_factor$factor) 367 embedding$factor <- as.factor(contrasting_factor$factor)
357 } 368 }
358 369
359 ggplot(embedding, aes(x=V1, y=V2, color=factor)) + 370 ggplot(embedding, aes(x = V1, y = V2, color = factor)) +
360 geom_point(size=1) + #, color=factor_cols 371 geom_point(size = 1) +
361 gg_legend + 372 gg_legend +
362 xlab("t-SNE 1") + 373 xlab("t-SNE 1") +
363 ylab("t-SNE 2") + 374 ylab("t-SNE 2") +
364 ggtitle('t-SNE') + 375 ggtitle("t-SNE") +
365 if (opt$labels) { 376 if (opt$labels) {
366 geom_text(aes(label=Class, colour=factor),hjust=-0.2, vjust=-0.5, size=1.5) 377 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = 1.5)
367 } 378 }
368 } 379 }
369 ggsave(file=opt$pdf_out, device="pdf") 380 ggsave(file = opt$pdf_out, device = "pdf")
370 381
371 #save coordinates table 382 #save coordinates table
372 if(opt$table_coordinates != ''){ 383 if (opt$table_coordinates != "") {
373 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6)) 384 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6))
374 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$Rtsne_dims))) 385 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$Rtsne_dims)))
375 } 386 }
376 } 387 }
377 388
378 389
379 ######### make PCA with FactoMineR ################# 390 ######### make PCA with FactoMineR #################
380 if (opt$visu_choice == 'PCA') { 391 if (opt$visu_choice == "PCA") {
381 pca <- PCA(t(data), ncp=opt$PCA_npc, graph=FALSE) 392 pca <- PCA(t(data), ncp = opt$PCA_npc, graph = FALSE)
382 pdf(opt$pdf_out) 393 pdf(opt$pdf_out)
383 if (opt$labels == FALSE) { 394 if (opt$labels == FALSE) {
384 plot(pca, axes = c(opt$PCA_x_axis,opt$PCA_y_axis), label="none" , col.ind = factor_cols) 395 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), label = "none", col.ind = factor_cols)
385 } else { 396 } else {
386 plot(pca, axes = c(opt$PCA_x_axis,opt$PCA_y_axis), cex=0.2 , col.ind = factor_cols) 397 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), cex = 0.2, col.ind = factor_cols)
387 } 398 }
388 if (opt$factor != '') { 399 if (opt$factor != "") {
389 if(is.factor(contrasting_factor$factor)) { 400 if (is.factor(contrasting_factor$factor)) {
390 legend(x = 'topright', 401 legend(x = "topright",
391 legend = as.character(factorColors$factor), 402 legend = as.character(factor_colors$factor),
392 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) 403 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7)
393 } else { 404 } else {
394 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) 405 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE))
395 } 406 }
396 } 407 }
397 dev.off() 408 dev.off()
398 409
399 #save coordinates table 410 #save coordinates table
400 if(opt$table_coordinates != ''){ 411 if (opt$table_coordinates != "") {
401 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6)) 412 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6))
402 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$PCA_npc))) 413 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$PCA_npc)))
403 } 414 }
404 415
405 } 416 }
406 417
407 ########### make HCPC with FactoMineR ########## 418 ########### make HCPC with FactoMineR ##########
408 if (opt$visu_choice == 'HCPC') { 419 if (opt$visu_choice == "HCPC") {
409 420
410 # HCPC starts with a PCA 421 # HCPC starts with a PCA
411 pca <- PCA( 422 pca <- PCA(
412 t(data), 423 t(data),
413 ncp = opt$HCPC_npc, 424 ncp = opt$HCPC_npc,
414 graph = FALSE, 425 graph = FALSE,
415 ) 426 )
416 427
417 PCA_IndCoord = as.data.frame(pca$ind$coord) # coordinates of observations in PCA 428 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA
418 429
419 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering 430 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering
420 res.hcpc <- HCPC(pca, 431 res_hcpc <- HCPC(pca,
421 nb.clust=opt$HCPC_ncluster, metric=opt$HCPC_metric, method=opt$HCPC_method, 432 nb.clust = opt$HCPC_ncluster, metric = opt$HCPC_metric, method = opt$HCPC_method,
422 graph=F,consol=opt$HCPC_consol,iter.max=opt$HCPC_itermax,min=opt$HCPC_min,max=opt$HCPC_max, 433 graph = FALSE, consol = opt$HCPC_consol, iter.max = opt$HCPC_itermax, min = opt$HCPC_min, max = opt$HCPC_max,
423 cluster.CA=opt$HCPC_clusterCA,kk=opt$HCPC_kk) 434 cluster.CA = opt$HCPC_clusterCA, kk = opt$HCPC_kk)
424 # HCPC plots 435 # HCPC plots
425 dims <- head(as.data.frame(res.hcpc$call$t$res$eig),2) # dims variances in column 2 436 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2
426 pdf(opt$pdf_out) 437 pdf(opt$pdf_out)
427 plot(res.hcpc, choice="tree") 438 plot(res_hcpc, choice = "tree")
428 plot(res.hcpc, choice="bar") 439 plot(res_hcpc, choice = "bar")
429 plot(res.hcpc, choice="3D.map") 440 plot(res_hcpc, choice = "3D.map")
430 if (opt$labels == FALSE) { 441 if (opt$labels == FALSE) {
431 plot(res.hcpc, choice="map", label="none") 442 plot(res_hcpc, choice = "map", label = "none")
432 } else { 443 } else {
433 plot(res.hcpc, choice="map") 444 plot(res_hcpc, choice = "map")
434 } 445 }
435 446
436 # user contrasts on the pca 447 # user contrasts on the pca
437 if (opt$factor != '') { 448 if (opt$factor != "") {
438 plot(pca, label="none", col.ind = factor_cols) 449 plot(pca, label = "none", col.ind = factor_cols)
439 if(is.factor(contrasting_factor$factor)) { 450 if (is.factor(contrasting_factor$factor)) {
440 legend(x = 'topright', 451 legend(x = "topright",
441 legend = as.character(factorColors$factor), 452 legend = as.character(factor_colors$factor),
442 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) 453 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7)
443 454
444 ## Normalized Mutual Information 455 ## Normalized Mutual Information
445 sink(opt$HCPC_mutual_info) 456 sink(opt$HCPC_mutual_info)
446 res <- external_validation( 457 res <- external_validation(
447 true_labels = as.numeric(contrasting_factor$factor), 458 true_labels = as.numeric(contrasting_factor$factor),
448 clusters = as.numeric(res.hcpc$data.clust$clust), 459 clusters = as.numeric(res_hcpc$data.clust$clust),
449 summary_stats = TRUE 460 summary_stats = TRUE
450 ) 461 )
451 sink() 462 sink()
452 463
453 } else { 464 } else {
454 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) 465 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE))
455 } 466 }
456 } 467 }
457
458 ## Clusters to which individual observations belong # used ?
459 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)],
460 # Observation = rownames(res.hcpc$data.clust))
461 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data))
462 # metadata = merge(y = metadata,
463 # x = Clust,
464 # by = "Observation")
465
466 # unclear utility
467 # ObsNumberPerCluster = as.data.frame(table(metadata$Cluster))
468 # colnames(ObsNumberPerCluster) = c("Cluster", "ObsNumber")
469 #
470 # ## Silhouette Plot # not used
471 # hc.cut = hcut(PCA_IndCoord,
472 # k = nlevels(metadata$Cluster),
473 # hc_method = "ward.D2")
474 #
475 # Sil = fviz_silhouette(hc.cut)
476 # sil1 = as.data.frame(Sil$data)
477 468
478 dev.off() 469 dev.off()
479 470
480 if(opt$table_coordinates != ''){ 471 if (opt$table_coordinates != "") {
481 coord_table <- cbind(Cell=rownames(res.hcpc$call$X), 472 coord_table <- cbind(Cell = rownames(res_hcpc$call$X),
482 round(as.data.frame(res.hcpc$call$X[, -length(res.hcpc$call$X)]), 6), 473 round(as.data.frame(res_hcpc$call$X[, -length(res_hcpc$call$X)]), 6),
483 as.data.frame(res.hcpc$call$X[, length(res.hcpc$call$X)]) 474 as.data.frame(res_hcpc$call$X[, length(res_hcpc$call$X)])
484 ) 475 )
485 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$HCPC_npc)),"Cluster") 476 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$HCPC_npc)), "Cluster")
486 } 477 }
487 478
488 if(opt$HCPC_clust != ""){ 479 if (opt$HCPC_clust != "") {
489 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), 480 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust),
490 Cluster = res.hcpc$data.clust$clust) 481 Cluster = res_hcpc$data.clust$clust)
491 482
492 } 483 }
493 484
494 # Description of cluster by most contributing variables / gene expressions 485 # Description of cluster by most contributing variables / gene expressions
495 486
496 # first transform list of vectors in a list of dataframes 487 # first transform list of vectors in a list of dataframes
497 extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame) 488 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame)
498 # second, transfer rownames (genes) to column in the dataframe, before rbinding 489 # second, transfer rownames (genes) to column in the dataframe, before rbinding
499 extract_description_w_genes <- Map(cbind, 490 extract_description_w_genes <- Map(cbind,
500 extract_description, 491 extract_description,
501 genes= lapply(extract_description, rownames) 492 genes = lapply(extract_description, rownames)
502 ) 493 )
503 # Then use data.table to collapse all generated dataframe, with the cluster id in first column 494 # Then use data.table to collapse all generated dataframe, with the cluster id in first column
504 # using the {data.table} rbindlist function 495 # using the {data.table} rbindlist function
505 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") 496 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
506 cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns 497 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # reorganize columns
507 498
508 499
509 # Finally, output cluster description data frame 500 # Finally, output cluster description data frame
510 write.table( 501 write.table(
511 cluster_description, 502 cluster_description,
512 file = opt$HCPC_cluster_description, 503 file = opt$HCPC_cluster_description,
513 sep = "\t", 504 sep = "\t",
514 quote = F, 505 quote = FALSE,
515 col.names = T, 506 col.names = TRUE,
516 row.names = F 507 row.names = FALSE
517 ) 508 )
518 509
519 } 510 }
520 511
521 ## Return coordinates file to user 512 ## Return coordinates file to user
522 513
523 if(opt$table_coordinates != ''){ 514 if (opt$table_coordinates != "") {
524 write.table( 515 write.table(
525 coord_table, 516 coord_table,
526 file = opt$table_coordinates, 517 file = opt$table_coordinates,
527 sep = "\t", 518 sep = "\t",
528 quote = F, 519 quote = FALSE,
529 col.names = T, 520 col.names = TRUE,
530 row.names = F 521 row.names = FALSE
531 ) 522 )
532 } 523 }
533 524
534 525
535 if(opt$HCPC_clust != ""){ 526 if (opt$HCPC_clust != "") {
536 write.table( 527 write.table(
537 res_clustering, 528 res_clustering,
538 file = opt$HCPC_clust, 529 file = opt$HCPC_clust,
539 sep = "\t", 530 sep = "\t",
540 quote = F, 531 quote = FALSE,
541 col.names = T, 532 col.names = TRUE,
542 row.names = F 533 row.names = FALSE
543 ) 534 )
544 } 535 }