comparison high_dim_visu.R @ 9:58aa18e1fe14 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit 7343be2d3b1b8cb1ba0c4c55767b60dbce8f8b22
author artbio
date Thu, 07 Nov 2024 22:43:01 +0000
parents fe6f76030168
children
comparison
equal deleted inserted replaced
8:fe6f76030168 9:58aa18e1fe14
1 options(show.error.messages = FALSE, 1 options(
2 error = function() { 2 show.error.messages = FALSE,
3 cat(geterrmessage(), file = stderr()) 3 error = function() {
4 q("no", 1, FALSE) 4 cat(geterrmessage(), file = stderr())
5 } 5 q("no", 1, FALSE)
6 }
6 ) 7 )
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
8 warnings() 9 warnings()
9 10
10 library(optparse) 11 library(optparse)
17 library(ClusterR) 18 library(ClusterR)
18 library(data.table) 19 library(data.table)
19 library(Polychrome) 20 library(Polychrome)
20 21
21 option_list <- list( 22 option_list <- list(
22 make_option( 23 make_option(
23 "--data", 24 "--data",
24 default = NA, 25 default = NA,
25 type = "character", 26 type = "character",
26 help = "Input file that contains expression value to visualise" 27 help = "Input file that contains expression value to visualise"
27 ), 28 ),
28 make_option( 29 make_option(
29 "--labels", 30 "--labels",
30 default = FALSE, 31 default = FALSE,
31 type = "logical", 32 type = "logical",
32 help = "add labels in scatter plots [default : '%default' ]" 33 help = "add labels in scatter plots [default : '%default' ]"
33 ), 34 ),
34 make_option( 35 make_option(
35 "--factor", 36 "--factor",
36 default = "", 37 default = "",
37 type = "character", 38 type = "character",
38 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" 39 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]"
39 ), 40 ),
40 make_option( 41 make_option(
41 "--visu_choice", 42 "--visu_choice",
42 default = "PCA", 43 default = "PCA",
43 type = "character", 44 type = "character",
44 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" 45 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]"
45 ), 46 ),
46 make_option( 47 make_option(
47 "--Rtsne_seed", 48 "--Rtsne_seed",
48 default = 42, 49 default = 42,
49 type = "integer", 50 type = "integer",
50 help = "Seed value for reproducibility [default : '%default' ]" 51 help = "Seed value for reproducibility [default : '%default' ]"
51 ), 52 ),
52 make_option( 53 make_option(
53 "--Rtsne_dims", 54 "--Rtsne_dims",
54 default = 2, 55 default = 2,
55 type = "integer", 56 type = "integer",
56 help = "Output dimensionality [default : '%default' ]" 57 help = "Output dimensionality [default : '%default' ]"
57 ), 58 ),
58 make_option( 59 make_option(
59 "--Rtsne_initial_dims", 60 "--Rtsne_initial_dims",
60 default = 50, 61 default = 50,
61 type = "integer", 62 type = "integer",
62 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" 63 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]"
63 ), 64 ),
64 make_option( 65 make_option(
65 "--Rtsne_perplexity", 66 "--Rtsne_perplexity",
66 default = 5.0, 67 default = 5.0,
67 type = "numeric", 68 type = "numeric",
68 help = "perplexity [default : '%default' ]" 69 help = "perplexity [default : '%default' ]"
69 ), 70 ),
70 make_option( 71 make_option(
71 "--Rtsne_theta", 72 "--Rtsne_theta",
72 default = 1.0, 73 default = 1.0,
73 type = "numeric", 74 type = "numeric",
74 help = "theta [default : '%default' ]" 75 help = "theta [default : '%default' ]"
75 ), 76 ),
76 make_option( 77 make_option(
77 "--Rtsne_max_iter", 78 "--Rtsne_max_iter",
78 default = 1000, 79 default = 1000,
79 type = "integer", 80 type = "integer",
80 help = "max_iter [default : '%default' ]" 81 help = "max_iter [default : '%default' ]"
81 ), 82 ),
82 make_option( 83 make_option(
83 "--Rtsne_pca", 84 "--Rtsne_pca",
84 default = TRUE, 85 default = TRUE,
85 type = "logical", 86 type = "logical",
86 help = "Whether an initial PCA step should be performed [default : '%default' ]" 87 help = "Whether an initial PCA step should be performed [default : '%default' ]"
87 ), 88 ),
88 make_option( 89 make_option(
89 "--Rtsne_pca_center", 90 "--Rtsne_pca_center",
90 default = TRUE, 91 default = TRUE,
91 type = "logical", 92 type = "logical",
92 help = "Should data be centered before pca is applied? [default : '%default' ]" 93 help = "Should data be centered before pca is applied? [default : '%default' ]"
93 ), 94 ),
94 make_option( 95 make_option(
95 "--Rtsne_pca_scale", 96 "--Rtsne_pca_scale",
96 default = FALSE, 97 default = FALSE,
97 type = "logical", 98 type = "logical",
98 help = "Should data be scaled before pca is applied? [default : '%default' ]" 99 help = "Should data be scaled before pca is applied? [default : '%default' ]"
99 ), 100 ),
100 make_option( 101 make_option(
101 "--Rtsne_normalize", 102 "--Rtsne_normalize",
102 default = TRUE, 103 default = TRUE,
103 type = "logical", 104 type = "logical",
104 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" 105 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]"
105 ), 106 ),
106 make_option( 107 make_option(
107 "--Rtsne_exaggeration_factor", 108 "--Rtsne_exaggeration_factor",
108 default = 12.0, 109 default = 12.0,
109 type = "numeric", 110 type = "numeric",
110 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" 111 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]"
111 ), 112 ),
112 make_option( 113 make_option(
113 "--PCA_npc", 114 "--PCA_npc",
114 default = 5, 115 default = 5,
115 type = "integer", 116 type = "integer",
116 help = "number of dimensions kept in the results [default : '%default' ]" 117 help = "number of dimensions kept in the results [default : '%default' ]"
117 ), 118 ),
118 make_option( 119 make_option(
119 "--item_size", 120 "--item_size",
120 default = 1, 121 default = 1,
121 type = "numeric", 122 type = "numeric",
122 help = "Size of points/labels in PCA [default : '%default' ]" 123 help = "Size of points/labels in PCA [default : '%default' ]"
123 ), 124 ),
124 make_option( 125 make_option(
125 "--x_axis", 126 "--x_axis",
126 default = 1, 127 default = 1,
127 type = "integer", 128 type = "integer",
128 help = "PC to plot in the x axis [default : '%default' ]" 129 help = "PC to plot in the x axis [default : '%default' ]"
129 ), 130 ),
130 make_option( 131 make_option(
131 "--y_axis", 132 "--y_axis",
132 default = 2, 133 default = 2,
133 type = "integer", 134 type = "integer",
134 help = "PC to plot in the y axis [default : '%default' ]" 135 help = "PC to plot in the y axis [default : '%default' ]"
135 ), 136 ),
136 make_option( 137 make_option(
137 "--HCPC_ncluster", 138 "--HCPC_ncluster",
138 default = -1, 139 default = -1,
139 type = "numeric", 140 type = "numeric",
140 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" 141 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]"
141 ), 142 ),
142 make_option( 143 make_option(
143 "--HCPC_npc", 144 "--HCPC_npc",
144 default = 5, 145 default = 5,
145 type = "integer", 146 type = "integer",
146 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" 147 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]"
147 ), 148 ),
148 make_option( 149 make_option(
149 "--HCPC_metric", 150 "--HCPC_metric",
150 default = "euclidean", 151 default = "euclidean",
151 type = "character", 152 type = "character",
152 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" 153 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]"
153 ), 154 ),
154 make_option( 155 make_option(
155 "--HCPC_method", 156 "--HCPC_method",
156 default = "ward", 157 default = "ward",
157 type = "character", 158 type = "character",
158 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" 159 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']"
159 ), 160 ),
160 make_option( 161 make_option(
161 "--pdf_out", 162 "--pdf_out",
162 default = "out.pdf", 163 default = "out.pdf",
163 type = "character", 164 type = "character",
164 help = "pdf of plots [default : '%default' ]" 165 help = "pdf of plots [default : '%default' ]"
165 ), 166 ),
166 make_option( 167 make_option(
167 "--HCPC_consol", 168 "--HCPC_consol",
168 default = "TRUE", 169 default = "TRUE",
169 type = "logical", 170 type = "logical",
170 help = "If TRUE, a k-means consolidation is performed [default :'%default']" 171 help = "If TRUE, a k-means consolidation is performed [default :'%default']"
171 ), 172 ),
172 make_option( 173 make_option(
173 "--HCPC_itermax", 174 "--HCPC_itermax",
174 default = "10", 175 default = "10",
175 type = "integer", 176 type = "integer",
176 help = "The maximum number of iterations for the consolidation [default :'%default']" 177 help = "The maximum number of iterations for the consolidation [default :'%default']"
177 ), 178 ),
178 make_option( 179 make_option(
179 "--HCPC_min", 180 "--HCPC_min",
180 default = "3", 181 default = "3",
181 type = "integer", 182 type = "integer",
182 help = "The least possible number of clusters suggested [default :'%default']" 183 help = "The least possible number of clusters suggested [default :'%default']"
183 ), 184 ),
184 make_option( 185 make_option(
185 "--HCPC_max", 186 "--HCPC_max",
186 default = -1, 187 default = -1,
187 type = "integer", 188 type = "integer",
188 help = "The higher possible number of clusters suggested [default :'%default']" 189 help = "The higher possible number of clusters suggested [default :'%default']"
189 ), 190 ),
190 make_option( 191 make_option(
191 "--HCPC_clusterCA", 192 "--HCPC_clusterCA",
192 default = "rows", 193 default = "rows",
193 type = "character", 194 type = "character",
194 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" 195 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']"
195 ), 196 ),
196 make_option( 197 make_option(
197 "--HCPC_kk", 198 "--HCPC_kk",
198 default = Inf, 199 default = Inf,
199 type = "numeric", 200 type = "numeric",
200 help = "The maximum number of iterations for the consolidation [default :'%default']" 201 help = "The maximum number of iterations for the consolidation [default :'%default']"
201 ), 202 ),
202 make_option( 203 make_option(
203 "--HCPC_mutual_info", 204 "--HCPC_mutual_info",
204 default = "", 205 default = "",
205 type = "character", 206 type = "character",
206 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" 207 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']"
207 ), 208 ),
208 make_option( 209 make_option(
209 "--HCPC_cell_clust", 210 "--HCPC_cell_clust",
210 default = "", 211 default = "",
211 type = "character", 212 type = "character",
212 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']" 213 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']"
213 ), 214 ),
214 make_option( 215 make_option(
215 "--HCPC_contributions", 216 "--HCPC_contributions",
216 default = "", 217 default = "",
217 type = "character", 218 type = "character",
218 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']" 219 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']"
219 ) 220 )
220 ) 221 )
221 222
222 opt <- parse_args(OptionParser(option_list = option_list), 223 opt <- parse_args(OptionParser(option_list = option_list),
223 args = commandArgs(trailingOnly = TRUE)) 224 args = commandArgs(trailingOnly = TRUE)
225 )
224 226
225 if (opt$HCPC_max == -1) { 227 if (opt$HCPC_max == -1) {
226 opt$HCPC_max <- NULL 228 opt$HCPC_max <- NULL
227 } 229 }
228 if (opt$HCPC_kk == -1) { 230 if (opt$HCPC_kk == -1) {
229 opt$HCPC_kk <- Inf 231 opt$HCPC_kk <- Inf
230 } 232 }
231 233
232 #### We treat data once, at the beginning of the script #### 234 #### We treat data once, at the beginning of the script ####
233 data <- read.delim( 235 data <- read.delim(
234 opt$data, 236 opt$data,
235 check.names = FALSE, 237 check.names = FALSE,
236 header = TRUE, 238 header = TRUE,
237 row.names = 1, 239 row.names = 1,
238 sep = "\t" 240 sep = "\t"
239 ) 241 )
240 # we transpose immediately, because this is the common data structure 242 # we transpose immediately, because this is the common data structure
241 data <- as.data.frame(t(data)) 243 data <- as.data.frame(t(data))
242 244
243 # we treat the factor for usage in 3 methods 245 # we treat the factor for usage in 3 methods
244 if (opt$factor != "") { 246 if (opt$factor != "") {
245 contrasting_factor <- read.delim(opt$factor, header = TRUE) 247 contrasting_factor <- read.delim(opt$factor, header = TRUE)
246 rownames(contrasting_factor) <- contrasting_factor[, 1] 248 rownames(contrasting_factor) <- contrasting_factor[, 1]
247 # we pick only the relevant values of the contrasting factor 249 # we pick only the relevant values of the contrasting factor
248 contrasting_factor <- contrasting_factor[rownames(data), ] 250 contrasting_factor <- contrasting_factor[rownames(data), ]
249 sup <- colnames(contrasting_factor)[2] 251 sup <- colnames(contrasting_factor)[2]
250 if (!is.numeric(contrasting_factor[, 2])) { 252 if (!is.numeric(contrasting_factor[, 2])) {
251 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2]) 253 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2])
252 } 254 }
253 } 255 }
254 256
255 ######### make PCA with FactoMineR ################# 257 ######### make PCA with FactoMineR #################
256 if (opt$visu_choice == "PCA") { 258 if (opt$visu_choice == "PCA") {
257 if (opt$labels) { 259 if (opt$labels) {
258 labels <- "ind" 260 labels <- "ind"
259 } else {
260 labels <- "none"
261 }
262 pdf(opt$pdf_out)
263 if (opt$factor != "") {
264 data <- cbind(data, contrasting_factor[, 2])
265 colnames(data)[length(data)] <- sup
266 if (is.numeric(contrasting_factor[, 2])) {
267 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE)
268 pca_plot <- plot(res_pca, habillage = sup, label = labels,
269 title = "PCA graph of cells", cex = opt$item_size,
270 axes = c(opt$x_axis, opt$y_axis))
271 } else { 261 } else {
272 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE) 262 labels <- "none"
273 pca_plot <- plot(res_pca, habillage = sup, label = labels, 263 }
274 title = "PCA graph of cells", cex = opt$item_size, 264 pdf(opt$pdf_out)
275 axes = c(opt$x_axis, opt$y_axis)) 265 if (opt$factor != "") {
276 } 266 data <- cbind(data, contrasting_factor[, 2])
277 } else { 267 colnames(data)[length(data)] <- sup
278 res_pca <- PCA(X = data, graph = FALSE) 268 if (is.numeric(contrasting_factor[, 2])) {
279 pca_plot <- plot(res_pca, label = labels, 269 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE)
280 title = "PCA graph of cells", cex = opt$item_size, 270 pca_plot <- plot(res_pca,
281 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4") 271 habillage = sup, label = labels,
282 } 272 title = "PCA graph of cells", cex = opt$item_size,
283 print(pca_plot) 273 axes = c(opt$x_axis, opt$y_axis)
284 dev.off() 274 )
275 } else {
276 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE)
277 pca_plot <- plot(res_pca,
278 habillage = sup, label = labels,
279 title = "PCA graph of cells", cex = opt$item_size,
280 axes = c(opt$x_axis, opt$y_axis)
281 )
282 }
283 } else {
284 res_pca <- PCA(X = data, graph = FALSE)
285 pca_plot <- plot(res_pca,
286 label = labels,
287 title = "PCA graph of cells", cex = opt$item_size,
288 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4"
289 )
290 }
291 print(pca_plot)
292 dev.off()
285 } 293 }
286 294
287 ########### make HCPC with FactoMineR ########## 295 ########### make HCPC with FactoMineR ##########
288 if (opt$visu_choice == "HCPC") { 296 if (opt$visu_choice == "HCPC") {
289 pdf(opt$pdf_out) 297 pdf(opt$pdf_out)
290 # HCPC starts with a PCA 298 # HCPC starts with a PCA
291 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE) 299 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE)
292 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA 300 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA
293 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering 301 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering
294 res_hcpc <- HCPC(pca, 302 res_hcpc <- HCPC(pca,
295 nb.clust = opt$HCPC_ncluster, 303 nb.clust = opt$HCPC_ncluster,
296 metric = opt$HCPC_metric, 304 metric = opt$HCPC_metric,
297 method = opt$HCPC_method, 305 method = opt$HCPC_method,
298 graph = FALSE, 306 graph = FALSE,
299 consol = opt$HCPC_consol, 307 consol = opt$HCPC_consol,
300 iter.max = opt$HCPC_itermax, 308 iter.max = opt$HCPC_itermax,
301 min = opt$HCPC_min, 309 min = opt$HCPC_min,
302 max = opt$HCPC_max, 310 max = opt$HCPC_max,
303 cluster.CA = opt$HCPC_clusterCA, 311 cluster.CA = opt$HCPC_clusterCA,
304 kk = opt$HCPC_kk) 312 kk = opt$HCPC_kk
305 # HCPC plots 313 )
306 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 314 # HCPC plots
307 plot(res_hcpc, choice = "tree") 315 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2
308 plot(res_hcpc, choice = "bar") 316 plot(res_hcpc, choice = "tree")
309 if (opt$labels == FALSE) { 317 plot(res_hcpc, choice = "bar")
310 plot(res_hcpc, choice = "3D.map", ind.names = FALSE) 318 if (opt$labels == FALSE) {
311 plot(res_hcpc, choice = "map", label = "none") 319 plot(res_hcpc, choice = "3D.map", ind.names = FALSE)
312 } else { 320 plot(res_hcpc, choice = "map", label = "none")
313 plot(res_hcpc, choice = "3D.map") 321 } else {
314 plot(res_hcpc, choice = "map") 322 plot(res_hcpc, choice = "3D.map")
315 } 323 plot(res_hcpc, choice = "map")
316 ## Normalized Mutual Information 324 }
317 if (opt$factor != "") { 325 ## Normalized Mutual Information
318 sink(opt$HCPC_mutual_info) 326 if (opt$factor != "") {
319 cat("Relationship between input factor and its levels and the HCPC clusters") 327 sink(opt$HCPC_mutual_info)
320 res <- external_validation(true_labels = as.numeric(contrasting_factor[, 2]), 328 cat("Relationship between input factor and its levels and the HCPC clusters")
321 clusters = as.numeric(res_hcpc$data.clust$clust), 329 res <- external_validation(
322 summary_stats = TRUE) 330 true_labels = as.numeric(contrasting_factor[, 2]),
323 sink() 331 clusters = as.numeric(res_hcpc$data.clust$clust),
324 } 332 summary_stats = TRUE
325 dev.off() 333 )
326 334 sink()
327 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust), 335 }
328 Cluster = res_hcpc$data.clust$clust) 336 dev.off()
329 # Description of cluster by most contributing variables / gene expressions 337
330 # first transform list of vectors in a list of dataframes 338 res_clustering <- data.frame(
331 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) 339 Cell = rownames(res_hcpc$data.clust),
332 # second, transfer rownames (genes) to column in the dataframe, before rbinding 340 Cluster = res_hcpc$data.clust$clust
333 extract_description_w_genes <- Map(cbind, 341 )
334 extract_description, 342 # Description of cluster by most contributing variables / gene expressions
335 genes = lapply(extract_description, rownames)) 343 # first transform list of vectors in a list of dataframes
336 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist() 344 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame)
337 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") 345 # second, transfer rownames (genes) to column in the dataframe, before rbinding
338 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns 346 extract_description_w_genes <- Map(cbind,
339 cluster_description <- cluster_description[order(cluster_description[[2]], 347 extract_description,
340 cluster_description[[8]]), ] # sort by cluster then by pval 348 genes = lapply(extract_description, rownames)
341 # Finally, output cluster description data frame 349 )
342 write.table(cluster_description, 350 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist()
343 file = opt$HCPC_contributions, 351 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
344 sep = "\t", 352 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns
345 quote = FALSE, 353 cluster_description <- cluster_description[order(
346 col.names = TRUE, 354 cluster_description[[2]],
347 row.names = FALSE) 355 cluster_description[[8]]
348 356 ), ] # sort by cluster then by pval
349 ## Return cluster table to user 357 # Finally, output cluster description data frame
350 write.table(res_clustering, 358 write.table(cluster_description,
351 file = opt$HCPC_cell_clust, 359 file = opt$HCPC_contributions,
352 sep = "\t", 360 sep = "\t",
353 quote = FALSE, 361 quote = FALSE,
354 col.names = TRUE, 362 col.names = TRUE,
355 row.names = FALSE) 363 row.names = FALSE
364 )
365
366 ## Return cluster table to user
367 write.table(res_clustering,
368 file = opt$HCPC_cell_clust,
369 sep = "\t",
370 quote = FALSE,
371 col.names = TRUE,
372 row.names = FALSE
373 )
356 } 374 }
357 ################ t-SNE #################### 375 ################ t-SNE ####################
358 if (opt$visu_choice == "tSNE") { 376 if (opt$visu_choice == "tSNE") {
359 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility 377 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility
360 tsne_out <- Rtsne(data, 378 tsne_out <- Rtsne(data,
361 dims = opt$Rtsne_dims, 379 dims = opt$Rtsne_dims,
362 initial_dims = opt$Rtsne_initial_dims, 380 initial_dims = opt$Rtsne_initial_dims,
363 perplexity = opt$Rtsne_perplexity, 381 perplexity = opt$Rtsne_perplexity,
364 theta = opt$Rtsne_theta, 382 theta = opt$Rtsne_theta,
365 max_iter = opt$Rtsne_max_iter, 383 max_iter = opt$Rtsne_max_iter,
366 pca = opt$Rtsne_pca, 384 pca = opt$Rtsne_pca,
367 pca_center = opt$Rtsne_pca_center, 385 pca_center = opt$Rtsne_pca_center,
368 pca_scale = opt$Rtsne_pca_scale, 386 pca_scale = opt$Rtsne_pca_scale,
369 normalize = opt$Rtsne_normalize, 387 normalize = opt$Rtsne_normalize,
370 exaggeration_factor = opt$Rtsne_exaggeration_factor) 388 exaggeration_factor = opt$Rtsne_exaggeration_factor
371 embedding <- as.data.frame(tsne_out$Y[, 1:2]) 389 )
372 embedding$Class <- as.factor(rownames(data)) 390 embedding <- as.data.frame(tsne_out$Y[, 1:2])
373 gg_legend <- theme(legend.position = "right") 391 embedding$Class <- as.factor(rownames(data))
374 pointcolor <- "#E70000" 392 gg_legend <- theme(legend.position = "right")
375 pointsize <- opt$item_size * 1.5 393 pointcolor <- "#E70000"
376 the_theme <- theme( 394 pointsize <- opt$item_size * 1.5
377 panel.background = element_rect(fill = "gray100", colour = "#6D9EC1", 395 the_theme <- theme(
378 size = 2, linetype = "solid"), 396 panel.background = element_rect(
379 panel.grid.major = element_line(size = 0.5, linetype = "solid", 397 fill = "gray100", colour = "#6D9EC1",
380 colour = "#6D9EC1"), 398 size = 2, linetype = "solid"
381 panel.grid.minor = element_line(size = 0.25, linetype = "solid", 399 ),
382 colour = "darkslategray3") 400 panel.grid.major = element_line(
383 ) 401 size = 0.5, linetype = "solid",
384 if (opt$factor == "") { 402 colour = "#6D9EC1"
385 p <- ggplot(embedding, aes(x = V1, y = V2)) + 403 ),
386 geom_point(size = pointsize * 0.25, color = pointcolor) + 404 panel.grid.minor = element_line(
387 gg_legend + 405 size = 0.25, linetype = "solid",
388 xlab("t-SNE 1") + 406 colour = "darkslategray3"
389 ylab("t-SNE 2") + 407 )
390 ggtitle("t-SNE") + 408 )
391 the_theme + 409 if (opt$factor == "") {
392 if (opt$labels) { 410 p <- ggplot(embedding, aes(x = V1, y = V2)) +
393 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor) 411 geom_point(size = pointsize * 0.25, color = pointcolor) +
394 } 412 gg_legend +
395 } else { 413 xlab("t-SNE 1") +
396 if (is.numeric(contrasting_factor[, 2])) { 414 ylab("t-SNE 2") +
397 embedding$factor <- contrasting_factor[, 2] 415 ggtitle("t-SNE") +
416 the_theme +
417 if (opt$labels) {
418 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor)
419 }
398 } else { 420 } else {
399 embedding$factor <- as.factor(contrasting_factor[, 2]) 421 if (is.numeric(contrasting_factor[, 2])) {
400 } 422 embedding$factor <- contrasting_factor[, 2]
401 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) + 423 } else {
402 geom_point(size = pointsize * 0.25) + 424 embedding$factor <- as.factor(contrasting_factor[, 2])
403 gg_legend + 425 }
404 xlab("t-SNE 1") + 426 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) +
405 ylab("t-SNE 2") + 427 geom_point(size = pointsize * 0.25) +
406 ggtitle("t-SNE") + 428 gg_legend +
407 the_theme + 429 xlab("t-SNE 1") +
408 if (opt$labels) { 430 ylab("t-SNE 2") +
409 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize) 431 ggtitle("t-SNE") +
410 } 432 the_theme +
411 } 433 if (opt$labels) {
412 pdf(opt$pdf_out) 434 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize)
413 print(p) 435 }
414 dev.off() 436 }
415 } 437 pdf(opt$pdf_out)
438 print(p)
439 dev.off()
440 }