Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.R @ 9:58aa18e1fe14 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit 7343be2d3b1b8cb1ba0c4c55767b60dbce8f8b22
author | artbio |
---|---|
date | Thu, 07 Nov 2024 22:43:01 +0000 |
parents | fe6f76030168 |
children |
comparison
equal
deleted
inserted
replaced
8:fe6f76030168 | 9:58aa18e1fe14 |
---|---|
1 options(show.error.messages = FALSE, | 1 options( |
2 error = function() { | 2 show.error.messages = FALSE, |
3 cat(geterrmessage(), file = stderr()) | 3 error = function() { |
4 q("no", 1, FALSE) | 4 cat(geterrmessage(), file = stderr()) |
5 } | 5 q("no", 1, FALSE) |
6 } | |
6 ) | 7 ) |
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
8 warnings() | 9 warnings() |
9 | 10 |
10 library(optparse) | 11 library(optparse) |
17 library(ClusterR) | 18 library(ClusterR) |
18 library(data.table) | 19 library(data.table) |
19 library(Polychrome) | 20 library(Polychrome) |
20 | 21 |
21 option_list <- list( | 22 option_list <- list( |
22 make_option( | 23 make_option( |
23 "--data", | 24 "--data", |
24 default = NA, | 25 default = NA, |
25 type = "character", | 26 type = "character", |
26 help = "Input file that contains expression value to visualise" | 27 help = "Input file that contains expression value to visualise" |
27 ), | 28 ), |
28 make_option( | 29 make_option( |
29 "--labels", | 30 "--labels", |
30 default = FALSE, | 31 default = FALSE, |
31 type = "logical", | 32 type = "logical", |
32 help = "add labels in scatter plots [default : '%default' ]" | 33 help = "add labels in scatter plots [default : '%default' ]" |
33 ), | 34 ), |
34 make_option( | 35 make_option( |
35 "--factor", | 36 "--factor", |
36 default = "", | 37 default = "", |
37 type = "character", | 38 type = "character", |
38 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" | 39 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" |
39 ), | 40 ), |
40 make_option( | 41 make_option( |
41 "--visu_choice", | 42 "--visu_choice", |
42 default = "PCA", | 43 default = "PCA", |
43 type = "character", | 44 type = "character", |
44 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" | 45 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" |
45 ), | 46 ), |
46 make_option( | 47 make_option( |
47 "--Rtsne_seed", | 48 "--Rtsne_seed", |
48 default = 42, | 49 default = 42, |
49 type = "integer", | 50 type = "integer", |
50 help = "Seed value for reproducibility [default : '%default' ]" | 51 help = "Seed value for reproducibility [default : '%default' ]" |
51 ), | 52 ), |
52 make_option( | 53 make_option( |
53 "--Rtsne_dims", | 54 "--Rtsne_dims", |
54 default = 2, | 55 default = 2, |
55 type = "integer", | 56 type = "integer", |
56 help = "Output dimensionality [default : '%default' ]" | 57 help = "Output dimensionality [default : '%default' ]" |
57 ), | 58 ), |
58 make_option( | 59 make_option( |
59 "--Rtsne_initial_dims", | 60 "--Rtsne_initial_dims", |
60 default = 50, | 61 default = 50, |
61 type = "integer", | 62 type = "integer", |
62 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" | 63 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" |
63 ), | 64 ), |
64 make_option( | 65 make_option( |
65 "--Rtsne_perplexity", | 66 "--Rtsne_perplexity", |
66 default = 5.0, | 67 default = 5.0, |
67 type = "numeric", | 68 type = "numeric", |
68 help = "perplexity [default : '%default' ]" | 69 help = "perplexity [default : '%default' ]" |
69 ), | 70 ), |
70 make_option( | 71 make_option( |
71 "--Rtsne_theta", | 72 "--Rtsne_theta", |
72 default = 1.0, | 73 default = 1.0, |
73 type = "numeric", | 74 type = "numeric", |
74 help = "theta [default : '%default' ]" | 75 help = "theta [default : '%default' ]" |
75 ), | 76 ), |
76 make_option( | 77 make_option( |
77 "--Rtsne_max_iter", | 78 "--Rtsne_max_iter", |
78 default = 1000, | 79 default = 1000, |
79 type = "integer", | 80 type = "integer", |
80 help = "max_iter [default : '%default' ]" | 81 help = "max_iter [default : '%default' ]" |
81 ), | 82 ), |
82 make_option( | 83 make_option( |
83 "--Rtsne_pca", | 84 "--Rtsne_pca", |
84 default = TRUE, | 85 default = TRUE, |
85 type = "logical", | 86 type = "logical", |
86 help = "Whether an initial PCA step should be performed [default : '%default' ]" | 87 help = "Whether an initial PCA step should be performed [default : '%default' ]" |
87 ), | 88 ), |
88 make_option( | 89 make_option( |
89 "--Rtsne_pca_center", | 90 "--Rtsne_pca_center", |
90 default = TRUE, | 91 default = TRUE, |
91 type = "logical", | 92 type = "logical", |
92 help = "Should data be centered before pca is applied? [default : '%default' ]" | 93 help = "Should data be centered before pca is applied? [default : '%default' ]" |
93 ), | 94 ), |
94 make_option( | 95 make_option( |
95 "--Rtsne_pca_scale", | 96 "--Rtsne_pca_scale", |
96 default = FALSE, | 97 default = FALSE, |
97 type = "logical", | 98 type = "logical", |
98 help = "Should data be scaled before pca is applied? [default : '%default' ]" | 99 help = "Should data be scaled before pca is applied? [default : '%default' ]" |
99 ), | 100 ), |
100 make_option( | 101 make_option( |
101 "--Rtsne_normalize", | 102 "--Rtsne_normalize", |
102 default = TRUE, | 103 default = TRUE, |
103 type = "logical", | 104 type = "logical", |
104 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" | 105 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" |
105 ), | 106 ), |
106 make_option( | 107 make_option( |
107 "--Rtsne_exaggeration_factor", | 108 "--Rtsne_exaggeration_factor", |
108 default = 12.0, | 109 default = 12.0, |
109 type = "numeric", | 110 type = "numeric", |
110 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" | 111 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" |
111 ), | 112 ), |
112 make_option( | 113 make_option( |
113 "--PCA_npc", | 114 "--PCA_npc", |
114 default = 5, | 115 default = 5, |
115 type = "integer", | 116 type = "integer", |
116 help = "number of dimensions kept in the results [default : '%default' ]" | 117 help = "number of dimensions kept in the results [default : '%default' ]" |
117 ), | 118 ), |
118 make_option( | 119 make_option( |
119 "--item_size", | 120 "--item_size", |
120 default = 1, | 121 default = 1, |
121 type = "numeric", | 122 type = "numeric", |
122 help = "Size of points/labels in PCA [default : '%default' ]" | 123 help = "Size of points/labels in PCA [default : '%default' ]" |
123 ), | 124 ), |
124 make_option( | 125 make_option( |
125 "--x_axis", | 126 "--x_axis", |
126 default = 1, | 127 default = 1, |
127 type = "integer", | 128 type = "integer", |
128 help = "PC to plot in the x axis [default : '%default' ]" | 129 help = "PC to plot in the x axis [default : '%default' ]" |
129 ), | 130 ), |
130 make_option( | 131 make_option( |
131 "--y_axis", | 132 "--y_axis", |
132 default = 2, | 133 default = 2, |
133 type = "integer", | 134 type = "integer", |
134 help = "PC to plot in the y axis [default : '%default' ]" | 135 help = "PC to plot in the y axis [default : '%default' ]" |
135 ), | 136 ), |
136 make_option( | 137 make_option( |
137 "--HCPC_ncluster", | 138 "--HCPC_ncluster", |
138 default = -1, | 139 default = -1, |
139 type = "numeric", | 140 type = "numeric", |
140 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" | 141 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" |
141 ), | 142 ), |
142 make_option( | 143 make_option( |
143 "--HCPC_npc", | 144 "--HCPC_npc", |
144 default = 5, | 145 default = 5, |
145 type = "integer", | 146 type = "integer", |
146 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" | 147 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" |
147 ), | 148 ), |
148 make_option( | 149 make_option( |
149 "--HCPC_metric", | 150 "--HCPC_metric", |
150 default = "euclidean", | 151 default = "euclidean", |
151 type = "character", | 152 type = "character", |
152 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" | 153 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" |
153 ), | 154 ), |
154 make_option( | 155 make_option( |
155 "--HCPC_method", | 156 "--HCPC_method", |
156 default = "ward", | 157 default = "ward", |
157 type = "character", | 158 type = "character", |
158 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" | 159 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" |
159 ), | 160 ), |
160 make_option( | 161 make_option( |
161 "--pdf_out", | 162 "--pdf_out", |
162 default = "out.pdf", | 163 default = "out.pdf", |
163 type = "character", | 164 type = "character", |
164 help = "pdf of plots [default : '%default' ]" | 165 help = "pdf of plots [default : '%default' ]" |
165 ), | 166 ), |
166 make_option( | 167 make_option( |
167 "--HCPC_consol", | 168 "--HCPC_consol", |
168 default = "TRUE", | 169 default = "TRUE", |
169 type = "logical", | 170 type = "logical", |
170 help = "If TRUE, a k-means consolidation is performed [default :'%default']" | 171 help = "If TRUE, a k-means consolidation is performed [default :'%default']" |
171 ), | 172 ), |
172 make_option( | 173 make_option( |
173 "--HCPC_itermax", | 174 "--HCPC_itermax", |
174 default = "10", | 175 default = "10", |
175 type = "integer", | 176 type = "integer", |
176 help = "The maximum number of iterations for the consolidation [default :'%default']" | 177 help = "The maximum number of iterations for the consolidation [default :'%default']" |
177 ), | 178 ), |
178 make_option( | 179 make_option( |
179 "--HCPC_min", | 180 "--HCPC_min", |
180 default = "3", | 181 default = "3", |
181 type = "integer", | 182 type = "integer", |
182 help = "The least possible number of clusters suggested [default :'%default']" | 183 help = "The least possible number of clusters suggested [default :'%default']" |
183 ), | 184 ), |
184 make_option( | 185 make_option( |
185 "--HCPC_max", | 186 "--HCPC_max", |
186 default = -1, | 187 default = -1, |
187 type = "integer", | 188 type = "integer", |
188 help = "The higher possible number of clusters suggested [default :'%default']" | 189 help = "The higher possible number of clusters suggested [default :'%default']" |
189 ), | 190 ), |
190 make_option( | 191 make_option( |
191 "--HCPC_clusterCA", | 192 "--HCPC_clusterCA", |
192 default = "rows", | 193 default = "rows", |
193 type = "character", | 194 type = "character", |
194 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" | 195 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" |
195 ), | 196 ), |
196 make_option( | 197 make_option( |
197 "--HCPC_kk", | 198 "--HCPC_kk", |
198 default = Inf, | 199 default = Inf, |
199 type = "numeric", | 200 type = "numeric", |
200 help = "The maximum number of iterations for the consolidation [default :'%default']" | 201 help = "The maximum number of iterations for the consolidation [default :'%default']" |
201 ), | 202 ), |
202 make_option( | 203 make_option( |
203 "--HCPC_mutual_info", | 204 "--HCPC_mutual_info", |
204 default = "", | 205 default = "", |
205 type = "character", | 206 type = "character", |
206 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" | 207 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" |
207 ), | 208 ), |
208 make_option( | 209 make_option( |
209 "--HCPC_cell_clust", | 210 "--HCPC_cell_clust", |
210 default = "", | 211 default = "", |
211 type = "character", | 212 type = "character", |
212 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']" | 213 help = "Lists cells in the clusters generated by HCPC clustering. 2-column table (cell identifiers/clusters) [default :'%default']" |
213 ), | 214 ), |
214 make_option( | 215 make_option( |
215 "--HCPC_contributions", | 216 "--HCPC_contributions", |
216 default = "", | 217 default = "", |
217 type = "character", | 218 type = "character", |
218 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']" | 219 help = "Table of variables (genes) most contributing to HCPC clustering [default :'%default']" |
219 ) | 220 ) |
220 ) | 221 ) |
221 | 222 |
222 opt <- parse_args(OptionParser(option_list = option_list), | 223 opt <- parse_args(OptionParser(option_list = option_list), |
223 args = commandArgs(trailingOnly = TRUE)) | 224 args = commandArgs(trailingOnly = TRUE) |
225 ) | |
224 | 226 |
225 if (opt$HCPC_max == -1) { | 227 if (opt$HCPC_max == -1) { |
226 opt$HCPC_max <- NULL | 228 opt$HCPC_max <- NULL |
227 } | 229 } |
228 if (opt$HCPC_kk == -1) { | 230 if (opt$HCPC_kk == -1) { |
229 opt$HCPC_kk <- Inf | 231 opt$HCPC_kk <- Inf |
230 } | 232 } |
231 | 233 |
232 #### We treat data once, at the beginning of the script #### | 234 #### We treat data once, at the beginning of the script #### |
233 data <- read.delim( | 235 data <- read.delim( |
234 opt$data, | 236 opt$data, |
235 check.names = FALSE, | 237 check.names = FALSE, |
236 header = TRUE, | 238 header = TRUE, |
237 row.names = 1, | 239 row.names = 1, |
238 sep = "\t" | 240 sep = "\t" |
239 ) | 241 ) |
240 # we transpose immediately, because this is the common data structure | 242 # we transpose immediately, because this is the common data structure |
241 data <- as.data.frame(t(data)) | 243 data <- as.data.frame(t(data)) |
242 | 244 |
243 # we treat the factor for usage in 3 methods | 245 # we treat the factor for usage in 3 methods |
244 if (opt$factor != "") { | 246 if (opt$factor != "") { |
245 contrasting_factor <- read.delim(opt$factor, header = TRUE) | 247 contrasting_factor <- read.delim(opt$factor, header = TRUE) |
246 rownames(contrasting_factor) <- contrasting_factor[, 1] | 248 rownames(contrasting_factor) <- contrasting_factor[, 1] |
247 # we pick only the relevant values of the contrasting factor | 249 # we pick only the relevant values of the contrasting factor |
248 contrasting_factor <- contrasting_factor[rownames(data), ] | 250 contrasting_factor <- contrasting_factor[rownames(data), ] |
249 sup <- colnames(contrasting_factor)[2] | 251 sup <- colnames(contrasting_factor)[2] |
250 if (!is.numeric(contrasting_factor[, 2])) { | 252 if (!is.numeric(contrasting_factor[, 2])) { |
251 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2]) | 253 contrasting_factor[, 2] <- as.factor(contrasting_factor[, 2]) |
252 } | 254 } |
253 } | 255 } |
254 | 256 |
255 ######### make PCA with FactoMineR ################# | 257 ######### make PCA with FactoMineR ################# |
256 if (opt$visu_choice == "PCA") { | 258 if (opt$visu_choice == "PCA") { |
257 if (opt$labels) { | 259 if (opt$labels) { |
258 labels <- "ind" | 260 labels <- "ind" |
259 } else { | |
260 labels <- "none" | |
261 } | |
262 pdf(opt$pdf_out) | |
263 if (opt$factor != "") { | |
264 data <- cbind(data, contrasting_factor[, 2]) | |
265 colnames(data)[length(data)] <- sup | |
266 if (is.numeric(contrasting_factor[, 2])) { | |
267 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE) | |
268 pca_plot <- plot(res_pca, habillage = sup, label = labels, | |
269 title = "PCA graph of cells", cex = opt$item_size, | |
270 axes = c(opt$x_axis, opt$y_axis)) | |
271 } else { | 261 } else { |
272 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE) | 262 labels <- "none" |
273 pca_plot <- plot(res_pca, habillage = sup, label = labels, | 263 } |
274 title = "PCA graph of cells", cex = opt$item_size, | 264 pdf(opt$pdf_out) |
275 axes = c(opt$x_axis, opt$y_axis)) | 265 if (opt$factor != "") { |
276 } | 266 data <- cbind(data, contrasting_factor[, 2]) |
277 } else { | 267 colnames(data)[length(data)] <- sup |
278 res_pca <- PCA(X = data, graph = FALSE) | 268 if (is.numeric(contrasting_factor[, 2])) { |
279 pca_plot <- plot(res_pca, label = labels, | 269 res_pca <- PCA(X = data, quanti.sup = sup, graph = FALSE) |
280 title = "PCA graph of cells", cex = opt$item_size, | 270 pca_plot <- plot(res_pca, |
281 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4") | 271 habillage = sup, label = labels, |
282 } | 272 title = "PCA graph of cells", cex = opt$item_size, |
283 print(pca_plot) | 273 axes = c(opt$x_axis, opt$y_axis) |
284 dev.off() | 274 ) |
275 } else { | |
276 res_pca <- PCA(X = data, quali.sup = sup, graph = FALSE) | |
277 pca_plot <- plot(res_pca, | |
278 habillage = sup, label = labels, | |
279 title = "PCA graph of cells", cex = opt$item_size, | |
280 axes = c(opt$x_axis, opt$y_axis) | |
281 ) | |
282 } | |
283 } else { | |
284 res_pca <- PCA(X = data, graph = FALSE) | |
285 pca_plot <- plot(res_pca, | |
286 label = labels, | |
287 title = "PCA graph of cells", cex = opt$item_size, | |
288 axes = c(opt$x_axis, opt$y_axis), col.ind = "deepskyblue4" | |
289 ) | |
290 } | |
291 print(pca_plot) | |
292 dev.off() | |
285 } | 293 } |
286 | 294 |
287 ########### make HCPC with FactoMineR ########## | 295 ########### make HCPC with FactoMineR ########## |
288 if (opt$visu_choice == "HCPC") { | 296 if (opt$visu_choice == "HCPC") { |
289 pdf(opt$pdf_out) | 297 pdf(opt$pdf_out) |
290 # HCPC starts with a PCA | 298 # HCPC starts with a PCA |
291 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE) | 299 pca <- PCA(X = data, ncp = opt$HCPC_npc, graph = FALSE) |
292 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA | 300 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA |
293 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering | 301 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering |
294 res_hcpc <- HCPC(pca, | 302 res_hcpc <- HCPC(pca, |
295 nb.clust = opt$HCPC_ncluster, | 303 nb.clust = opt$HCPC_ncluster, |
296 metric = opt$HCPC_metric, | 304 metric = opt$HCPC_metric, |
297 method = opt$HCPC_method, | 305 method = opt$HCPC_method, |
298 graph = FALSE, | 306 graph = FALSE, |
299 consol = opt$HCPC_consol, | 307 consol = opt$HCPC_consol, |
300 iter.max = opt$HCPC_itermax, | 308 iter.max = opt$HCPC_itermax, |
301 min = opt$HCPC_min, | 309 min = opt$HCPC_min, |
302 max = opt$HCPC_max, | 310 max = opt$HCPC_max, |
303 cluster.CA = opt$HCPC_clusterCA, | 311 cluster.CA = opt$HCPC_clusterCA, |
304 kk = opt$HCPC_kk) | 312 kk = opt$HCPC_kk |
305 # HCPC plots | 313 ) |
306 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 | 314 # HCPC plots |
307 plot(res_hcpc, choice = "tree") | 315 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 |
308 plot(res_hcpc, choice = "bar") | 316 plot(res_hcpc, choice = "tree") |
309 if (opt$labels == FALSE) { | 317 plot(res_hcpc, choice = "bar") |
310 plot(res_hcpc, choice = "3D.map", ind.names = FALSE) | 318 if (opt$labels == FALSE) { |
311 plot(res_hcpc, choice = "map", label = "none") | 319 plot(res_hcpc, choice = "3D.map", ind.names = FALSE) |
312 } else { | 320 plot(res_hcpc, choice = "map", label = "none") |
313 plot(res_hcpc, choice = "3D.map") | 321 } else { |
314 plot(res_hcpc, choice = "map") | 322 plot(res_hcpc, choice = "3D.map") |
315 } | 323 plot(res_hcpc, choice = "map") |
316 ## Normalized Mutual Information | 324 } |
317 if (opt$factor != "") { | 325 ## Normalized Mutual Information |
318 sink(opt$HCPC_mutual_info) | 326 if (opt$factor != "") { |
319 cat("Relationship between input factor and its levels and the HCPC clusters") | 327 sink(opt$HCPC_mutual_info) |
320 res <- external_validation(true_labels = as.numeric(contrasting_factor[, 2]), | 328 cat("Relationship between input factor and its levels and the HCPC clusters") |
321 clusters = as.numeric(res_hcpc$data.clust$clust), | 329 res <- external_validation( |
322 summary_stats = TRUE) | 330 true_labels = as.numeric(contrasting_factor[, 2]), |
323 sink() | 331 clusters = as.numeric(res_hcpc$data.clust$clust), |
324 } | 332 summary_stats = TRUE |
325 dev.off() | 333 ) |
326 | 334 sink() |
327 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust), | 335 } |
328 Cluster = res_hcpc$data.clust$clust) | 336 dev.off() |
329 # Description of cluster by most contributing variables / gene expressions | 337 |
330 # first transform list of vectors in a list of dataframes | 338 res_clustering <- data.frame( |
331 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) | 339 Cell = rownames(res_hcpc$data.clust), |
332 # second, transfer rownames (genes) to column in the dataframe, before rbinding | 340 Cluster = res_hcpc$data.clust$clust |
333 extract_description_w_genes <- Map(cbind, | 341 ) |
334 extract_description, | 342 # Description of cluster by most contributing variables / gene expressions |
335 genes = lapply(extract_description, rownames)) | 343 # first transform list of vectors in a list of dataframes |
336 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist() | 344 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) |
337 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | 345 # second, transfer rownames (genes) to column in the dataframe, before rbinding |
338 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns | 346 extract_description_w_genes <- Map(cbind, |
339 cluster_description <- cluster_description[order(cluster_description[[2]], | 347 extract_description, |
340 cluster_description[[8]]), ] # sort by cluster then by pval | 348 genes = lapply(extract_description, rownames) |
341 # Finally, output cluster description data frame | 349 ) |
342 write.table(cluster_description, | 350 # Then collapse all dataframes with cluster_id in 1st column using {data.table} rbindlist() |
343 file = opt$HCPC_contributions, | 351 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") |
344 sep = "\t", | 352 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # swap columns |
345 quote = FALSE, | 353 cluster_description <- cluster_description[order( |
346 col.names = TRUE, | 354 cluster_description[[2]], |
347 row.names = FALSE) | 355 cluster_description[[8]] |
348 | 356 ), ] # sort by cluster then by pval |
349 ## Return cluster table to user | 357 # Finally, output cluster description data frame |
350 write.table(res_clustering, | 358 write.table(cluster_description, |
351 file = opt$HCPC_cell_clust, | 359 file = opt$HCPC_contributions, |
352 sep = "\t", | 360 sep = "\t", |
353 quote = FALSE, | 361 quote = FALSE, |
354 col.names = TRUE, | 362 col.names = TRUE, |
355 row.names = FALSE) | 363 row.names = FALSE |
364 ) | |
365 | |
366 ## Return cluster table to user | |
367 write.table(res_clustering, | |
368 file = opt$HCPC_cell_clust, | |
369 sep = "\t", | |
370 quote = FALSE, | |
371 col.names = TRUE, | |
372 row.names = FALSE | |
373 ) | |
356 } | 374 } |
357 ################ t-SNE #################### | 375 ################ t-SNE #################### |
358 if (opt$visu_choice == "tSNE") { | 376 if (opt$visu_choice == "tSNE") { |
359 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility | 377 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility |
360 tsne_out <- Rtsne(data, | 378 tsne_out <- Rtsne(data, |
361 dims = opt$Rtsne_dims, | 379 dims = opt$Rtsne_dims, |
362 initial_dims = opt$Rtsne_initial_dims, | 380 initial_dims = opt$Rtsne_initial_dims, |
363 perplexity = opt$Rtsne_perplexity, | 381 perplexity = opt$Rtsne_perplexity, |
364 theta = opt$Rtsne_theta, | 382 theta = opt$Rtsne_theta, |
365 max_iter = opt$Rtsne_max_iter, | 383 max_iter = opt$Rtsne_max_iter, |
366 pca = opt$Rtsne_pca, | 384 pca = opt$Rtsne_pca, |
367 pca_center = opt$Rtsne_pca_center, | 385 pca_center = opt$Rtsne_pca_center, |
368 pca_scale = opt$Rtsne_pca_scale, | 386 pca_scale = opt$Rtsne_pca_scale, |
369 normalize = opt$Rtsne_normalize, | 387 normalize = opt$Rtsne_normalize, |
370 exaggeration_factor = opt$Rtsne_exaggeration_factor) | 388 exaggeration_factor = opt$Rtsne_exaggeration_factor |
371 embedding <- as.data.frame(tsne_out$Y[, 1:2]) | 389 ) |
372 embedding$Class <- as.factor(rownames(data)) | 390 embedding <- as.data.frame(tsne_out$Y[, 1:2]) |
373 gg_legend <- theme(legend.position = "right") | 391 embedding$Class <- as.factor(rownames(data)) |
374 pointcolor <- "#E70000" | 392 gg_legend <- theme(legend.position = "right") |
375 pointsize <- opt$item_size * 1.5 | 393 pointcolor <- "#E70000" |
376 the_theme <- theme( | 394 pointsize <- opt$item_size * 1.5 |
377 panel.background = element_rect(fill = "gray100", colour = "#6D9EC1", | 395 the_theme <- theme( |
378 size = 2, linetype = "solid"), | 396 panel.background = element_rect( |
379 panel.grid.major = element_line(size = 0.5, linetype = "solid", | 397 fill = "gray100", colour = "#6D9EC1", |
380 colour = "#6D9EC1"), | 398 size = 2, linetype = "solid" |
381 panel.grid.minor = element_line(size = 0.25, linetype = "solid", | 399 ), |
382 colour = "darkslategray3") | 400 panel.grid.major = element_line( |
383 ) | 401 size = 0.5, linetype = "solid", |
384 if (opt$factor == "") { | 402 colour = "#6D9EC1" |
385 p <- ggplot(embedding, aes(x = V1, y = V2)) + | 403 ), |
386 geom_point(size = pointsize * 0.25, color = pointcolor) + | 404 panel.grid.minor = element_line( |
387 gg_legend + | 405 size = 0.25, linetype = "solid", |
388 xlab("t-SNE 1") + | 406 colour = "darkslategray3" |
389 ylab("t-SNE 2") + | 407 ) |
390 ggtitle("t-SNE") + | 408 ) |
391 the_theme + | 409 if (opt$factor == "") { |
392 if (opt$labels) { | 410 p <- ggplot(embedding, aes(x = V1, y = V2)) + |
393 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor) | 411 geom_point(size = pointsize * 0.25, color = pointcolor) + |
394 } | 412 gg_legend + |
395 } else { | 413 xlab("t-SNE 1") + |
396 if (is.numeric(contrasting_factor[, 2])) { | 414 ylab("t-SNE 2") + |
397 embedding$factor <- contrasting_factor[, 2] | 415 ggtitle("t-SNE") + |
416 the_theme + | |
417 if (opt$labels) { | |
418 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = pointsize, color = pointcolor) | |
419 } | |
398 } else { | 420 } else { |
399 embedding$factor <- as.factor(contrasting_factor[, 2]) | 421 if (is.numeric(contrasting_factor[, 2])) { |
400 } | 422 embedding$factor <- contrasting_factor[, 2] |
401 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) + | 423 } else { |
402 geom_point(size = pointsize * 0.25) + | 424 embedding$factor <- as.factor(contrasting_factor[, 2]) |
403 gg_legend + | 425 } |
404 xlab("t-SNE 1") + | 426 p <- ggplot(embedding, aes(x = V1, y = V2, color = factor)) + |
405 ylab("t-SNE 2") + | 427 geom_point(size = pointsize * 0.25) + |
406 ggtitle("t-SNE") + | 428 gg_legend + |
407 the_theme + | 429 xlab("t-SNE 1") + |
408 if (opt$labels) { | 430 ylab("t-SNE 2") + |
409 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize) | 431 ggtitle("t-SNE") + |
410 } | 432 the_theme + |
411 } | 433 if (opt$labels) { |
412 pdf(opt$pdf_out) | 434 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = pointsize) |
413 print(p) | 435 } |
414 dev.off() | 436 } |
415 } | 437 pdf(opt$pdf_out) |
438 print(p) | |
439 dev.off() | |
440 } |