Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.R @ 7:18a1dc4aec4a draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit ef87a68f9a33f8418699d97627eb5f49a5e2c4a6
author | artbio |
---|---|
date | Sun, 21 May 2023 16:26:23 +0000 |
parents | 19bef589f876 |
children | fe6f76030168 |
comparison
equal
deleted
inserted
replaced
6:19bef589f876 | 7:18a1dc4aec4a |
---|---|
1 # load packages that are provided in the conda env | 1 options(show.error.messages = FALSE, |
2 options( show.error.messages=F, | 2 error = function() { |
3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 3 cat(geterrmessage(), file = stderr()) |
4 q("no", 1, FALSE) | |
5 } | |
6 ) | |
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
5 warnings() | 8 warnings() |
6 library(optparse) | 9 library(optparse) |
7 library(FactoMineR) | 10 library(FactoMineR) |
8 library(factoextra) | 11 library(factoextra) |
10 library(ggplot2) | 13 library(ggplot2) |
11 library(ggfortify) | 14 library(ggfortify) |
12 library(RColorBrewer) | 15 library(RColorBrewer) |
13 library(ClusterR) | 16 library(ClusterR) |
14 library(data.table) | 17 library(data.table) |
18 library(Polychrome) | |
15 | 19 |
16 # Arguments | 20 # Arguments |
17 option_list = list( | 21 option_list <- list( |
18 make_option( | 22 make_option( |
19 "--data", | 23 "--data", |
20 default = NA, | 24 default = NA, |
21 type = 'character', | 25 type = "character", |
22 help = "Input file that contains expression value to visualise" | 26 help = "Input file that contains expression value to visualise" |
23 ), | 27 ), |
24 make_option( | 28 make_option( |
25 "--sep", | 29 "--sep", |
26 default = '\t', | 30 default = "\t", |
27 type = 'character', | 31 type = "character", |
28 help = "File separator [default : '%default' ]" | 32 help = "File separator [default : '%default' ]" |
29 ), | 33 ), |
30 make_option( | 34 make_option( |
31 "--colnames", | 35 "--colnames", |
32 default = TRUE, | 36 default = TRUE, |
33 type = 'logical', | 37 type = "logical", |
34 help = "Consider first line as header ? [default : '%default' ]" | 38 help = "Consider first line as header ? [default : '%default' ]" |
35 ), | 39 ), |
36 make_option( | 40 make_option( |
37 "--out", | 41 "--out", |
38 default = "res.tab", | 42 default = "res.tab", |
39 type = 'character', | 43 type = "character", |
40 help = "Output name [default : '%default' ]" | 44 help = "Output name [default : '%default' ]" |
41 ), | 45 ), |
42 make_option( | 46 make_option( |
43 "--labels", | 47 "--labels", |
44 default = FALSE, | 48 default = FALSE, |
45 type = 'logical', | 49 type = "logical", |
46 help = "add labels in scatter plots [default : '%default' ]" | 50 help = "add labels in scatter plots [default : '%default' ]" |
47 ), | 51 ), |
48 make_option( | 52 make_option( |
49 "--factor", | 53 "--factor", |
50 default = '', | 54 default = "", |
51 type = 'character', | 55 type = "character", |
52 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" | 56 help = "A two column table that specifies factor levels for contrasting data [default : '%default' ]" |
53 ), | 57 ), |
54 make_option( | 58 make_option( |
55 "--visu_choice", | 59 "--visu_choice", |
56 default = 'PCA', | 60 default = "PCA", |
57 type = 'character', | 61 type = "character", |
58 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" | 62 help = "visualisation method ('PCA', 'tSNE', 'HCPC') [default : '%default' ]" |
59 ), | 63 ), |
60 make_option( | 64 make_option( |
61 "--table_coordinates", | 65 "--table_coordinates", |
62 default = '', | 66 default = "", |
63 type = 'character', | 67 type = "character", |
64 help = "Table with plot coordinates [default : '%default' ]" | 68 help = "Table with plot coordinates [default : '%default' ]" |
65 ), | 69 ), |
66 make_option( | 70 make_option( |
67 "--Rtsne_seed", | 71 "--Rtsne_seed", |
68 default = 42, | 72 default = 42, |
69 type = 'integer', | 73 type = "integer", |
70 help = "Seed value for reproducibility [default : '%default' ]" | 74 help = "Seed value for reproducibility [default : '%default' ]" |
71 ), | 75 ), |
72 make_option( | 76 make_option( |
73 "--Rtsne_dims", | 77 "--Rtsne_dims", |
74 default = 2, | 78 default = 2, |
75 type = 'integer', | 79 type = "integer", |
76 help = "Output dimensionality [default : '%default' ]" | 80 help = "Output dimensionality [default : '%default' ]" |
77 ), | 81 ), |
78 make_option( | 82 make_option( |
79 "--Rtsne_initial_dims", | 83 "--Rtsne_initial_dims", |
80 default = 50, | 84 default = 50, |
81 type = 'integer', | 85 type = "integer", |
82 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" | 86 help = "The number of dimensions that should be retained in the initial PCA step [default : '%default' ]" |
83 ), | 87 ), |
84 make_option( | 88 make_option( |
85 "--Rtsne_perplexity", | 89 "--Rtsne_perplexity", |
86 default = 5.0, | 90 default = 5.0, |
87 type = 'numeric', | 91 type = "numeric", |
88 help = "perplexity [default : '%default' ]" | 92 help = "perplexity [default : '%default' ]" |
89 ), | 93 ), |
90 make_option( | 94 make_option( |
91 "--Rtsne_theta", | 95 "--Rtsne_theta", |
92 default = 1.0, | 96 default = 1.0, |
93 type = 'numeric', | 97 type = "numeric", |
94 help = "theta [default : '%default' ]" | 98 help = "theta [default : '%default' ]" |
95 ), | 99 ), |
96 make_option( | 100 make_option( |
97 "--Rtsne_max_iter", | 101 "--Rtsne_max_iter", |
98 default = 1000, | 102 default = 1000, |
99 type = 'integer', | 103 type = "integer", |
100 help = "max_iter [default : '%default' ]" | 104 help = "max_iter [default : '%default' ]" |
101 ), | 105 ), |
102 make_option( | 106 make_option( |
103 "--Rtsne_pca", | 107 "--Rtsne_pca", |
104 default = TRUE, | 108 default = TRUE, |
105 type = 'logical', | 109 type = "logical", |
106 help = "Whether an initial PCA step should be performed [default : '%default' ]" | 110 help = "Whether an initial PCA step should be performed [default : '%default' ]" |
107 ), | 111 ), |
108 make_option( | 112 make_option( |
109 "--Rtsne_pca_center", | 113 "--Rtsne_pca_center", |
110 default = TRUE, | 114 default = TRUE, |
111 type = 'logical', | 115 type = "logical", |
112 help = "Should data be centered before pca is applied? [default : '%default' ]" | 116 help = "Should data be centered before pca is applied? [default : '%default' ]" |
113 ), | 117 ), |
114 make_option( | 118 make_option( |
115 "--Rtsne_pca_scale", | 119 "--Rtsne_pca_scale", |
116 default = FALSE, | 120 default = FALSE, |
117 type = 'logical', | 121 type = "logical", |
118 help = "Should data be scaled before pca is applied? [default : '%default' ]" | 122 help = "Should data be scaled before pca is applied? [default : '%default' ]" |
119 ), | 123 ), |
120 make_option( | 124 make_option( |
121 "--Rtsne_normalize", | 125 "--Rtsne_normalize", |
122 default = TRUE, | 126 default = TRUE, |
123 type = 'logical', | 127 type = "logical", |
124 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" | 128 help = "Should data be normalized internally prior to distance calculations? [default : '%default' ]" |
125 ), | 129 ), |
126 make_option( | 130 make_option( |
127 "--Rtsne_exaggeration_factor", | 131 "--Rtsne_exaggeration_factor", |
128 default = 12.0, | 132 default = 12.0, |
129 type = 'numeric', | 133 type = "numeric", |
130 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" | 134 help = " Exaggeration factor used to multiply the P matrix in the first part of the optimization [default : '%default' ]" |
131 ), | 135 ), |
132 make_option( | 136 make_option( |
133 "--PCA_npc", | 137 "--PCA_npc", |
134 default = 5, | 138 default = 5, |
135 type = 'integer', | 139 type = "integer", |
136 help = "number of dimensions kept in the results [default : '%default' ]" | 140 help = "number of dimensions kept in the results [default : '%default' ]" |
137 ), | 141 ), |
138 make_option( | 142 make_option( |
139 "--PCA_x_axis", | 143 "--PCA_x_axis", |
140 default = 1, | 144 default = 1, |
141 type = 'integer', | 145 type = "integer", |
142 help = "PC to plot in the x axis [default : '%default' ]" | 146 help = "PC to plot in the x axis [default : '%default' ]" |
143 ), | 147 ), |
144 make_option( | 148 make_option( |
145 "--PCA_y_axis", | 149 "--PCA_y_axis", |
146 default = 2, | 150 default = 2, |
147 type = 'integer', | 151 type = "integer", |
148 help = "PC to plot in the y axis [default : '%default' ]" | 152 help = "PC to plot in the y axis [default : '%default' ]" |
149 ), | 153 ), |
150 make_option( | 154 make_option( |
151 "--HCPC_ncluster", | 155 "--HCPC_ncluster", |
152 default = -1, | 156 default = -1, |
153 type = 'numeric', | 157 type = "numeric", |
154 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" | 158 help = "nb.clust, number of clusters to consider in the hierarchical clustering. [default : -1 let HCPC to optimize the number]" |
155 ), | 159 ), |
156 make_option( | 160 make_option( |
157 "--HCPC_npc", | 161 "--HCPC_npc", |
158 default = 5, | 162 default = 5, |
159 type = 'integer', | 163 type = "integer", |
160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" | 164 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" |
161 ), | 165 ), |
162 make_option( | 166 make_option( |
163 "--HCPC_metric", | 167 "--HCPC_metric", |
164 default = 'euclidean', | 168 default = "euclidean", |
165 type = 'character', | 169 type = "character", |
166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" | 170 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" |
167 ), | 171 ), |
168 make_option( | 172 make_option( |
169 "--HCPC_method", | 173 "--HCPC_method", |
170 default = 'ward', | 174 default = "ward", |
171 type = 'character', | 175 type = "character", |
172 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" | 176 help = "Clustering method between 'ward','average','single', 'complete', 'weighted' [default :'%default']" |
173 ), | 177 ), |
174 make_option( | 178 make_option( |
175 "--pdf_out", | 179 "--pdf_out", |
176 default = "out.pdf", | 180 default = "out.pdf", |
177 type = 'character', | 181 type = "character", |
178 help = "pdf of plots [default : '%default' ]" | 182 help = "pdf of plots [default : '%default' ]" |
179 ), | 183 ), |
180 make_option( | 184 make_option( |
181 "--HCPC_consol", | 185 "--HCPC_consol", |
182 default = 'TRUE', | 186 default = "TRUE", |
183 type = 'logical', | 187 type = "logical", |
184 help = "If TRUE, a k-means consolidation is performed [default :'%default']" | 188 help = "If TRUE, a k-means consolidation is performed [default :'%default']" |
185 ), | 189 ), |
186 make_option( | 190 make_option( |
187 "--HCPC_itermax", | 191 "--HCPC_itermax", |
188 default = '10', | 192 default = "10", |
189 type = 'integer', | 193 type = "integer", |
190 help = "The maximum number of iterations for the consolidation [default :'%default']" | 194 help = "The maximum number of iterations for the consolidation [default :'%default']" |
191 ), | 195 ), |
192 make_option( | 196 make_option( |
193 "--HCPC_min", | 197 "--HCPC_min", |
194 default = '3', | 198 default = "3", |
195 type = 'integer', | 199 type = "integer", |
196 help = "The least possible number of clusters suggested [default :'%default']" | 200 help = "The least possible number of clusters suggested [default :'%default']" |
197 ), | 201 ), |
198 make_option( | 202 make_option( |
199 "--HCPC_max", | 203 "--HCPC_max", |
200 default = -1, | 204 default = -1, |
201 type = 'integer', | 205 type = "integer", |
202 help = "The higher possible number of clusters suggested [default :'%default']" | 206 help = "The higher possible number of clusters suggested [default :'%default']" |
203 ), | 207 ), |
204 make_option( | 208 make_option( |
205 "--HCPC_clusterCA", | 209 "--HCPC_clusterCA", |
206 default = 'rows', | 210 default = "rows", |
207 type = 'character', | 211 type = "character", |
208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" | 212 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" |
209 ), | 213 ), |
210 make_option( | 214 make_option( |
211 "--HCPC_kk", | 215 "--HCPC_kk", |
212 default = Inf, | 216 default = Inf, |
213 type = 'numeric', | 217 type = "numeric", |
214 help = "The maximum number of iterations for the consolidation [default :'%default']" | 218 help = "The maximum number of iterations for the consolidation [default :'%default']" |
215 ), | 219 ), |
216 make_option( | 220 make_option( |
217 "--HCPC_clust", | 221 "--HCPC_clust", |
218 default = "", | 222 default = "", |
219 type = 'character', | 223 type = "character", |
220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" | 224 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" |
221 ), | 225 ), |
222 make_option( | 226 make_option( |
223 "--HCPC_mutual_info", | 227 "--HCPC_mutual_info", |
224 default = "", | 228 default = "", |
231 type = "character", | 235 type = "character", |
232 help = "Output file with variables most contributing to clustering [default :'%default']" | 236 help = "Output file with variables most contributing to clustering [default :'%default']" |
233 ) | 237 ) |
234 ) | 238 ) |
235 | 239 |
236 opt = parse_args(OptionParser(option_list = option_list), | 240 opt <- parse_args(OptionParser(option_list = option_list), |
237 args = commandArgs(trailingOnly = TRUE)) | 241 args = commandArgs(trailingOnly = TRUE)) |
238 | 242 |
239 if (opt$sep == "tab") {opt$sep <- "\t"} | 243 if (opt$sep == "tab") { |
240 if (opt$sep == "comma") {opt$sep <- ","} | 244 opt$sep <- "\t" |
241 if(opt$HCPC_max == -1) {opt$HCPC_max <- NULL} | 245 } |
242 if(opt$HCPC_kk == -1) {opt$HCPC_kk <- Inf} | 246 if (opt$sep == "comma") { |
247 opt$sep <- "," | |
248 } | |
249 if (opt$HCPC_max == -1) { | |
250 opt$HCPC_max <- NULL | |
251 } | |
252 if (opt$HCPC_kk == -1) { | |
253 opt$HCPC_kk <- Inf | |
254 } | |
243 | 255 |
244 ##Add legend to plot() | 256 ##Add legend to plot() |
245 legend.col <- function(col, lev){ | 257 legend_col <- function(col, lev) { |
246 | 258 |
247 opar <- par | 259 opar <- par |
248 | 260 |
249 n <- length(col) | 261 n <- length(col) |
250 | 262 |
251 bx <- par("usr") | 263 bx <- par("usr") |
252 | 264 |
253 box.cx <- c(bx[2] + (bx[2] - bx[1]) / 1000, | 265 box_cx <- c(bx[2] + (bx[2] - bx[1]) / 1000, |
254 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50) | 266 bx[2] + (bx[2] - bx[1]) / 1000 + (bx[2] - bx[1]) / 50) |
255 box.cy <- c(bx[3], bx[3]) | 267 box_cy <- c(bx[3], bx[3]) |
256 box.sy <- (bx[4] - bx[3]) / n | 268 box_sy <- (bx[4] - bx[3]) / n |
257 | 269 |
258 xx <- rep(box.cx, each = 2) | 270 xx <- rep(box_cx, each = 2) |
259 | 271 |
260 par(xpd = TRUE) | 272 par(xpd = TRUE) |
261 for(i in 1:n){ | 273 for (i in 1:n) { |
262 | 274 yy <- c(box_cy[1] + (box_sy * (i - 1)), |
263 yy <- c(box.cy[1] + (box.sy * (i - 1)), | 275 box_cy[1] + (box_sy * (i)), |
264 box.cy[1] + (box.sy * (i)), | 276 box_cy[1] + (box_sy * (i)), |
265 box.cy[1] + (box.sy * (i)), | 277 box_cy[1] + (box_sy * (i - 1))) |
266 box.cy[1] + (box.sy * (i - 1))) | 278 polygon(xx, yy, col = col[i], border = col[i]) |
267 polygon(xx, yy, col = col[i], border = col[i]) | |
268 | |
269 } | 279 } |
270 par(new = TRUE) | 280 par(new = TRUE) |
271 plot(0, 0, type = "n", | 281 plot(0, 0, type = "n", |
272 ylim = c(min(lev), max(lev)), | 282 ylim = c(min(lev), max(lev)), |
273 yaxt = "n", ylab = "", | 283 yaxt = "n", ylab = "", |
275 frame.plot = FALSE) | 285 frame.plot = FALSE) |
276 axis(side = 4, las = 2, tick = FALSE, line = .25) | 286 axis(side = 4, las = 2, tick = FALSE, line = .25) |
277 par <- opar | 287 par <- opar |
278 } | 288 } |
279 | 289 |
280 | 290 data <- read.delim( |
281 data = read.delim( | |
282 opt$data, | 291 opt$data, |
283 check.names = FALSE, | 292 check.names = FALSE, |
284 header = opt$colnames, | 293 header = opt$colnames, |
285 row.names = 1, | 294 row.names = 1, |
286 sep = opt$sep | 295 sep = opt$sep |
287 ) | 296 ) |
288 | 297 |
289 # Contrasting factor and its colors | 298 # Contrasting factor and its colors |
290 if (opt$factor != '') { | 299 if (opt$factor != "") { |
291 contrasting_factor <- read.delim( | 300 contrasting_factor <- read.delim( |
292 opt$factor, | 301 opt$factor, |
293 header = TRUE | 302 header = TRUE |
294 ) | 303 ) |
295 rownames(contrasting_factor) <- contrasting_factor[,1] | 304 rownames(contrasting_factor) <- contrasting_factor[, 1] |
296 contrasting_factor <- contrasting_factor[colnames(data),] | 305 contrasting_factor <- contrasting_factor[colnames(data), ] |
297 colnames(contrasting_factor) <- c("id","factor") | 306 colnames(contrasting_factor) <- c("id", "factor") |
298 if(is.numeric(contrasting_factor$factor)){ | 307 if (is.numeric(contrasting_factor$factor)) { |
299 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor] | 308 factor_cols <- rev(brewer.pal(n = 11, name = "RdYlGn"))[contrasting_factor$factor] |
300 } else { | 309 } else { |
301 contrasting_factor$factor <- as.factor(contrasting_factor$factor) | 310 contrasting_factor$factor <- as.factor(contrasting_factor$factor) |
302 if(nlevels(contrasting_factor$factor) == 2){ | 311 if (nlevels(contrasting_factor$factor) == 2) { |
303 colors_labels <- c("#E41A1C", "#377EB8") | 312 colors_labels <- c("#E41A1C", "#377EB8") |
304 } else { | 313 } else { |
305 colors_labels <- brewer.pal(nlevels(contrasting_factor$factor), name = 'Paired') | 314 set.seed(567629) |
315 colors_labels <- createPalette(nlevels(contrasting_factor$factor), c("#5A5156", "#E4E1E3", "#F6222E")) | |
316 names(colors_labels) <- NULL | |
306 } | 317 } |
307 factorColors <- | 318 factor_colors <- |
308 with(contrasting_factor, | 319 with(contrasting_factor, |
309 data.frame(factor = levels(contrasting_factor$factor), | 320 data.frame(factor = levels(contrasting_factor$factor), |
310 color = I(colors_labels) | 321 color = I(colors_labels) |
311 ) | 322 ) |
312 ) | 323 ) |
313 factor_cols <- factorColors$color[match(contrasting_factor$factor, | 324 factor_cols <- factor_colors$color[match(contrasting_factor$factor, |
314 factorColors$factor)] | 325 factor_colors$factor)] |
315 } | 326 } |
316 } else { | 327 } else { |
317 factor_cols <- rep("deepskyblue4", length(rownames(data))) | 328 factor_cols <- rep("deepskyblue4", length(rownames(data))) |
318 } | 329 } |
319 | 330 |
320 ################ t-SNE #################### | 331 ################ t-SNE #################### |
321 if (opt$visu_choice == 'tSNE') { | 332 if (opt$visu_choice == "tSNE") { |
322 # filter and transpose df for tsne and pca | 333 # filter and transpose df for tsne and pca |
323 tdf = t(data) | 334 tdf <- t(data) |
324 # make tsne and plot results | 335 # make tsne and plot results |
325 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility | 336 set.seed(opt$Rtsne_seed) ## Sets seed for reproducibility |
326 | 337 |
327 tsne_out <- Rtsne(tdf, | 338 tsne_out <- Rtsne(tdf, |
328 dims = opt$Rtsne_dims, | 339 dims = opt$Rtsne_dims, |
329 initial_dims = opt$Rtsne_initial_dims, | 340 initial_dims = opt$Rtsne_initial_dims, |
330 perplexity = opt$Rtsne_perplexity , | 341 perplexity = opt$Rtsne_perplexity, |
331 theta = opt$Rtsne_theta, | 342 theta = opt$Rtsne_theta, |
332 max_iter = opt$Rtsne_max_iter, | 343 max_iter = opt$Rtsne_max_iter, |
333 pca = opt$Rtsne_pca, | 344 pca = opt$Rtsne_pca, |
334 pca_center = opt$Rtsne_pca_center, | 345 pca_center = opt$Rtsne_pca_center, |
335 pca_scale = opt$Rtsne_pca_scale, | 346 pca_scale = opt$Rtsne_pca_scale, |
336 normalize = opt$Rtsne_normalize, | 347 normalize = opt$Rtsne_normalize, |
337 exaggeration_factor=opt$Rtsne_exaggeration_factor) | 348 exaggeration_factor = opt$Rtsne_exaggeration_factor) |
338 | 349 |
339 embedding <- as.data.frame(tsne_out$Y[,1:2]) | 350 embedding <- as.data.frame(tsne_out$Y[, 1:2]) |
340 embedding$Class <- as.factor(rownames(tdf)) | 351 embedding$Class <- as.factor(rownames(tdf)) |
341 gg_legend = theme(legend.position="right") | 352 gg_legend <- theme(legend.position = "right") |
342 if (opt$factor == '') { | 353 if (opt$factor == "") { |
343 ggplot(embedding, aes(x=V1, y=V2)) + | 354 ggplot(embedding, aes(x = V1, y = V2)) + |
344 geom_point(size=1, color='deepskyblue4') + | 355 geom_point(size = 1, color = "deepskyblue4") + |
345 gg_legend + | 356 gg_legend + |
346 xlab("t-SNE 1") + | 357 xlab("t-SNE 1") + |
347 ylab("t-SNE 2") + | 358 ylab("t-SNE 2") + |
348 ggtitle('t-SNE') + | 359 ggtitle("t-SNE") + |
349 if (opt$labels) { | 360 if (opt$labels) { |
350 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=1.5, color='deepskyblue4') | 361 geom_text(aes(label = Class), hjust = -0.2, vjust = -0.5, size = 1.5, color = "deepskyblue4") |
351 } | 362 } |
352 } else { | 363 } else { |
353 if(is.numeric(contrasting_factor$factor)){ | 364 if (is.numeric(contrasting_factor$factor)) { |
354 embedding$factor <- contrasting_factor$factor | 365 embedding$factor <- contrasting_factor$factor |
355 } else { | 366 } else { |
356 embedding$factor <- as.factor(contrasting_factor$factor) | 367 embedding$factor <- as.factor(contrasting_factor$factor) |
357 } | 368 } |
358 | 369 |
359 ggplot(embedding, aes(x=V1, y=V2, color=factor)) + | 370 ggplot(embedding, aes(x = V1, y = V2, color = factor)) + |
360 geom_point(size=1) + #, color=factor_cols | 371 geom_point(size = 1) + |
361 gg_legend + | 372 gg_legend + |
362 xlab("t-SNE 1") + | 373 xlab("t-SNE 1") + |
363 ylab("t-SNE 2") + | 374 ylab("t-SNE 2") + |
364 ggtitle('t-SNE') + | 375 ggtitle("t-SNE") + |
365 if (opt$labels) { | 376 if (opt$labels) { |
366 geom_text(aes(label=Class, colour=factor),hjust=-0.2, vjust=-0.5, size=1.5) | 377 geom_text(aes(label = Class, colour = factor), hjust = -0.2, vjust = -0.5, size = 1.5) |
367 } | 378 } |
368 } | 379 } |
369 ggsave(file=opt$pdf_out, device="pdf") | 380 ggsave(file = opt$pdf_out, device = "pdf") |
370 | 381 |
371 #save coordinates table | 382 #save coordinates table |
372 if(opt$table_coordinates != ''){ | 383 if (opt$table_coordinates != "") { |
373 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6)) | 384 coord_table <- cbind(rownames(tdf), round(as.data.frame(tsne_out$Y), 6)) |
374 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$Rtsne_dims))) | 385 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$Rtsne_dims))) |
375 } | 386 } |
376 } | 387 } |
377 | 388 |
378 | 389 |
379 ######### make PCA with FactoMineR ################# | 390 ######### make PCA with FactoMineR ################# |
380 if (opt$visu_choice == 'PCA') { | 391 if (opt$visu_choice == "PCA") { |
381 pca <- PCA(t(data), ncp=opt$PCA_npc, graph=FALSE) | 392 pca <- PCA(t(data), ncp = opt$PCA_npc, graph = FALSE) |
382 pdf(opt$pdf_out) | 393 pdf(opt$pdf_out) |
383 if (opt$labels == FALSE) { | 394 if (opt$labels == FALSE) { |
384 plot(pca, axes = c(opt$PCA_x_axis,opt$PCA_y_axis), label="none" , col.ind = factor_cols) | 395 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), label = "none", col.ind = factor_cols) |
385 } else { | 396 } else { |
386 plot(pca, axes = c(opt$PCA_x_axis,opt$PCA_y_axis), cex=0.2 , col.ind = factor_cols) | 397 plot(pca, axes = c(opt$PCA_x_axis, opt$PCA_y_axis), cex = 0.2, col.ind = factor_cols) |
387 } | 398 } |
388 if (opt$factor != '') { | 399 if (opt$factor != "") { |
389 if(is.factor(contrasting_factor$factor)) { | 400 if (is.factor(contrasting_factor$factor)) { |
390 legend(x = 'topright', | 401 legend(x = "topright", |
391 legend = as.character(factorColors$factor), | 402 legend = as.character(factor_colors$factor), |
392 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) | 403 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7) |
393 } else { | 404 } else { |
394 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | 405 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) |
395 } | 406 } |
396 } | 407 } |
397 dev.off() | 408 dev.off() |
398 | 409 |
399 #save coordinates table | 410 #save coordinates table |
400 if(opt$table_coordinates != ''){ | 411 if (opt$table_coordinates != "") { |
401 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6)) | 412 coord_table <- cbind(rownames(pca$ind$coord), round(as.data.frame(pca$ind$coord), 6)) |
402 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$PCA_npc))) | 413 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$PCA_npc))) |
403 } | 414 } |
404 | 415 |
405 } | 416 } |
406 | 417 |
407 ########### make HCPC with FactoMineR ########## | 418 ########### make HCPC with FactoMineR ########## |
408 if (opt$visu_choice == 'HCPC') { | 419 if (opt$visu_choice == "HCPC") { |
409 | 420 |
410 # HCPC starts with a PCA | 421 # HCPC starts with a PCA |
411 pca <- PCA( | 422 pca <- PCA( |
412 t(data), | 423 t(data), |
413 ncp = opt$HCPC_npc, | 424 ncp = opt$HCPC_npc, |
414 graph = FALSE, | 425 graph = FALSE, |
415 ) | 426 ) |
416 | 427 |
417 PCA_IndCoord = as.data.frame(pca$ind$coord) # coordinates of observations in PCA | 428 pca_ind_coord <- as.data.frame(pca$ind$coord) # coordinates of observations in PCA |
418 | 429 |
419 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering | 430 # Hierarchical Clustering On Principal Components Followed By Kmean Clustering |
420 res.hcpc <- HCPC(pca, | 431 res_hcpc <- HCPC(pca, |
421 nb.clust=opt$HCPC_ncluster, metric=opt$HCPC_metric, method=opt$HCPC_method, | 432 nb.clust = opt$HCPC_ncluster, metric = opt$HCPC_metric, method = opt$HCPC_method, |
422 graph=F,consol=opt$HCPC_consol,iter.max=opt$HCPC_itermax,min=opt$HCPC_min,max=opt$HCPC_max, | 433 graph = FALSE, consol = opt$HCPC_consol, iter.max = opt$HCPC_itermax, min = opt$HCPC_min, max = opt$HCPC_max, |
423 cluster.CA=opt$HCPC_clusterCA,kk=opt$HCPC_kk) | 434 cluster.CA = opt$HCPC_clusterCA, kk = opt$HCPC_kk) |
424 # HCPC plots | 435 # HCPC plots |
425 dims <- head(as.data.frame(res.hcpc$call$t$res$eig),2) # dims variances in column 2 | 436 dims <- head(as.data.frame(res_hcpc$call$t$res$eig), 2) # dims variances in column 2 |
426 pdf(opt$pdf_out) | 437 pdf(opt$pdf_out) |
427 plot(res.hcpc, choice="tree") | 438 plot(res_hcpc, choice = "tree") |
428 plot(res.hcpc, choice="bar") | 439 plot(res_hcpc, choice = "bar") |
429 plot(res.hcpc, choice="3D.map") | 440 plot(res_hcpc, choice = "3D.map") |
430 if (opt$labels == FALSE) { | 441 if (opt$labels == FALSE) { |
431 plot(res.hcpc, choice="map", label="none") | 442 plot(res_hcpc, choice = "map", label = "none") |
432 } else { | 443 } else { |
433 plot(res.hcpc, choice="map") | 444 plot(res_hcpc, choice = "map") |
434 } | 445 } |
435 | 446 |
436 # user contrasts on the pca | 447 # user contrasts on the pca |
437 if (opt$factor != '') { | 448 if (opt$factor != "") { |
438 plot(pca, label="none", col.ind = factor_cols) | 449 plot(pca, label = "none", col.ind = factor_cols) |
439 if(is.factor(contrasting_factor$factor)) { | 450 if (is.factor(contrasting_factor$factor)) { |
440 legend(x = 'topright', | 451 legend(x = "topright", |
441 legend = as.character(factorColors$factor), | 452 legend = as.character(factor_colors$factor), |
442 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) | 453 col = factor_colors$color, pch = 16, bty = "n", xjust = 1, cex = 0.7) |
443 | 454 |
444 ## Normalized Mutual Information | 455 ## Normalized Mutual Information |
445 sink(opt$HCPC_mutual_info) | 456 sink(opt$HCPC_mutual_info) |
446 res <- external_validation( | 457 res <- external_validation( |
447 true_labels = as.numeric(contrasting_factor$factor), | 458 true_labels = as.numeric(contrasting_factor$factor), |
448 clusters = as.numeric(res.hcpc$data.clust$clust), | 459 clusters = as.numeric(res_hcpc$data.clust$clust), |
449 summary_stats = TRUE | 460 summary_stats = TRUE |
450 ) | 461 ) |
451 sink() | 462 sink() |
452 | 463 |
453 } else { | 464 } else { |
454 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | 465 legend_col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) |
455 } | 466 } |
456 } | 467 } |
457 | |
458 ## Clusters to which individual observations belong # used ? | |
459 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], | |
460 # Observation = rownames(res.hcpc$data.clust)) | |
461 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data)) | |
462 # metadata = merge(y = metadata, | |
463 # x = Clust, | |
464 # by = "Observation") | |
465 | |
466 # unclear utility | |
467 # ObsNumberPerCluster = as.data.frame(table(metadata$Cluster)) | |
468 # colnames(ObsNumberPerCluster) = c("Cluster", "ObsNumber") | |
469 # | |
470 # ## Silhouette Plot # not used | |
471 # hc.cut = hcut(PCA_IndCoord, | |
472 # k = nlevels(metadata$Cluster), | |
473 # hc_method = "ward.D2") | |
474 # | |
475 # Sil = fviz_silhouette(hc.cut) | |
476 # sil1 = as.data.frame(Sil$data) | |
477 | 468 |
478 dev.off() | 469 dev.off() |
479 | 470 |
480 if(opt$table_coordinates != ''){ | 471 if (opt$table_coordinates != "") { |
481 coord_table <- cbind(Cell=rownames(res.hcpc$call$X), | 472 coord_table <- cbind(Cell = rownames(res_hcpc$call$X), |
482 round(as.data.frame(res.hcpc$call$X[, -length(res.hcpc$call$X)]), 6), | 473 round(as.data.frame(res_hcpc$call$X[, -length(res_hcpc$call$X)]), 6), |
483 as.data.frame(res.hcpc$call$X[, length(res.hcpc$call$X)]) | 474 as.data.frame(res_hcpc$call$X[, length(res_hcpc$call$X)]) |
484 ) | 475 ) |
485 colnames(coord_table)=c("Cells",paste0("DIM",(1:opt$HCPC_npc)),"Cluster") | 476 colnames(coord_table) <- c("Cells", paste0("DIM", (1:opt$HCPC_npc)), "Cluster") |
486 } | 477 } |
487 | 478 |
488 if(opt$HCPC_clust != ""){ | 479 if (opt$HCPC_clust != "") { |
489 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), | 480 res_clustering <- data.frame(Cell = rownames(res_hcpc$data.clust), |
490 Cluster = res.hcpc$data.clust$clust) | 481 Cluster = res_hcpc$data.clust$clust) |
491 | 482 |
492 } | 483 } |
493 | 484 |
494 # Description of cluster by most contributing variables / gene expressions | 485 # Description of cluster by most contributing variables / gene expressions |
495 | 486 |
496 # first transform list of vectors in a list of dataframes | 487 # first transform list of vectors in a list of dataframes |
497 extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame) | 488 extract_description <- lapply(res_hcpc$desc.var$quanti, as.data.frame) |
498 # second, transfer rownames (genes) to column in the dataframe, before rbinding | 489 # second, transfer rownames (genes) to column in the dataframe, before rbinding |
499 extract_description_w_genes <- Map(cbind, | 490 extract_description_w_genes <- Map(cbind, |
500 extract_description, | 491 extract_description, |
501 genes= lapply(extract_description, rownames) | 492 genes = lapply(extract_description, rownames) |
502 ) | 493 ) |
503 # Then use data.table to collapse all generated dataframe, with the cluster id in first column | 494 # Then use data.table to collapse all generated dataframe, with the cluster id in first column |
504 # using the {data.table} rbindlist function | 495 # using the {data.table} rbindlist function |
505 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | 496 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") |
506 cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns | 497 cluster_description <- cluster_description[, c(8, 1, 2, 3, 4, 5, 6, 7)] # reorganize columns |
507 | 498 |
508 | 499 |
509 # Finally, output cluster description data frame | 500 # Finally, output cluster description data frame |
510 write.table( | 501 write.table( |
511 cluster_description, | 502 cluster_description, |
512 file = opt$HCPC_cluster_description, | 503 file = opt$HCPC_cluster_description, |
513 sep = "\t", | 504 sep = "\t", |
514 quote = F, | 505 quote = FALSE, |
515 col.names = T, | 506 col.names = TRUE, |
516 row.names = F | 507 row.names = FALSE |
517 ) | 508 ) |
518 | 509 |
519 } | 510 } |
520 | 511 |
521 ## Return coordinates file to user | 512 ## Return coordinates file to user |
522 | 513 |
523 if(opt$table_coordinates != ''){ | 514 if (opt$table_coordinates != "") { |
524 write.table( | 515 write.table( |
525 coord_table, | 516 coord_table, |
526 file = opt$table_coordinates, | 517 file = opt$table_coordinates, |
527 sep = "\t", | 518 sep = "\t", |
528 quote = F, | 519 quote = FALSE, |
529 col.names = T, | 520 col.names = TRUE, |
530 row.names = F | 521 row.names = FALSE |
531 ) | 522 ) |
532 } | 523 } |
533 | 524 |
534 | 525 |
535 if(opt$HCPC_clust != ""){ | 526 if (opt$HCPC_clust != "") { |
536 write.table( | 527 write.table( |
537 res_clustering, | 528 res_clustering, |
538 file = opt$HCPC_clust, | 529 file = opt$HCPC_clust, |
539 sep = "\t", | 530 sep = "\t", |
540 quote = F, | 531 quote = FALSE, |
541 col.names = T, | 532 col.names = TRUE, |
542 row.names = F | 533 row.names = FALSE |
543 ) | 534 ) |
544 } | 535 } |