Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.R @ 5:569334568afa draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit 1b98c85982a2a9f9df4b318f672b9b68cff66a93"
author | artbio |
---|---|
date | Mon, 02 Sep 2019 04:39:20 -0400 |
parents | 8e17c31c536a |
children | 19bef589f876 |
comparison
equal
deleted
inserted
replaced
4:8e17c31c536a | 5:569334568afa |
---|---|
1 # load packages that are provided in the conda env | 1 # load packages that are provided in the conda env |
2 options( show.error.messages=F, | 2 options( show.error.messages=F, |
3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) |
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
5 requiredPackages = c('optparse', 'Rtsne', 'ggplot2', 'ggfortify') | |
6 warnings() | 5 warnings() |
7 library(optparse) | 6 library(optparse) |
8 library(FactoMineR) | 7 library(FactoMineR) |
9 library(factoextra) | 8 library(factoextra) |
10 library(Rtsne) | 9 library(Rtsne) |
11 library(ggplot2) | 10 library(ggplot2) |
12 library(ggfortify) | 11 library(ggfortify) |
13 library(RColorBrewer) | 12 library(RColorBrewer) |
14 library(ClusterR) | 13 library(ClusterR) |
14 library(data.table) | |
15 | 15 |
16 # Arguments | 16 # Arguments |
17 option_list = list( | 17 option_list = list( |
18 make_option( | 18 make_option( |
19 "--data", | 19 "--data", |
159 type = 'integer', | 159 type = 'integer', |
160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" | 160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" |
161 ), | 161 ), |
162 make_option( | 162 make_option( |
163 "--HCPC_metric", | 163 "--HCPC_metric", |
164 default = 'euclidian', | 164 default = 'euclidean', |
165 type = 'character', | 165 type = 'character', |
166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidian' or 'manhattan' [default : '%default' ]" | 166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" |
167 ), | 167 ), |
168 make_option( | 168 make_option( |
169 "--HCPC_method", | 169 "--HCPC_method", |
170 default = 'ward', | 170 default = 'ward', |
171 type = 'character', | 171 type = 'character', |
207 type = 'character', | 207 type = 'character', |
208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" | 208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" |
209 ), | 209 ), |
210 make_option( | 210 make_option( |
211 "--HCPC_kk", | 211 "--HCPC_kk", |
212 default = -1, | 212 default = Inf, |
213 type = 'numeric', | 213 type = 'numeric', |
214 help = "The maximum number of iterations for the consolidation [default :'%default']" | 214 help = "The maximum number of iterations for the consolidation [default :'%default']" |
215 ), | 215 ), |
216 make_option( | 216 make_option( |
217 "--HCPC_clust", | 217 "--HCPC_clust", |
218 default = "", | 218 default = "", |
219 type = 'character', | 219 type = 'character', |
220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" | 220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" |
221 ), | 221 ), |
222 make_option( | 222 make_option( |
223 "--mutual_info", | 223 "--HCPC_mutual_info", |
224 default = "", | 224 default = "", |
225 type = "character", | 225 type = "character", |
226 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" | 226 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" |
227 ), | |
228 make_option( | |
229 "--HCPC_cluster_description", | |
230 default = "", | |
231 type = "character", | |
232 help = "Output file with variables most contributing to clustering [default :'%default']" | |
227 ) | 233 ) |
228 ) | 234 ) |
229 | 235 |
230 opt = parse_args(OptionParser(option_list = option_list), | 236 opt = parse_args(OptionParser(option_list = option_list), |
231 args = commandArgs(trailingOnly = TRUE)) | 237 args = commandArgs(trailingOnly = TRUE)) |
434 legend(x = 'topright', | 440 legend(x = 'topright', |
435 legend = as.character(factorColors$factor), | 441 legend = as.character(factorColors$factor), |
436 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) | 442 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) |
437 | 443 |
438 ## Normalized Mutual Information | 444 ## Normalized Mutual Information |
439 sink(opt$mutual_info) | 445 sink(opt$HCPC_mutual_info) |
440 res <- external_validation( | 446 res <- external_validation( |
441 true_labels = as.numeric(contrasting_factor$factor), | 447 true_labels = as.numeric(contrasting_factor$factor), |
442 clusters = as.numeric(res.hcpc$data.clust$clust), | 448 clusters = as.numeric(res.hcpc$data.clust$clust), |
443 summary_stats = TRUE | 449 summary_stats = TRUE |
444 ) | 450 ) |
446 | 452 |
447 } else { | 453 } else { |
448 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) | 454 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) |
449 } | 455 } |
450 } | 456 } |
457 | |
451 ## Clusters to which individual observations belong # used ? | 458 ## Clusters to which individual observations belong # used ? |
452 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], | 459 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], |
453 # Observation = rownames(res.hcpc$data.clust)) | 460 # Observation = rownames(res.hcpc$data.clust)) |
454 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data)) | 461 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data)) |
455 # metadata = merge(y = metadata, | 462 # metadata = merge(y = metadata, |
482 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), | 489 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), |
483 Cluster = res.hcpc$data.clust$clust) | 490 Cluster = res.hcpc$data.clust$clust) |
484 | 491 |
485 } | 492 } |
486 | 493 |
494 # Description of cluster by most contributing variables / gene expressions | |
495 | |
496 # first transform list of vectors in a list of dataframes | |
497 extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame) | |
498 # second, transfer rownames (genes) to column in the dataframe, before rbinding | |
499 extract_description_w_genes <- Map(cbind, | |
500 extract_description, | |
501 genes= lapply(extract_description, rownames) | |
502 ) | |
503 # Then use data.table to collapse all generated dataframe, with the cluster id in first column | |
504 # using the {data.table} rbindlist function | |
505 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") | |
506 cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns | |
507 | |
508 | |
509 # Finally, output cluster description data frame | |
510 write.table( | |
511 cluster_description, | |
512 file = opt$HCPC_cluster_description, | |
513 sep = "\t", | |
514 quote = F, | |
515 col.names = T, | |
516 row.names = F | |
517 ) | |
518 | |
487 } | 519 } |
488 | 520 |
489 ## Return coordinates file to user | 521 ## Return coordinates file to user |
490 | 522 |
491 if(opt$table_coordinates != ''){ | 523 if(opt$table_coordinates != ''){ |
507 sep = "\t", | 539 sep = "\t", |
508 quote = F, | 540 quote = F, |
509 col.names = T, | 541 col.names = T, |
510 row.names = F | 542 row.names = F |
511 ) | 543 ) |
512 } | 544 } |
513 | |
514 | |
515 | |
516 | |
517 | |
518 | |
519 | |
520 | |
521 | |
522 | |
523 |