comparison high_dim_visu.R @ 5:569334568afa draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit 1b98c85982a2a9f9df4b318f672b9b68cff66a93"
author artbio
date Mon, 02 Sep 2019 04:39:20 -0400
parents 8e17c31c536a
children 19bef589f876
comparison
equal deleted inserted replaced
4:8e17c31c536a 5:569334568afa
1 # load packages that are provided in the conda env 1 # load packages that are provided in the conda env
2 options( show.error.messages=F, 2 options( show.error.messages=F,
3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
5 requiredPackages = c('optparse', 'Rtsne', 'ggplot2', 'ggfortify')
6 warnings() 5 warnings()
7 library(optparse) 6 library(optparse)
8 library(FactoMineR) 7 library(FactoMineR)
9 library(factoextra) 8 library(factoextra)
10 library(Rtsne) 9 library(Rtsne)
11 library(ggplot2) 10 library(ggplot2)
12 library(ggfortify) 11 library(ggfortify)
13 library(RColorBrewer) 12 library(RColorBrewer)
14 library(ClusterR) 13 library(ClusterR)
14 library(data.table)
15 15
16 # Arguments 16 # Arguments
17 option_list = list( 17 option_list = list(
18 make_option( 18 make_option(
19 "--data", 19 "--data",
159 type = 'integer', 159 type = 'integer',
160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]" 160 help = "npc, number of dimensions which are kept for HCPC analysis [default : '%default' ]"
161 ), 161 ),
162 make_option( 162 make_option(
163 "--HCPC_metric", 163 "--HCPC_metric",
164 default = 'euclidian', 164 default = 'euclidean',
165 type = 'character', 165 type = 'character',
166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidian' or 'manhattan' [default : '%default' ]" 166 help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]"
167 ), 167 ),
168 make_option( 168 make_option(
169 "--HCPC_method", 169 "--HCPC_method",
170 default = 'ward', 170 default = 'ward',
171 type = 'character', 171 type = 'character',
207 type = 'character', 207 type = 'character',
208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']" 208 help = "A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results [default :'%default']"
209 ), 209 ),
210 make_option( 210 make_option(
211 "--HCPC_kk", 211 "--HCPC_kk",
212 default = -1, 212 default = Inf,
213 type = 'numeric', 213 type = 'numeric',
214 help = "The maximum number of iterations for the consolidation [default :'%default']" 214 help = "The maximum number of iterations for the consolidation [default :'%default']"
215 ), 215 ),
216 make_option( 216 make_option(
217 "--HCPC_clust", 217 "--HCPC_clust",
218 default = "", 218 default = "",
219 type = 'character', 219 type = 'character',
220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" 220 help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']"
221 ), 221 ),
222 make_option( 222 make_option(
223 "--mutual_info", 223 "--HCPC_mutual_info",
224 default = "", 224 default = "",
225 type = "character", 225 type = "character",
226 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" 226 help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']"
227 ),
228 make_option(
229 "--HCPC_cluster_description",
230 default = "",
231 type = "character",
232 help = "Output file with variables most contributing to clustering [default :'%default']"
227 ) 233 )
228 ) 234 )
229 235
230 opt = parse_args(OptionParser(option_list = option_list), 236 opt = parse_args(OptionParser(option_list = option_list),
231 args = commandArgs(trailingOnly = TRUE)) 237 args = commandArgs(trailingOnly = TRUE))
434 legend(x = 'topright', 440 legend(x = 'topright',
435 legend = as.character(factorColors$factor), 441 legend = as.character(factorColors$factor),
436 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) 442 col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7)
437 443
438 ## Normalized Mutual Information 444 ## Normalized Mutual Information
439 sink(opt$mutual_info) 445 sink(opt$HCPC_mutual_info)
440 res <- external_validation( 446 res <- external_validation(
441 true_labels = as.numeric(contrasting_factor$factor), 447 true_labels = as.numeric(contrasting_factor$factor),
442 clusters = as.numeric(res.hcpc$data.clust$clust), 448 clusters = as.numeric(res.hcpc$data.clust$clust),
443 summary_stats = TRUE 449 summary_stats = TRUE
444 ) 450 )
446 452
447 } else { 453 } else {
448 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) 454 legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE))
449 } 455 }
450 } 456 }
457
451 ## Clusters to which individual observations belong # used ? 458 ## Clusters to which individual observations belong # used ?
452 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], 459 # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)],
453 # Observation = rownames(res.hcpc$data.clust)) 460 # Observation = rownames(res.hcpc$data.clust))
454 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data)) 461 # metadata <- data.frame(Observation=colnames(data), row.names=colnames(data))
455 # metadata = merge(y = metadata, 462 # metadata = merge(y = metadata,
482 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust), 489 res_clustering <- data.frame(Cell = rownames(res.hcpc$data.clust),
483 Cluster = res.hcpc$data.clust$clust) 490 Cluster = res.hcpc$data.clust$clust)
484 491
485 } 492 }
486 493
494 # Description of cluster by most contributing variables / gene expressions
495
496 # first transform list of vectors in a list of dataframes
497 extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame)
498 # second, transfer rownames (genes) to column in the dataframe, before rbinding
499 extract_description_w_genes <- Map(cbind,
500 extract_description,
501 genes= lapply(extract_description, rownames)
502 )
503 # Then use data.table to collapse all generated dataframe, with the cluster id in first column
504 # using the {data.table} rbindlist function
505 cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id")
506 cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns
507
508
509 # Finally, output cluster description data frame
510 write.table(
511 cluster_description,
512 file = opt$HCPC_cluster_description,
513 sep = "\t",
514 quote = F,
515 col.names = T,
516 row.names = F
517 )
518
487 } 519 }
488 520
489 ## Return coordinates file to user 521 ## Return coordinates file to user
490 522
491 if(opt$table_coordinates != ''){ 523 if(opt$table_coordinates != ''){
507 sep = "\t", 539 sep = "\t",
508 quote = F, 540 quote = F,
509 col.names = T, 541 col.names = T,
510 row.names = F 542 row.names = F
511 ) 543 )
512 } 544 }
513
514
515
516
517
518
519
520
521
522
523