Mercurial > repos > artbio > gsc_high_dimensions_visualisation
diff high_dim_visu.R @ 5:569334568afa draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit 1b98c85982a2a9f9df4b318f672b9b68cff66a93"
author | artbio |
---|---|
date | Mon, 02 Sep 2019 04:39:20 -0400 |
parents | 8e17c31c536a |
children | 19bef589f876 |
line wrap: on
line diff
--- a/high_dim_visu.R Thu Jul 11 12:31:28 2019 -0400 +++ b/high_dim_visu.R Mon Sep 02 04:39:20 2019 -0400 @@ -2,7 +2,6 @@ options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") -requiredPackages = c('optparse', 'Rtsne', 'ggplot2', 'ggfortify') warnings() library(optparse) library(FactoMineR) @@ -12,6 +11,7 @@ library(ggfortify) library(RColorBrewer) library(ClusterR) +library(data.table) # Arguments option_list = list( @@ -161,9 +161,9 @@ ), make_option( "--HCPC_metric", - default = 'euclidian', + default = 'euclidean', type = 'character', - help = "Metric to be used for calculating dissimilarities between observations, available 'euclidian' or 'manhattan' [default : '%default' ]" + help = "Metric to be used for calculating dissimilarities between observations, available 'euclidean' or 'manhattan' [default : '%default' ]" ), make_option( "--HCPC_method", @@ -209,7 +209,7 @@ ), make_option( "--HCPC_kk", - default = -1, + default = Inf, type = 'numeric', help = "The maximum number of iterations for the consolidation [default :'%default']" ), @@ -220,10 +220,16 @@ help = "Output result of HCPC clustering : two column table (cell identifiers and clusters) [default :'%default']" ), make_option( - "--mutual_info", + "--HCPC_mutual_info", default = "", type = "character", help = "Output file of external validation of HCPC clustering with factor levels [default :'%default']" + ), + make_option( + "--HCPC_cluster_description", + default = "", + type = "character", + help = "Output file with variables most contributing to clustering [default :'%default']" ) ) @@ -436,7 +442,7 @@ col = factorColors$color, pch = 16, bty = 'n', xjust = 1, cex=0.7) ## Normalized Mutual Information - sink(opt$mutual_info) + sink(opt$HCPC_mutual_info) res <- external_validation( true_labels = as.numeric(contrasting_factor$factor), clusters = as.numeric(res.hcpc$data.clust$clust), @@ -448,6 +454,7 @@ legend.col(col = rev(brewer.pal(n = 11, name = "RdYlGn")), lev = cut(contrasting_factor$factor, 11, label = FALSE)) } } + ## Clusters to which individual observations belong # used ? # Clust <- data.frame(Cluster = res.hcpc$data.clust[, (nrow(data) + 1)], # Observation = rownames(res.hcpc$data.clust)) @@ -484,6 +491,31 @@ } +# Description of cluster by most contributing variables / gene expressions + +# first transform list of vectors in a list of dataframes +extract_description <- lapply(res.hcpc$desc.var$quanti, as.data.frame) +# second, transfer rownames (genes) to column in the dataframe, before rbinding +extract_description_w_genes <- Map(cbind, + extract_description, + genes= lapply(extract_description, rownames) + ) +# Then use data.table to collapse all generated dataframe, with the cluster id in first column +# using the {data.table} rbindlist function +cluster_description <- rbindlist(extract_description_w_genes, idcol = "cluster_id") +cluster_description = cluster_description[ ,c(8, 1, 2, 3,4,5,6,7)] # reorganize columns + + +# Finally, output cluster description data frame +write.table( + cluster_description, + file = opt$HCPC_cluster_description, + sep = "\t", + quote = F, + col.names = T, + row.names = F + ) + } ## Return coordinates file to user @@ -509,15 +541,4 @@ col.names = T, row.names = F ) -} - - - - - - - - - - - +}