Mercurial > repos > bgruening > music_construct_eset
diff scripts/dendrogram.R @ 1:be91cb6f48e7 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
author | bgruening |
---|---|
date | Fri, 26 Nov 2021 15:55:11 +0000 |
parents | 2cfd0db49bbc |
children | 7902cd31b9b5 |
line wrap: on
line diff
--- a/scripts/dendrogram.R Sun Sep 12 19:49:12 2021 +0000 +++ b/scripts/dendrogram.R Fri Nov 26 15:55:11 2021 +0000 @@ -17,31 +17,6 @@ args <- commandArgs(trailingOnly = TRUE) source(args[1]) -## We then perform bulk tissue cell type estimation with pre-grouping -## of cell types: C, list_of_cell_types, marker genes name, marker -## genes list. -## data.to.use = list( -## "C1" = list(cell.types = c("Neutro"), -## marker.names=NULL, -## marker.list=NULL), -## "C2" = list(cell.types = c("Podo"), -## marker.names=NULL, -## marker.list=NULL), -## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"), -## marker.names = "Epithelial", -## marker.list = read_list("../test-data/epith.markers")), -## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"), -## marker.names = "Immune", -## marker.list = read_list("../test-data/immune.markers")) -## ) -grouped_celltypes <- lapply(data.to.use, function(x) { - x$cell.types -}) -marker_groups <- lapply(data.to.use, function(x) { - x$marker.list -}) -names(marker_groups) <- names(data.to.use) - ## Perform the estimation ## Produce the first step information @@ -51,33 +26,107 @@ ## Plot the dendrogram of design matrix and cross-subject mean of ## realtive abundance -par(mfrow = c(1, 2)) -d <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean") +## Hierarchical clustering using Complete Linkage +d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean") +hc1 <- hclust(d1, method = "complete") ## Hierarchical clustering using Complete Linkage -hc1 <- hclust(d, method = "complete") -## Plot the obtained dendrogram -plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)") -d <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean") -## Hierarchical clustering using Complete Linkage -hc2 <- hclust(d, method = "complete") -## Plot the obtained dendrogram -pdf(file = outfile_pdf, width = 8, height = 8) -plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)") +d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean") +hc2 <- hclust(d2, method = "complete") + -cl_type <- as.character(scrna_eset[[celltypes_label]]) +if (length(data.to.use) > 0) { + ## We then perform bulk tissue cell type estimation with pre-grouping + ## of cell types: C, list_of_cell_types, marker genes name, marker + ## genes list. + ## data.to.use = list( + ## "C1" = list(cell.types = c("Neutro"), + ## marker.names=NULL, + ## marker.list=NULL), + ## "C2" = list(cell.types = c("Podo"), + ## marker.names=NULL, + ## marker.list=NULL), + ## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"), + ## marker.names = "Epithelial", + ## marker.list = read_list("../test-data/epith.markers")), + ## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"), + ## marker.names = "Immune", + ## marker.list = read_list("../test-data/immune.markers")) + ## ) + grouped_celltypes <- lapply(data.to.use, function(x) { + x$cell.types + }) + marker_groups <- lapply(data.to.use, function(x) { + x$marker.list + }) + names(marker_groups) <- names(data.to.use) + + + cl_type <- as.character(scrna_eset[[celltypes_label]]) + + for (cl in seq_len(length(grouped_celltypes))) { + cl_type[cl_type %in% + grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl] + } + pData(scrna_eset)[[clustertype_label]] <- factor( + cl_type, levels = c(names(grouped_celltypes), + "CD-Trans", "Novel1", "Novel2")) + + est_bulk <- music_prop.cluster( + bulk.eset = bulk_eset, sc.eset = scrna_eset, + group.markers = marker_groups, clusters = celltypes_label, + groups = clustertype_label, samples = samples_label, + clusters.type = grouped_celltypes + ) -for (cl in seq_len(length(grouped_celltypes))) { - cl_type[cl_type %in% grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl] -} -pData(scrna_eset)[[clustertype_label]] <- factor( - cl_type, levels = c(names(grouped_celltypes), - "CD-Trans", "Novel1", "Novel2")) + estimated_music_props <- est_bulk$Est.prop.weighted.cluster + ## NNLS is not calculated here + + ## Show different in estimation methods + ## Jitter plot of estimated cell type proportions + methods_list <- c("MuSiC") + + jitter_fig <- Jitter_Est( + list(data.matrix(estimated_music_props)), + method.name = methods_list, title = "Jitter plot of Est Proportions", + size = 2, alpha = 0.7) + + theme_minimal() + + labs(x = element_blank(), y = element_blank()) + + theme(axis.text = element_text(size = 6), + axis.text.x = element_blank(), + legend.position = "none") + + plot_box <- Boxplot_Est(list( + data.matrix(estimated_music_props)), + method.name = methods_list) + + theme_minimal() + + labs(x = element_blank(), y = element_blank()) + + theme(axis.text = element_text(size = 6), + axis.text.x = element_blank(), + legend.position = "none") -est_bulk <- music_prop.cluster( - bulk.eset = bulk_eset, sc.eset = scrna_eset, - group.markers = marker_groups, clusters = celltypes_label, - groups = clustertype_label, samples = samples_label, - clusters.type = grouped_celltypes) + plot_hmap <- Prop_heat_Est(list( + data.matrix(estimated_music_props)), + method.name = methods_list) + + labs(x = element_blank(), y = element_blank()) + + theme(axis.text.y = element_text(size = 6), + axis.text.x = element_text(angle = -90, size = 5), + plot.title = element_text(size = 9), + legend.key.width = unit(0.15, "cm"), + legend.text = element_text(size = 5), + legend.title = element_text(size = 5)) -write.table(est_bulk, file = outfile_tab, quote = F, col.names = NA, sep = "\t") -dev.off() +} + +pdf(file = outfile_pdf, width = 8, height = 8) +par(mfrow = c(1, 2)) +plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)") +plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)") +if (length(data.to.use) > 0) { + plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2) +} +message(dev.off()) + +if (length(data.to.use) > 0) { + write.table(estimated_music_props, + file = outfile_tab, quote = F, col.names = NA, sep = "\t") +}