# HG changeset patch
# User bgruening
# Date 1730935288 0
# Node ID 48f0fb3061b1e56ea2d850890707443dc6a7713e
# Parent 7022ce682d2f07a4ae94a22a12950b15a22bcc67
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 7b4e1e85d9d288a904444eb9fcb96bcdc856b9ff
diff -r 7022ce682d2f -r 48f0fb3061b1 construct_eset.xml
--- a/construct_eset.xml Tue Oct 29 13:39:39 2024 +0000
+++ b/construct_eset.xml Wed Nov 06 23:21:28 2024 +0000
@@ -4,6 +4,7 @@
macros.xml
+
> /dev/stderr &&
@@ -228,4 +229,4 @@
}
-
\ No newline at end of file
+
diff -r 7022ce682d2f -r 48f0fb3061b1 macros.xml
--- a/macros.xml Tue Oct 29 13:39:39 2024 +0000
+++ b/macros.xml Wed Nov 06 23:21:28 2024 +0000
@@ -9,6 +9,11 @@
in 21.09
rdata.eset
-->
+
+
+ music_deconvolution
+
+
music-deconvolution
diff -r 7022ce682d2f -r 48f0fb3061b1 music-deconvolution.xml.orig
--- a/music-deconvolution.xml.orig Tue Oct 29 13:39:39 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,357 +0,0 @@
-
- estimate cell type proportions in bulk RNA-seq data
-
- macros.xml
-
-
-
-
-
-
-null_str_vec = function(gstr){
- tokens = unlist(as.vector(strsplit(gstr, split=",")))
- if (length(tokens) == 0){
- return(NULL)
- }
- if (length(tokens) == 1){
- return(tokens[[1]])
- }
- return(tokens)
-}
-
-bulk_eset = readRDS('$bulk_eset')
-scrna_eset = readRDS('$scrna_eset')
-use_disease_factor = FALSE
-maxyscale = NA
-
-#if str($do.method) == "estimateprops":
-
-maxyscale = as.numeric('$do.maxyscale') ## yields "NA" if blank
-phenotype_factors = null_str_vec('$do.phenotype_factors')
-phenotype_factors_always_exclude = null_str_vec('$do.phenotype_factors_always_exclude')
-celltypes_label = null_str_vec('$do.celltypes_label')
-samples_label = null_str_vec('$do.samples_label')
-celltypes = null_str_vec('$do.celltypes')
-methods = c("MuSiC", "NNLS")
-
- #if str($do.disease_factor.use) == "yes":
-use_disease_factor = TRUE
-<<<<<<< HEAD
-phenotype_scrna_target = null_str_vec('$do.disease_factor.phenotype_scrna_target')
-=======
->>>>>>> 768a6e5b (v3 update:)
-phenotype_target = null_str_vec('$do.disease_factor.phenotype_target')
-phenotype_target_threshold = as.numeric('$do.disease_factor.phenotype_target_threshold')
-sample_disease_group = null_str_vec('$do.disease_factor.sample_disease_group')
-sample_disease_group_scale = as.integer('$do.disease_factor.sample_disease_group_scale')
-<<<<<<< HEAD
-=======
-compare_title = null_str_vec('$do.disease_factor.compare_title')
->>>>>>> 768a6e5b (v3 update:)
- #end if
-
-outfile_pdf='$out_pdf'
-
-#elif str($do.method) == "dendrogram":
-
-celltypes_label = null_str_vec('$do.celltypes_label')
-clustertype_label = null_str_vec('$do.clustertype_label')
-samples_label = null_str_vec('$do.samples_label')
-celltypes = null_str_vec('$do.celltypes')
-
-data.to.use = list(
- #for $i, $repeat in enumerate( $do.cluster_groups )
- #if $i == 0:
- $repeat.cluster_id = list(cell.types = null_str_vec('$repeat.celltypes'),
- marker.names = null_str_vec('$repeat.marker_name'),
- marker.list = read_list('$repeat.marker_list'))
- #else
- , $repeat.cluster_id = list(cell.types = null_str_vec('$repeat.celltypes'),
- marker.names = null_str_vec('$repeat.marker_name'),
- marker.list = read_list('$repeat.marker_list'))
- #end if
- #end for
-)
-
-outfile_pdf='$out_pdf'
-outfile_tab='$out_tab'
-
-#else
- stop("No such option")
-#end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<<<<<<< HEAD
-
-
-
-
-
-
-
-
-
-
-
-
-=======
-
-
-
-
-
-
-
-
-
-
-
-
->>>>>>> 768a6e5b (v3 update:)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- do["method"] == "dendrogram" and len(do["cluster_groups"]) >0
-
-
- do["method"] == "estimateprops"
-
-
-
- do["method"] == "estimateprops" and do["disease_factor"]["use"] == "yes"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<<<<<<< HEAD
-
-=======
->>>>>>> 768a6e5b (v3 update:)
-
-
-
-
-
-<<<<<<< HEAD
-=======
-
->>>>>>> 768a6e5b (v3 update:)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- https://doi.org/10.1038/s41467-018-08023-x
-
-
\ No newline at end of file
diff -r 7022ce682d2f -r 48f0fb3061b1 scripts/dendrogram.R.orig
--- a/scripts/dendrogram.R.orig Tue Oct 29 13:39:39 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-##
-suppressWarnings(suppressPackageStartupMessages(library(xbioc)))
-suppressWarnings(suppressPackageStartupMessages(library(MuSiC)))
-suppressWarnings(suppressPackageStartupMessages(library(reshape2)))
-suppressWarnings(suppressPackageStartupMessages(library(cowplot)))
-## We use this script to generate a clustering dendrogram of cell
-## types, using the prior labelling from scRNA.
-
-read_list <- function(lfile) {
- if (lfile == "None") {
- return(NULL)
- }
-<<<<<<< HEAD
- return(read.table(file = lfile, header = FALSE, check.names = FALSE,
-=======
- return(read.table(file = lfile, header = FALSE, check.names=FALSE,
->>>>>>> 768a6e5b (v3 update:)
- stringsAsFactors = FALSE)$V1)
-}
-
-args <- commandArgs(trailingOnly = TRUE)
-source(args[1])
-
-
-## Perform the estimation
-## Produce the first step information
-sub.basis <- music_basis(scrna_eset, clusters = celltypes_label,
- samples = samples_label,
- select.ct = celltypes)
-
-## Plot the dendrogram of design matrix and cross-subject mean of
-## realtive abundance
-## Hierarchical clustering using Complete Linkage
-d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
-hc1 <- hclust(d1, method = "complete")
-## Hierarchical clustering using Complete Linkage
-d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
-hc2 <- hclust(d2, method = "complete")
-
-
-if (length(data.to.use) > 0) {
- ## We then perform bulk tissue cell type estimation with pre-grouping
- ## of cell types: C, list_of_cell_types, marker genes name, marker
- ## genes list.
- ## data.to.use = list(
- ## "C1" = list(cell.types = c("Neutro"),
- ## marker.names=NULL,
- ## marker.list=NULL),
- ## "C2" = list(cell.types = c("Podo"),
- ## marker.names=NULL,
- ## marker.list=NULL),
- ## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
- ## marker.names = "Epithelial",
- ## marker.list = read_list("../test-data/epith.markers")),
- ## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
- ## marker.names = "Immune",
- ## marker.list = read_list("../test-data/immune.markers"))
- ## )
- grouped_celltypes <- lapply(data.to.use, function(x) {
- x$cell.types
- })
- marker_groups <- lapply(data.to.use, function(x) {
- x$marker.list
- })
- names(marker_groups) <- names(data.to.use)
-
-
- cl_type <- as.character(scrna_eset[[celltypes_label]])
-
- for (cl in seq_len(length(grouped_celltypes))) {
- cl_type[cl_type %in%
- grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
- }
- pData(scrna_eset)[[clustertype_label]] <- factor(
- cl_type, levels = c(names(grouped_celltypes),
- "CD-Trans", "Novel1", "Novel2"))
-
- est_bulk <- music_prop.cluster(
- bulk.eset = bulk_eset, sc.eset = scrna_eset,
- group.markers = marker_groups, clusters = celltypes_label,
- groups = clustertype_label, samples = samples_label,
- clusters.type = grouped_celltypes
- )
-
- estimated_music_props <- est_bulk$Est.prop.weighted.cluster
- ## NNLS is not calculated here
-
- ## Show different in estimation methods
- ## Jitter plot of estimated cell type proportions
- methods_list <- c("MuSiC")
-
- jitter_fig <- Jitter_Est(
- list(data.matrix(estimated_music_props)),
- method.name = methods_list, title = "Jitter plot of Est Proportions",
- size = 2, alpha = 0.7) +
- theme_minimal() +
- labs(x = element_blank(), y = element_blank()) +
- theme(axis.text = element_text(size = 6),
- axis.text.x = element_blank(),
- legend.position = "none")
-
- plot_box <- Boxplot_Est(list(
- data.matrix(estimated_music_props)),
- method.name = methods_list) +
- theme_minimal() +
- labs(x = element_blank(), y = element_blank()) +
- theme(axis.text = element_text(size = 6),
- axis.text.x = element_blank(),
- legend.position = "none")
-
- plot_hmap <- Prop_heat_Est(list(
- data.matrix(estimated_music_props)),
- method.name = methods_list) +
- labs(x = element_blank(), y = element_blank()) +
- theme(axis.text.y = element_text(size = 6),
- axis.text.x = element_text(angle = -90, size = 5),
- plot.title = element_text(size = 9),
- legend.key.width = unit(0.15, "cm"),
- legend.text = element_text(size = 5),
- legend.title = element_text(size = 5))
-
-}
-
-pdf(file = outfile_pdf, width = 8, height = 8)
-par(mfrow = c(1, 2))
-plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
-plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
-if (length(data.to.use) > 0) {
- plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2)
-}
-message(dev.off())
-
-if (length(data.to.use) > 0) {
- write.table(estimated_music_props,
- file = outfile_tab, quote = F, col.names = NA, sep = "\t")
-}
diff -r 7022ce682d2f -r 48f0fb3061b1 scripts/estimateprops.R.orig
--- a/scripts/estimateprops.R.orig Tue Oct 29 13:39:39 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,281 +0,0 @@
-suppressWarnings(suppressPackageStartupMessages(library(xbioc)))
-suppressWarnings(suppressPackageStartupMessages(library(MuSiC)))
-suppressWarnings(suppressPackageStartupMessages(library(reshape2)))
-suppressWarnings(suppressPackageStartupMessages(library(cowplot)))
-## We use this script to estimate the effectiveness of proportion methods
-
-## Load Conf
-args <- commandArgs(trailingOnly = TRUE)
-source(args[1])
-
-## Estimate cell type proportions
-est_prop <- music_prop(
- bulk.eset = bulk_eset, sc.eset = scrna_eset,
- clusters = celltypes_label,
- samples = samples_label, select.ct = celltypes, verbose = T)
-
-
-estimated_music_props <- est_prop$Est.prop.weighted
-estimated_nnls_props <- est_prop$Est.prop.allgene
-##
-estimated_music_props_flat <- melt(estimated_music_props)
-estimated_nnls_props_flat <- melt(estimated_nnls_props)
-
-scale_yaxes <- function(gplot, value) {
- if (is.na(value)) {
- gplot
- } else {
- gplot + scale_y_continuous(lim = c(0, value))
- }
-}
-
-sieve_data <- function(func, music_data, nnls_data) {
- if (func == "list") {
- res <- list(if ("MuSiC" %in% methods) music_data else NULL,
- if ("NNLS" %in% methods) nnls_data else NULL)
- res[lengths(res) > 0] ## filter out NULL elements
- } else if (func == "rbind") {
- rbind(if ("MuSiC" %in% methods) music_data else NULL,
- if ("NNLS" %in% methods) nnls_data else NULL)
- } else if (func == "c") {
- c(if ("MuSiC" %in% methods) music_data else NULL,
- if ("NNLS" %in% methods) nnls_data else NULL)
- }
-}
-
-
-## Show different in estimation methods
-## Jitter plot of estimated cell type proportions
-jitter_fig <- scale_yaxes(Jitter_Est(
- sieve_data("list",
- data.matrix(estimated_music_props),
- data.matrix(estimated_nnls_props)),
- method.name = methods, title = "Jitter plot of Est Proportions",
- size = 2, alpha = 0.7) + theme_minimal(), maxyscale)
-
-## Make a Plot
-## A more sophisticated jitter plot is provided as below. We separated
-## the T2D subjects and normal subjects by their disease factor levels.
-m_prop <- sieve_data("rbind",
- estimated_music_props_flat,
- estimated_nnls_props_flat)
-colnames(m_prop) <- c("Sub", "CellType", "Prop")
-
-if (is.null(celltypes)) {
- celltypes <- levels(m_prop$CellType)
- message("No celltypes declared, using:")
- message(celltypes)
-}
-
-if (is.null(phenotype_factors)) {
- phenotype_factors <- colnames(pData(bulk_eset))
-}
-## filter out unwanted factors like "sampleID" and "subjectName"
-phenotype_factors <- phenotype_factors[
- !(phenotype_factors %in% phenotype_factors_always_exclude)]
-message("Phenotype Factors to use:")
-message(paste0(phenotype_factors, collapse = ", "))
-
-m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint
-m_prop$Method <- factor(rep(methods, each = nrow(estimated_music_props_flat)), # nolint
- levels = methods)
-
-if (use_disease_factor) {
-
- if (phenotype_target_threshold == -99) {
- phenotype_target_threshold <- -Inf
- message("phenotype target threshold set to -Inf")
- }
- ## the "2" here is to do with the sample groups, not number of methods
- m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint
- m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]
- ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2
- sample_groups <- c("Normal", sample_disease_group)
- m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint
- levels = sample_groups)
-
- ## Binary to scale: e.g. TRUE / 5 = 0.2
- m_prop$D <- (m_prop$Disease == # nolint
- sample_disease_group) / sample_disease_group_scale
- ## NA's are not included in the comparison below
- m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),
- subset(m_prop, Disease == sample_disease_group))
-
- jitter_new <- scale_yaxes(
- ggplot(m_prop, aes(Method, Prop)) +
- geom_point(aes(fill = Method, color = Disease,
- stroke = D, shape = Disease),
- size = 2, alpha = 0.7,
- position = position_jitter(width = 0.25, height = 0)) +
- facet_wrap(~ CellType, scales = "free") +
- scale_colour_manual(values = c("white", "gray20")) +
- scale_shape_manual(values = c(21, 24)) + theme_minimal(), maxyscale)
-
-}
-
-if (use_disease_factor) {
-
- ## Plot to compare method effectiveness
- ## Create dataframe for beta cell proportions and Disease_factor levels
- ## - Ugly code. Essentially, doubles the cell type proportions for each
- ## set of MuSiC and NNLS methods
- m_prop_ana <- data.frame(
- pData(bulk_eset)[rep(1:nrow(estimated_music_props), length(methods)), #nolint
- phenotype_factors],
- ## get proportions of target cell type
- ct.prop = sieve_data("c",
- estimated_music_props[, phenotype_scrna_target],
- estimated_nnls_props[, phenotype_scrna_target]),
- ##
- Method = factor(rep(methods,
- each = nrow(estimated_music_props)),
- levels = methods))
- ## - fix headers
- colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint
- ## - drop NA for target phenotype (e.g. hba1c)
- m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target]))
- m_prop_ana$Disease <- factor( # nolint
- ## - Here we set Normal/Disease assignments across the methods
- sample_groups[(
- m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1
- ],
- sample_groups)
- ## - Then we scale this binary assignment to a plotable factor
- m_prop_ana$D <- (m_prop_ana$Disease == # nolint
- sample_disease_group) / sample_disease_group_scale
-
- jitt_compare <- scale_yaxes(
- ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) +
- geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) +
- geom_point(aes(fill = Method, color = Disease,
- stroke = D, shape = Disease),
- size = 2, alpha = 0.7) + facet_wrap(~ Method) +
- ggtitle(paste0(toupper(phenotype_target), " vs. ",
- toupper(phenotype_scrna_target),
- " Cell Type Proportion")) +
- theme_minimal() +
- ylab(paste0("Proportion of ",
- phenotype_scrna_target, " cells")) +
- xlab(paste0("Level of bulk factor (", phenotype_target, ")")) +
- scale_colour_manual(values = c("white", "gray20")) +
- scale_shape_manual(values = c(21, 24)), maxyscale)
-}
-
-## BoxPlot
-plot_box <- scale_yaxes(Boxplot_Est(
- sieve_data("list",
- data.matrix(estimated_music_props),
- data.matrix(estimated_nnls_props)),
- method.name = methods) +
- theme(axis.text.x = element_text(angle = -90),
- axis.text.y = element_text(size = 8)) +
- ggtitle(element_blank()) + theme_minimal(), maxyscale)
-
-## Heatmap
-plot_hmap <- Prop_heat_Est(
- sieve_data(
- "list",
- data.matrix(estimated_music_props),
- data.matrix(estimated_nnls_props)),
- method.name = methods) +
- theme(axis.text.x = element_text(angle = -90),
- axis.text.y = element_text(size = 6))
-
-pdf(file = outfile_pdf, width = 8, height = 8)
-if (length(celltypes) <= 8) {
- plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)
-} else {
- print(jitter_fig)
- plot_box
-}
-if (use_disease_factor) {
- plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)
-}
-plot_hmap
-message(dev.off())
-
-writable <- function(obj, prefix, title) {
- write.table(obj,
- file = paste0("report_data/", prefix, "_",
- title, ".tabular"),
- quote = F, sep = "\t", col.names = NA)
-}
-
-## Output Proportions
-if ("NNLS" %in% methods) {
- writable(est_prop$Est.prop.allgene, "prop",
- "NNLS Estimated Proportions of Cell Types")
-}
-
-if ("MuSiC" %in% methods) {
- writable(est_prop$Est.prop.weighted, "prop",
- "Music Estimated Proportions of Cell Types")
- writable(est_prop$Weight.gene, "weightgene",
- "Music Estimated Proportions of Cell Types (by Gene)")
- writable(est_prop$r.squared.full, "rsquared",
- "Music R-sqr Estimated Proportions of Each Subject")
- writable(est_prop$Var.prop, "varprop",
- "Matrix of Variance of MuSiC Estimates")
-}
-
-
-<<<<<<< HEAD
-=======
-write.table(est_prop$Est.prop.weighted,
- file = paste0("report_data/prop_",
- "Music Estimated Proportions of Cell Types",
- ".tabular"),
- quote = F, sep = "\t", col.names = NA)
-write.table(est_prop$Est.prop.allgene,
- file = paste0("report_data/prop_",
- "NNLS Estimated Proportions of Cell Types",
- ".tabular"),
- quote = F, sep = "\t", col.names = NA)
-write.table(est_prop$Weight.gene,
- file = paste0("report_data/weightgene_",
- "Music Estimated Proportions of Cell Types (by Gene)",
- ".tabular"),
- quote = F, sep = "\t", col.names = NA)
-write.table(est_prop$r.squared.full,
- file = paste0("report_data/rsquared_",
- "Music R-sqr Estimated Proportions of Each Subject",
- ".tabular"),
- quote = F, sep = "\t", col.names = NA)
-write.table(est_prop$Var.prop,
- file = paste0("report_data/varprop_",
- "Matrix of Variance of MuSiC Estimates",
- ".tabular"),
- quote = F, sep = "\t", col.names = NA)
-
-
->>>>>>> 7a416140 (fitting summaries only apply when disease factor is used)
-if (use_disease_factor) {
- ## Summary table of linear regressions of disease factors
- for (meth in methods) {
- ##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =
- sub_data <- subset(m_prop_ana, Method == meth)
-
- ## We can only do regression where there are more than 1 factors
- ## so we must find and exclude the ones which are not
- gt1_facts <- sapply(phenotype_factors, function(facname) {
- return(length(unique(sort(sub_data[[facname]]))) == 1)
- })
- form_factors <- phenotype_factors
- exclude_facts <- names(gt1_facts)[gt1_facts]
- if (length(exclude_facts) > 0) {
- message("Factors with only one level will be excluded:")
- message(exclude_facts)
- form_factors <- phenotype_factors[
- !(phenotype_factors %in% exclude_facts)]
- }
- lm_beta_meth <- lm(as.formula(
- paste("ct.prop", paste(form_factors, collapse = " + "),
- sep = " ~ ")), data = sub_data)
- message(paste0("Summary: ", meth))
- capture.output(summary(lm_beta_meth),
- file = paste0("report_data/summ_Log of ",
- meth,
- " fitting.txt"))
- }
-}
-