# HG changeset patch
# User bgruening
# Date 1637942071 0
# Node ID 817eb707bbf4e9e4fce409f424252435f0ffc849
# Parent 2fed32b5aa02cf0f53a95aed9030b330b13494f9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 683bb72ae92b5759a239b7e3bf4c5a229ed35b54"
diff -r 2fed32b5aa02 -r 817eb707bbf4 inspect_eset.xml
--- a/inspect_eset.xml Sun Sep 12 19:48:26 2021 +0000
+++ b/inspect_eset.xml Fri Nov 26 15:54:31 2021 +0000
@@ -38,14 +38,15 @@
-
-
-
-
+
+
+
+
+
-
+
@@ -75,7 +76,6 @@
diff -r 2fed32b5aa02 -r 817eb707bbf4 macros.xml
--- a/macros.xml Sun Sep 12 19:48:26 2021 +0000
+++ b/macros.xml Fri Nov 26 15:54:31 2021 +0000
@@ -1,5 +1,5 @@
- 0
+ 1
@@ -15,11 +15,11 @@
^(([A-Za-z0-9+_ -]+)\s?,?)*$
- ^(([A-Za-z0-9+_ -]+)\s?)*$
+ ^(([A-Za-z0-9+_ -]+)\s?)+$
+ label="Comma list of cell types to use from scRNA dataset" help="If blank, then use all available cell types." >
diff -r 2fed32b5aa02 -r 817eb707bbf4 scripts/dendrogram.R
--- a/scripts/dendrogram.R Sun Sep 12 19:48:26 2021 +0000
+++ b/scripts/dendrogram.R Fri Nov 26 15:54:31 2021 +0000
@@ -17,31 +17,6 @@
args <- commandArgs(trailingOnly = TRUE)
source(args[1])
-## We then perform bulk tissue cell type estimation with pre-grouping
-## of cell types: C, list_of_cell_types, marker genes name, marker
-## genes list.
-## data.to.use = list(
-## "C1" = list(cell.types = c("Neutro"),
-## marker.names=NULL,
-## marker.list=NULL),
-## "C2" = list(cell.types = c("Podo"),
-## marker.names=NULL,
-## marker.list=NULL),
-## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
-## marker.names = "Epithelial",
-## marker.list = read_list("../test-data/epith.markers")),
-## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
-## marker.names = "Immune",
-## marker.list = read_list("../test-data/immune.markers"))
-## )
-grouped_celltypes <- lapply(data.to.use, function(x) {
- x$cell.types
-})
-marker_groups <- lapply(data.to.use, function(x) {
- x$marker.list
-})
-names(marker_groups) <- names(data.to.use)
-
## Perform the estimation
## Produce the first step information
@@ -51,33 +26,107 @@
## Plot the dendrogram of design matrix and cross-subject mean of
## realtive abundance
-par(mfrow = c(1, 2))
-d <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+## Hierarchical clustering using Complete Linkage
+d1 <- dist(t(log(sub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
+hc1 <- hclust(d1, method = "complete")
## Hierarchical clustering using Complete Linkage
-hc1 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
-d <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
-## Hierarchical clustering using Complete Linkage
-hc2 <- hclust(d, method = "complete")
-## Plot the obtained dendrogram
-pdf(file = outfile_pdf, width = 8, height = 8)
-plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+d2 <- dist(t(log(sub.basis$M.theta + 1e-8)), method = "euclidean")
+hc2 <- hclust(d2, method = "complete")
+
-cl_type <- as.character(scrna_eset[[celltypes_label]])
+if (length(data.to.use) > 0) {
+ ## We then perform bulk tissue cell type estimation with pre-grouping
+ ## of cell types: C, list_of_cell_types, marker genes name, marker
+ ## genes list.
+ ## data.to.use = list(
+ ## "C1" = list(cell.types = c("Neutro"),
+ ## marker.names=NULL,
+ ## marker.list=NULL),
+ ## "C2" = list(cell.types = c("Podo"),
+ ## marker.names=NULL,
+ ## marker.list=NULL),
+ ## "C3" = list(cell.types = c("Endo","CD-PC","LOH","CD-IC","DCT","PT"),
+ ## marker.names = "Epithelial",
+ ## marker.list = read_list("../test-data/epith.markers")),
+ ## "C4" = list(cell.types = c("Macro","Fib","B lymph","NK","T lymph"),
+ ## marker.names = "Immune",
+ ## marker.list = read_list("../test-data/immune.markers"))
+ ## )
+ grouped_celltypes <- lapply(data.to.use, function(x) {
+ x$cell.types
+ })
+ marker_groups <- lapply(data.to.use, function(x) {
+ x$marker.list
+ })
+ names(marker_groups) <- names(data.to.use)
+
+
+ cl_type <- as.character(scrna_eset[[celltypes_label]])
+
+ for (cl in seq_len(length(grouped_celltypes))) {
+ cl_type[cl_type %in%
+ grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
+ }
+ pData(scrna_eset)[[clustertype_label]] <- factor(
+ cl_type, levels = c(names(grouped_celltypes),
+ "CD-Trans", "Novel1", "Novel2"))
+
+ est_bulk <- music_prop.cluster(
+ bulk.eset = bulk_eset, sc.eset = scrna_eset,
+ group.markers = marker_groups, clusters = celltypes_label,
+ groups = clustertype_label, samples = samples_label,
+ clusters.type = grouped_celltypes
+ )
-for (cl in seq_len(length(grouped_celltypes))) {
- cl_type[cl_type %in% grouped_celltypes[[cl]]] <- names(grouped_celltypes)[cl]
-}
-pData(scrna_eset)[[clustertype_label]] <- factor(
- cl_type, levels = c(names(grouped_celltypes),
- "CD-Trans", "Novel1", "Novel2"))
+ estimated_music_props <- est_bulk$Est.prop.weighted.cluster
+ ## NNLS is not calculated here
+
+ ## Show different in estimation methods
+ ## Jitter plot of estimated cell type proportions
+ methods_list <- c("MuSiC")
+
+ jitter_fig <- Jitter_Est(
+ list(data.matrix(estimated_music_props)),
+ method.name = methods_list, title = "Jitter plot of Est Proportions",
+ size = 2, alpha = 0.7) +
+ theme_minimal() +
+ labs(x = element_blank(), y = element_blank()) +
+ theme(axis.text = element_text(size = 6),
+ axis.text.x = element_blank(),
+ legend.position = "none")
+
+ plot_box <- Boxplot_Est(list(
+ data.matrix(estimated_music_props)),
+ method.name = methods_list) +
+ theme_minimal() +
+ labs(x = element_blank(), y = element_blank()) +
+ theme(axis.text = element_text(size = 6),
+ axis.text.x = element_blank(),
+ legend.position = "none")
-est_bulk <- music_prop.cluster(
- bulk.eset = bulk_eset, sc.eset = scrna_eset,
- group.markers = marker_groups, clusters = celltypes_label,
- groups = clustertype_label, samples = samples_label,
- clusters.type = grouped_celltypes)
+ plot_hmap <- Prop_heat_Est(list(
+ data.matrix(estimated_music_props)),
+ method.name = methods_list) +
+ labs(x = element_blank(), y = element_blank()) +
+ theme(axis.text.y = element_text(size = 6),
+ axis.text.x = element_text(angle = -90, size = 5),
+ plot.title = element_text(size = 9),
+ legend.key.width = unit(0.15, "cm"),
+ legend.text = element_text(size = 5),
+ legend.title = element_text(size = 5))
-write.table(est_bulk, file = outfile_tab, quote = F, col.names = NA, sep = "\t")
-dev.off()
+}
+
+pdf(file = outfile_pdf, width = 8, height = 8)
+par(mfrow = c(1, 2))
+plot(hc1, cex = 0.6, hang = -1, main = "Cluster log(Design Matrix)")
+plot(hc2, cex = 0.6, hang = -1, main = "Cluster log(Mean of RA)")
+if (length(data.to.use) > 0) {
+ plot_grid(jitter_fig, plot_box, plot_hmap, ncol = 2, nrow = 2)
+}
+message(dev.off())
+
+if (length(data.to.use) > 0) {
+ write.table(estimated_music_props,
+ file = outfile_tab, quote = F, col.names = NA, sep = "\t")
+}
diff -r 2fed32b5aa02 -r 817eb707bbf4 scripts/estimateprops.R
--- a/scripts/estimateprops.R Sun Sep 12 19:48:26 2021 +0000
+++ b/scripts/estimateprops.R Fri Nov 26 15:54:31 2021 +0000
@@ -14,36 +14,67 @@
clusters = celltypes_label,
samples = samples_label, select.ct = celltypes, verbose = T)
+estimated_music_props <- est_prop$Est.prop.weighted
+estimated_nnls_props <- est_prop$Est.prop.allgene
## Show different in estimation methods
## Jitter plot of estimated cell type proportions
-jitter.fig <- Jitter_Est(
- list(data.matrix(est_prop$Est.prop.weighted),
- data.matrix(est_prop$Est.prop.allgene)),
- method.name = methods, title = "Jitter plot of Est Proportions")
+jitter_fig <- Jitter_Est(
+ list(data.matrix(estimated_music_props),
+ data.matrix(estimated_nnls_props)),
+ method.name = methods, title = "Jitter plot of Est Proportions",
+ size = 2, alpha = 0.7) + theme_minimal()
## Make a Plot
## A more sophisticated jitter plot is provided as below. We separated
-## the T2D subjects and normal subjects by their HbA1c levels.
-m_prop <- rbind(melt(est_prop$Est.prop.weighted),
- melt(est_prop$Est.prop.allgene))
+## the T2D subjects and normal subjects by their disease factor levels.
+estimated_music_props_flat <- melt(estimated_music_props)
+estimated_nnls_props_flat <- melt(estimated_nnls_props)
+
+m_prop <- rbind(estimated_music_props_flat,
+ estimated_nnls_props_flat)
+colnames(m_prop) <- c("Sub", "CellType", "Prop")
+
+if (is.null(celltypes)) {
+ celltypes <- levels(m_prop$CellType)
+ message("No celltypes declared, using:")
+ message(celltypes)
+}
-colnames(m_prop) <- c("Sub", "CellType", "Prop")
+if (phenotype_target_threshold == -99) {
+ phenotype_target_threshold <- -Inf
+ message("phenotype target threshold set to -Inf")
+}
+
+if (is.null(phenotype_factors)) {
+ phenotype_factors <- colnames(pData(bulk_eset))
+}
+## filter out unwanted factors like "sampleID" and "subjectName"
+phenotype_factors <- phenotype_factors[
+ !(phenotype_factors %in% phenotype_factors_always_exclude)]
+message("Phenotype Factors to use:")
+message(phenotype_factors)
+
m_prop$CellType <- factor(m_prop$CellType, levels = celltypes) # nolint
-m_prop$Method <- factor(rep(methods, each = 89 * 6), levels = methods) # nolint
-m_prop$HbA1c <- rep(bulk_eset$hba1c, 2 * 6) # nolint
-m_prop <- m_prop[!is.na(m_prop$HbA1c), ]
-m_prop$Disease <- factor(sample_groups[(m_prop$HbA1c > 6.5) + 1], # nolint
+m_prop$Method <- factor(rep(methods, each = nrow(estimated_music_props_flat)), # nolint
+ levels = methods)
+m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint
+m_prop <- m_prop[!is.na(m_prop$Disease_factor), ]
+## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2
+sample_groups <- c("Normal", sample_disease_group)
+m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint
levels = sample_groups)
+## Binary to scale: e.g. TRUE / 5 = 0.2
m_prop$D <- (m_prop$Disease == # nolint
sample_disease_group) / sample_disease_group_scale
-m_prop <- rbind(subset(m_prop, Disease == healthy_phenotype),
- subset(m_prop, Disease != healthy_phenotype))
+## NA's are not included in the comparison below
+m_prop <- rbind(subset(m_prop, Disease != sample_disease_group),
+ subset(m_prop, Disease == sample_disease_group))
-jitter.new <- ggplot(m_prop, aes(Method, Prop)) +
+jitter_new <- ggplot(m_prop, aes(Method, Prop)) +
geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
size = 2, alpha = 0.7,
position = position_jitter(width = 0.25, height = 0)) +
@@ -52,20 +83,23 @@
scale_shape_manual(values = c(21, 24)) + theme_minimal()
## Plot to compare method effectiveness
-## Create dataframe for beta cell proportions and HbA1c levels
-m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:89, 2), phenotype_factors],
- ct.prop = c(est_prop$Est.prop.weighted[, 2],
- est_prop$Est.prop.allgene[, 2]),
- Method = factor(rep(methods, each = 89),
+## Create dataframe for beta cell proportions and Disease_factor levels
+m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint
+ phenotype_factors],
+ ct.prop = c(estimated_music_props[, 2],
+ estimated_nnls_props[, 2]),
+ Method = factor(rep(methods,
+ each = nrow(estimated_music_props)),
levels = methods))
-colnames(m_prop_ana)[1:4] <- phenotype_factors
-m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_gene]))
+colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint
+m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target]))
m_prop_ana$Disease <- factor(sample_groups[( # nolint
- m_prop_ana[phenotype_gene] > 6.5) + 1], sample_groups)
+ m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1],
+ sample_groups)
m_prop_ana$D <- (m_prop_ana$Disease == # nolint
sample_disease_group) / sample_disease_group_scale
-jitt_compare <- ggplot(m_prop_ana, aes_string(phenotype_gene, "ct.prop")) +
+jitt_compare <- ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) +
geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) +
geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
size = 2, alpha = 0.7) + facet_wrap(~ Method) +
@@ -73,21 +107,81 @@
scale_colour_manual(values = c("white", "gray20")) +
scale_shape_manual(values = c(21, 24))
+## BoxPlot
+plot_box <- Boxplot_Est(list(
+ data.matrix(estimated_music_props),
+ data.matrix(estimated_nnls_props)),
+ method.name = c("MuSiC", "NNLS")) +
+ theme(axis.text.x = element_text(angle = -90),
+ axis.text.y = element_text(size = 8)) +
+ ggtitle(element_blank()) + theme_minimal()
+
+## Heatmap
+plot_hmap <- Prop_heat_Est(list(
+ data.matrix(estimated_music_props),
+ data.matrix(estimated_nnls_props)),
+ method.name = c("MuSiC", "NNLS")) +
+ theme(axis.text.x = element_text(angle = -90),
+ axis.text.y = element_text(size = 6))
pdf(file = outfile_pdf, width = 8, height = 8)
-plot_grid(jitter.fig, jitter.new, labels = "auto", ncol = 1, nrow = 2)
-jitt_compare
-dev.off()
+plot_grid(jitter_fig, plot_box, labels = "auto", ncol = 1, nrow = 2)
+plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2)
+plot_hmap
+message(dev.off())
+
+## Output Proportions
+
+write.table(est_prop$Est.prop.weighted,
+ file = paste0("report_data/prop_",
+ "Music Estimated Proportions of Cell Types",
+ ".tabular"),
+ quote = F, sep = "\t", col.names = NA)
+write.table(est_prop$Est.prop.allgene,
+ file = paste0("report_data/prop_",
+ "NNLS Estimated Proportions of Cell Types",
+ ".tabular"),
+ quote = F, sep = "\t", col.names = NA)
+write.table(est_prop$Weight.gene,
+ file = paste0("report_data/weightgene_",
+ "Music Estimated Proportions of Cell Types (by Gene)",
+ ".tabular"),
+ quote = F, sep = "\t", col.names = NA)
+write.table(est_prop$r.squared.full,
+ file = paste0("report_data/rsquared_",
+ "Music R-sqr Estimated Proportions of Each Subject",
+ ".tabular"),
+ quote = F, sep = "\t", col.names = NA)
+write.table(est_prop$Var.prop,
+ file = paste0("report_data/varprop_",
+ "Matrix of Variance of MuSiC Estimates",
+ ".tabular"),
+ quote = F, sep = "\t", col.names = NA)
+
## Summary table
for (meth in methods) {
##lm_beta_meth = lm(ct.prop ~ age + bmi + hba1c + gender, data =
- ##subset(m_prop_ana, Method == meth))
+ sub_data <- subset(m_prop_ana, Method == meth)
+ ## We can only do regression where there are more than 1 factors
+ ## so we must find and exclude the ones which are not
+ gt1_facts <- sapply(phenotype_factors, function(facname) {
+ return(length(unique(sort(sub_data[[facname]]))) == 1)
+ })
+ form_factors <- phenotype_factors
+ exclude_facts <- names(gt1_facts)[gt1_facts]
+ if (length(exclude_facts) > 0) {
+ message("Factors with only one level will be excluded:")
+ message(exclude_facts)
+ form_factors <- phenotype_factors[
+ !(phenotype_factors %in% exclude_facts)]
+ }
lm_beta_meth <- lm(as.formula(
- paste("ct.prop", paste(phenotype_factors, collapse = " + "),
- sep = " ~ ")),
- data = subset(m_prop_ana, Method == meth))
- print(paste0("Summary: ", meth))
+ paste("ct.prop", paste(form_factors, collapse = " + "),
+ sep = " ~ ")), data = sub_data)
+ message(paste0("Summary: ", meth))
capture.output(summary(lm_beta_meth),
- file = paste0("report_data/summ_", meth, ".txt"))
+ file = paste0("report_data/summ_Log of ",
+ meth,
+ " fitting.txt"))
}
diff -r 2fed32b5aa02 -r 817eb707bbf4 scripts/inspect.R
--- a/scripts/inspect.R Sun Sep 12 19:48:26 2021 +0000
+++ b/scripts/inspect.R Fri Nov 26 15:54:31 2021 +0000
@@ -6,17 +6,19 @@
source(args[1])
printout <- function(text) {
- if (typeof(text) %in% c("list", "vector")) {
+ if (typeof(text) %in% c("list", "vector", "integer", "double", "numeric")) {
write.table(text, file = outfile_tab, quote = F, sep = "\t",
col.names = NA)
} else {
## text
+ print(typeof(text))
capture.output(text, file = outfile_tab) # nolint
}
}
-if (inspector %in% c("print", "pData", "fData", "dims", "experimentData",
- "exprs", "signature", "annotation", "abstract")) {
+if (inspector %in% c("print", "pData", "fData", "dims",
+ "experimentData", "protocolData", "exprs",
+ "signature", "annotation", "abstract")) {
op <- get(inspector)
tab <- op(rds_eset)
printout(tab)
diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/EMTABesethealthy.subset.rds
Binary file test-data/EMTABesethealthy.subset.rds has changed
diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/Mousebulkeset.rds
Binary file test-data/Mousebulkeset.rds has changed
diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/Mousesubeset.degenesonly2.half.rds
Binary file test-data/Mousesubeset.degenesonly2.half.rds has changed
diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/default_output.pdf
Binary file test-data/default_output.pdf has changed
diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/dendro.pdf
Binary file test-data/dendro.pdf has changed
diff -r 2fed32b5aa02 -r 817eb707bbf4 test-data/dendro_1.pdf
Binary file test-data/dendro_1.pdf has changed