Mercurial > repos > bgruening > music_construct_eset
comparison scripts/estimateprops.R @ 3:7ffaa0968da3 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 8beed1a19fcd9dc59f7746e1dfa735a2d5f29784"
author | bgruening |
---|---|
date | Thu, 10 Feb 2022 12:53:22 +0000 |
parents | 7902cd31b9b5 |
children |
comparison
equal
deleted
inserted
replaced
2:7902cd31b9b5 | 3:7ffaa0968da3 |
---|---|
12 est_prop <- music_prop( | 12 est_prop <- music_prop( |
13 bulk.eset = bulk_eset, sc.eset = scrna_eset, | 13 bulk.eset = bulk_eset, sc.eset = scrna_eset, |
14 clusters = celltypes_label, | 14 clusters = celltypes_label, |
15 samples = samples_label, select.ct = celltypes, verbose = T) | 15 samples = samples_label, select.ct = celltypes, verbose = T) |
16 | 16 |
17 | |
17 estimated_music_props <- est_prop$Est.prop.weighted | 18 estimated_music_props <- est_prop$Est.prop.weighted |
18 estimated_nnls_props <- est_prop$Est.prop.allgene | 19 estimated_nnls_props <- est_prop$Est.prop.allgene |
20 ## | |
21 estimated_music_props_flat <- melt(estimated_music_props) | |
22 estimated_nnls_props_flat <- melt(estimated_nnls_props) | |
19 | 23 |
20 scale_yaxes <- function(gplot, value) { | 24 scale_yaxes <- function(gplot, value) { |
21 if (is.na(value)) { | 25 if (is.na(value)) { |
22 gplot | 26 gplot |
23 } else { | 27 } else { |
24 gplot + scale_y_continuous(lim = c(0, value)) | 28 gplot + scale_y_continuous(lim = c(0, value)) |
25 } | 29 } |
26 } | 30 } |
27 | 31 |
32 sieve_data <- function(func, music_data, nnls_data) { | |
33 if (func == "list") { | |
34 res <- list(if ("MuSiC" %in% methods) music_data else NULL, | |
35 if ("NNLS" %in% methods) nnls_data else NULL) | |
36 res[lengths(res) > 0] ## filter out NULL elements | |
37 } else if (func == "rbind") { | |
38 rbind(if ("MuSiC" %in% methods) music_data else NULL, | |
39 if ("NNLS" %in% methods) nnls_data else NULL) | |
40 } else if (func == "c") { | |
41 c(if ("MuSiC" %in% methods) music_data else NULL, | |
42 if ("NNLS" %in% methods) nnls_data else NULL) | |
43 } | |
44 } | |
45 | |
46 | |
28 ## Show different in estimation methods | 47 ## Show different in estimation methods |
29 ## Jitter plot of estimated cell type proportions | 48 ## Jitter plot of estimated cell type proportions |
30 jitter_fig <- scale_yaxes(Jitter_Est( | 49 jitter_fig <- scale_yaxes(Jitter_Est( |
31 list(data.matrix(estimated_music_props), | 50 sieve_data("list", |
32 data.matrix(estimated_nnls_props)), | 51 data.matrix(estimated_music_props), |
52 data.matrix(estimated_nnls_props)), | |
33 method.name = methods, title = "Jitter plot of Est Proportions", | 53 method.name = methods, title = "Jitter plot of Est Proportions", |
34 size = 2, alpha = 0.7) + theme_minimal(), maxyscale) | 54 size = 2, alpha = 0.7) + theme_minimal(), maxyscale) |
35 | |
36 | 55 |
37 ## Make a Plot | 56 ## Make a Plot |
38 ## A more sophisticated jitter plot is provided as below. We separated | 57 ## A more sophisticated jitter plot is provided as below. We separated |
39 ## the T2D subjects and normal subjects by their disease factor levels. | 58 ## the T2D subjects and normal subjects by their disease factor levels. |
40 estimated_music_props_flat <- melt(estimated_music_props) | 59 m_prop <- sieve_data("rbind", |
41 estimated_nnls_props_flat <- melt(estimated_nnls_props) | 60 estimated_music_props_flat, |
42 | 61 estimated_nnls_props_flat) |
43 m_prop <- rbind(estimated_music_props_flat, | |
44 estimated_nnls_props_flat) | |
45 colnames(m_prop) <- c("Sub", "CellType", "Prop") | 62 colnames(m_prop) <- c("Sub", "CellType", "Prop") |
46 | 63 |
47 if (is.null(celltypes)) { | 64 if (is.null(celltypes)) { |
48 celltypes <- levels(m_prop$CellType) | 65 celltypes <- levels(m_prop$CellType) |
49 message("No celltypes declared, using:") | 66 message("No celltypes declared, using:") |
67 | 84 |
68 if (phenotype_target_threshold == -99) { | 85 if (phenotype_target_threshold == -99) { |
69 phenotype_target_threshold <- -Inf | 86 phenotype_target_threshold <- -Inf |
70 message("phenotype target threshold set to -Inf") | 87 message("phenotype target threshold set to -Inf") |
71 } | 88 } |
72 | 89 ## the "2" here is to do with the sample groups, not number of methods |
73 m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint | 90 m_prop$Disease_factor <- rep(bulk_eset[[phenotype_target]], 2 * length(celltypes)) # nolint |
74 m_prop <- m_prop[!is.na(m_prop$Disease_factor), ] | 91 m_prop <- m_prop[!is.na(m_prop$Disease_factor), ] |
75 ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2 | 92 ## Generate a TRUE/FALSE table of Normal == 1 and Disease == 2 |
76 sample_groups <- c("Normal", sample_disease_group) | 93 sample_groups <- c("Normal", sample_disease_group) |
77 m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint | 94 m_prop$Disease <- factor(sample_groups[(m_prop$Disease_factor > phenotype_target_threshold) + 1], # nolint |
82 sample_disease_group) / sample_disease_group_scale | 99 sample_disease_group) / sample_disease_group_scale |
83 ## NA's are not included in the comparison below | 100 ## NA's are not included in the comparison below |
84 m_prop <- rbind(subset(m_prop, Disease != sample_disease_group), | 101 m_prop <- rbind(subset(m_prop, Disease != sample_disease_group), |
85 subset(m_prop, Disease == sample_disease_group)) | 102 subset(m_prop, Disease == sample_disease_group)) |
86 | 103 |
87 jitter_new <- scale_yaxes(ggplot(m_prop, aes(Method, Prop)) + | 104 jitter_new <- scale_yaxes( |
88 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), | 105 ggplot(m_prop, aes(Method, Prop)) + |
106 geom_point(aes(fill = Method, color = Disease, | |
107 stroke = D, shape = Disease), | |
89 size = 2, alpha = 0.7, | 108 size = 2, alpha = 0.7, |
90 position = position_jitter(width = 0.25, height = 0)) + | 109 position = position_jitter(width = 0.25, height = 0)) + |
91 facet_wrap(~ CellType, scales = "free") + | 110 facet_wrap(~ CellType, scales = "free") + |
92 scale_colour_manual(values = c("white", "gray20")) + | 111 scale_colour_manual(values = c("white", "gray20")) + |
93 scale_shape_manual(values = c(21, 24)) + theme_minimal(), maxyscale) | 112 scale_shape_manual(values = c(21, 24)) + theme_minimal(), maxyscale) |
98 | 117 |
99 ## Plot to compare method effectiveness | 118 ## Plot to compare method effectiveness |
100 ## Create dataframe for beta cell proportions and Disease_factor levels | 119 ## Create dataframe for beta cell proportions and Disease_factor levels |
101 ## - Ugly code. Essentially, doubles the cell type proportions for each | 120 ## - Ugly code. Essentially, doubles the cell type proportions for each |
102 ## set of MuSiC and NNLS methods | 121 ## set of MuSiC and NNLS methods |
103 m_prop_ana <- data.frame(pData(bulk_eset)[rep(1:nrow(estimated_music_props), 2), #nolint | 122 m_prop_ana <- data.frame( |
104 phenotype_factors], | 123 pData(bulk_eset)[rep(1:nrow(estimated_music_props), length(methods)), #nolint |
105 ## get proportions of target cell type | 124 phenotype_factors], |
106 ct.prop = c(estimated_music_props[, phenotype_scrna_target], | 125 ## get proportions of target cell type |
107 estimated_nnls_props[, phenotype_scrna_target]), | 126 ct.prop = sieve_data("c", |
108 ## | 127 estimated_music_props[, phenotype_scrna_target], |
109 Method = factor(rep(methods, | 128 estimated_nnls_props[, phenotype_scrna_target]), |
110 each = nrow(estimated_music_props)), | 129 ## |
111 levels = methods)) | 130 Method = factor(rep(methods, |
131 each = nrow(estimated_music_props)), | |
132 levels = methods)) | |
112 ## - fix headers | 133 ## - fix headers |
113 colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint | 134 colnames(m_prop_ana)[1:length(phenotype_factors)] <- phenotype_factors #nolint |
114 ## - drop NA for target phenotype (e.g. hba1c) | 135 ## - drop NA for target phenotype (e.g. hba1c) |
115 m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target])) | 136 m_prop_ana <- subset(m_prop_ana, !is.na(m_prop_ana[phenotype_target])) |
116 m_prop_ana$Disease <- factor( # nolint | 137 m_prop_ana$Disease <- factor( # nolint |
117 ## - Here we set Normal/Disease assignments across the two MuSiC and NNLS methods | 138 ## - Here we set Normal/Disease assignments across the methods |
118 sample_groups[( | 139 sample_groups[( |
119 m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1 | 140 m_prop_ana[phenotype_target] > phenotype_target_threshold) + 1 |
120 ], | 141 ], |
121 sample_groups) | 142 sample_groups) |
122 ## - Then we scale this binary assignment to a plotable factor | 143 ## - Then we scale this binary assignment to a plotable factor |
123 m_prop_ana$D <- (m_prop_ana$Disease == # nolint | 144 m_prop_ana$D <- (m_prop_ana$Disease == # nolint |
124 sample_disease_group) / sample_disease_group_scale | 145 sample_disease_group) / sample_disease_group_scale |
125 | 146 |
126 jitt_compare <- scale_yaxes(ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) + | 147 jitt_compare <- scale_yaxes( |
148 ggplot(m_prop_ana, aes_string(phenotype_target, "ct.prop")) + | |
127 geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) + | 149 geom_smooth(method = "lm", se = FALSE, col = "black", lwd = 0.25) + |
128 geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), | 150 geom_point(aes(fill = Method, color = Disease, |
151 stroke = D, shape = Disease), | |
129 size = 2, alpha = 0.7) + facet_wrap(~ Method) + | 152 size = 2, alpha = 0.7) + facet_wrap(~ Method) + |
130 ggtitle(paste0(toupper(phenotype_target), " vs. ", | 153 ggtitle(paste0(toupper(phenotype_target), " vs. ", |
131 toupper(phenotype_scrna_target), " Cell Type Proportion")) + | 154 toupper(phenotype_scrna_target), |
155 " Cell Type Proportion")) + | |
132 theme_minimal() + | 156 theme_minimal() + |
133 ylab(paste0("Proportion of ", | 157 ylab(paste0("Proportion of ", |
134 phenotype_scrna_target, " cells")) + | 158 phenotype_scrna_target, " cells")) + |
135 xlab(paste0("Level of bulk factor (", phenotype_target, ")")) + | 159 xlab(paste0("Level of bulk factor (", phenotype_target, ")")) + |
136 scale_colour_manual(values = c("white", "gray20")) + | 160 scale_colour_manual(values = c("white", "gray20")) + |
137 scale_shape_manual(values = c(21, 24)), maxyscale) | 161 scale_shape_manual(values = c(21, 24)), maxyscale) |
138 } | 162 } |
139 | 163 |
140 ## BoxPlot | 164 ## BoxPlot |
141 plot_box <- scale_yaxes(Boxplot_Est(list( | 165 plot_box <- scale_yaxes(Boxplot_Est( |
142 data.matrix(estimated_music_props), | 166 sieve_data("list", |
143 data.matrix(estimated_nnls_props)), | 167 data.matrix(estimated_music_props), |
144 method.name = c("MuSiC", "NNLS")) + | 168 data.matrix(estimated_nnls_props)), |
169 method.name = methods) + | |
145 theme(axis.text.x = element_text(angle = -90), | 170 theme(axis.text.x = element_text(angle = -90), |
146 axis.text.y = element_text(size = 8)) + | 171 axis.text.y = element_text(size = 8)) + |
147 ggtitle(element_blank()) + theme_minimal(), maxyscale) | 172 ggtitle(element_blank()) + theme_minimal(), maxyscale) |
148 | 173 |
149 ## Heatmap | 174 ## Heatmap |
150 plot_hmap <- Prop_heat_Est(list( | 175 plot_hmap <- Prop_heat_Est( |
151 data.matrix(estimated_music_props), | 176 sieve_data( |
152 data.matrix(estimated_nnls_props)), | 177 "list", |
153 method.name = c("MuSiC", "NNLS")) + | 178 data.matrix(estimated_music_props), |
179 data.matrix(estimated_nnls_props)), | |
180 method.name = methods) + | |
154 theme(axis.text.x = element_text(angle = -90), | 181 theme(axis.text.x = element_text(angle = -90), |
155 axis.text.y = element_text(size = 6)) | 182 axis.text.y = element_text(size = 6)) |
156 | 183 |
157 pdf(file = outfile_pdf, width = 8, height = 8) | 184 pdf(file = outfile_pdf, width = 8, height = 8) |
158 if (length(celltypes) <= 8) { | 185 if (length(celltypes) <= 8) { |
165 plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2) | 192 plot_grid(jitter_new, jitt_compare, labels = "auto", ncol = 1, nrow = 2) |
166 } | 193 } |
167 plot_hmap | 194 plot_hmap |
168 message(dev.off()) | 195 message(dev.off()) |
169 | 196 |
197 writable <- function(obj, prefix, title) { | |
198 write.table(obj, | |
199 file = paste0("report_data/", prefix, "_", | |
200 title, ".tabular"), | |
201 quote = F, sep = "\t", col.names = NA) | |
202 } | |
203 | |
170 ## Output Proportions | 204 ## Output Proportions |
171 | 205 if ("NNLS" %in% methods) { |
172 write.table(est_prop$Est.prop.weighted, | 206 writable(est_prop$Est.prop.allgene, "prop", |
173 file = paste0("report_data/prop_", | 207 "NNLS Estimated Proportions of Cell Types") |
174 "Music Estimated Proportions of Cell Types", | 208 } |
175 ".tabular"), | 209 |
176 quote = F, sep = "\t", col.names = NA) | 210 if ("MuSiC" %in% methods) { |
177 write.table(est_prop$Est.prop.allgene, | 211 writable(est_prop$Est.prop.weighted, "prop", |
178 file = paste0("report_data/prop_", | 212 "Music Estimated Proportions of Cell Types") |
179 "NNLS Estimated Proportions of Cell Types", | 213 writable(est_prop$Weight.gene, "weightgene", |
180 ".tabular"), | 214 "Music Estimated Proportions of Cell Types (by Gene)") |
181 quote = F, sep = "\t", col.names = NA) | 215 writable(est_prop$r.squared.full, "rsquared", |
182 write.table(est_prop$Weight.gene, | 216 "Music R-sqr Estimated Proportions of Each Subject") |
183 file = paste0("report_data/weightgene_", | 217 writable(est_prop$Var.prop, "varprop", |
184 "Music Estimated Proportions of Cell Types (by Gene)", | 218 "Matrix of Variance of MuSiC Estimates") |
185 ".tabular"), | 219 } |
186 quote = F, sep = "\t", col.names = NA) | |
187 write.table(est_prop$r.squared.full, | |
188 file = paste0("report_data/rsquared_", | |
189 "Music R-sqr Estimated Proportions of Each Subject", | |
190 ".tabular"), | |
191 quote = F, sep = "\t", col.names = NA) | |
192 write.table(est_prop$Var.prop, | |
193 file = paste0("report_data/varprop_", | |
194 "Matrix of Variance of MuSiC Estimates", | |
195 ".tabular"), | |
196 quote = F, sep = "\t", col.names = NA) | |
197 | 220 |
198 | 221 |
199 if (use_disease_factor) { | 222 if (use_disease_factor) { |
200 ## Summary table of linear regressions of disease factors | 223 ## Summary table of linear regressions of disease factors |
201 for (meth in methods) { | 224 for (meth in methods) { |