Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 4:7ba08c826888 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit e2d6ed12516e1bd24071962a0dfe0220cc348f3c"
author | artbio |
---|---|
date | Mon, 05 Oct 2020 19:29:42 +0000 |
parents | e332cf9dfa06 |
children | fe31d059a482 |
comparison
equal
deleted
inserted
replaced
3:e332cf9dfa06 | 4:7ba08c826888 |
---|---|
5 warnings() | 5 warnings() |
6 library(optparse) | 6 library(optparse) |
7 library(rjson) | 7 library(rjson) |
8 library(grid) | 8 library(grid) |
9 library(gridExtra) | 9 library(gridExtra) |
10 library(scales) | |
11 library(RColorBrewer) | |
10 | 12 |
11 # Arguments | 13 # Arguments |
12 option_list = list( | 14 option_list = list( |
13 make_option( | 15 make_option( |
14 "--inputs", | 16 "--inputs", |
115 tissue <- as.vector(metadata_table$level) | 117 tissue <- as.vector(metadata_table$level) |
116 } | 118 } |
117 | 119 |
118 ##### This is done for any section ###### | 120 ##### This is done for any section ###### |
119 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) | 121 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) |
122 qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] | |
123 col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) | |
124 col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette | |
120 | 125 |
121 ###### Section 1 Mutation characteristics and spectrums ############# | 126 ###### Section 1 Mutation characteristics and spectrums ############# |
122 if (!is.na(opt$output_spectrum)[1]) { | 127 if (!is.na(opt$output_spectrum)[1]) { |
123 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) | 128 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) |
124 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) | 129 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) |
202 cancer_signatures = cancer_signatures[as.vector(new_order),] | 207 cancer_signatures = cancer_signatures[as.vector(new_order),] |
203 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | 208 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type |
204 cancer_signatures = as.matrix(cancer_signatures[,4:33]) | 209 cancer_signatures = as.matrix(cancer_signatures[,4:33]) |
205 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels | 210 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels |
206 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" | 211 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" |
212 cosmic_colors <- col_vector[1:30] | |
207 } else { | 213 } else { |
208 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" | 214 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" |
209 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | 215 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) |
210 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) | 216 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) |
211 cancer_signatures = cancer_signatures[as.vector(new_order),] | 217 cancer_signatures = cancer_signatures[as.vector(new_order),] |
212 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | 218 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type |
213 cancer_signatures = as.matrix(cancer_signatures[,4:70]) | 219 cancer_signatures = as.matrix(cancer_signatures[,4:70]) |
214 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels | 220 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels |
215 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" | 221 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" |
222 cosmic_colors <- col_vector[1:67] | |
216 } | 223 } |
217 | 224 |
218 # Plot mutational profiles of the COSMIC signatures | 225 # Plot mutational profiles of the COSMIC signatures |
219 if (opt$cosmic_version == "v2") { | 226 if (opt$cosmic_version == "v2") { |
220 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) | 227 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) |
221 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) | 228 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) |
222 } else { | 229 } else { |
223 print(length(cancer_signatures)) | |
224 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3) | 230 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3) |
225 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3) | 231 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3) |
226 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3))) | 232 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3))) |
227 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3))) | 233 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3))) |
228 } | 234 } |
229 | 235 |
230 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage | |
231 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") | |
232 # store signatures in new order | |
233 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] | |
234 # plot(hclust_cosmic) | |
235 | |
236 # Similarity between mutational profiles and COSMIC signatures | |
237 | |
238 | |
239 # The similarity between each mutational profile and each COSMIC signature, can be calculated | |
240 # with cos_sim_matrix, and visualized with plot_cosine_heatmap. The cosine similarity reflects | |
241 # how well each mutational profile can be explained by each signature individually. The advantage | |
242 # of this heatmap representation is that it shows in a glance the similarity in mutational | |
243 # profiles between samples, while at the same time providing information on which signatures | |
244 # are most prominent. The samples can be hierarchically clustered in plot_cosine_heatmap. | |
245 # The cosine similarity between two mutational profiles/signatures can be calculated with cos_sim : | |
246 # cos_sim(mut_mat[,1], cancer_signatures[,1]) | |
247 | |
248 # Calculate pairwise cosine similarity between mutational profiles and COSMIC signatures | |
249 | |
250 # cos_sim_samples_signatures = cos_sim_matrix(mut_mat, cancer_signatures) | |
251 # Plot heatmap with specified signature order | |
252 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) | |
253 # grid.arrange(p.trans) | |
254 | 236 |
255 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 237 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
256 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) | 238 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) |
257 | 239 |
258 # Select signatures with some contribution (above a threshold) | |
259 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] | |
260 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded | |
261 # Plot contribution barplots | 240 # Plot contribution barplots |
262 pc3 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") | 241 pc3 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "absolute") |
263 pc4 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") | 242 pc4 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "relative") |
243 pc3_data <- pc3$data | |
244 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | |
245 geom_bar(stat="identity", position='stack') + | |
246 coord_flip() + | |
247 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | |
248 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | |
249 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") | |
250 pc4_data <- pc4$data | |
251 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | |
252 geom_bar(stat="identity", position='fill') + | |
253 coord_flip() + | |
254 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | |
255 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | |
256 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | |
257 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") | |
264 ##### | 258 ##### |
265 # ggplot2 alternative | 259 # ggplot2 alternative |
266 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs | 260 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs |
267 pc3_data <- pc3$data | 261 pc3_data <- pc3$data |
268 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | 262 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") |
269 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | 263 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + |
270 geom_bar(stat="identity", position='stack') + | 264 geom_bar(stat="identity", position='stack') + |
271 scale_fill_discrete(name="Cosmic\nSignature") + | 265 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + |
272 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | 266 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + |
273 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + | 267 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") + |
274 facet_grid(~level, scales = "free_x") | 268 facet_grid(~level, scales = "free_x") |
275 pc4_data <- pc4$data | 269 pc4_data <- pc4$data |
276 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | 270 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") |
277 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | 271 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + |
278 geom_bar(stat="identity", position='fill') + | 272 geom_bar(stat="identity", position='fill') + |
279 scale_fill_discrete(name="Cosmic\nSignature") + | 273 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + |
280 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | 274 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + |
281 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | 275 labs(x = "Samples", y = "Relative contribution") + theme_bw() + |
282 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + | 276 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right") + |
283 facet_grid(~level, scales = "free_x") | 277 facet_grid(~level, scales = "free_x") |
284 } | 278 } |
285 # Combine the two plots: | 279 # Combine the two plots: |
286 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) | 280 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) |
281 | |
282 # Select signatures with some contribution (above a threshold) | |
283 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] | |
284 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded | |
285 | |
287 ## pie charts of comic signatures contributions in samples | 286 ## pie charts of comic signatures contributions in samples |
288 | |
289 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) | 287 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) |
290 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) | 288 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) |
291 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 | 289 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 |
292 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) | 290 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) |
293 library(reshape2) | 291 library(reshape2) |
297 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + | 295 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + |
298 geom_bar(width = 1, stat = "identity") + | 296 geom_bar(width = 1, stat = "identity") + |
299 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + | 297 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + |
300 coord_polar("y", start=0) + facet_wrap(~ sample) + | 298 coord_polar("y", start=0) + facet_wrap(~ sample) + |
301 labs(x="", y="Samples", fill = cosmic_tag) + | 299 labs(x="", y="Samples", fill = cosmic_tag) + |
300 scale_fill_manual(name = paste0(length(select), " most contributing\nSignatures"), values = cosmic_colors[select]) | |
302 theme(axis.text = element_blank(), | 301 theme(axis.text = element_blank(), |
303 axis.ticks = element_blank(), | 302 axis.ticks = element_blank(), |
304 panel.grid = element_blank()) | 303 panel.grid = element_blank()) |
305 grid.arrange(p7) | 304 grid.arrange(p7) |
306 | 305 |
307 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering | 306 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering |
308 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") | 307 if (length(vcf_paths) > 1) { |
309 grid.arrange(p8) | 308 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") |
310 | 309 grid.arrange(p8) |
311 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile | 310 } |
312 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1], | |
313 # profile_names = c("Original", "Reconstructed"), | |
314 # condensed = TRUE) | |
315 | |
316 # Calculate the cosine similarity between all original and reconstructed mutational profiles with | |
317 # `cos_sim_matrix` | |
318 | 311 |
319 # calculate all pairwise cosine similarities | 312 # calculate all pairwise cosine similarities |
320 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) | 313 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) |
321 # extract cosine similarities per sample between original and reconstructed | 314 # extract cosine similarities per sample between original and reconstructed |
322 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) | 315 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) |