Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 8:e0dad46148bf draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 75fd87e806f9bee2f6ff7fcd3834e55eb21d8710"
author | artbio |
---|---|
date | Mon, 19 Oct 2020 07:47:24 +0000 |
parents | 34626b2b9907 |
children | 4ff3c984523b |
comparison
equal
deleted
inserted
replaced
7:34626b2b9907 | 8:e0dad46148bf |
---|---|
108 library(ref_genome, character.only = TRUE) | 108 library(ref_genome, character.only = TRUE) |
109 library(ggplot2) | 109 library(ggplot2) |
110 | 110 |
111 # Load the VCF files into a GRangesList: | 111 # Load the VCF files into a GRangesList: |
112 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) | 112 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) |
113 library(plyr) | |
113 if (!is.na(opt$levels)[1]) { # manage levels if there are | 114 if (!is.na(opt$levels)[1]) { # manage levels if there are |
114 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level")) | 115 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level")) |
115 library(plyr) | 116 } else { |
116 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") | 117 levels_table <- data.frame(element_identifier=vcf_table$element_identifier, |
117 tissue <- as.vector(metadata_table$level) | 118 level=rep("nolabels", length(vcf_table$element_identifier))) |
118 } | 119 } |
120 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") | |
121 tissue <- as.vector(metadata_table$level) | |
122 detach(package:plyr) | |
119 | 123 |
120 ##### This is done for any section ###### | 124 ##### This is done for any section ###### |
121 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) | 125 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) |
122 qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] | 126 qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] |
123 col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) | 127 col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) |
128 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) | 132 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) |
129 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) | 133 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) |
130 | 134 |
131 # mutation spectrum, total or by sample | 135 # mutation spectrum, total or by sample |
132 | 136 |
133 if (is.na(opt$levels)[1]) { | 137 if (length(levels(factor(levels_table$level))) == 1) { # (is.na(opt$levels)[1]) |
134 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) | 138 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) |
135 plot(p1) | 139 plot(p1) |
136 } else { | 140 } else { |
137 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels | 141 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels |
138 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total | 142 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total |
285 facet_grid(~level, scales = "free_x", space="free") | 289 facet_grid(~level, scales = "free_x", space="free") |
286 } | 290 } |
287 # Combine the two plots: | 291 # Combine the two plots: |
288 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) | 292 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) |
289 | 293 |
290 # Select signatures with some contribution (above a threshold) | 294 #### pie charts of comic signatures contributions in samples ### |
291 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] | |
292 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded | |
293 | |
294 ## pie charts of comic signatures contributions in samples | |
295 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) | |
296 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) | |
297 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 | |
298 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) | |
299 library(reshape2) | 295 library(reshape2) |
300 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents) | 296 library(dplyr) |
301 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable) | 297 if (length(levels(factor(levels_table$level))) < 2) { |
302 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2 | 298 fit_res_contrib <- as.data.frame(fit_res$contribution) |
303 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + | 299 worklist <- cbind(signature=rownames(fit_res$contribution), |
304 geom_bar(width = 1, stat = "identity") + | 300 level=rep("nolabels", length(fit_res_contrib[,1])), |
305 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + | 301 fit_res_contrib, |
306 coord_polar("y", start=0) + facet_wrap(~ sample) + | 302 sum=rowSums(fit_res_contrib)) |
307 labs(x="", y="Samples", fill = cosmic_tag) + | 303 worklist <- worklist[order(worklist[ ,"sum"], decreasing = T), ] |
308 scale_fill_manual(name = paste0(length(select), " most contributing\nSignatures"), values = cosmic_colors[select]) | 304 worklist <- worklist[1:opt$signum,] |
309 theme(axis.text = element_blank(), | 305 worklist <- worklist[ , -length(worklist[1,])] |
310 axis.ticks = element_blank(), | 306 worklist <- melt(worklist) |
311 panel.grid = element_blank()) | 307 worklist <- worklist[,c(1,3,4,2)] |
308 } else { | |
309 worklist <- list() | |
310 for (i in levels(factor(levels_table$level))) { | |
311 fit_res$contribution[,levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] | |
312 sum <- rowSums(as.data.frame(worklist[[i]])) | |
313 worklist[[i]] <- cbind(worklist[[i]], sum) | |
314 worklist[[i]] <- worklist[[i]][order(worklist[[i]][ ,"sum"], decreasing = T), ] | |
315 worklist[[i]] <- worklist[[i]][1:opt$signum,] | |
316 worklist[[i]] <- worklist[[i]][ , -length(as.data.frame(worklist[[i]]))] | |
317 } | |
318 worklist <- as.data.frame(melt(worklist)) | |
319 } | |
320 | |
321 colnames(worklist) <- c("signature", "sample", "value", "level") | |
322 print(worklist) | |
323 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value=value/sum(value)*100)) | |
324 worklist$pos <- cumsum(worklist$value) - worklist$value/2 | |
325 worklist$label <- factor(worklist$signature) | |
326 worklist$signature <- factor(worklist$signature) | |
327 p7 <- ggplot(worklist, aes(x="", y=value, group=signature, fill=signature)) + | |
328 geom_bar(width = 1, stat = "identity") + | |
329 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + | |
330 coord_polar("y", start=0) + facet_wrap(~ sample) + | |
331 labs(x="", y="Samples", fill = cosmic_tag) + | |
332 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), | |
333 values = cosmic_colors[as.numeric(levels(factor(worklist$label)))]) + | |
334 theme(axis.text = element_blank(), | |
335 axis.ticks = element_blank(), | |
336 panel.grid = element_blank()) | |
312 grid.arrange(p7) | 337 grid.arrange(p7) |
313 | 338 |
314 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering | 339 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering |
315 if (length(vcf_paths) > 1) { | 340 if (length(vcf_paths) > 1) { |
316 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") | 341 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") |