Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 17:8c6ee1c2248f draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit e5d498dfc5a6a9aaea3d09037dea5d15c2d85dd2"
author | artbio |
---|---|
date | Tue, 05 Oct 2021 22:28:34 +0000 |
parents | 31e7a33ecd71 |
children | 8d9f31389f33 |
comparison
equal
deleted
inserted
replaced
16:31e7a33ecd71 | 17:8c6ee1c2248f |
---|---|
95 ), | 95 ), |
96 make_option( | 96 make_option( |
97 c("-r", "--rdata"), | 97 c("-r", "--rdata"), |
98 type = "character", | 98 type = "character", |
99 default = NULL, | 99 default = NULL, |
100 help = "Path to RData output file") | 100 help = "Path to RData output file" |
101 ), | |
102 make_option( | |
103 c("-t", "--tooldir"), | |
104 type = "character", | |
105 default = NULL, | |
106 help = "Path to tool directory, where tool data are stored") | |
107 | |
101 ) | 108 ) |
102 | 109 |
103 opt <- parse_args(OptionParser(option_list = option_list), | 110 opt <- parse_args(OptionParser(option_list = option_list), |
104 args = commandArgs(trailingOnly = TRUE)) | 111 args = commandArgs(trailingOnly = TRUE)) |
105 | 112 |
215 dev.off() | 222 dev.off() |
216 } | 223 } |
217 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### | 224 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### |
218 | 225 |
219 if (!is.na(opt$output_cosmic)[1]) { | 226 if (!is.na(opt$output_cosmic)[1]) { |
227 # Prepare cosmic signatures | |
228 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) | |
229 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & | |
230 cosmic_urls$cosmic_version == opt$cosmic_version] | |
231 cancer_sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), | |
232 sep = "\t", header = TRUE) | |
233 row.names(cancer_sbs_signatures) <- cancer_sbs_signatures$Type | |
234 new_order <- match(row.names(mut_mat), cancer_sbs_signatures$Type) | |
235 cancer_sbs_signatures <- cancer_sbs_signatures[as.numeric(new_order), ] | |
236 cosmic_tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ") | |
237 cosmic_colors <- col_vector[seq_len(ncol(cancer_sbs_signatures) - 1)] | |
238 names(cosmic_colors) <- colnames(cancer_sbs_signatures[2:length(cancer_sbs_signatures)]) | |
239 cancer_sbs_matrix <- as.matrix(cancer_sbs_signatures[, 2:length(cancer_sbs_signatures)]) | |
240 | |
241 | |
242 # Plot mutational profiles of the COSMIC signatures | |
220 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) | 243 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) |
221 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix | |
222 if (opt$cosmic_version == "v2") { | 244 if (opt$cosmic_version == "v2") { |
223 sp_url <- "https://cancer.sanger.ac.uk/signatures/documents/420/COSMIC_v2_SBS_GRCh38.txt" | 245 p6 <- plot_96_profile(cancer_sbs_matrix, condensed = TRUE, ymax = 0.3) |
224 cancer_signatures <- read.table(sp_url, sep = "\t", header = TRUE) | 246 grid.arrange(p6, top = textGrob("COSMIC SBS signature profiles", gp = gpar(fontsize = 12, font = 3))) |
225 new_order <- match(row.names(pseudo_mut_mat), cancer_signatures$Type) | |
226 cancer_signatures <- cancer_signatures[as.vector(new_order), ] | |
227 row.names(cancer_signatures) <- cancer_signatures$Type | |
228 cancer_signatures <- as.matrix(cancer_signatures[, 2:31]) | |
229 colnames(cancer_signatures) <- gsub("Signature_", "", colnames(cancer_signatures)) # shorten signature labels | |
230 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" | |
231 cosmic_colors <- col_vector[1:30] | |
232 names(cosmic_colors) <- colnames(cancer_signatures) | |
233 } else { | |
234 sp_url <- "https://cancer.sanger.ac.uk/signatures/documents/431/COSMIC_v3_SBS_GRCh38.txt" | |
235 cancer_signatures <- read.table(sp_url, sep = "\t", header = TRUE) | |
236 new_order <- match(row.names(pseudo_mut_mat), cancer_signatures$Type) | |
237 cancer_signatures <- cancer_signatures[as.vector(new_order), ] | |
238 row.names(cancer_signatures) <- cancer_signatures$Type | |
239 cancer_signatures <- as.matrix(cancer_signatures[, 2:68]) | |
240 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels | |
241 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" | |
242 cosmic_colors <- col_vector[1:67] | |
243 names(cosmic_colors) <- colnames(cancer_signatures) | |
244 } | |
245 | |
246 # Plot mutational profiles of the COSMIC signatures | |
247 if (opt$cosmic_version == "v2") { | |
248 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) | |
249 grid.arrange(p6, top = textGrob("COSMIC signature profiles", gp = gpar(fontsize = 12, font = 3))) | |
250 } else { | 247 } else { |
251 p6 <- plot_96_profile(cancer_signatures[, 1:33], condensed = TRUE, ymax = 0.3) | 248 p6 <- plot_96_profile(cancer_sbs_matrix[, 1:trunc(ncol(cancer_sbs_matrix) / 2)], condensed = TRUE, ymax = 0.3) |
252 p6bis <- plot_96_profile(cancer_signatures[, 34:67], condensed = TRUE, ymax = 0.3) | 249 p6bis <- plot_96_profile(cancer_sbs_matrix[, (trunc(ncol(cancer_sbs_matrix) / 2) + 1):ncol(cancer_sbs_matrix)], |
250 condensed = TRUE, ymax = 0.3) | |
253 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)", | 251 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)", |
254 gp = gpar(fontsize = 12, font = 3))) | 252 gp = gpar(fontsize = 12, font = 3))) |
255 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)", | 253 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)", |
256 gp = gpar(fontsize = 12, font = 3))) | 254 gp = gpar(fontsize = 12, font = 3))) |
257 } | 255 } |
258 | 256 |
259 | 257 |
260 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 258 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
261 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) | 259 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix |
260 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_sbs_matrix) | |
262 | 261 |
263 # Plot contribution barplots | 262 # Plot contribution barplots |
264 pc3 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "absolute") | 263 pc3 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "absolute") |
265 pc4 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "relative") | 264 pc4 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "relative") |
266 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs | 265 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs |
267 pc3_data <- pc3$data | 266 pc3_data <- pc3$data |
268 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 267 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
269 geom_bar(stat = "identity", position = "stack") + | 268 geom_bar(stat = "identity", position = "stack") + |
270 coord_flip() + | 269 coord_flip() + |
372 } | 371 } |
373 | 372 |
374 # export relative contribution matrix | 373 # export relative contribution matrix |
375 if (!is.na(opt$sig_contrib_matrix)) { | 374 if (!is.na(opt$sig_contrib_matrix)) { |
376 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) | 375 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) |
377 colnames(output_table) <- paste0("s", colnames(output_table)) | |
378 if (length(levels(factor(levels_table$level))) > 1) { | 376 if (length(levels(factor(levels_table$level))) > 1) { |
379 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), | 377 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), |
380 3], "-", colnames(fit_res$contribution)), | 378 3], "-", colnames(fit_res$contribution)), |
381 output_table) | 379 output_table) |
382 } else { | 380 } else { |