comparison mutational_patterns.R @ 18:8d9f31389f33 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 1cb9c8fd0c74943a8e6de4c63ac5e4a84ef27430"
author artbio
date Sun, 17 Oct 2021 15:51:05 +0000
parents 8c6ee1c2248f
children 69f09dff98f9
comparison
equal deleted inserted replaced
17:8c6ee1c2248f 18:8d9f31389f33
33 type = "character", 33 type = "character",
34 help = "path to the tab separated file describing the levels in function of datasets" 34 help = "path to the tab separated file describing the levels in function of datasets"
35 ), 35 ),
36 make_option( 36 make_option(
37 "--cosmic_version", 37 "--cosmic_version",
38 default = "v2", 38 default = NA,
39 type = "character", 39 type = "character",
40 help = "Version of the Cosmic Signature set to be used to express mutational patterns" 40 help = "Version of the Cosmic Signature set to be used to express mutational profiles"
41 ),
42 make_option(
43 "--own_signatures",
44 default = NA,
45 type = "character",
46 help = "Path to the user-defined signature matrix"
41 ), 47 ),
42 make_option( 48 make_option(
43 "--signum", 49 "--signum",
44 default = 2, 50 default = 2,
45 type = "integer", 51 type = "integer",
46 help = "selects the N most significant signatures in samples to express mutational patterns" 52 help = "selects the N most significant signatures in samples to express mutational profiles"
47 ), 53 ),
48 make_option( 54 make_option(
49 "--nrun", 55 "--nrun",
50 default = 2, 56 default = 2,
51 type = "integer", 57 type = "integer",
80 default = NA, 86 default = NA,
81 type = "character", 87 type = "character",
82 help = "path to signature matrix" 88 help = "path to signature matrix"
83 ), 89 ),
84 make_option( 90 make_option(
85 "--output_cosmic", 91 "--output_sigpattern",
86 default = NA, 92 default = NA,
87 type = "character", 93 type = "character",
88 help = "path to output dataset" 94 help = "path to output dataset"
89 ), 95 ),
90 make_option( 96 make_option(
139 detach(package:plyr) 145 detach(package:plyr)
140 146
141 ##### This is done for any section ###### 147 ##### This is done for any section ######
142 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) 148 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome)
143 qual_col_pals <- brewer.pal.info[brewer.pal.info$category == "qual", ] 149 qual_col_pals <- brewer.pal.info[brewer.pal.info$category == "qual", ]
144 col_vector <- unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))))
145 col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette
146 150
147 ###### Section 1 Mutation characteristics and spectrums ############# 151 ###### Section 1 Mutation characteristics and spectrums #############
148 if (!is.na(opt$output_spectrum)[1]) { 152 if (!is.na(opt$output_spectrum)[1]) {
149 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) 153 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69)
150 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) 154 type_occurrences <- mut_type_occurrences(vcfs, ref_genome)
219 nmf_res$reconstructed[, 1], 223 nmf_res$reconstructed[, 1],
220 profile_names = c("Original", "Reconstructed"), 224 profile_names = c("Original", "Reconstructed"),
221 condensed = TRUE) 225 condensed = TRUE)
222 dev.off() 226 dev.off()
223 } 227 }
224 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### 228
225 229 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures ####
226 if (!is.na(opt$output_cosmic)[1]) { 230
231 if (!is.na(opt$output_sigpattern)[1]) {
227 # Prepare cosmic signatures 232 # Prepare cosmic signatures
228 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) 233 if (!is.na(opt$cosmic_version)) {
229 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & 234 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE)
230 cosmic_urls$cosmic_version == opt$cosmic_version] 235 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome &
231 cancer_sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), 236 cosmic_urls$cosmic_version == opt$cosmic_version]
232 sep = "\t", header = TRUE) 237 sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file),
233 row.names(cancer_sbs_signatures) <- cancer_sbs_signatures$Type 238 sep = "\t", header = TRUE)
234 new_order <- match(row.names(mut_mat), cancer_sbs_signatures$Type) 239 tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ")
235 cancer_sbs_signatures <- cancer_sbs_signatures[as.numeric(new_order), ] 240 }
236 cosmic_tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ") 241 # Prepare user-defined signatures
237 cosmic_colors <- col_vector[seq_len(ncol(cancer_sbs_signatures) - 1)] 242 if (!is.na(opt$own_signatures)) {
238 names(cosmic_colors) <- colnames(cancer_sbs_signatures[2:length(cancer_sbs_signatures)]) 243 sbs_signatures <- read.table(opt$own_signatures, sep = "\t", header = TRUE)
239 cancer_sbs_matrix <- as.matrix(cancer_sbs_signatures[, 2:length(cancer_sbs_signatures)]) 244 tag <- paste(opt$genome, "User-Defined Signatures", sep = " ")
245 }
246 row.names(sbs_signatures) <- sbs_signatures$Type
247 # drop column Type of sbs_signatures
248 sbs_signatures <- subset(sbs_signatures, select = -c(Type))
249 # reorder substitutions of sbs_signatures to match mut_mat
250 sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ]
251 colnames(sbs_signatures) <- gsub("SBS", "", colnames(sbs_signatures))
252 # arrange signature colors
253 signature_colors <- c("#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", "#d2ab00",
254 "#e36eff", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", "#c0ce67", "#002981",
255 "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", "#ff233f", "#01b7b4", "#fd005c",
256 "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", "#093f00", "#ff6494", "#009791",
257 "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", "#f0be6d", "#6a0058", "#957a00",
258 "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", "#b36b00", "#debaeb", "#605400",
259 "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", "#6e0028", "#717653",
260 "#6c1000", "#693600")
261 signature_colors <- signature_colors[seq_len(ncol(sbs_signatures))]
262 names(signature_colors) <- colnames(sbs_signatures)
263 # To drop signature_colors <- signature_colors[colnames(sbs_signatures)]
264 # This is IMPORTANT since in Galaxy we do not use the embeded function get_known_signatures()
265 sbs_signatures <- as.matrix(sbs_signatures)
240 266
241 267
242 # Plot mutational profiles of the COSMIC signatures 268 # Plot mutational profiles of the COSMIC signatures
243 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) 269
244 if (opt$cosmic_version == "v2") { 270 # to do: this is largely optional and should be graphically improved anyway
245 p6 <- plot_96_profile(cancer_sbs_matrix, condensed = TRUE, ymax = 0.3) 271
246 grid.arrange(p6, top = textGrob("COSMIC SBS signature profiles", gp = gpar(fontsize = 12, font = 3))) 272 pdf(opt$output_sigpattern, paper = "special", width = 11.69, height = 11.69)
247 } else { 273 for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) {
248 p6 <- plot_96_profile(cancer_sbs_matrix[, 1:trunc(ncol(cancer_sbs_matrix) / 2)], condensed = TRUE, ymax = 0.3) 274 p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3)
249 p6bis <- plot_96_profile(cancer_sbs_matrix[, (trunc(ncol(cancer_sbs_matrix) / 2) + 1):ncol(cancer_sbs_matrix)], 275 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc((i + 1) / 20) + 1, " of ",
250 condensed = TRUE, ymax = 0.3) 276 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"),
251 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)", 277 gp = gpar(fontsize = 12, font = 3)))
252 gp = gpar(fontsize = 12, font = 3))) 278 }
253 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)", 279 p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))],
254 gp = gpar(fontsize = 12, font = 3))) 280 condensed = TRUE, ymax = 0.3)
255 } 281 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ",
256 282 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"),
257 283 gp = gpar(fontsize = 12, font = 3)))
258 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles 284 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles
259 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix 285 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
260 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_sbs_matrix) 286 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures)
261 287
262 # Plot contribution barplots 288 # Plot contribution barplots
263 pc3 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "absolute") 289 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute")
264 pc4 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "relative") 290 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative")
265 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs 291 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs
266 pc3_data <- pc3$data 292 pc3_data <- pc3$data
267 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 293 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
268 geom_bar(stat = "identity", position = "stack") + 294 geom_bar(stat = "identity", position = "stack") +
269 coord_flip() + 295 coord_flip() +
270 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + 296 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors[]) +
271 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + 297 labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
272 theme(panel.grid.minor.x = element_blank(), 298 theme(panel.grid.minor.x = element_blank(),
273 panel.grid.major.x = element_blank(), 299 panel.grid.major.x = element_blank(),
274 legend.position = "right", 300 legend.position = "right",
275 text = element_text(size = 8), 301 text = element_text(size = 8),
276 axis.text.x = element_text(angle = 90, hjust = 1)) 302 axis.text.x = element_text(angle = 90, hjust = 1))
277 pc4_data <- pc4$data 303 pc4_data <- pc4$data
278 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 304 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
279 geom_bar(stat = "identity", position = "fill") + 305 geom_bar(stat = "identity", position = "fill") +
280 coord_flip() + 306 coord_flip() +
281 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + 307 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) +
282 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 308 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
283 labs(x = "Samples", y = "Relative contribution") + theme_bw() + 309 labs(x = "Samples", y = "Relative contribution") + theme_bw() +
284 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", 310 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right",
285 text = element_text(size = 8), 311 text = element_text(size = 8),
286 axis.text.x = element_text(angle = 90, hjust = 1)) 312 axis.text.x = element_text(angle = 90, hjust = 1))
290 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs 316 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs
291 pc3_data <- pc3$data 317 pc3_data <- pc3$data
292 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") 318 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
293 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 319 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
294 geom_bar(stat = "identity", position = "stack") + 320 geom_bar(stat = "identity", position = "stack") +
295 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + 321 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) +
296 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + 322 labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
297 theme(panel.grid.minor.x = element_blank(), 323 theme(panel.grid.minor.x = element_blank(),
298 panel.grid.major.x = element_blank(), 324 panel.grid.major.x = element_blank(),
299 legend.position = "right", 325 legend.position = "right",
300 text = element_text(size = 8), 326 text = element_text(size = 8),
302 facet_grid(~level, scales = "free_x", space = "free") 328 facet_grid(~level, scales = "free_x", space = "free")
303 pc4_data <- pc4$data 329 pc4_data <- pc4$data
304 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") 330 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
305 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 331 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
306 geom_bar(stat = "identity", position = "fill") + 332 geom_bar(stat = "identity", position = "fill") +
307 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + 333 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) +
308 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 334 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
309 labs(x = "Samples", y = "Relative contribution") + theme_bw() + 335 labs(x = "Samples", y = "Relative contribution") + theme_bw() +
310 theme(panel.grid.minor.x = element_blank(), 336 theme(panel.grid.minor.x = element_blank(),
311 panel.grid.major.x = element_blank(), 337 panel.grid.major.x = element_blank(),
312 legend.position = "right", 338 legend.position = "right",
314 axis.text.x = element_text(angle = 90, hjust = 1)) + 340 axis.text.x = element_text(angle = 90, hjust = 1)) +
315 facet_grid(~level, scales = "free_x", space = "free") 341 facet_grid(~level, scales = "free_x", space = "free")
316 } 342 }
317 # Combine the two plots: 343 # Combine the two plots:
318 grid.arrange(pc3, pc4, 344 grid.arrange(pc3, pc4,
319 top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns", 345 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles",
320 gp = gpar(fontsize = 12, font = 3))) 346 gp = gpar(fontsize = 12, font = 3)))
321 347
322 #### pie charts of comic signatures contributions in samples ### 348 #### pie charts of comic signatures contributions in samples ###
323 library(reshape2) 349 library(reshape2)
324 library(dplyr) 350 library(dplyr)
354 worklist$signature <- factor(worklist$signature) 380 worklist$signature <- factor(worklist$signature)
355 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + 381 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) +
356 geom_bar(width = 1, stat = "identity") + 382 geom_bar(width = 1, stat = "identity") +
357 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) + 383 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) +
358 coord_polar("y", start = 0) + facet_wrap(.~sample) + 384 coord_polar("y", start = 0) + facet_wrap(.~sample) +
359 labs(x = "", y = "Samples", fill = cosmic_tag) + 385 labs(x = "", y = "Samples", fill = tag) +
360 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), 386 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
361 values = cosmic_colors[levels(worklist$signature)]) + 387 values = signature_colors[levels(worklist$signature)],
388 labels = names(signature_colors[levels(worklist$signature)])) +
362 theme(axis.text = element_blank(), 389 theme(axis.text = element_blank(),
363 axis.ticks = element_blank(), 390 axis.ticks = element_blank(),
364 panel.grid = element_blank()) 391 panel.grid = element_blank())
365 grid.arrange(p7) 392 grid.arrange(p7)
366 393
375 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) 402 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution))
376 if (length(levels(factor(levels_table$level))) > 1) { 403 if (length(levels(factor(levels_table$level))) > 1) {
377 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), 404 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution),
378 3], "-", colnames(fit_res$contribution)), 405 3], "-", colnames(fit_res$contribution)),
379 output_table) 406 output_table)
407 colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
380 } else { 408 } else {
381 output_table <- data.frame(sample = rownames(output_table), output_table) 409 output_table <- data.frame(sample = rownames(output_table), output_table)
410 colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
382 } 411 }
383 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) 412 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F)
384 } 413 }
385 414
386 # calculate all pairwise cosine similarities 415 # calculate all pairwise cosine similarities
410 theme_bw() + 439 theme_bw() +
411 theme(panel.grid.minor.y = element_blank(), 440 theme(panel.grid.minor.y = element_blank(),
412 panel.grid.major.y = element_blank()) + 441 panel.grid.major.y = element_blank()) +
413 # Add cut.off line 442 # Add cut.off line
414 geom_hline(aes(yintercept = .95)) 443 geom_hline(aes(yintercept = .95))
415 grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)", gp = gpar(fontsize = 12, font = 3))) 444 grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3)))
416 dev.off() 445 dev.off()
417 } 446 }
418 447
419 448
420 # Output RData file 449 # Output RData file