comparison mutational_patterns.R @ 24:ca6c19ee7da0 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit bad92e3210a78b5ebf47d6950f4dba10c1cbf07d
author artbio
date Tue, 05 Jul 2022 21:41:43 +0000
parents 83f8c93c34b4
children b00fef2b1c2c
comparison
equal deleted inserted replaced
23:83f8c93c34b4 24:ca6c19ee7da0
1 # load packages that are provided in the conda env 1 # load packages that are provided in the conda env
2 options(show.error.messages = F, 2 options(show.error.messages = FALSE,
3 error = function() { 3 error = function() {
4 cat(geterrmessage(), file = stderr()); q("no", 1, F) 4 cat(geterrmessage(), file = stderr())
5 q("no", 1, FALSE)
5 } 6 }
6 ) 7 )
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
8 warnings() 9 warnings()
9 library(optparse) 10 library(optparse)
200 grid.arrange(p4) 201 grid.arrange(p4)
201 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures 202 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures
202 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter 203 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter
203 # to achieve stability and avoid local minima) 204 # to achieve stability and avoid local minima)
204 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) 205 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun)
205 # Assign signature names 206 # Assign signature COSMICv3.2 names
206 colnames(nmf_res$signatures) <- paste0("SBS", 1:opt$newsignum) 207 cosmic_signatures <- get_known_signatures()
207 rownames(nmf_res$contribution) <- paste0("SBS", 1:opt$newsignum) 208 nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85)
209 sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures)
210 plot_cosine_sim <- plot_cosine_heatmap(sim_matrix)
211 grid.arrange(plot_cosine_sim)
208 # Plot the 96-profile of the signatures: 212 # Plot the 96-profile of the signatures:
209 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) 213 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE)
210 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") 214 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq")
211 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) 215 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE)
212 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = T), 216 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE),
213 "[", new_sig_matrix$substitution, "]", 217 "[", new_sig_matrix$substitution, "]",
214 gsub("^.\\.", "", new_sig_matrix$context, perl = T)) 218 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE))
215 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) 219 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]])
216 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") 220 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t")
217 grid.arrange(p5) 221 grid.arrange(p5)
218 # Visualize the contribution of the signatures in a barplot 222 # Visualize the contribution of the signatures in a barplot
219 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) 223 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE)
327 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles 331 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles
328 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix 332 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
329 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) 333 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures)
330 334
331 # Plot contribution barplots 335 # Plot contribution barplots
332 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute") 336 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute")
333 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative") 337 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative")
334 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs 338 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs
335 pc3_data <- pc3$data 339 pc3_data <- pc3$data
336 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + 340 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
337 geom_bar(stat = "identity", position = "stack") + 341 geom_bar(stat = "identity", position = "stack") +
338 coord_flip() + 342 coord_flip() +
395 fit_res_contrib <- as.data.frame(fit_res$contribution) 399 fit_res_contrib <- as.data.frame(fit_res$contribution)
396 worklist <- cbind(signature = rownames(fit_res$contribution), 400 worklist <- cbind(signature = rownames(fit_res$contribution),
397 level = rep("nolabels", length(fit_res_contrib[, 1])), 401 level = rep("nolabels", length(fit_res_contrib[, 1])),
398 fit_res_contrib, 402 fit_res_contrib,
399 sum = rowSums(fit_res_contrib)) 403 sum = rowSums(fit_res_contrib))
400 worklist <- worklist[order(worklist[, "sum"], decreasing = T), ] 404 worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ]
401 worklist <- worklist[1:opt$signum, ] 405 worklist <- worklist[1:opt$signum, ]
402 worklist <- worklist[, -length(worklist[1, ])] 406 worklist <- worklist[, -length(worklist[1, ])]
403 worklist <- melt(worklist) 407 worklist <- melt(worklist)
404 worklist <- worklist[, c(1, 3, 4, 2)] 408 worklist <- worklist[, c(1, 3, 4, 2)]
405 } else { 409 } else {
406 worklist <- list() 410 worklist <- list()
407 for (i in levels(factor(levels_table$level))) { 411 for (i in levels(factor(levels_table$level))) {
408 fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] 412 worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]]
409 sum <- rowSums(as.data.frame(worklist[[i]])) 413 sum <- rowSums(as.data.frame(worklist[[i]]))
410 worklist[[i]] <- cbind(worklist[[i]], sum) 414 worklist[[i]] <- cbind(worklist[[i]], sum)
411 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = T), ] 415 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ]
412 worklist[[i]] <- worklist[[i]][1:opt$signum, ] 416 worklist[[i]] <- worklist[[i]][1:opt$signum, ]
413 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] 417 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))]
414 } 418 }
415 worklist <- as.data.frame(melt(worklist)) 419 worklist <- as.data.frame(melt(worklist))
416 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) 420 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2])
422 worklist$label <- factor(gsub("SBS", "", worklist$signature)) 426 worklist$label <- factor(gsub("SBS", "", worklist$signature))
423 worklist$signature <- factor(worklist$signature) 427 worklist$signature <- factor(worklist$signature)
424 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + 428 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) +
425 geom_bar(width = 1, stat = "identity") + 429 geom_bar(width = 1, stat = "identity") +
426 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + 430 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) +
427 coord_polar("y", start = 0) + facet_wrap(.~sample) + 431 coord_polar("y", start = 0) + facet_wrap(. ~ sample) +
428 labs(x = "", y = "Samples", fill = tag) + 432 labs(x = "", y = "Samples", fill = tag) +
429 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), 433 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
430 values = signature_colors[levels(worklist$signature)], 434 values = signature_colors[levels(worklist$signature)],
431 labels = names(signature_colors[levels(worklist$signature)])) + 435 labels = names(signature_colors[levels(worklist$signature)])) +
432 theme(axis.text = element_blank(), 436 theme(axis.text = element_blank(),
450 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) 454 colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
451 } else { 455 } else {
452 output_table <- data.frame(sample = rownames(output_table), output_table) 456 output_table <- data.frame(sample = rownames(output_table), output_table)
453 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) 457 colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
454 } 458 }
455 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) 459 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE)
456 } 460 }
457 461
458 # calculate all pairwise cosine similarities 462 # calculate all pairwise cosine similarities
459 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) 463 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed)
460 # extract cosine similarities per sample between original and reconstructed 464 # extract cosine similarities per sample between original and reconstructed