Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 24:ca6c19ee7da0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit bad92e3210a78b5ebf47d6950f4dba10c1cbf07d
author | artbio |
---|---|
date | Tue, 05 Jul 2022 21:41:43 +0000 |
parents | 83f8c93c34b4 |
children | b00fef2b1c2c |
comparison
equal
deleted
inserted
replaced
23:83f8c93c34b4 | 24:ca6c19ee7da0 |
---|---|
1 # load packages that are provided in the conda env | 1 # load packages that are provided in the conda env |
2 options(show.error.messages = F, | 2 options(show.error.messages = FALSE, |
3 error = function() { | 3 error = function() { |
4 cat(geterrmessage(), file = stderr()); q("no", 1, F) | 4 cat(geterrmessage(), file = stderr()) |
5 q("no", 1, FALSE) | |
5 } | 6 } |
6 ) | 7 ) |
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
8 warnings() | 9 warnings() |
9 library(optparse) | 10 library(optparse) |
200 grid.arrange(p4) | 201 grid.arrange(p4) |
201 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures | 202 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures |
202 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter | 203 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter |
203 # to achieve stability and avoid local minima) | 204 # to achieve stability and avoid local minima) |
204 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) | 205 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) |
205 # Assign signature names | 206 # Assign signature COSMICv3.2 names |
206 colnames(nmf_res$signatures) <- paste0("SBS", 1:opt$newsignum) | 207 cosmic_signatures <- get_known_signatures() |
207 rownames(nmf_res$contribution) <- paste0("SBS", 1:opt$newsignum) | 208 nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85) |
209 sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures) | |
210 plot_cosine_sim <- plot_cosine_heatmap(sim_matrix) | |
211 grid.arrange(plot_cosine_sim) | |
208 # Plot the 96-profile of the signatures: | 212 # Plot the 96-profile of the signatures: |
209 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) | 213 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) |
210 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") | 214 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") |
211 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) | 215 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) |
212 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = T), | 216 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), |
213 "[", new_sig_matrix$substitution, "]", | 217 "[", new_sig_matrix$substitution, "]", |
214 gsub("^.\\.", "", new_sig_matrix$context, perl = T)) | 218 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) |
215 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) | 219 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) |
216 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") | 220 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") |
217 grid.arrange(p5) | 221 grid.arrange(p5) |
218 # Visualize the contribution of the signatures in a barplot | 222 # Visualize the contribution of the signatures in a barplot |
219 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) | 223 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) |
327 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 331 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
328 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix | 332 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix |
329 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) | 333 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) |
330 | 334 |
331 # Plot contribution barplots | 335 # Plot contribution barplots |
332 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute") | 336 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") |
333 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative") | 337 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") |
334 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs | 338 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs |
335 pc3_data <- pc3$data | 339 pc3_data <- pc3$data |
336 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 340 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
337 geom_bar(stat = "identity", position = "stack") + | 341 geom_bar(stat = "identity", position = "stack") + |
338 coord_flip() + | 342 coord_flip() + |
395 fit_res_contrib <- as.data.frame(fit_res$contribution) | 399 fit_res_contrib <- as.data.frame(fit_res$contribution) |
396 worklist <- cbind(signature = rownames(fit_res$contribution), | 400 worklist <- cbind(signature = rownames(fit_res$contribution), |
397 level = rep("nolabels", length(fit_res_contrib[, 1])), | 401 level = rep("nolabels", length(fit_res_contrib[, 1])), |
398 fit_res_contrib, | 402 fit_res_contrib, |
399 sum = rowSums(fit_res_contrib)) | 403 sum = rowSums(fit_res_contrib)) |
400 worklist <- worklist[order(worklist[, "sum"], decreasing = T), ] | 404 worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] |
401 worklist <- worklist[1:opt$signum, ] | 405 worklist <- worklist[1:opt$signum, ] |
402 worklist <- worklist[, -length(worklist[1, ])] | 406 worklist <- worklist[, -length(worklist[1, ])] |
403 worklist <- melt(worklist) | 407 worklist <- melt(worklist) |
404 worklist <- worklist[, c(1, 3, 4, 2)] | 408 worklist <- worklist[, c(1, 3, 4, 2)] |
405 } else { | 409 } else { |
406 worklist <- list() | 410 worklist <- list() |
407 for (i in levels(factor(levels_table$level))) { | 411 for (i in levels(factor(levels_table$level))) { |
408 fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] | 412 worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] |
409 sum <- rowSums(as.data.frame(worklist[[i]])) | 413 sum <- rowSums(as.data.frame(worklist[[i]])) |
410 worklist[[i]] <- cbind(worklist[[i]], sum) | 414 worklist[[i]] <- cbind(worklist[[i]], sum) |
411 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = T), ] | 415 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] |
412 worklist[[i]] <- worklist[[i]][1:opt$signum, ] | 416 worklist[[i]] <- worklist[[i]][1:opt$signum, ] |
413 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] | 417 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] |
414 } | 418 } |
415 worklist <- as.data.frame(melt(worklist)) | 419 worklist <- as.data.frame(melt(worklist)) |
416 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) | 420 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) |
422 worklist$label <- factor(gsub("SBS", "", worklist$signature)) | 426 worklist$label <- factor(gsub("SBS", "", worklist$signature)) |
423 worklist$signature <- factor(worklist$signature) | 427 worklist$signature <- factor(worklist$signature) |
424 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + | 428 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + |
425 geom_bar(width = 1, stat = "identity") + | 429 geom_bar(width = 1, stat = "identity") + |
426 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + | 430 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + |
427 coord_polar("y", start = 0) + facet_wrap(.~sample) + | 431 coord_polar("y", start = 0) + facet_wrap(. ~ sample) + |
428 labs(x = "", y = "Samples", fill = tag) + | 432 labs(x = "", y = "Samples", fill = tag) + |
429 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), | 433 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), |
430 values = signature_colors[levels(worklist$signature)], | 434 values = signature_colors[levels(worklist$signature)], |
431 labels = names(signature_colors[levels(worklist$signature)])) + | 435 labels = names(signature_colors[levels(worklist$signature)])) + |
432 theme(axis.text = element_blank(), | 436 theme(axis.text = element_blank(), |
450 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) | 454 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) |
451 } else { | 455 } else { |
452 output_table <- data.frame(sample = rownames(output_table), output_table) | 456 output_table <- data.frame(sample = rownames(output_table), output_table) |
453 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) | 457 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) |
454 } | 458 } |
455 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) | 459 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE) |
456 } | 460 } |
457 | 461 |
458 # calculate all pairwise cosine similarities | 462 # calculate all pairwise cosine similarities |
459 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) | 463 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) |
460 # extract cosine similarities per sample between original and reconstructed | 464 # extract cosine similarities per sample between original and reconstructed |