Mercurial > repos > artbio > mutational_patterns
diff mutational_patterns.R @ 24:ca6c19ee7da0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit bad92e3210a78b5ebf47d6950f4dba10c1cbf07d
author | artbio |
---|---|
date | Tue, 05 Jul 2022 21:41:43 +0000 |
parents | 83f8c93c34b4 |
children | b00fef2b1c2c |
line wrap: on
line diff
--- a/mutational_patterns.R Wed Oct 27 00:46:47 2021 +0000 +++ b/mutational_patterns.R Tue Jul 05 21:41:43 2022 +0000 @@ -1,7 +1,8 @@ # load packages that are provided in the conda env -options(show.error.messages = F, +options(show.error.messages = FALSE, error = function() { - cat(geterrmessage(), file = stderr()); q("no", 1, F) + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) } ) loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") @@ -202,16 +203,19 @@ # (For larger datasets it is wise to perform more iterations by changing the nrun parameter # to achieve stability and avoid local minima) nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) - # Assign signature names - colnames(nmf_res$signatures) <- paste0("SBS", 1:opt$newsignum) - rownames(nmf_res$contribution) <- paste0("SBS", 1:opt$newsignum) + # Assign signature COSMICv3.2 names + cosmic_signatures <- get_known_signatures() + nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85) + sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures) + plot_cosine_sim <- plot_cosine_heatmap(sim_matrix) + grid.arrange(plot_cosine_sim) # Plot the 96-profile of the signatures: p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) - newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = T), + newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), "[", new_sig_matrix$substitution, "]", - gsub("^.\\.", "", new_sig_matrix$context, perl = T)) + gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") grid.arrange(p5) @@ -329,8 +333,8 @@ fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) # Plot contribution barplots - pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute") - pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative") + pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") + pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs pc3_data <- pc3$data pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + @@ -397,7 +401,7 @@ level = rep("nolabels", length(fit_res_contrib[, 1])), fit_res_contrib, sum = rowSums(fit_res_contrib)) - worklist <- worklist[order(worklist[, "sum"], decreasing = T), ] + worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] worklist <- worklist[1:opt$signum, ] worklist <- worklist[, -length(worklist[1, ])] worklist <- melt(worklist) @@ -405,10 +409,10 @@ } else { worklist <- list() for (i in levels(factor(levels_table$level))) { - fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] + worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] sum <- rowSums(as.data.frame(worklist[[i]])) worklist[[i]] <- cbind(worklist[[i]], sum) - worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = T), ] + worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] worklist[[i]] <- worklist[[i]][1:opt$signum, ] worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] } @@ -424,7 +428,7 @@ p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + geom_bar(width = 1, stat = "identity") + geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + - coord_polar("y", start = 0) + facet_wrap(.~sample) + + coord_polar("y", start = 0) + facet_wrap(. ~ sample) + labs(x = "", y = "Samples", fill = tag) + scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), values = signature_colors[levels(worklist$signature)], @@ -452,7 +456,7 @@ output_table <- data.frame(sample = rownames(output_table), output_table) colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) } - write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) + write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE) } # calculate all pairwise cosine similarities