diff mutational_patterns.R @ 17:8c6ee1c2248f draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit e5d498dfc5a6a9aaea3d09037dea5d15c2d85dd2"
author artbio
date Tue, 05 Oct 2021 22:28:34 +0000
parents 31e7a33ecd71
children 8d9f31389f33
line wrap: on
line diff
--- a/mutational_patterns.R	Mon Oct 04 00:35:06 2021 +0000
+++ b/mutational_patterns.R	Tue Oct 05 22:28:34 2021 +0000
@@ -97,7 +97,14 @@
     c("-r", "--rdata"),
     type = "character",
     default = NULL,
-    help = "Path to RData output file")
+    help = "Path to RData output file"
+  ),
+  make_option(
+    c("-t", "--tooldir"),
+    type = "character",
+    default = NULL,
+    help = "Path to tool directory, where tool data are stored")
+
 )
 
 opt <- parse_args(OptionParser(option_list = option_list),
@@ -217,39 +224,30 @@
 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures ####
 
 if (!is.na(opt$output_cosmic)[1]) {
-    pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69)
-    pseudo_mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix
-    if (opt$cosmic_version == "v2") {
-        sp_url <- "https://cancer.sanger.ac.uk/signatures/documents/420/COSMIC_v2_SBS_GRCh38.txt"
-        cancer_signatures <- read.table(sp_url, sep = "\t", header = TRUE)
-        new_order <- match(row.names(pseudo_mut_mat), cancer_signatures$Type)
-        cancer_signatures <- cancer_signatures[as.vector(new_order), ]
-        row.names(cancer_signatures) <- cancer_signatures$Type
-        cancer_signatures <- as.matrix(cancer_signatures[, 2:31])
-        colnames(cancer_signatures) <- gsub("Signature_", "", colnames(cancer_signatures)) # shorten signature labels
-        cosmic_tag <- "Signatures (Cosmic v2, March 2015)"
-        cosmic_colors <- col_vector[1:30]
-        names(cosmic_colors) <- colnames(cancer_signatures)
-        } else {
-        sp_url <- "https://cancer.sanger.ac.uk/signatures/documents/431/COSMIC_v3_SBS_GRCh38.txt"
-        cancer_signatures <- read.table(sp_url, sep = "\t", header = TRUE)
-        new_order <- match(row.names(pseudo_mut_mat), cancer_signatures$Type)
-        cancer_signatures <- cancer_signatures[as.vector(new_order), ]
-        row.names(cancer_signatures) <- cancer_signatures$Type
-        cancer_signatures <- as.matrix(cancer_signatures[, 2:68])
-        colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels
-        cosmic_tag <- "Signatures (Cosmic v3, May 2019)"
-        cosmic_colors <- col_vector[1:67]
-        names(cosmic_colors) <- colnames(cancer_signatures)
-        }
+    # Prepare cosmic signatures
+    cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE)
+    cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome &
+                                       cosmic_urls$cosmic_version == opt$cosmic_version]
+    cancer_sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file),
+                                        sep = "\t", header = TRUE)
+    row.names(cancer_sbs_signatures) <- cancer_sbs_signatures$Type
+    new_order <- match(row.names(mut_mat), cancer_sbs_signatures$Type)
+    cancer_sbs_signatures <- cancer_sbs_signatures[as.numeric(new_order), ]
+    cosmic_tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ")
+    cosmic_colors <- col_vector[seq_len(ncol(cancer_sbs_signatures) - 1)]
+    names(cosmic_colors) <- colnames(cancer_sbs_signatures[2:length(cancer_sbs_signatures)])
+    cancer_sbs_matrix <- as.matrix(cancer_sbs_signatures[, 2:length(cancer_sbs_signatures)])
+
 
     # Plot mutational profiles of the COSMIC signatures
+    pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69)
     if (opt$cosmic_version == "v2") {
-        p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3)
-        grid.arrange(p6, top = textGrob("COSMIC signature profiles", gp = gpar(fontsize = 12, font = 3)))
+        p6 <- plot_96_profile(cancer_sbs_matrix, condensed = TRUE, ymax = 0.3)
+        grid.arrange(p6, top = textGrob("COSMIC SBS signature profiles", gp = gpar(fontsize = 12, font = 3)))
     } else {
-        p6 <- plot_96_profile(cancer_signatures[, 1:33], condensed = TRUE, ymax = 0.3)
-        p6bis <- plot_96_profile(cancer_signatures[, 34:67], condensed = TRUE, ymax = 0.3)
+        p6 <- plot_96_profile(cancer_sbs_matrix[, 1:trunc(ncol(cancer_sbs_matrix) / 2)], condensed = TRUE, ymax = 0.3)
+        p6bis <- plot_96_profile(cancer_sbs_matrix[, (trunc(ncol(cancer_sbs_matrix) / 2) + 1):ncol(cancer_sbs_matrix)],
+                                 condensed = TRUE, ymax = 0.3)
         grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",
                      gp = gpar(fontsize = 12, font = 3)))
         grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",
@@ -258,11 +256,12 @@
 
 
     # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles
-    fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures)
+    pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
+    fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_sbs_matrix)
 
     # Plot contribution barplots
-    pc3 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "absolute")
-    pc4 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "relative")
+    pc3 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "absolute")
+    pc4 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "relative")
     if (is.na(opt$levels)[1]) {  # if there are NO levels to display in graphs
         pc3_data <- pc3$data
         pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
@@ -374,7 +373,6 @@
     # export relative contribution matrix
     if (!is.na(opt$sig_contrib_matrix)) {
         output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution))
-        colnames(output_table) <- paste0("s", colnames(output_table))
         if (length(levels(factor(levels_table$level))) > 1) {
             output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution),
                                                                     3], "-", colnames(fit_res$contribution)),