diff mutational_patterns.R @ 18:8d9f31389f33 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 1cb9c8fd0c74943a8e6de4c63ac5e4a84ef27430"
author artbio
date Sun, 17 Oct 2021 15:51:05 +0000
parents 8c6ee1c2248f
children 69f09dff98f9
line wrap: on
line diff
--- a/mutational_patterns.R	Tue Oct 05 22:28:34 2021 +0000
+++ b/mutational_patterns.R	Sun Oct 17 15:51:05 2021 +0000
@@ -35,15 +35,21 @@
   ),
   make_option(
     "--cosmic_version",
-    default = "v2",
+    default = NA,
     type = "character",
-    help = "Version of the Cosmic Signature set to be used to express mutational patterns"
+    help = "Version of the Cosmic Signature set to be used to express mutational profiles"
+  ),
+  make_option(
+    "--own_signatures",
+    default = NA,
+    type = "character",
+    help = "Path to the user-defined signature matrix"
   ),
   make_option(
     "--signum",
     default = 2,
     type = "integer",
-    help = "selects the N most significant signatures in samples to express mutational patterns"
+    help = "selects the N most significant signatures in samples to express mutational profiles"
   ),
   make_option(
     "--nrun",
@@ -82,7 +88,7 @@
     help = "path to signature matrix"
   ),
   make_option(
-    "--output_cosmic",
+    "--output_sigpattern",
     default = NA,
     type = "character",
     help = "path to output dataset"
@@ -141,8 +147,6 @@
 ##### This is done for any section ######
 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome)
 qual_col_pals <- brewer.pal.info[brewer.pal.info$category == "qual", ]
-col_vector <- unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))))
-col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette
 
 ###### Section 1 Mutation characteristics and spectrums #############
 if (!is.na(opt$output_spectrum)[1]) {
@@ -221,53 +225,75 @@
                           condensed = TRUE)
     dev.off()
     }
-##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures ####
 
-if (!is.na(opt$output_cosmic)[1]) {
+##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures ####
+
+if (!is.na(opt$output_sigpattern)[1]) {
     # Prepare cosmic signatures
-    cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE)
-    cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome &
-                                       cosmic_urls$cosmic_version == opt$cosmic_version]
-    cancer_sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file),
-                                        sep = "\t", header = TRUE)
-    row.names(cancer_sbs_signatures) <- cancer_sbs_signatures$Type
-    new_order <- match(row.names(mut_mat), cancer_sbs_signatures$Type)
-    cancer_sbs_signatures <- cancer_sbs_signatures[as.numeric(new_order), ]
-    cosmic_tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ")
-    cosmic_colors <- col_vector[seq_len(ncol(cancer_sbs_signatures) - 1)]
-    names(cosmic_colors) <- colnames(cancer_sbs_signatures[2:length(cancer_sbs_signatures)])
-    cancer_sbs_matrix <- as.matrix(cancer_sbs_signatures[, 2:length(cancer_sbs_signatures)])
+    if (!is.na(opt$cosmic_version)) {
+        cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE)
+        cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome &
+                                           cosmic_urls$cosmic_version == opt$cosmic_version]
+        sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file),
+                                            sep = "\t", header = TRUE)
+        tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ")
+    }
+    # Prepare user-defined signatures
+    if (!is.na(opt$own_signatures)) {
+        sbs_signatures <- read.table(opt$own_signatures, sep = "\t", header = TRUE)
+        tag <- paste(opt$genome, "User-Defined Signatures", sep = " ")
+    }
+    row.names(sbs_signatures) <- sbs_signatures$Type
+    # drop column Type of sbs_signatures
+    sbs_signatures <- subset(sbs_signatures, select = -c(Type))
+    # reorder substitutions of sbs_signatures to match mut_mat
+    sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ]
+    colnames(sbs_signatures) <- gsub("SBS", "", colnames(sbs_signatures))
+    # arrange signature colors
+    signature_colors <- c("#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", "#d2ab00",
+                          "#e36eff", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", "#c0ce67", "#002981",
+                          "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", "#ff233f", "#01b7b4", "#fd005c",
+                          "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", "#093f00", "#ff6494", "#009791",
+                          "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", "#f0be6d", "#6a0058", "#957a00",
+                          "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", "#b36b00", "#debaeb", "#605400",
+                          "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", "#6e0028", "#717653",
+                          "#6c1000", "#693600")
+    signature_colors <- signature_colors[seq_len(ncol(sbs_signatures))]
+    names(signature_colors) <- colnames(sbs_signatures)
+    # To drop signature_colors <- signature_colors[colnames(sbs_signatures)]
+    # This is IMPORTANT since in Galaxy we do not use the embeded function get_known_signatures()
+    sbs_signatures <- as.matrix(sbs_signatures)
 
 
     # Plot mutational profiles of the COSMIC signatures
-    pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69)
-    if (opt$cosmic_version == "v2") {
-        p6 <- plot_96_profile(cancer_sbs_matrix, condensed = TRUE, ymax = 0.3)
-        grid.arrange(p6, top = textGrob("COSMIC SBS signature profiles", gp = gpar(fontsize = 12, font = 3)))
-    } else {
-        p6 <- plot_96_profile(cancer_sbs_matrix[, 1:trunc(ncol(cancer_sbs_matrix) / 2)], condensed = TRUE, ymax = 0.3)
-        p6bis <- plot_96_profile(cancer_sbs_matrix[, (trunc(ncol(cancer_sbs_matrix) / 2) + 1):ncol(cancer_sbs_matrix)],
-                                 condensed = TRUE, ymax = 0.3)
-        grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",
-                     gp = gpar(fontsize = 12, font = 3)))
-        grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",
-                     gp = gpar(fontsize = 12, font = 3)))
+
+    # to do: this is largely optional and should be graphically improved anyway
+
+    pdf(opt$output_sigpattern, paper = "special", width = 11.69, height = 11.69)
+    for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) {
+        p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3)
+        grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc((i + 1) / 20) + 1, " of ",
+                                               trunc(ncol(sbs_signatures) / 20) + 1, " pages)"),
+                                        gp = gpar(fontsize = 12, font = 3)))
     }
-
-
+    p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))],
+                          condensed = TRUE, ymax = 0.3)
+    grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ",
+                                               trunc(ncol(sbs_signatures) / 20) + 1, " pages)"),
+                                        gp = gpar(fontsize = 12, font = 3)))
     # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles
     pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix
-    fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_sbs_matrix)
+    fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures)
 
     # Plot contribution barplots
-    pc3 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "absolute")
-    pc4 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "relative")
+    pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute")
+    pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative")
     if (is.na(opt$levels)[1]) {  # if there are NO levels to display in graphs
         pc3_data <- pc3$data
         pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
                geom_bar(stat = "identity", position = "stack") +
                coord_flip() +
-               scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
+               scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors[]) +
                labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
                theme(panel.grid.minor.x = element_blank(),
                      panel.grid.major.x = element_blank(),
@@ -278,7 +304,7 @@
         pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
                geom_bar(stat = "identity", position = "fill") +
                coord_flip() +
-               scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
+               scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) +
                scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
                labs(x = "Samples", y = "Relative contribution") + theme_bw() +
                theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right",
@@ -292,7 +318,7 @@
         pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
         pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
                geom_bar(stat = "identity", position = "stack") +
-               scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
+               scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) +
                labs(x = "Samples", y = "Absolute contribution") + theme_bw() +
                theme(panel.grid.minor.x = element_blank(),
                      panel.grid.major.x = element_blank(),
@@ -304,7 +330,7 @@
         pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier")
         pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) +
                geom_bar(stat = "identity", position = "fill") +
-               scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) +
+               scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) +
                scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
                labs(x = "Samples", y = "Relative contribution") + theme_bw() +
                theme(panel.grid.minor.x = element_blank(),
@@ -316,7 +342,7 @@
     }
     # Combine the two plots:
     grid.arrange(pc3, pc4,
-                 top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",
+                 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles",
                  gp = gpar(fontsize = 12, font = 3)))
 
     #### pie charts of comic signatures contributions in samples ###
@@ -356,9 +382,10 @@
               geom_bar(width = 1, stat = "identity") +
               geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) +
               coord_polar("y", start = 0) + facet_wrap(.~sample) +
-              labs(x = "", y = "Samples", fill = cosmic_tag) +
+              labs(x = "", y = "Samples", fill = tag) +
               scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"),
-                                values = cosmic_colors[levels(worklist$signature)]) +
+                                values = signature_colors[levels(worklist$signature)],
+                                labels = names(signature_colors[levels(worklist$signature)])) +
               theme(axis.text = element_blank(),
                     axis.ticks = element_blank(),
                     panel.grid  = element_blank())
@@ -377,8 +404,10 @@
             output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution),
                                                                     3], "-", colnames(fit_res$contribution)),
                                        output_table)
+            colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
             } else {
-        output_table <- data.frame(sample = rownames(output_table), output_table)
+            output_table <- data.frame(sample = rownames(output_table), output_table)
+            colnames(output_table) <- gsub("X", "SBS", colnames(output_table))
         }
         write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F)
     }
@@ -412,7 +441,7 @@
                       panel.grid.major.y = element_blank()) +
                       # Add cut.off line
                       geom_hline(aes(yintercept = .95))
-    grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)", gp = gpar(fontsize = 12, font = 3)))
+    grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3)))
     dev.off()
 }