Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 28:9a33a5a90a2c draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/mutational_patterns commit c55e91bc6716a71ee3a3f30da3f4c915f465c1d4
| author | artbio |
|---|---|
| date | Sun, 11 Feb 2024 01:12:41 +0000 |
| parents | af5c65ad5317 |
| children |
comparison
equal
deleted
inserted
replaced
| 27:2c9759c7ba72 | 28:9a33a5a90a2c |
|---|---|
| 1 # load packages that are provided in the conda env | 1 # load packages that are provided in the conda env |
| 2 options(show.error.messages = FALSE, | 2 options( |
| 3 error = function() { | 3 show.error.messages = FALSE, |
| 4 cat(geterrmessage(), file = stderr()) | 4 error = function() { |
| 5 q("no", 1, FALSE) | 5 cat(geterrmessage(), file = stderr()) |
| 6 } | 6 q("no", 1, FALSE) |
| 7 ) | 7 } |
| 8 ) | |
| 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 9 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 9 warnings() | 10 warnings() |
| 10 library(optparse) | 11 library(optparse) |
| 11 library(rjson) | 12 library(rjson) |
| 12 library(grid) | 13 library(grid) |
| 14 library(scales) | 15 library(scales) |
| 15 library(RColorBrewer) | 16 library(RColorBrewer) |
| 16 | 17 |
| 17 # Arguments | 18 # Arguments |
| 18 option_list <- list( | 19 option_list <- list( |
| 19 make_option( | 20 make_option( |
| 20 "--inputs", | 21 "--inputs", |
| 21 default = NA, | 22 default = NA, |
| 22 type = "character", | 23 type = "character", |
| 23 help = "json formatted dictionary of datasets and their paths" | 24 help = "json formatted dictionary of datasets and their paths" |
| 24 ), | 25 ), |
| 25 make_option( | 26 make_option( |
| 26 "--genome", | 27 "--genome", |
| 27 default = NA, | 28 default = NA, |
| 28 type = "character", | 29 type = "character", |
| 29 help = "genome name in the BSgenome bioconductor package" | 30 help = "genome name in the BSgenome bioconductor package" |
| 30 ), | 31 ), |
| 31 make_option( | 32 make_option( |
| 32 "--levels", | 33 "--levels", |
| 33 default = NA, | 34 default = NA, |
| 34 type = "character", | 35 type = "character", |
| 35 help = "path to the tab separated file describing the levels in function of datasets" | 36 help = "path to the tab separated file describing the levels in function of datasets" |
| 36 ), | 37 ), |
| 37 make_option( | 38 make_option( |
| 38 "--cosmic_version", | 39 "--cosmic_version", |
| 39 default = NA, | 40 default = NA, |
| 40 type = "character", | 41 type = "character", |
| 41 help = "Version of the Cosmic Signature set to be used to express mutational profiles" | 42 help = "Version of the Cosmic Signature set to be used to express mutational profiles" |
| 42 ), | 43 ), |
| 43 make_option( | 44 make_option( |
| 44 "--own_signatures", | 45 "--own_signatures", |
| 45 default = NA, | 46 default = NA, |
| 46 type = "character", | 47 type = "character", |
| 47 help = "Path to the user-defined signature matrix" | 48 help = "Path to the user-defined signature matrix" |
| 48 ), | 49 ), |
| 49 make_option( | 50 make_option( |
| 50 "--signum", | 51 "--signum", |
| 51 default = 2, | 52 default = 2, |
| 52 type = "integer", | 53 type = "integer", |
| 53 help = "selects the N most significant signatures in samples to express mutational profiles" | 54 help = "selects the N most significant signatures in samples to express mutational profiles" |
| 54 ), | 55 ), |
| 55 make_option( | 56 make_option( |
| 56 "--nrun", | 57 "--nrun", |
| 57 default = 2, | 58 default = 2, |
| 58 type = "integer", | 59 type = "integer", |
| 59 help = "Number of runs to fit signatures" | 60 help = "Number of runs to fit signatures" |
| 60 ), | 61 ), |
| 61 make_option( | 62 make_option( |
| 62 "--rank", | 63 "--rank", |
| 63 default = 2, | 64 default = 2, |
| 64 type = "integer", | 65 type = "integer", |
| 65 help = "number of ranks to display for parameter optimization" | 66 help = "number of ranks to display for parameter optimization" |
| 66 ), | 67 ), |
| 67 make_option( | 68 make_option( |
| 68 "--newsignum", | 69 "--newsignum", |
| 69 default = 2, | 70 default = 2, |
| 70 type = "integer", | 71 type = "integer", |
| 71 help = "Number of new signatures to be captured" | 72 help = "Number of new signatures to be captured" |
| 72 ), | 73 ), |
| 73 make_option( | 74 make_option( |
| 74 "--cosmic_id_threshold", | 75 "--cosmic_id_threshold", |
| 75 default = 0.85, | 76 default = 0.85, |
| 76 type = "double", | 77 type = "double", |
| 77 help = "minimu cosine similarity to rename a new signature according to cosmic v3.2" | 78 help = "minimu cosine similarity to rename a new signature according to cosmic v3.2" |
| 78 ), | 79 ), |
| 79 make_option( | 80 make_option( |
| 80 "--output_spectrum", | 81 "--output_spectrum", |
| 81 default = NA, | 82 default = NA, |
| 82 type = "character", | 83 type = "character", |
| 83 help = "path to output dataset" | 84 help = "path to output dataset" |
| 84 ), | 85 ), |
| 85 make_option( | 86 make_option( |
| 86 "--output_denovo", | 87 "--output_denovo", |
| 87 default = NA, | 88 default = NA, |
| 88 type = "character", | 89 type = "character", |
| 89 help = "path to output dataset" | 90 help = "path to output dataset" |
| 90 ), | 91 ), |
| 91 make_option( | 92 make_option( |
| 92 "--sigmatrix", | 93 "--sigmatrix", |
| 93 default = NA, | 94 default = NA, |
| 94 type = "character", | 95 type = "character", |
| 95 help = "path to signature matrix" | 96 help = "path to signature matrix" |
| 96 ), | 97 ), |
| 97 make_option( | 98 make_option( |
| 98 "--output_sigpattern", | 99 "--output_sigpattern", |
| 99 default = NA, | 100 default = NA, |
| 100 type = "character", | 101 type = "character", |
| 101 help = "path to output dataset" | 102 help = "path to output dataset" |
| 102 ), | 103 ), |
| 103 make_option( | 104 make_option( |
| 104 "--display_signatures", | 105 "--display_signatures", |
| 105 default = NA, | 106 default = NA, |
| 106 type = "character", | 107 type = "character", |
| 107 help = "display input signature profiles if set to yes" | 108 help = "display input signature profiles if set to yes" |
| 108 ), | 109 ), |
| 109 make_option( | 110 make_option( |
| 110 "--sig_contrib_matrix", | 111 "--sig_contrib_matrix", |
| 111 default = NA, | 112 default = NA, |
| 112 type = "character", | 113 type = "character", |
| 113 help = "path to signature contribution matrix" | 114 help = "path to signature contribution matrix" |
| 114 ), | 115 ), |
| 115 make_option( | 116 make_option( |
| 116 "--colors", | 117 "--colors", |
| 117 default = NA, | 118 default = NA, |
| 118 type = "character", | 119 type = "character", |
| 119 help = "color palette to display signatures" | 120 help = "color palette to display signatures" |
| 120 ), | 121 ), |
| 121 make_option( | 122 make_option( |
| 122 c("-r", "--rdata"), | 123 c("-r", "--rdata"), |
| 123 type = "character", | 124 type = "character", |
| 124 default = NULL, | 125 default = NULL, |
| 125 help = "Path to RData output file" | 126 help = "Path to RData output file" |
| 126 ), | 127 ), |
| 127 make_option( | 128 make_option( |
| 128 c("-t", "--tooldir"), | 129 c("-t", "--tooldir"), |
| 129 type = "character", | 130 type = "character", |
| 130 default = NULL, | 131 default = NULL, |
| 131 help = "Path to tool directory, where tool data are stored") | 132 help = "Path to tool directory, where tool data are stored" |
| 132 | 133 ) |
| 133 ) | 134 ) |
| 134 | 135 |
| 135 opt <- parse_args(OptionParser(option_list = option_list), | 136 opt <- parse_args(OptionParser(option_list = option_list), |
| 136 args = commandArgs(trailingOnly = TRUE)) | 137 args = commandArgs(trailingOnly = TRUE) |
| 138 ) | |
| 137 | 139 |
| 138 ################ Manage input data #################### | 140 ################ Manage input data #################### |
| 139 json_dict <- opt$inputs | 141 json_dict <- opt$inputs |
| 140 parser <- newJSONParser() | 142 parser <- newJSONParser() |
| 141 parser$addData(json_dict) | 143 parser$addData(json_dict) |
| 150 library(ggplot2) | 152 library(ggplot2) |
| 151 | 153 |
| 152 # Load the VCF files into a GRangesList: | 154 # Load the VCF files into a GRangesList: |
| 153 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) | 155 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) |
| 154 library(plyr) | 156 library(plyr) |
| 155 if (!is.na(opt$levels)[1]) { # manage levels if there are | 157 if (!is.na(opt$levels)[1]) { # manage levels if there are |
| 156 levels_table <- read.delim(opt$levels, header = FALSE, | 158 levels_table <- read.delim(opt$levels, |
| 157 col.names = c("element_identifier", "level")) | 159 header = FALSE, |
| 158 } else { | 160 col.names = c("element_identifier", "level") |
| 159 levels_table <- data.frame(element_identifier = vcf_table$element_identifier, | 161 ) |
| 160 level = rep("nolabels", length(vcf_table$element_identifier))) | 162 } else { |
| 163 levels_table <- data.frame( | |
| 164 element_identifier = vcf_table$element_identifier, | |
| 165 level = rep("nolabels", length(vcf_table$element_identifier)) | |
| 166 ) | |
| 161 } | 167 } |
| 162 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") | 168 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") |
| 163 tissue <- as.vector(metadata_table$level) | 169 tissue <- as.vector(metadata_table$level) |
| 164 detach(package:plyr) | 170 detach(package:plyr) |
| 165 | 171 |
| 179 plot(p1) | 185 plot(p1) |
| 180 } else { | 186 } else { |
| 181 p2 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) # by levels | 187 p2 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) # by levels |
| 182 p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) # total | 188 p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) # total |
| 183 grid.arrange(p2, p3, ncol = 2, widths = c(4, 2.3), heights = c(4, 1)) | 189 grid.arrange(p2, p3, ncol = 2, widths = c(4, 2.3), heights = c(4, 1)) |
| 184 } | 190 } |
| 185 plot_96_profile(mut_mat, condensed = TRUE) | 191 plot_96_profile(mut_mat, condensed = TRUE) |
| 186 dev.off() | 192 dev.off() |
| 187 } | 193 } |
| 188 | 194 |
| 189 ###### Section 2: De novo mutational signature extraction using NMF ####### | 195 ###### Section 2: De novo mutational signature extraction using NMF ####### |
| 190 # opt$rank cannot be higher than the number of samples and | 196 # opt$rank cannot be higher than the number of samples and |
| 191 # likewise, opt$signum cannot be higher thant the number of samples | 197 # likewise, opt$signum cannot be higher thant the number of samples |
| 192 if (!is.na(opt$output_denovo)[1]) { | 198 if (!is.na(opt$output_denovo)[1]) { |
| 193 | |
| 194 if (opt$rank > length(element_identifiers)) { | 199 if (opt$rank > length(element_identifiers)) { |
| 195 opt$rank <- length(element_identifiers) | 200 opt$rank <- length(element_identifiers) |
| 196 } | 201 } |
| 197 if (opt$signum > length(element_identifiers)) { | 202 if (opt$signum > length(element_identifiers)) { |
| 198 opt$signum <- length(element_identifiers) | 203 opt$signum <- length(element_identifiers) |
| 199 } | 204 } |
| 200 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix | 205 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix |
| 201 # Use the NMF package to generate an estimate rank plot | 206 # Use the NMF package to generate an estimate rank plot |
| 202 library("NMF") | 207 library("NMF") |
| 203 estimate <- nmf(pseudo_mut_mat, rank = 1:opt$rank, method = "brunet", nrun = opt$nrun, seed = 123456) | 208 estimate <- nmf(pseudo_mut_mat, rank = 1:opt$rank, method = "brunet", nrun = opt$nrun, seed = 123456) |
| 204 # And plot it | 209 # And plot it |
| 219 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) | 224 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) |
| 220 grid.arrange(p5) | 225 grid.arrange(p5) |
| 221 # write matrix of deno signatures for user | 226 # write matrix of deno signatures for user |
| 222 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") | 227 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") |
| 223 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) | 228 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) |
| 224 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), | 229 newcol <- paste0( |
| 225 "[", new_sig_matrix$substitution, "]", | 230 gsub("\\..", "", new_sig_matrix$context, perl = TRUE), |
| 226 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) | 231 "[", new_sig_matrix$substitution, "]", |
| 232 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE) | |
| 233 ) | |
| 227 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) | 234 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) |
| 228 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") | 235 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") |
| 229 # Visualize the contribution of the signatures in a barplot | 236 # Visualize the contribution of the signatures in a barplot |
| 230 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) | 237 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) |
| 231 # Visualize the contribution of the signatures in absolute number of mutations | 238 # Visualize the contribution of the signatures in absolute number of mutations |
| 239 # can be plotted in a user-specified order. | 246 # can be plotted in a user-specified order. |
| 240 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: | 247 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: |
| 241 pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE) | 248 pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE) |
| 242 # Plot signature contribution as a heatmap without sample clustering: | 249 # Plot signature contribution as a heatmap without sample clustering: |
| 243 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) | 250 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) |
| 244 #Combine the plots into one figure: | 251 # Combine the plots into one figure: |
| 245 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) | 252 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) |
| 246 | 253 |
| 247 # Compare the reconstructed mutational profile with the original mutational profile: | 254 # Compare the reconstructed mutational profile with the original mutational profile: |
| 248 pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95) | 255 pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95) |
| 249 grid.arrange(pch3) | 256 grid.arrange(pch3) |
| 250 dev.off() | 257 dev.off() |
| 251 } | 258 } |
| 252 | 259 |
| 253 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### | 260 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### |
| 254 | 261 |
| 255 if (!is.na(opt$output_sigpattern)[1]) { | 262 if (!is.na(opt$output_sigpattern)[1]) { |
| 256 # Prepare cosmic signatures | 263 # Prepare cosmic signatures |
| 257 if (!is.na(opt$cosmic_version)) { | 264 if (!is.na(opt$cosmic_version)) { |
| 258 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) | 265 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) |
| 259 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & | 266 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & |
| 260 cosmic_urls$cosmic_version == opt$cosmic_version] | 267 cosmic_urls$cosmic_version == opt$cosmic_version] |
| 261 sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), | 268 sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), |
| 262 sep = "\t", header = TRUE) | 269 sep = "\t", header = TRUE |
| 270 ) | |
| 263 tag <- paste(gsub("BSgenome.Hsapiens.UCSC.", "", opt$genome), "COSMIC", opt$cosmic_version, sep = " ") | 271 tag <- paste(gsub("BSgenome.Hsapiens.UCSC.", "", opt$genome), "COSMIC", opt$cosmic_version, sep = " ") |
| 264 } | 272 } |
| 265 # Prepare user-defined signatures | 273 # Prepare user-defined signatures |
| 266 if (!is.na(opt$own_signatures)) { | 274 if (!is.na(opt$own_signatures)) { |
| 267 sbs_signatures <- read.table(opt$own_signatures, sep = "\t", header = TRUE) | 275 sbs_signatures <- read.table(opt$own_signatures, sep = "\t", header = TRUE) |
| 272 sbs_signatures <- subset(sbs_signatures, select = -c(Type)) | 280 sbs_signatures <- subset(sbs_signatures, select = -c(Type)) |
| 273 # reorder substitutions of sbs_signatures to match mut_mat | 281 # reorder substitutions of sbs_signatures to match mut_mat |
| 274 sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ] | 282 sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ] |
| 275 # arrange signature colors | 283 # arrange signature colors |
| 276 if (opt$colors == "intense") { | 284 if (opt$colors == "intense") { |
| 277 signature_colors <- c("#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", | 285 signature_colors <- c( |
| 278 "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", | 286 "#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#c4bedf", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", |
| 279 "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", | 287 "#d2ab00", "#e36eff", "#cad5b3", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", |
| 280 "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", | 288 "#c0ce67", "#002981", "#e6b8b3", "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", |
| 281 "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", | 289 "#ff233f", "#01b7b4", "#fd005c", "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", |
| 282 "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", | 290 "#093f00", "#ff6494", "#009791", "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", |
| 283 "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", | 291 "#f0be6d", "#6a0058", "#957a00", "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", |
| 284 "#6e0028", "#717653", "#6c1000", "#693600") | 292 "#b36b00", "#debaeb", "#605400", "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", |
| 293 "#6e0028", "#717653", "#6c1000", "#693600" | |
| 294 ) | |
| 285 } else { | 295 } else { |
| 286 signature_colors <- c("#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02", | 296 signature_colors <- c( |
| 287 "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", | 297 "#7FC97F", "#BEAED4", "#FDC086", "#FFFF99", "#386CB0", "#F0027F", "#c4bedf", "#BF5B17", "#666666", "#1B9E77", "#D95F02", |
| 288 "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4", | 298 "#7570B3", "#E7298A", "#cad5b3", "#66A61E", "#E6AB02", "#A6761D", "#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", |
| 289 "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE", | 299 "#E31A1C", "#B3E2CD", "#e6b8b3", "#FF7F00", "#CAB2D6", "#6A3D9A", "#B15928", "#FBB4AE", "#B3CDE3", "#CCEBC5", "#DECBE4", |
| 290 "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5", | 300 "#FED9A6", "#FFFFCC", "#E5D8BD", "#FDDAEC", "#F2F2F2", "#B3E2CD", "#FDCDAC", "#CBD5E8", "#F4CAE4", "#E6F5C9", "#FFF2AE", |
| 291 "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", | 301 "#F1E2CC", "#CCCCCC", "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FFFF33", "#A65628", "#F781BF", "#999999", "#66C2A5", |
| 292 "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500", | 302 "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", |
| 293 "#6e0028", "#717653", "#6c1000", "#693600") | 303 "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#FFED6F", "#3f1d02", "#ff958f", "#634a66", "#775500", |
| 294 } | 304 "#6e0028", "#717653", "#6c1000", "#693600" |
| 295 names(signature_colors) <- c("SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d", | 305 ) |
| 296 "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14", | 306 } |
| 297 "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23", | 307 names(signature_colors) <- c( |
| 298 "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34", | 308 "SBS1", "SBS2", "SBS3", "SBS4", "SBS5", "SBS6", "SBS7", "SBS7a", "SBS7b", "SBS7c", "SBS7d", |
| 299 "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45", | 309 "SBS8", "SBS9", "SBS10", "SBS10a", "SBS10b", "SBS10c", "SBS10d", "SBS11", "SBS12", "SBS13", "SBS14", |
| 300 "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56", | 310 "SBS15", "SBS16", "SBS17", "SBS17a", "SBS17b", "SBS18", "SBS19", "SBS20", "SBS21", "SBS22", "SBS23", |
| 301 "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90", | 311 "SBS24", "SBS25", "SBS26", "SBS27", "SBS28", "SBS29", "SBS30", "SBS31", "SBS32", "SBS33", "SBS34", |
| 302 "SBS91", "SBS92", "SBS93", "SBS94") | 312 "SBS35", "SBS36", "SBS37", "SBS38", "SBS39", "SBS40", "SBS41", "SBS42", "SBS43", "SBS44", "SBS45", |
| 313 "SBS46", "SBS47", "SBS48", "SBS49", "SBS50", "SBS51", "SBS52", "SBS53", "SBS54", "SBS55", "SBS56", | |
| 314 "SBS57", "SBS58", "SBS59", "SBS60", "SBS84", "SBS85", "SBS86", "SBS87", "SBS88", "SBS89", "SBS90", | |
| 315 "SBS91", "SBS92", "SBS93", "SBS94" | |
| 316 ) | |
| 303 | 317 |
| 304 # if signature names provided are not compliant with cosmic nomenclature, | 318 # if signature names provided are not compliant with cosmic nomenclature, |
| 305 # we attribute these names to the active signature_colors vector, adjusted to the length | 319 # we attribute these names to the active signature_colors vector, adjusted to the length |
| 306 # of the signature names. | 320 # of the signature names. |
| 307 | 321 |
| 308 if (! all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names | 322 if (!all(colnames(sbs_signatures) %in% names(signature_colors))) { # provided signature are not all included in cosmic names |
| 309 signature_colors <- signature_colors[seq_along(sbs_signatures)] | 323 signature_colors <- signature_colors[seq_along(sbs_signatures)] |
| 310 names(signature_colors) <- colnames(sbs_signatures) | 324 names(signature_colors) <- colnames(sbs_signatures) |
| 311 } | 325 } |
| 312 | 326 |
| 313 # This is IMPORTANT since in Galaxy we do not use the embeded function get_known_signatures() | 327 # This is IMPORTANT since in Galaxy we do not use the embeded function get_known_signatures() |
| 318 | 332 |
| 319 pdf(opt$output_sigpattern, paper = "special", width = 11.69, height = 11.69) | 333 pdf(opt$output_sigpattern, paper = "special", width = 11.69, height = 11.69) |
| 320 if (opt$display_signatures == "yes") { | 334 if (opt$display_signatures == "yes") { |
| 321 for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) { | 335 for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) { |
| 322 p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3) | 336 p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3) |
| 323 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc((i + 1) / 20) + 1, " of ", | 337 grid.arrange(p6, top = textGrob( |
| 324 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), | 338 paste0( |
| 325 gp = gpar(fontsize = 12, font = 3))) | 339 tag, " profiles (", trunc((i + 1) / 20) + 1, " of ", |
| 340 trunc(ncol(sbs_signatures) / 20) + 1, " pages)" | |
| 341 ), | |
| 342 gp = gpar(fontsize = 12, font = 3) | |
| 343 )) | |
| 326 } | 344 } |
| 327 p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))], | 345 p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))], |
| 328 condensed = TRUE, ymax = 0.3) | 346 condensed = TRUE, ymax = 0.3 |
| 329 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ", | 347 ) |
| 330 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), | 348 grid.arrange(p6, top = textGrob( |
| 331 gp = gpar(fontsize = 12, font = 3))) | 349 paste0( |
| 350 tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ", | |
| 351 trunc(ncol(sbs_signatures) / 20) + 1, " pages)" | |
| 352 ), | |
| 353 gp = gpar(fontsize = 12, font = 3) | |
| 354 )) | |
| 332 } | 355 } |
| 333 | 356 |
| 334 | 357 |
| 335 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 358 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
| 336 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix | 359 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix |
| 337 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) | 360 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) |
| 338 | 361 |
| 339 # Plot contribution barplots | 362 # Plot contribution barplots |
| 340 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") | 363 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "absolute") |
| 341 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") | 364 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = TRUE, mode = "relative") |
| 342 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs | 365 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs |
| 343 pc3_data <- pc3$data | 366 pc3_data <- pc3$data |
| 344 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 367 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
| 345 geom_bar(stat = "identity", position = "stack") + | 368 geom_bar(stat = "identity", position = "stack") + |
| 346 coord_flip() + | 369 coord_flip() + |
| 347 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + | 370 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + |
| 348 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | 371 labs(x = "Samples", y = "Absolute contribution") + |
| 349 theme(panel.grid.minor.x = element_blank(), | 372 theme_bw() + |
| 350 panel.grid.major.x = element_blank(), | 373 theme( |
| 351 legend.position = "right", | 374 panel.grid.minor.x = element_blank(), |
| 352 text = element_text(size = 8), | 375 panel.grid.major.x = element_blank(), |
| 353 axis.text.x = element_text(angle = 90, hjust = 1)) | 376 legend.position = "right", |
| 377 text = element_text(size = 8), | |
| 378 axis.text.x = element_text(angle = 90, hjust = 1) | |
| 379 ) | |
| 354 pc4_data <- pc4$data | 380 pc4_data <- pc4$data |
| 355 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 381 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
| 356 geom_bar(stat = "identity", position = "fill") + | 382 geom_bar(stat = "identity", position = "fill") + |
| 357 coord_flip() + | 383 coord_flip() + |
| 358 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + | 384 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + |
| 359 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | 385 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + |
| 360 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | 386 labs(x = "Samples", y = "Relative contribution") + |
| 361 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", | 387 theme_bw() + |
| 362 text = element_text(size = 8), | 388 theme( |
| 363 axis.text.x = element_text(angle = 90, hjust = 1)) | 389 panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", |
| 390 text = element_text(size = 8), | |
| 391 axis.text.x = element_text(angle = 90, hjust = 1) | |
| 392 ) | |
| 364 } | 393 } |
| 365 ##### | 394 ##### |
| 366 # ggplot2 alternative | 395 # ggplot2 alternative |
| 367 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs | 396 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs |
| 368 pc3_data <- pc3$data | 397 pc3_data <- pc3$data |
| 369 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") | 398 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") |
| 370 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 399 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
| 371 geom_bar(stat = "identity", position = "stack") + | 400 geom_bar(stat = "identity", position = "stack") + |
| 372 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + | 401 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + |
| 373 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | 402 labs(x = "Samples", y = "Absolute contribution") + |
| 374 theme(panel.grid.minor.x = element_blank(), | 403 theme_bw() + |
| 375 panel.grid.major.x = element_blank(), | 404 theme( |
| 376 legend.position = "right", | 405 panel.grid.minor.x = element_blank(), |
| 377 text = element_text(size = 8), | 406 panel.grid.major.x = element_blank(), |
| 378 axis.text.x = element_text(angle = 90, hjust = 1)) + | 407 legend.position = "right", |
| 379 facet_grid(~level, scales = "free_x", space = "free") | 408 text = element_text(size = 8), |
| 409 axis.text.x = element_text(angle = 90, hjust = 1) | |
| 410 ) + | |
| 411 facet_grid(~level, scales = "free_x", space = "free") | |
| 380 pc4_data <- pc4$data | 412 pc4_data <- pc4$data |
| 381 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") | 413 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") |
| 382 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 414 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
| 383 geom_bar(stat = "identity", position = "fill") + | 415 geom_bar(stat = "identity", position = "fill") + |
| 384 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + | 416 scale_fill_manual(name = tag, values = signature_colors[colnames(sbs_signatures)]) + |
| 385 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | 417 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + |
| 386 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | 418 labs(x = "Samples", y = "Relative contribution") + |
| 387 theme(panel.grid.minor.x = element_blank(), | 419 theme_bw() + |
| 388 panel.grid.major.x = element_blank(), | 420 theme( |
| 389 legend.position = "right", | 421 panel.grid.minor.x = element_blank(), |
| 390 text = element_text(size = 8), | 422 panel.grid.major.x = element_blank(), |
| 391 axis.text.x = element_text(angle = 90, hjust = 1)) + | 423 legend.position = "right", |
| 392 facet_grid(~level, scales = "free_x", space = "free") | 424 text = element_text(size = 8), |
| 425 axis.text.x = element_text(angle = 90, hjust = 1) | |
| 426 ) + | |
| 427 facet_grid(~level, scales = "free_x", space = "free") | |
| 393 } | 428 } |
| 394 # Combine the two plots: | 429 # Combine the two plots: |
| 395 grid.arrange(pc3, pc4, | 430 grid.arrange(pc3, pc4, |
| 396 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles", | 431 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles", |
| 397 gp = gpar(fontsize = 12, font = 3))) | 432 gp = gpar(fontsize = 12, font = 3) |
| 433 ) | |
| 434 ) | |
| 398 | 435 |
| 399 #### pie charts of comic signatures contributions in samples ### | 436 #### pie charts of comic signatures contributions in samples ### |
| 400 library(reshape2) | 437 library(reshape2) |
| 401 library(dplyr) | 438 library(dplyr) |
| 402 if (length(levels(factor(levels_table$level))) < 2) { | 439 if (length(levels(factor(levels_table$level))) < 2) { |
| 403 fit_res_contrib <- as.data.frame(fit_res$contribution) | 440 fit_res_contrib <- as.data.frame(fit_res$contribution) |
| 404 worklist <- cbind(signature = rownames(fit_res$contribution), | 441 worklist <- cbind( |
| 405 level = rep("nolabels", length(fit_res_contrib[, 1])), | 442 signature = rownames(fit_res$contribution), |
| 406 fit_res_contrib, | 443 level = rep("nolabels", length(fit_res_contrib[, 1])), |
| 407 sum = rowSums(fit_res_contrib)) | 444 fit_res_contrib, |
| 445 sum = rowSums(fit_res_contrib) | |
| 446 ) | |
| 408 worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] | 447 worklist <- worklist[order(worklist[, "sum"], decreasing = TRUE), ] |
| 409 worklist <- worklist[1:opt$signum, ] | 448 worklist <- worklist[1:opt$signum, ] |
| 410 worklist <- worklist[, -length(worklist[1, ])] | 449 worklist <- worklist[, -length(worklist[1, ])] |
| 411 worklist <- melt(worklist) | 450 worklist <- melt(worklist) |
| 412 worklist <- worklist[, c(1, 3, 4, 2)] | 451 worklist <- worklist[, c(1, 3, 4, 2)] |
| 413 } else { | 452 } else { |
| 414 worklist <- list() | 453 worklist <- list() |
| 415 for (i in levels(factor(levels_table$level))) { | 454 for (i in levels(factor(levels_table$level))) { |
| 416 worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] | 455 worklist[[i]] <- fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] |
| 417 sum <- rowSums(as.data.frame(worklist[[i]])) | 456 sum <- rowSums(as.data.frame(worklist[[i]])) |
| 418 worklist[[i]] <- cbind(worklist[[i]], sum) | 457 worklist[[i]] <- cbind(worklist[[i]], sum) |
| 419 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] | 458 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = TRUE), ] |
| 420 worklist[[i]] <- worklist[[i]][1:opt$signum, ] | 459 worklist[[i]] <- worklist[[i]][1:opt$signum, ] |
| 421 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] | 460 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] |
| 422 } | 461 } |
| 423 worklist <- as.data.frame(melt(worklist)) | 462 worklist <- as.data.frame(melt(worklist)) |
| 424 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) | 463 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) |
| 425 } | 464 } |
| 426 | 465 |
| 427 colnames(worklist) <- c("signature", "sample", "value", "level") | 466 colnames(worklist) <- c("signature", "sample", "value", "level") |
| 428 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value = value / sum(value) * 100)) | 467 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value = value / sum(value) * 100)) |
| 429 worklist$pos <- cumsum(worklist$value) - worklist$value / 2 | 468 worklist$pos <- cumsum(worklist$value) - worklist$value / 2 |
| 430 worklist$label <- factor(gsub("SBS", "", worklist$signature)) | 469 worklist$label <- factor(gsub("SBS", "", worklist$signature)) |
| 431 worklist$signature <- factor(worklist$signature) | 470 worklist$signature <- factor(worklist$signature) |
| 432 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + | 471 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + |
| 433 geom_bar(width = 1, stat = "identity") + | 472 geom_bar(width = 1, stat = "identity") + |
| 434 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + | 473 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "white", size = 3) + |
| 435 coord_polar("y", start = 0) + facet_wrap(. ~ sample) + | 474 coord_polar("y", start = 0) + |
| 436 labs(x = "", y = "Samples", fill = tag) + | 475 facet_wrap(. ~ sample) + |
| 437 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), | 476 labs(x = "", y = "Samples", fill = tag) + |
| 438 values = signature_colors[levels(worklist$signature)], | 477 scale_fill_manual( |
| 439 labels = names(signature_colors[levels(worklist$signature)])) + | 478 name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), |
| 440 theme(axis.text = element_blank(), | 479 values = signature_colors[levels(worklist$signature)], |
| 441 axis.ticks = element_blank(), | 480 labels = names(signature_colors[levels(worklist$signature)]) |
| 442 panel.grid = element_blank()) | 481 ) + |
| 482 theme( | |
| 483 axis.text = element_blank(), | |
| 484 axis.ticks = element_blank(), | |
| 485 panel.grid = element_blank() | |
| 486 ) | |
| 443 grid.arrange(p7) | 487 grid.arrange(p7) |
| 444 | 488 |
| 445 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering | 489 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering |
| 446 if (length(vcf_paths) > 1) { | 490 if (length(vcf_paths) > 1) { |
| 447 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") | 491 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") |
| 450 | 494 |
| 451 # export relative contribution matrix | 495 # export relative contribution matrix |
| 452 if (!is.na(opt$sig_contrib_matrix)) { | 496 if (!is.na(opt$sig_contrib_matrix)) { |
| 453 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) | 497 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) |
| 454 if (length(levels(factor(levels_table$level))) > 1) { | 498 if (length(levels(factor(levels_table$level))) > 1) { |
| 455 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), | 499 output_table <- data.frame( |
| 456 3], "-", colnames(fit_res$contribution)), | 500 sample = paste0(metadata_table[ |
| 457 output_table) | 501 metadata_table$element_identifier == colnames(fit_res$contribution), |
| 502 3 | |
| 503 ], "-", colnames(fit_res$contribution)), | |
| 504 output_table | |
| 505 ) | |
| 458 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) | 506 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) |
| 459 } else { | 507 } else { |
| 460 output_table <- data.frame(sample = rownames(output_table), output_table) | 508 output_table <- data.frame(sample = rownames(output_table), output_table) |
| 461 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) | 509 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) |
| 462 } | 510 } |
| 463 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE) | 511 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = FALSE, row.names = FALSE) |
| 464 } | 512 } |
| 478 # Adjust data frame for plotting with gpplot | 526 # Adjust data frame for plotting with gpplot |
| 479 colnames(cos_sim_ori_rec) <- "cos_sim" | 527 colnames(cos_sim_ori_rec) <- "cos_sim" |
| 480 cos_sim_ori_rec$sample <- row.names(cos_sim_ori_rec) | 528 cos_sim_ori_rec$sample <- row.names(cos_sim_ori_rec) |
| 481 # Make barplot | 529 # Make barplot |
| 482 p9 <- ggplot(cos_sim_ori_rec, aes(y = cos_sim, x = sample)) + | 530 p9 <- ggplot(cos_sim_ori_rec, aes(y = cos_sim, x = sample)) + |
| 483 geom_bar(stat = "identity", fill = "skyblue4") + | 531 geom_bar(stat = "identity", fill = "skyblue4") + |
| 484 coord_cartesian(ylim = c(0.8, 1)) + | 532 coord_cartesian(ylim = c(0.8, 1)) + |
| 485 # coord_flip(ylim=c(0.8,1)) + | 533 # coord_flip(ylim=c(0.8,1)) + |
| 486 ylab("Cosine similarity\n original VS reconstructed") + | 534 ylab("Cosine similarity\n original VS reconstructed") + |
| 487 xlab("") + | 535 xlab("") + |
| 488 # Reverse order of the samples such that first is up | 536 # Reverse order of the samples such that first is up |
| 489 # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) + | 537 # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) + |
| 490 theme_bw() + | 538 theme_bw() + |
| 491 theme(panel.grid.minor.y = element_blank(), | 539 theme( |
| 492 panel.grid.major.y = element_blank(), | 540 panel.grid.minor.y = element_blank(), |
| 493 axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1) | 541 panel.grid.major.y = element_blank(), |
| 494 ) + | 542 axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1) |
| 495 # Add cut.off line | 543 ) + |
| 496 geom_hline(aes(yintercept = .95)) | 544 # Add cut.off line |
| 545 geom_hline(aes(yintercept = .95)) | |
| 497 grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3))) | 546 grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3))) |
| 498 dev.off() | 547 dev.off() |
| 499 } | 548 } |
| 500 | 549 |
| 501 | 550 |
| 502 # Output RData file | 551 # Output RData file |
| 503 if (!is.null(opt$rdata)) { | 552 if (!is.null(opt$rdata)) { |
| 504 save.image(file = opt$rdata) | 553 save.image(file = opt$rdata) |
| 505 } | 554 } |
