Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 18:8d9f31389f33 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 1cb9c8fd0c74943a8e6de4c63ac5e4a84ef27430"
author | artbio |
---|---|
date | Sun, 17 Oct 2021 15:51:05 +0000 |
parents | 8c6ee1c2248f |
children | 69f09dff98f9 |
comparison
equal
deleted
inserted
replaced
17:8c6ee1c2248f | 18:8d9f31389f33 |
---|---|
33 type = "character", | 33 type = "character", |
34 help = "path to the tab separated file describing the levels in function of datasets" | 34 help = "path to the tab separated file describing the levels in function of datasets" |
35 ), | 35 ), |
36 make_option( | 36 make_option( |
37 "--cosmic_version", | 37 "--cosmic_version", |
38 default = "v2", | 38 default = NA, |
39 type = "character", | 39 type = "character", |
40 help = "Version of the Cosmic Signature set to be used to express mutational patterns" | 40 help = "Version of the Cosmic Signature set to be used to express mutational profiles" |
41 ), | |
42 make_option( | |
43 "--own_signatures", | |
44 default = NA, | |
45 type = "character", | |
46 help = "Path to the user-defined signature matrix" | |
41 ), | 47 ), |
42 make_option( | 48 make_option( |
43 "--signum", | 49 "--signum", |
44 default = 2, | 50 default = 2, |
45 type = "integer", | 51 type = "integer", |
46 help = "selects the N most significant signatures in samples to express mutational patterns" | 52 help = "selects the N most significant signatures in samples to express mutational profiles" |
47 ), | 53 ), |
48 make_option( | 54 make_option( |
49 "--nrun", | 55 "--nrun", |
50 default = 2, | 56 default = 2, |
51 type = "integer", | 57 type = "integer", |
80 default = NA, | 86 default = NA, |
81 type = "character", | 87 type = "character", |
82 help = "path to signature matrix" | 88 help = "path to signature matrix" |
83 ), | 89 ), |
84 make_option( | 90 make_option( |
85 "--output_cosmic", | 91 "--output_sigpattern", |
86 default = NA, | 92 default = NA, |
87 type = "character", | 93 type = "character", |
88 help = "path to output dataset" | 94 help = "path to output dataset" |
89 ), | 95 ), |
90 make_option( | 96 make_option( |
139 detach(package:plyr) | 145 detach(package:plyr) |
140 | 146 |
141 ##### This is done for any section ###### | 147 ##### This is done for any section ###### |
142 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) | 148 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) |
143 qual_col_pals <- brewer.pal.info[brewer.pal.info$category == "qual", ] | 149 qual_col_pals <- brewer.pal.info[brewer.pal.info$category == "qual", ] |
144 col_vector <- unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) | |
145 col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette | |
146 | 150 |
147 ###### Section 1 Mutation characteristics and spectrums ############# | 151 ###### Section 1 Mutation characteristics and spectrums ############# |
148 if (!is.na(opt$output_spectrum)[1]) { | 152 if (!is.na(opt$output_spectrum)[1]) { |
149 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) | 153 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) |
150 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) | 154 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) |
219 nmf_res$reconstructed[, 1], | 223 nmf_res$reconstructed[, 1], |
220 profile_names = c("Original", "Reconstructed"), | 224 profile_names = c("Original", "Reconstructed"), |
221 condensed = TRUE) | 225 condensed = TRUE) |
222 dev.off() | 226 dev.off() |
223 } | 227 } |
224 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### | 228 |
225 | 229 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### |
226 if (!is.na(opt$output_cosmic)[1]) { | 230 |
231 if (!is.na(opt$output_sigpattern)[1]) { | |
227 # Prepare cosmic signatures | 232 # Prepare cosmic signatures |
228 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) | 233 if (!is.na(opt$cosmic_version)) { |
229 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & | 234 cosmic_urls <- read.delim(paste0(opt$tooldir, "cosmic_urls.tsv"), sep = "\t", header = TRUE) |
230 cosmic_urls$cosmic_version == opt$cosmic_version] | 235 cosmic_sbs_file <- cosmic_urls$url[cosmic_urls$genome == opt$genome & |
231 cancer_sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), | 236 cosmic_urls$cosmic_version == opt$cosmic_version] |
232 sep = "\t", header = TRUE) | 237 sbs_signatures <- read.table(paste0(opt$tooldir, cosmic_sbs_file), |
233 row.names(cancer_sbs_signatures) <- cancer_sbs_signatures$Type | 238 sep = "\t", header = TRUE) |
234 new_order <- match(row.names(mut_mat), cancer_sbs_signatures$Type) | 239 tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ") |
235 cancer_sbs_signatures <- cancer_sbs_signatures[as.numeric(new_order), ] | 240 } |
236 cosmic_tag <- paste(opt$genome, "COSMIC", opt$cosmic_version, sep = " ") | 241 # Prepare user-defined signatures |
237 cosmic_colors <- col_vector[seq_len(ncol(cancer_sbs_signatures) - 1)] | 242 if (!is.na(opt$own_signatures)) { |
238 names(cosmic_colors) <- colnames(cancer_sbs_signatures[2:length(cancer_sbs_signatures)]) | 243 sbs_signatures <- read.table(opt$own_signatures, sep = "\t", header = TRUE) |
239 cancer_sbs_matrix <- as.matrix(cancer_sbs_signatures[, 2:length(cancer_sbs_signatures)]) | 244 tag <- paste(opt$genome, "User-Defined Signatures", sep = " ") |
245 } | |
246 row.names(sbs_signatures) <- sbs_signatures$Type | |
247 # drop column Type of sbs_signatures | |
248 sbs_signatures <- subset(sbs_signatures, select = -c(Type)) | |
249 # reorder substitutions of sbs_signatures to match mut_mat | |
250 sbs_signatures <- sbs_signatures[match(row.names(mut_mat), row.names(sbs_signatures)), ] | |
251 colnames(sbs_signatures) <- gsub("SBS", "", colnames(sbs_signatures)) | |
252 # arrange signature colors | |
253 signature_colors <- c("#3f4100", "#6f53ff", "#6dc400", "#9d1fd7", "#009c06", "#001fae", "#8adb4d", "#5a67ff", "#d8c938", "#024bc3", "#d2ab00", | |
254 "#e36eff", "#00ac44", "#d000b0", "#01b071", "#ff64e2", "#006b21", "#b70090", "#60dc9f", "#5f0083", "#c0ce67", "#002981", | |
255 "#ffb53e", "#44005f", "#b59600", "#7d95ff", "#f47600", "#017bc4", "#ff2722", "#02cfec", "#ff233f", "#01b7b4", "#fd005c", | |
256 "#019560", "#ff57a9", "#88d896", "#b80067", "#abd27f", "#dc8eff", "#667b00", "#fba3ff", "#093f00", "#ff6494", "#009791", | |
257 "#c93200", "#4ac8ff", "#a60005", "#8fd4b6", "#ce0036", "#00634d", "#ff6035", "#2d1956", "#f0be6d", "#6a0058", "#957a00", | |
258 "#e4b4ff", "#4a5500", "#abc7fe", "#c95900", "#003d27", "#b10043", "#d5c68e", "#3e163e", "#b36b00", "#debaeb", "#605400", | |
259 "#7a0044", "#ffa06d", "#4c0d21", "#ff9cb5", "#3f1d02", "#ff958f", "#634a66", "#775500", "#6e0028", "#717653", | |
260 "#6c1000", "#693600") | |
261 signature_colors <- signature_colors[seq_len(ncol(sbs_signatures))] | |
262 names(signature_colors) <- colnames(sbs_signatures) | |
263 # To drop signature_colors <- signature_colors[colnames(sbs_signatures)] | |
264 # This is IMPORTANT since in Galaxy we do not use the embeded function get_known_signatures() | |
265 sbs_signatures <- as.matrix(sbs_signatures) | |
240 | 266 |
241 | 267 |
242 # Plot mutational profiles of the COSMIC signatures | 268 # Plot mutational profiles of the COSMIC signatures |
243 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) | 269 |
244 if (opt$cosmic_version == "v2") { | 270 # to do: this is largely optional and should be graphically improved anyway |
245 p6 <- plot_96_profile(cancer_sbs_matrix, condensed = TRUE, ymax = 0.3) | 271 |
246 grid.arrange(p6, top = textGrob("COSMIC SBS signature profiles", gp = gpar(fontsize = 12, font = 3))) | 272 pdf(opt$output_sigpattern, paper = "special", width = 11.69, height = 11.69) |
247 } else { | 273 for (i in head(seq(1, ncol(sbs_signatures), by = 20), -1)) { |
248 p6 <- plot_96_profile(cancer_sbs_matrix[, 1:trunc(ncol(cancer_sbs_matrix) / 2)], condensed = TRUE, ymax = 0.3) | 274 p6 <- plot_96_profile(sbs_signatures[, i:(i + 19)], condensed = TRUE, ymax = 0.3) |
249 p6bis <- plot_96_profile(cancer_sbs_matrix[, (trunc(ncol(cancer_sbs_matrix) / 2) + 1):ncol(cancer_sbs_matrix)], | 275 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc((i + 1) / 20) + 1, " of ", |
250 condensed = TRUE, ymax = 0.3) | 276 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), |
251 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)", | 277 gp = gpar(fontsize = 12, font = 3))) |
252 gp = gpar(fontsize = 12, font = 3))) | 278 } |
253 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)", | 279 p6 <- plot_96_profile(sbs_signatures[, (trunc(ncol(sbs_signatures) / 20) * 20):(ncol(sbs_signatures))], |
254 gp = gpar(fontsize = 12, font = 3))) | 280 condensed = TRUE, ymax = 0.3) |
255 } | 281 grid.arrange(p6, top = textGrob(paste0(tag, " profiles (", trunc(ncol(sbs_signatures) / 20) + 1, " of ", |
256 | 282 trunc(ncol(sbs_signatures) / 20) + 1, " pages)"), |
257 | 283 gp = gpar(fontsize = 12, font = 3))) |
258 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 284 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
259 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix | 285 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix |
260 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_sbs_matrix) | 286 fit_res <- fit_to_signatures(pseudo_mut_mat, sbs_signatures) |
261 | 287 |
262 # Plot contribution barplots | 288 # Plot contribution barplots |
263 pc3 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "absolute") | 289 pc3 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "absolute") |
264 pc4 <- plot_contribution(fit_res$contribution, cancer_sbs_matrix, coord_flip = T, mode = "relative") | 290 pc4 <- plot_contribution(fit_res$contribution, sbs_signatures, coord_flip = T, mode = "relative") |
265 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs | 291 if (is.na(opt$levels)[1]) { # if there are NO levels to display in graphs |
266 pc3_data <- pc3$data | 292 pc3_data <- pc3$data |
267 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 293 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
268 geom_bar(stat = "identity", position = "stack") + | 294 geom_bar(stat = "identity", position = "stack") + |
269 coord_flip() + | 295 coord_flip() + |
270 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 296 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors[]) + |
271 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | 297 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + |
272 theme(panel.grid.minor.x = element_blank(), | 298 theme(panel.grid.minor.x = element_blank(), |
273 panel.grid.major.x = element_blank(), | 299 panel.grid.major.x = element_blank(), |
274 legend.position = "right", | 300 legend.position = "right", |
275 text = element_text(size = 8), | 301 text = element_text(size = 8), |
276 axis.text.x = element_text(angle = 90, hjust = 1)) | 302 axis.text.x = element_text(angle = 90, hjust = 1)) |
277 pc4_data <- pc4$data | 303 pc4_data <- pc4$data |
278 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 304 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
279 geom_bar(stat = "identity", position = "fill") + | 305 geom_bar(stat = "identity", position = "fill") + |
280 coord_flip() + | 306 coord_flip() + |
281 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 307 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) + |
282 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | 308 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + |
283 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | 309 labs(x = "Samples", y = "Relative contribution") + theme_bw() + |
284 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", | 310 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", |
285 text = element_text(size = 8), | 311 text = element_text(size = 8), |
286 axis.text.x = element_text(angle = 90, hjust = 1)) | 312 axis.text.x = element_text(angle = 90, hjust = 1)) |
290 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs | 316 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs |
291 pc3_data <- pc3$data | 317 pc3_data <- pc3$data |
292 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") | 318 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") |
293 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 319 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
294 geom_bar(stat = "identity", position = "stack") + | 320 geom_bar(stat = "identity", position = "stack") + |
295 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 321 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) + |
296 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | 322 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + |
297 theme(panel.grid.minor.x = element_blank(), | 323 theme(panel.grid.minor.x = element_blank(), |
298 panel.grid.major.x = element_blank(), | 324 panel.grid.major.x = element_blank(), |
299 legend.position = "right", | 325 legend.position = "right", |
300 text = element_text(size = 8), | 326 text = element_text(size = 8), |
302 facet_grid(~level, scales = "free_x", space = "free") | 328 facet_grid(~level, scales = "free_x", space = "free") |
303 pc4_data <- pc4$data | 329 pc4_data <- pc4$data |
304 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") | 330 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") |
305 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + | 331 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
306 geom_bar(stat = "identity", position = "fill") + | 332 geom_bar(stat = "identity", position = "fill") + |
307 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 333 scale_fill_manual(name = "Cosmic\nSignatures", values = signature_colors) + |
308 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | 334 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + |
309 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | 335 labs(x = "Samples", y = "Relative contribution") + theme_bw() + |
310 theme(panel.grid.minor.x = element_blank(), | 336 theme(panel.grid.minor.x = element_blank(), |
311 panel.grid.major.x = element_blank(), | 337 panel.grid.major.x = element_blank(), |
312 legend.position = "right", | 338 legend.position = "right", |
314 axis.text.x = element_text(angle = 90, hjust = 1)) + | 340 axis.text.x = element_text(angle = 90, hjust = 1)) + |
315 facet_grid(~level, scales = "free_x", space = "free") | 341 facet_grid(~level, scales = "free_x", space = "free") |
316 } | 342 } |
317 # Combine the two plots: | 343 # Combine the two plots: |
318 grid.arrange(pc3, pc4, | 344 grid.arrange(pc3, pc4, |
319 top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns", | 345 top = textGrob("Absolute and Relative Contributions of elementary signatures to mutational profiles", |
320 gp = gpar(fontsize = 12, font = 3))) | 346 gp = gpar(fontsize = 12, font = 3))) |
321 | 347 |
322 #### pie charts of comic signatures contributions in samples ### | 348 #### pie charts of comic signatures contributions in samples ### |
323 library(reshape2) | 349 library(reshape2) |
324 library(dplyr) | 350 library(dplyr) |
354 worklist$signature <- factor(worklist$signature) | 380 worklist$signature <- factor(worklist$signature) |
355 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + | 381 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + |
356 geom_bar(width = 1, stat = "identity") + | 382 geom_bar(width = 1, stat = "identity") + |
357 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) + | 383 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) + |
358 coord_polar("y", start = 0) + facet_wrap(.~sample) + | 384 coord_polar("y", start = 0) + facet_wrap(.~sample) + |
359 labs(x = "", y = "Samples", fill = cosmic_tag) + | 385 labs(x = "", y = "Samples", fill = tag) + |
360 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), | 386 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), |
361 values = cosmic_colors[levels(worklist$signature)]) + | 387 values = signature_colors[levels(worklist$signature)], |
388 labels = names(signature_colors[levels(worklist$signature)])) + | |
362 theme(axis.text = element_blank(), | 389 theme(axis.text = element_blank(), |
363 axis.ticks = element_blank(), | 390 axis.ticks = element_blank(), |
364 panel.grid = element_blank()) | 391 panel.grid = element_blank()) |
365 grid.arrange(p7) | 392 grid.arrange(p7) |
366 | 393 |
375 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) | 402 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) |
376 if (length(levels(factor(levels_table$level))) > 1) { | 403 if (length(levels(factor(levels_table$level))) > 1) { |
377 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), | 404 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), |
378 3], "-", colnames(fit_res$contribution)), | 405 3], "-", colnames(fit_res$contribution)), |
379 output_table) | 406 output_table) |
407 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) | |
380 } else { | 408 } else { |
381 output_table <- data.frame(sample = rownames(output_table), output_table) | 409 output_table <- data.frame(sample = rownames(output_table), output_table) |
410 colnames(output_table) <- gsub("X", "SBS", colnames(output_table)) | |
382 } | 411 } |
383 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) | 412 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) |
384 } | 413 } |
385 | 414 |
386 # calculate all pairwise cosine similarities | 415 # calculate all pairwise cosine similarities |
410 theme_bw() + | 439 theme_bw() + |
411 theme(panel.grid.minor.y = element_blank(), | 440 theme(panel.grid.minor.y = element_blank(), |
412 panel.grid.major.y = element_blank()) + | 441 panel.grid.major.y = element_blank()) + |
413 # Add cut.off line | 442 # Add cut.off line |
414 geom_hline(aes(yintercept = .95)) | 443 geom_hline(aes(yintercept = .95)) |
415 grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)", gp = gpar(fontsize = 12, font = 3))) | 444 grid.arrange(p9, top = textGrob("Similarity between true profiles and profiles reconstructed with elementary signatures", gp = gpar(fontsize = 12, font = 3))) |
416 dev.off() | 445 dev.off() |
417 } | 446 } |
418 | 447 |
419 | 448 |
420 # Output RData file | 449 # Output RData file |