Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 0:924c527fb379 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit e1f3ca871f13569401f41a5af9d0e281bf372540"
| author | artbio |
|---|---|
| date | Sun, 13 Sep 2020 18:40:29 +0000 |
| parents | |
| children | aea952be68cb |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:924c527fb379 |
|---|---|
| 1 # load packages that are provided in the conda env | |
| 2 options( show.error.messages=F, | |
| 3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
| 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 5 warnings() | |
| 6 library(optparse) | |
| 7 library(rjson) | |
| 8 library(MutationalPatterns) | |
| 9 library(ggplot2) | |
| 10 | |
| 11 # Arguments | |
| 12 option_list = list( | |
| 13 make_option( | |
| 14 "--inputs", | |
| 15 default = NA, | |
| 16 type = 'character', | |
| 17 help = "json formatted dictionary of datasets and their paths" | |
| 18 ), | |
| 19 make_option( | |
| 20 "--genome", | |
| 21 default = NA, | |
| 22 type = 'character', | |
| 23 help = "genome name in the BSgenome bioconductor package" | |
| 24 ), | |
| 25 make_option( | |
| 26 "--levels", | |
| 27 default = NA, | |
| 28 type = 'character', | |
| 29 help = "path to the tab separated file describing the levels in function of datasets" | |
| 30 ), | |
| 31 make_option( | |
| 32 "--signum", | |
| 33 default = 2, | |
| 34 type = 'integer', | |
| 35 help = "selects the N most significant signatures in samples to express mutational patterns" | |
| 36 ), | |
| 37 make_option( | |
| 38 "--output", | |
| 39 default = NA, | |
| 40 type = 'character', | |
| 41 help = "path to output dataset" | |
| 42 ) | |
| 43 ) | |
| 44 | |
| 45 opt = parse_args(OptionParser(option_list = option_list), | |
| 46 args = commandArgs(trailingOnly = TRUE)) | |
| 47 | |
| 48 json_dict <- opt$inputs | |
| 49 parser <- newJSONParser() | |
| 50 parser$addData(json_dict) | |
| 51 fileslist <- parser$getObject() | |
| 52 vcf_files <- attr(fileslist, "names") | |
| 53 sample_names <- unname(unlist(fileslist)) | |
| 54 pdf(opt$output, paper = "special", width = 11.69, height = 11.69) | |
| 55 ref_genome <- opt$genome | |
| 56 library(ref_genome, character.only = TRUE) | |
| 57 | |
| 58 # Load the VCF files into a GRangesList: | |
| 59 vcfs <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome) | |
| 60 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("sample_name","level")) | |
| 61 vcf_table <- data.frame(path=vcf_files, sample_name=sample_names) | |
| 62 metadata_table <- merge(vcf_table, levels_table, by.x=2, by.y=1) | |
| 63 levels <- metadata_table$level | |
| 64 muts = mutations_from_vcf(vcfs[[1]]) | |
| 65 types = mut_type(vcfs[[1]]) | |
| 66 context = mut_context(vcfs[[1]], ref_genome) | |
| 67 type_context = type_context(vcfs[[1]], ref_genome) | |
| 68 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) | |
| 69 # p1 <- plot_spectrum(type_occurrences) | |
| 70 # p2 <- plot_spectrum(type_occurrences, CT = TRUE) | |
| 71 # p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = FALSE) | |
| 72 # | |
| 73 # plot(p2) | |
| 74 # p4 <- plot_spectrum(type_occurrences, by = levels, CT = TRUE, legend = TRUE) | |
| 75 # palette <- c("pink", "orange", "blue", "lightblue", "green", "red", "purple") | |
| 76 # p5 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE, colors=palette) | |
| 77 # | |
| 78 # plot(p4) | |
| 79 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) | |
| 80 # plot_96_profile(mut_mat[,1:length(as.data.frame(mut_mat))], condensed = TRUE) | |
| 81 mut_mat <- mut_mat + 0.0001 | |
| 82 # library("NMF") | |
| 83 # estimate <- nmf(mut_mat, rank=2:5, method="brunet", nrun=100, seed=123456) | |
| 84 # plot(estimate) | |
| 85 # nmf_res <- extract_signatures(mut_mat, rank = 4, nrun = 100) | |
| 86 # colnames(nmf_res$signatures) <- c("Signature A", "Signature B", "Signature C", "Signature D") | |
| 87 # rownames(nmf_res$contribution) <- c("Signature A", "Signature B", "Signature C", "Signature D") | |
| 88 # plot_96_profile(nmf_res$signatures, condensed = TRUE) | |
| 89 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") | |
| 90 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | |
| 91 new_order = match(row.names(mut_mat), cancer_signatures$Somatic.Mutation.Type) | |
| 92 cancer_signatures = cancer_signatures[as.vector(new_order),] | |
| 93 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | |
| 94 cancer_signatures = as.matrix(cancer_signatures[,4:33]) | |
| 95 # plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) | |
| 96 hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") | |
| 97 cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] | |
| 98 # plot(hclust_cosmic) | |
| 99 cos_sim(mut_mat[,1], cancer_signatures[,1]) | |
| 100 cos_sim_samples_signatures = cos_sim_matrix(mut_mat, cancer_signatures) | |
| 101 plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) | |
| 102 fit_res <- fit_to_signatures(mut_mat, cancer_signatures) | |
| 103 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] | |
| 104 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded | |
| 105 plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") | |
| 106 plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") | |
| 107 plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") | |
| 108 | |
| 109 | |
| 110 sig5data <- as.data.frame(t(head(fit_res$contribution[select,]))) | |
| 111 colnames(sig5data) <- gsub("nature", "", colnames(sig5data)) | |
| 112 sig5data_percents <- sig5data / (apply(sig5data,1,sum)) * 100 | |
| 113 sig5data_percents$sample <- rownames(sig5data_percents) | |
| 114 library(reshape2) | |
| 115 melted_sig5data_percents <-melt(data=sig5data_percents) | |
| 116 melted_sig5data_percents$label <- sub("Sig.", "", melted_sig5data_percents$variable) | |
| 117 melted_sig5data_percents$pos <- cumsum(melted_sig5data_percents$value) - melted_sig5data_percents$value/2 | |
| 118 ggplot(melted_sig5data_percents, aes(x="", y=value, group=variable, fill=variable)) + | |
| 119 geom_bar(width = 1, stat = "identity") + | |
| 120 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + | |
| 121 coord_polar("y", start=0) + facet_wrap(~ sample) + | |
| 122 labs(x="", y="Samples", fill = "Signatures (Cosmic_v2,March 2015)") + | |
| 123 theme(axis.text = element_blank(), | |
| 124 axis.ticks = element_blank(), | |
| 125 panel.grid = element_blank()) | |
| 126 dev.off() |
