Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 3:e332cf9dfa06 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 0f7593f703ba0dfd12aea1c0460e371f08b57d2f"
| author | artbio |
|---|---|
| date | Thu, 24 Sep 2020 01:32:52 +0000 |
| parents | aea952be68cb |
| children | 7ba08c826888 |
comparison
equal
deleted
inserted
replaced
| 2:aea952be68cb | 3:e332cf9dfa06 |
|---|---|
| 27 default = NA, | 27 default = NA, |
| 28 type = 'character', | 28 type = 'character', |
| 29 help = "path to the tab separated file describing the levels in function of datasets" | 29 help = "path to the tab separated file describing the levels in function of datasets" |
| 30 ), | 30 ), |
| 31 make_option( | 31 make_option( |
| 32 "--cosmic_version", | |
| 33 default = "v2", | |
| 34 type = 'character', | |
| 35 help = "Version of the Cosmic Signature set to be used to express mutational patterns" | |
| 36 ), | |
| 37 make_option( | |
| 32 "--signum", | 38 "--signum", |
| 33 default = 2, | 39 default = 2, |
| 34 type = 'integer', | 40 type = 'integer', |
| 35 help = "selects the N most significant signatures in samples to express mutational patterns" | 41 help = "selects the N most significant signatures in samples to express mutational patterns" |
| 36 ), | 42 ), |
| 50 "--newsignum", | 56 "--newsignum", |
| 51 default = 2, | 57 default = 2, |
| 52 type = 'integer', | 58 type = 'integer', |
| 53 help = "Number of new signatures to be captured" | 59 help = "Number of new signatures to be captured" |
| 54 ), | 60 ), |
| 55 | |
| 56 make_option( | 61 make_option( |
| 57 "--output_spectrum", | 62 "--output_spectrum", |
| 58 default = NA, | 63 default = NA, |
| 59 type = 'character', | 64 type = 'character', |
| 60 help = "path to output dataset" | 65 help = "path to output dataset" |
| 64 default = NA, | 69 default = NA, |
| 65 type = 'character', | 70 type = 'character', |
| 66 help = "path to output dataset" | 71 help = "path to output dataset" |
| 67 ), | 72 ), |
| 68 make_option( | 73 make_option( |
| 74 "--sigmatrix", | |
| 75 default = NA, | |
| 76 type = 'character', | |
| 77 help = "path to signature matrix" | |
| 78 ), | |
| 79 make_option( | |
| 69 "--output_cosmic", | 80 "--output_cosmic", |
| 70 default = NA, | 81 default = NA, |
| 71 type = 'character', | 82 type = 'character', |
| 72 help = "path to output dataset" | 83 help = "path to output dataset" |
| 73 ) | 84 ), |
| 85 make_option( | |
| 86 c("-r", "--rdata"), | |
| 87 type="character", | |
| 88 default=NULL, | |
| 89 help="Path to RData output file") | |
| 74 ) | 90 ) |
| 75 | 91 |
| 76 opt = parse_args(OptionParser(option_list = option_list), | 92 opt = parse_args(OptionParser(option_list = option_list), |
| 77 args = commandArgs(trailingOnly = TRUE)) | 93 args = commandArgs(trailingOnly = TRUE)) |
| 78 | 94 |
| 79 ################ Manage input data #################### | 95 ################ Manage input data #################### |
| 80 json_dict <- opt$inputs | 96 json_dict <- opt$inputs |
| 81 parser <- newJSONParser() | 97 parser <- newJSONParser() |
| 82 parser$addData(json_dict) | 98 parser$addData(json_dict) |
| 83 fileslist <- parser$getObject() | 99 fileslist <- parser$getObject() |
| 84 vcf_files <- attr(fileslist, "names") | 100 vcf_paths <- attr(fileslist, "names") |
| 85 sample_names <- unname(unlist(fileslist)) | 101 element_identifiers <- unname(unlist(fileslist)) |
| 86 ref_genome <- opt$genome | 102 ref_genome <- opt$genome |
| 87 vcf_table <- data.frame(sample_name=sample_names, path=vcf_files) | 103 vcf_table <- data.frame(element_identifier=element_identifiers, path=vcf_paths) |
| 88 | 104 |
| 89 library(MutationalPatterns) | 105 library(MutationalPatterns) |
| 90 library(ref_genome, character.only = TRUE) | 106 library(ref_genome, character.only = TRUE) |
| 107 library(ggplot2) | |
| 91 | 108 |
| 92 # Load the VCF files into a GRangesList: | 109 # Load the VCF files into a GRangesList: |
| 93 vcfs <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome) | 110 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) |
| 94 if (!is.na(opt$levels)[1]) { # manage levels if there are | 111 if (!is.na(opt$levels)[1]) { # manage levels if there are |
| 95 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("sample_name","level")) | 112 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level")) |
| 96 library(dplyr) | 113 library(plyr) |
| 97 metadata_table <- left_join(vcf_table, levels_table, by = "sample_name") | 114 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") |
| 98 # detach("package:dplyr", unload=TRUE) | 115 tissue <- as.vector(metadata_table$level) |
| 99 tissue <- metadata_table$level | |
| 100 } | 116 } |
| 101 | 117 |
| 102 ##### This is done for any section ###### | 118 ##### This is done for any section ###### |
| 103 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) | 119 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) |
| 104 | |
| 105 | 120 |
| 106 ###### Section 1 Mutation characteristics and spectrums ############# | 121 ###### Section 1 Mutation characteristics and spectrums ############# |
| 107 if (!is.na(opt$output_spectrum)[1]) { | 122 if (!is.na(opt$output_spectrum)[1]) { |
| 108 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) | 123 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) |
| 109 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) | 124 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) |
| 112 | 127 |
| 113 if (is.na(opt$levels)[1]) { | 128 if (is.na(opt$levels)[1]) { |
| 114 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) | 129 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) |
| 115 plot(p1) | 130 plot(p1) |
| 116 } else { | 131 } else { |
| 117 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE, legend=TRUE) # by sample | 132 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels |
| 118 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total | 133 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total |
| 119 grid.arrange(p2, p3, ncol=2, widths=c(4,2.3), heights=c(4,1)) | 134 grid.arrange(p2, p3, ncol=2, widths=c(4,2.3), heights=c(4,1)) |
| 120 } | 135 } |
| 121 plot_96_profile(mut_mat, condensed = TRUE) | 136 plot_96_profile(mut_mat, condensed = TRUE) |
| 122 dev.off() | 137 dev.off() |
| 123 } | 138 } |
| 124 | 139 |
| 125 ###### Section 2: De novo mutational signature extraction using NMF ####### | 140 ###### Section 2: De novo mutational signature extraction using NMF ####### |
| 126 if (!is.na(opt$output_denovo)[1]) { | 141 if (!is.na(opt$output_denovo)[1]) { |
| 127 mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix | 142 # opt$rank cannot be higher than the number of samples |
| 143 if (opt$rank > length(element_identifiers)) {opt$rank <-length(element_identifiers)} | |
| 144 # likewise, opt$signum cannot be higher thant the number of samples | |
| 145 if (opt$signum > length(element_identifiers)) {opt$signum <-length(element_identifiers)} | |
| 146 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix | |
| 128 # Use the NMF package to generate an estimate rank plot | 147 # Use the NMF package to generate an estimate rank plot |
| 129 library("NMF") | 148 library("NMF") |
| 130 estimate <- nmf(mut_mat, rank=1:opt$rank+1, method="brunet", nrun=opt$nrun, seed=123456) | 149 estimate <- nmf(pseudo_mut_mat, rank=1:opt$rank, method="brunet", nrun=opt$nrun, seed=123456) |
| 131 # And plot it | 150 # And plot it |
| 132 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69) | 151 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69) |
| 133 p.trans <- plot(estimate) | 152 p4 <- plot(estimate) |
| 134 grid.arrange(p.trans) | 153 grid.arrange(p4) |
| 135 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures | 154 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures |
| 136 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter | 155 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter |
| 137 # to achieve stability and avoid local minima) | 156 # to achieve stability and avoid local minima) |
| 138 nmf_res <- extract_signatures(mut_mat, rank=opt$rank, nrun=opt$nrun) | 157 nmf_res <- extract_signatures(pseudo_mut_mat, rank=opt$newsignum, nrun=opt$nrun) |
| 139 # Assign signature names | 158 # Assign signature names |
| 140 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$rank) | 159 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$newsignum) |
| 141 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$rank) | 160 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$newsignum) |
| 142 # Plot the 96-profile of the signatures: | 161 # Plot the 96-profile of the signatures: |
| 143 p.trans <- plot_96_profile(nmf_res$signatures, condensed = TRUE) | 162 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) |
| 144 grid.arrange(p.trans) | 163 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ variable, value.var = "value") |
| 164 new_sig_matrix = format(new_sig_matrix, scientific=TRUE) | |
| 165 write.table(new_sig_matrix, file=opt$sigmatrix, quote = FALSE, row.names = FALSE, sep="\t") | |
| 166 grid.arrange(p5) | |
| 145 # Visualize the contribution of the signatures in a barplot | 167 # Visualize the contribution of the signatures in a barplot |
| 146 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="relative", coord_flip = TRUE) | 168 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="relative", coord_flip = TRUE) |
| 147 # Visualize the contribution of the signatures in absolute number of mutations | 169 # Visualize the contribution of the signatures in absolute number of mutations |
| 148 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="absolute", coord_flip = TRUE) | 170 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="absolute", coord_flip = TRUE) |
| 149 # Combine the two plots: | 171 # Combine the two plots: |
| 153 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. | 175 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. |
| 154 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures | 176 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures |
| 155 # can be plotted in a user-specified order. | 177 # can be plotted in a user-specified order. |
| 156 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: | 178 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: |
| 157 pch1 <- plot_contribution_heatmap(nmf_res$contribution, | 179 pch1 <- plot_contribution_heatmap(nmf_res$contribution, |
| 158 sig_order = paste0("NewSig_", 1:opt$rank)) | 180 sig_order = paste0("NewSig_", 1:opt$newsignum)) |
| 159 # Plot signature contribution as a heatmap without sample clustering: | 181 # Plot signature contribution as a heatmap without sample clustering: |
| 160 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples=FALSE) | 182 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples=FALSE) |
| 161 #Combine the plots into one figure: | 183 #Combine the plots into one figure: |
| 162 grid.arrange(pch1, pch2, ncol = 2, widths = c(2,1.6)) | 184 grid.arrange(pch1, pch2, ncol = 2, widths = c(2,1.6)) |
| 163 | 185 |
| 164 # Compare the reconstructed mutational profile with the original mutational profile: | 186 # Compare the reconstructed mutational profile with the original mutational profile: |
| 165 plot_compare_profiles(mut_mat[,1], | 187 plot_compare_profiles(pseudo_mut_mat[,1], |
| 166 nmf_res$reconstructed[,1], | 188 nmf_res$reconstructed[,1], |
| 167 profile_names = c("Original", "Reconstructed"), | 189 profile_names = c("Original", "Reconstructed"), |
| 168 condensed = TRUE) | 190 condensed = TRUE) |
| 169 dev.off() | 191 dev.off() |
| 170 } | 192 } |
| 171 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### | 193 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### |
| 172 | 194 |
| 173 if (!is.na(opt$output_cosmic)[1]) { | 195 if (!is.na(opt$output_cosmic)[1]) { |
| 174 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) | 196 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) |
| 175 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") | 197 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix |
| 176 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | 198 if (opt$cosmic_version == "v2") { |
| 177 mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix | 199 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") |
| 178 new_order = match(row.names(mut_mat), cancer_signatures$Somatic.Mutation.Type) | 200 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) |
| 179 cancer_signatures = cancer_signatures[as.vector(new_order),] | 201 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) |
| 180 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | 202 cancer_signatures = cancer_signatures[as.vector(new_order),] |
| 181 cancer_signatures = as.matrix(cancer_signatures[,4:33]) | 203 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type |
| 204 cancer_signatures = as.matrix(cancer_signatures[,4:33]) | |
| 205 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels | |
| 206 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" | |
| 207 } else { | |
| 208 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" | |
| 209 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | |
| 210 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) | |
| 211 cancer_signatures = cancer_signatures[as.vector(new_order),] | |
| 212 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | |
| 213 cancer_signatures = as.matrix(cancer_signatures[,4:70]) | |
| 214 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels | |
| 215 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" | |
| 216 } | |
| 182 | 217 |
| 183 # Plot mutational profiles of the COSMIC signatures | 218 # Plot mutational profiles of the COSMIC signatures |
| 184 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels | 219 if (opt$cosmic_version == "v2") { |
| 185 p.trans <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) | 220 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) |
| 186 grid.arrange(p.trans, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) | 221 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) |
| 222 } else { | |
| 223 print(length(cancer_signatures)) | |
| 224 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3) | |
| 225 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3) | |
| 226 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3))) | |
| 227 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3))) | |
| 228 } | |
| 187 | 229 |
| 188 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage | 230 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage |
| 189 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") | 231 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") |
| 190 # store signatures in new order | 232 # store signatures in new order |
| 191 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] | 233 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] |
| 209 # Plot heatmap with specified signature order | 251 # Plot heatmap with specified signature order |
| 210 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) | 252 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) |
| 211 # grid.arrange(p.trans) | 253 # grid.arrange(p.trans) |
| 212 | 254 |
| 213 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 255 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
| 214 fit_res <- fit_to_signatures(mut_mat, cancer_signatures) | 256 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) |
| 215 | 257 |
| 216 # Select signatures with some contribution (above a threshold) | 258 # Select signatures with some contribution (above a threshold) |
| 217 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] | 259 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] |
| 218 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded | 260 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded |
| 219 # Plot contribution barplots | 261 # Plot contribution barplots |
| 220 pc1 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") | 262 pc3 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") |
| 221 pc2 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") | 263 pc4 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") |
| 264 ##### | |
| 265 # ggplot2 alternative | |
| 266 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs | |
| 267 pc3_data <- pc3$data | |
| 268 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | |
| 269 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | |
| 270 geom_bar(stat="identity", position='stack') + | |
| 271 scale_fill_discrete(name="Cosmic\nSignature") + | |
| 272 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | |
| 273 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + | |
| 274 facet_grid(~level, scales = "free_x") | |
| 275 pc4_data <- pc4$data | |
| 276 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | |
| 277 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | |
| 278 geom_bar(stat="identity", position='fill') + | |
| 279 scale_fill_discrete(name="Cosmic\nSignature") + | |
| 280 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | |
| 281 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | |
| 282 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + | |
| 283 facet_grid(~level, scales = "free_x") | |
| 284 } | |
| 222 # Combine the two plots: | 285 # Combine the two plots: |
| 223 grid.arrange(pc1, pc2) | 286 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) |
| 224 ## pie charts of comic signatures contributions in samples | 287 ## pie charts of comic signatures contributions in samples |
| 225 | 288 |
| 226 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) | 289 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) |
| 227 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) | 290 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) |
| 228 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 | 291 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 |
| 229 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) | 292 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) |
| 230 library(reshape2) | 293 library(reshape2) |
| 231 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents) | 294 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents) |
| 232 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable) | 295 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable) |
| 233 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2 | 296 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2 |
| 234 library(ggplot2) | 297 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + |
| 235 p.trans <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + | |
| 236 geom_bar(width = 1, stat = "identity") + | 298 geom_bar(width = 1, stat = "identity") + |
| 237 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + | 299 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + |
| 238 coord_polar("y", start=0) + facet_wrap(~ sample) + | 300 coord_polar("y", start=0) + facet_wrap(~ sample) + |
| 239 labs(x="", y="Samples", fill = "Signatures (Cosmic_v2,March 2015)") + | 301 labs(x="", y="Samples", fill = cosmic_tag) + |
| 240 theme(axis.text = element_blank(), | 302 theme(axis.text = element_blank(), |
| 241 axis.ticks = element_blank(), | 303 axis.ticks = element_blank(), |
| 242 panel.grid = element_blank()) | 304 panel.grid = element_blank()) |
| 243 grid.arrange(p.trans) | 305 grid.arrange(p7) |
| 244 | 306 |
| 245 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering | 307 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering |
| 246 p.trans <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") | 308 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") |
| 247 grid.arrange(p.trans) | 309 grid.arrange(p8) |
| 248 | 310 |
| 249 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile | 311 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile |
| 250 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1], | 312 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1], |
| 251 # profile_names = c("Original", "Reconstructed"), | 313 # profile_names = c("Original", "Reconstructed"), |
| 252 # condensed = TRUE) | 314 # condensed = TRUE) |
| 253 | 315 |
| 254 # Calculate the cosine similarity between all original and reconstructed mutational profiles with | 316 # Calculate the cosine similarity between all original and reconstructed mutational profiles with |
| 255 # `cos_sim_matrix` | 317 # `cos_sim_matrix` |
| 256 | 318 |
| 257 # calculate all pairwise cosine similarities | 319 # calculate all pairwise cosine similarities |
| 258 cos_sim_ori_rec <- cos_sim_matrix(mut_mat, fit_res$reconstructed) | 320 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) |
| 259 # extract cosine similarities per sample between original and reconstructed | 321 # extract cosine similarities per sample between original and reconstructed |
| 260 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) | 322 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) |
| 261 | 323 |
| 262 # We can use ggplot to make a barplot of the cosine similarities between the original and | 324 # We can use ggplot to make a barplot of the cosine similarities between the original and |
| 263 # reconstructed mutational profile of each sample. This clearly shows how well each mutational | 325 # reconstructed mutational profile of each sample. This clearly shows how well each mutational |
| 268 | 330 |
| 269 # Adjust data frame for plotting with gpplot | 331 # Adjust data frame for plotting with gpplot |
| 270 colnames(cos_sim_ori_rec) = "cos_sim" | 332 colnames(cos_sim_ori_rec) = "cos_sim" |
| 271 cos_sim_ori_rec$sample = row.names(cos_sim_ori_rec) | 333 cos_sim_ori_rec$sample = row.names(cos_sim_ori_rec) |
| 272 # Make barplot | 334 # Make barplot |
| 273 p.trans <- ggplot(cos_sim_ori_rec, aes(y=cos_sim, x=sample)) + | 335 p9 <- ggplot(cos_sim_ori_rec, aes(y=cos_sim, x=sample)) + |
| 274 geom_bar(stat="identity", fill = "skyblue4") + | 336 geom_bar(stat="identity", fill = "skyblue4") + |
| 275 coord_cartesian(ylim=c(0.8, 1)) + | 337 coord_cartesian(ylim=c(0.8, 1)) + |
| 276 # coord_flip(ylim=c(0.8,1)) + | 338 # coord_flip(ylim=c(0.8,1)) + |
| 277 ylab("Cosine similarity\n original VS reconstructed") + | 339 ylab("Cosine similarity\n original VS reconstructed") + |
| 278 xlab("") + | 340 xlab("") + |
| 281 theme_bw() + | 343 theme_bw() + |
| 282 theme(panel.grid.minor.y=element_blank(), | 344 theme(panel.grid.minor.y=element_blank(), |
| 283 panel.grid.major.y=element_blank()) + | 345 panel.grid.major.y=element_blank()) + |
| 284 # Add cut.off line | 346 # Add cut.off line |
| 285 geom_hline(aes(yintercept=.95)) | 347 geom_hline(aes(yintercept=.95)) |
| 286 grid.arrange(p.trans, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)",gp=gpar(fontsize=12,font=3))) | 348 grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)",gp=gpar(fontsize=12,font=3))) |
| 287 | |
| 288 dev.off() | 349 dev.off() |
| 289 } | 350 } |
| 351 | |
| 352 | |
| 353 # Output RData file | |
| 354 if (!is.null(opt$rdata)) { | |
| 355 save.image(file=opt$rdata) | |
| 356 } |
