Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 14:56c8869a231e draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 518fb067e8206ecafbf673a5e4cf375ccead11e3"
author | artbio |
---|---|
date | Fri, 04 Jun 2021 22:35:48 +0000 |
parents | 6741b819cc15 |
children | 8182d1625433 |
comparison
equal
deleted
inserted
replaced
13:6741b819cc15 | 14:56c8869a231e |
---|---|
1 # load packages that are provided in the conda env | 1 # load packages that are provided in the conda env |
2 options( show.error.messages=F, | 2 options(show.error.messages = F, |
3 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 3 error = function() { |
4 cat(geterrmessage(), file = stderr()); q("no", 1, F) | |
5 } | |
6 ) | |
4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
5 warnings() | 8 warnings() |
6 library(optparse) | 9 library(optparse) |
7 library(rjson) | 10 library(rjson) |
8 library(grid) | 11 library(grid) |
9 library(gridExtra) | 12 library(gridExtra) |
10 library(scales) | 13 library(scales) |
11 library(RColorBrewer) | 14 library(RColorBrewer) |
12 | 15 |
13 # Arguments | 16 # Arguments |
14 option_list = list( | 17 option_list <- list( |
15 make_option( | 18 make_option( |
16 "--inputs", | 19 "--inputs", |
17 default = NA, | 20 default = NA, |
18 type = 'character', | 21 type = "character", |
19 help = "json formatted dictionary of datasets and their paths" | 22 help = "json formatted dictionary of datasets and their paths" |
20 ), | 23 ), |
21 make_option( | 24 make_option( |
22 "--genome", | 25 "--genome", |
23 default = NA, | 26 default = NA, |
24 type = 'character', | 27 type = "character", |
25 help = "genome name in the BSgenome bioconductor package" | 28 help = "genome name in the BSgenome bioconductor package" |
26 ), | 29 ), |
27 make_option( | 30 make_option( |
28 "--levels", | 31 "--levels", |
29 default = NA, | 32 default = NA, |
30 type = 'character', | 33 type = "character", |
31 help = "path to the tab separated file describing the levels in function of datasets" | 34 help = "path to the tab separated file describing the levels in function of datasets" |
32 ), | 35 ), |
33 make_option( | 36 make_option( |
34 "--cosmic_version", | 37 "--cosmic_version", |
35 default = "v2", | 38 default = "v2", |
36 type = 'character', | 39 type = "character", |
37 help = "Version of the Cosmic Signature set to be used to express mutational patterns" | 40 help = "Version of the Cosmic Signature set to be used to express mutational patterns" |
38 ), | 41 ), |
39 make_option( | 42 make_option( |
40 "--signum", | 43 "--signum", |
41 default = 2, | 44 default = 2, |
42 type = 'integer', | 45 type = "integer", |
43 help = "selects the N most significant signatures in samples to express mutational patterns" | 46 help = "selects the N most significant signatures in samples to express mutational patterns" |
44 ), | 47 ), |
45 make_option( | 48 make_option( |
46 "--nrun", | 49 "--nrun", |
47 default = 2, | 50 default = 2, |
48 type = 'integer', | 51 type = "integer", |
49 help = "Number of runs to fit signatures" | 52 help = "Number of runs to fit signatures" |
50 ), | 53 ), |
51 make_option( | 54 make_option( |
52 "--rank", | 55 "--rank", |
53 default = 2, | 56 default = 2, |
54 type = 'integer', | 57 type = "integer", |
55 help = "number of ranks to display for parameter optimization" | 58 help = "number of ranks to display for parameter optimization" |
56 ), | 59 ), |
57 make_option( | 60 make_option( |
58 "--newsignum", | 61 "--newsignum", |
59 default = 2, | 62 default = 2, |
60 type = 'integer', | 63 type = "integer", |
61 help = "Number of new signatures to be captured" | 64 help = "Number of new signatures to be captured" |
62 ), | 65 ), |
63 make_option( | 66 make_option( |
64 "--output_spectrum", | 67 "--output_spectrum", |
65 default = NA, | 68 default = NA, |
66 type = 'character', | 69 type = "character", |
67 help = "path to output dataset" | 70 help = "path to output dataset" |
68 ), | 71 ), |
69 make_option( | 72 make_option( |
70 "--output_denovo", | 73 "--output_denovo", |
71 default = NA, | 74 default = NA, |
72 type = 'character', | 75 type = "character", |
73 help = "path to output dataset" | 76 help = "path to output dataset" |
74 ), | 77 ), |
75 make_option( | 78 make_option( |
76 "--sigmatrix", | 79 "--sigmatrix", |
77 default = NA, | 80 default = NA, |
78 type = 'character', | 81 type = "character", |
79 help = "path to signature matrix" | 82 help = "path to signature matrix" |
80 ), | 83 ), |
81 make_option( | 84 make_option( |
82 "--output_cosmic", | 85 "--output_cosmic", |
83 default = NA, | 86 default = NA, |
84 type = 'character', | 87 type = "character", |
85 help = "path to output dataset" | 88 help = "path to output dataset" |
86 ), | 89 ), |
87 make_option( | 90 make_option( |
88 "--sig_contrib_matrix", | 91 "--sig_contrib_matrix", |
89 default = NA, | 92 default = NA, |
90 type = 'character', | 93 type = "character", |
91 help = "path to signature contribution matrix" | 94 help = "path to signature contribution matrix" |
92 ), | 95 ), |
93 make_option( | 96 make_option( |
94 c("-r", "--rdata"), | 97 c("-r", "--rdata"), |
95 type="character", | 98 type = "character", |
96 default=NULL, | 99 default = NULL, |
97 help="Path to RData output file") | 100 help = "Path to RData output file") |
98 ) | 101 ) |
99 | 102 |
100 opt = parse_args(OptionParser(option_list = option_list), | 103 opt <- parse_args(OptionParser(option_list = option_list), |
101 args = commandArgs(trailingOnly = TRUE)) | 104 args = commandArgs(trailingOnly = TRUE)) |
102 | 105 |
103 ################ Manage input data #################### | 106 ################ Manage input data #################### |
104 json_dict <- opt$inputs | 107 json_dict <- opt$inputs |
105 parser <- newJSONParser() | 108 parser <- newJSONParser() |
106 parser$addData(json_dict) | 109 parser$addData(json_dict) |
107 fileslist <- parser$getObject() | 110 fileslist <- parser$getObject() |
108 vcf_paths <- attr(fileslist, "names") | 111 vcf_paths <- attr(fileslist, "names") |
109 element_identifiers <- unname(unlist(fileslist)) | 112 element_identifiers <- unname(unlist(fileslist)) |
110 ref_genome <- opt$genome | 113 ref_genome <- opt$genome |
111 vcf_table <- data.frame(element_identifier=as.character(element_identifiers), path=vcf_paths) | 114 vcf_table <- data.frame(element_identifier = as.character(element_identifiers), path = vcf_paths) |
112 | 115 |
113 library(MutationalPatterns) | 116 library(MutationalPatterns) |
114 library(ref_genome, character.only = TRUE) | 117 library(ref_genome, character.only = TRUE) |
115 library(ggplot2) | 118 library(ggplot2) |
116 | 119 |
117 # Load the VCF files into a GRangesList: | 120 # Load the VCF files into a GRangesList: |
118 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) | 121 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) |
119 library(plyr) | 122 library(plyr) |
120 if (!is.na(opt$levels)[1]) { # manage levels if there are | 123 if (!is.na(opt$levels)[1]) { # manage levels if there are |
121 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level")) | 124 levels_table <- read.delim(opt$levels, header = FALSE, |
125 col.names = c("element_identifier", "level")) | |
122 } else { | 126 } else { |
123 levels_table <- data.frame(element_identifier=vcf_table$element_identifier, | 127 levels_table <- data.frame(element_identifier = vcf_table$element_identifier, |
124 level=rep("nolabels", length(vcf_table$element_identifier))) | 128 level = rep("nolabels", length(vcf_table$element_identifier))) |
125 } | 129 } |
126 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") | 130 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") |
127 tissue <- as.vector(metadata_table$level) | 131 tissue <- as.vector(metadata_table$level) |
128 detach(package:plyr) | 132 detach(package:plyr) |
129 | 133 |
130 ##### This is done for any section ###### | 134 ##### This is done for any section ###### |
131 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) | 135 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) |
132 qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] | 136 qual_col_pals <- brewer.pal.info[brewer.pal.info$category == "qual", ] |
133 col_vector = unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) | 137 col_vector <- unique(unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))) |
134 col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette | 138 col_vector <- col_vector[c(-32, -34, -39)] # 67-color palette |
135 | 139 |
136 ###### Section 1 Mutation characteristics and spectrums ############# | 140 ###### Section 1 Mutation characteristics and spectrums ############# |
137 if (!is.na(opt$output_spectrum)[1]) { | 141 if (!is.na(opt$output_spectrum)[1]) { |
138 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) | 142 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) |
139 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) | 143 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) |
140 | 144 |
141 # mutation spectrum, total or by sample | 145 # mutation spectrum, total or by sample |
142 | 146 |
143 if (length(levels(factor(levels_table$level))) == 1) { # (is.na(opt$levels)[1]) | 147 if (length(levels(factor(levels_table$level))) == 1) { |
144 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) | 148 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) |
145 plot(p1) | 149 plot(p1) |
146 } else { | 150 } else { |
147 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels | 151 p2 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) # by levels |
148 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total | 152 p3 <- plot_spectrum(type_occurrences, CT = TRUE, legend = TRUE) # total |
149 grid.arrange(p2, p3, ncol=2, widths=c(4,2.3), heights=c(4,1)) | 153 grid.arrange(p2, p3, ncol = 2, widths = c(4, 2.3), heights = c(4, 1)) |
150 } | 154 } |
151 plot_96_profile(mut_mat, condensed = TRUE) | 155 plot_96_profile(mut_mat, condensed = TRUE) |
152 dev.off() | 156 dev.off() |
153 } | 157 } |
154 | 158 |
155 ###### Section 2: De novo mutational signature extraction using NMF ####### | 159 ###### Section 2: De novo mutational signature extraction using NMF ####### |
160 # opt$rank cannot be higher than the number of samples and | |
161 # likewise, opt$signum cannot be higher thant the number of samples | |
156 if (!is.na(opt$output_denovo)[1]) { | 162 if (!is.na(opt$output_denovo)[1]) { |
157 # opt$rank cannot be higher than the number of samples | 163 |
158 if (opt$rank > length(element_identifiers)) {opt$rank <-length(element_identifiers)} | 164 if (opt$rank > length(element_identifiers)) { |
159 # likewise, opt$signum cannot be higher thant the number of samples | 165 opt$rank <- length(element_identifiers) |
160 if (opt$signum > length(element_identifiers)) {opt$signum <-length(element_identifiers)} | 166 } |
167 if (opt$signum > length(element_identifiers)) { | |
168 opt$signum <- length(element_identifiers) | |
169 } | |
161 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix | 170 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix |
162 # Use the NMF package to generate an estimate rank plot | 171 # Use the NMF package to generate an estimate rank plot |
163 library("NMF") | 172 library("NMF") |
164 estimate <- nmf(pseudo_mut_mat, rank=1:opt$rank, method="brunet", nrun=opt$nrun, seed=123456) | 173 estimate <- nmf(pseudo_mut_mat, rank = 1:opt$rank, method = "brunet", nrun = opt$nrun, seed = 123456) |
165 # And plot it | 174 # And plot it |
166 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69) | 175 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69) |
167 p4 <- plot(estimate) | 176 p4 <- plot(estimate) |
168 grid.arrange(p4) | 177 grid.arrange(p4) |
169 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures | 178 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures |
170 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter | 179 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter |
171 # to achieve stability and avoid local minima) | 180 # to achieve stability and avoid local minima) |
172 nmf_res <- extract_signatures(pseudo_mut_mat, rank=opt$newsignum, nrun=opt$nrun) | 181 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) |
173 # Assign signature names | 182 # Assign signature names |
174 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$newsignum) | 183 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$newsignum) |
175 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$newsignum) | 184 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$newsignum) |
176 # Plot the 96-profile of the signatures: | 185 # Plot the 96-profile of the signatures: |
177 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) | 186 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) |
178 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ variable, value.var = "value") | 187 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ variable, value.var = "value") |
179 new_sig_matrix = format(new_sig_matrix, scientific=TRUE) | 188 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) |
180 write.table(new_sig_matrix, file=opt$sigmatrix, quote = FALSE, row.names = FALSE, sep="\t") | 189 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") |
181 grid.arrange(p5) | 190 grid.arrange(p5) |
182 # Visualize the contribution of the signatures in a barplot | 191 # Visualize the contribution of the signatures in a barplot |
183 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="relative", coord_flip = TRUE) | 192 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) |
184 # Visualize the contribution of the signatures in absolute number of mutations | 193 # Visualize the contribution of the signatures in absolute number of mutations |
185 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="absolute", coord_flip = TRUE) | 194 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "absolute", coord_flip = TRUE) |
186 # Combine the two plots: | 195 # Combine the two plots: |
187 grid.arrange(pc1, pc2) | 196 grid.arrange(pc1, pc2) |
188 | 197 |
189 # The relative contribution of each signature for each sample can also be plotted as a heatmap with | 198 # The relative contribution of each signature for each sample can also be plotted as a heatmap with |
190 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. | 199 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. |
191 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures | 200 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures |
192 # can be plotted in a user-specified order. | 201 # can be plotted in a user-specified order. |
193 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: | 202 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: |
194 pch1 <- plot_contribution_heatmap(nmf_res$contribution, | 203 pch1 <- plot_contribution_heatmap(nmf_res$contribution, |
195 sig_order = paste0("NewSig_", 1:opt$newsignum)) | 204 sig_order = paste0("NewSig_", 1:opt$newsignum)) |
196 # Plot signature contribution as a heatmap without sample clustering: | 205 # Plot signature contribution as a heatmap without sample clustering: |
197 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples=FALSE) | 206 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) |
198 #Combine the plots into one figure: | 207 #Combine the plots into one figure: |
199 grid.arrange(pch1, pch2, ncol = 2, widths = c(2,1.6)) | 208 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) |
200 | 209 |
201 # Compare the reconstructed mutational profile with the original mutational profile: | 210 # Compare the reconstructed mutational profile with the original mutational profile: |
202 plot_compare_profiles(pseudo_mut_mat[,1], | 211 plot_compare_profiles(pseudo_mut_mat[, 1], |
203 nmf_res$reconstructed[,1], | 212 nmf_res$reconstructed[, 1], |
204 profile_names = c("Original", "Reconstructed"), | 213 profile_names = c("Original", "Reconstructed"), |
205 condensed = TRUE) | 214 condensed = TRUE) |
206 dev.off() | 215 dev.off() |
207 } | 216 } |
208 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### | 217 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### |
209 | 218 |
210 if (!is.na(opt$output_cosmic)[1]) { | 219 if (!is.na(opt$output_cosmic)[1]) { |
211 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) | 220 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) |
212 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix | 221 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix |
213 if (opt$cosmic_version == "v2") { | 222 if (opt$cosmic_version == "v2") { |
214 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") | 223 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") |
215 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | 224 cancer_signatures <- read.table(sp_url, sep = "\t", header = TRUE) |
216 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) | 225 new_order <- match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) |
217 cancer_signatures = cancer_signatures[as.vector(new_order),] | 226 cancer_signatures <- cancer_signatures[as.vector(new_order), ] |
218 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | 227 row.names(cancer_signatures) <- cancer_signatures$Somatic.Mutation.Type |
219 cancer_signatures = as.matrix(cancer_signatures[,4:33]) | 228 cancer_signatures <- as.matrix(cancer_signatures[, 4:33]) |
220 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels | 229 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels |
221 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" | 230 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" |
222 cosmic_colors <- col_vector[1:30] | 231 cosmic_colors <- col_vector[1:30] |
223 } else { | 232 } else { |
224 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" | 233 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" |
225 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | 234 cancer_signatures <- read.table(sp_url, sep = "\t", header = TRUE) |
226 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) | 235 new_order <- match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) |
227 cancer_signatures = cancer_signatures[as.vector(new_order),] | 236 cancer_signatures <- cancer_signatures[as.vector(new_order), ] |
228 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | 237 row.names(cancer_signatures) <- cancer_signatures$Somatic.Mutation.Type |
229 cancer_signatures = as.matrix(cancer_signatures[,4:70]) | 238 cancer_signatures <- as.matrix(cancer_signatures[, 4:70]) |
230 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels | 239 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels |
231 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" | 240 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" |
232 cosmic_colors <- col_vector[1:67] | 241 cosmic_colors <- col_vector[1:67] |
233 } | 242 } |
234 | 243 |
235 # Plot mutational profiles of the COSMIC signatures | 244 # Plot mutational profiles of the COSMIC signatures |
236 if (opt$cosmic_version == "v2") { | 245 if (opt$cosmic_version == "v2") { |
237 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) | 246 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) |
238 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) | 247 grid.arrange(p6, top = textGrob("COSMIC signature profiles", gp = gpar(fontsize = 12, font = 3))) |
239 } else { | 248 } else { |
240 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3) | 249 p6 <- plot_96_profile(cancer_signatures[, 1:33], condensed = TRUE, ymax = 0.3) |
241 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3) | 250 p6bis <- plot_96_profile(cancer_signatures[, 34:67], condensed = TRUE, ymax = 0.3) |
242 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3))) | 251 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)", |
243 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3))) | 252 gp = gpar(fontsize = 12, font = 3))) |
244 } | 253 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)", |
245 | 254 gp = gpar(fontsize = 12, font = 3))) |
246 | 255 } |
256 | |
257 | |
247 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 258 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
248 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) | 259 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) |
249 | 260 |
250 # Plot contribution barplots | 261 # Plot contribution barplots |
251 pc3 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "absolute") | 262 pc3 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "absolute") |
252 pc4 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "relative") | 263 pc4 <- plot_contribution(fit_res$contribution, cancer_signatures, coord_flip = T, mode = "relative") |
253 pc3_data <- pc3$data | 264 pc3_data <- pc3$data |
254 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | 265 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
255 geom_bar(stat="identity", position='stack') + | 266 geom_bar(stat = "identity", position = "stack") + |
256 coord_flip() + | 267 coord_flip() + |
257 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 268 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + |
258 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | 269 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + |
259 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right", | 270 theme(panel.grid.minor.x = element_blank(), |
260 text = element_text(size=8), | 271 panel.grid.major.x = element_blank(), |
261 axis.text.x = element_text(angle=90, hjust=1)) | 272 legend.position = "right", |
273 text = element_text(size = 8), | |
274 axis.text.x = element_text(angle = 90, hjust = 1)) | |
262 pc4_data <- pc4$data | 275 pc4_data <- pc4$data |
263 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | 276 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
264 geom_bar(stat="identity", position='fill') + | 277 geom_bar(stat = "identity", position = "fill") + |
265 coord_flip() + | 278 coord_flip() + |
266 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 279 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + |
267 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | 280 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + |
268 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | 281 labs(x = "Samples", y = "Relative contribution") + theme_bw() + |
269 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right", | 282 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position = "right", |
270 text = element_text(size=8), | 283 text = element_text(size = 8), |
271 axis.text.x = element_text(angle=90, hjust=1)) | 284 axis.text.x = element_text(angle = 90, hjust = 1)) |
272 ##### | 285 ##### |
273 # ggplot2 alternative | 286 # ggplot2 alternative |
274 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs | 287 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs |
275 pc3_data <- pc3$data | 288 pc3_data <- pc3$data |
276 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | 289 pc3_data <- merge(pc3_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") |
277 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | 290 pc3 <- ggplot(pc3_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
278 geom_bar(stat="identity", position='stack') + | 291 geom_bar(stat = "identity", position = "stack") + |
279 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 292 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + |
280 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | 293 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + |
281 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right", | 294 theme(panel.grid.minor.x = element_blank(), |
282 text = element_text(size=8), | 295 panel.grid.major.x = element_blank(), |
283 axis.text.x = element_text(angle=90, hjust=1)) + | 296 legend.position = "right", |
284 facet_grid(~level, scales = "free_x", space="free") | 297 text = element_text(size = 8), |
298 axis.text.x = element_text(angle = 90, hjust = 1)) + | |
299 facet_grid(~level, scales = "free_x", space = "free") | |
285 pc4_data <- pc4$data | 300 pc4_data <- pc4$data |
286 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | 301 pc4_data <- merge(pc4_data, metadata_table[, c(1, 3)], by.x = "Sample", by.y = "element_identifier") |
287 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | 302 pc4 <- ggplot(pc4_data, aes(x = Sample, y = Contribution, fill = as.factor(Signature))) + |
288 geom_bar(stat="identity", position='fill') + | 303 geom_bar(stat = "identity", position = "fill") + |
289 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + | 304 scale_fill_manual(name = "Cosmic\nSignatures", values = cosmic_colors) + |
290 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | 305 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + |
291 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | 306 labs(x = "Samples", y = "Relative contribution") + theme_bw() + |
292 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), legend.position="right", | 307 theme(panel.grid.minor.x = element_blank(), |
293 text = element_text(size=8), | 308 panel.grid.major.x = element_blank(), |
294 axis.text.x = element_text(angle=90, hjust=1)) + | 309 legend.position = "right", |
295 facet_grid(~level, scales = "free_x", space="free") | 310 text = element_text(size = 8), |
311 axis.text.x = element_text(angle = 90, hjust = 1)) + | |
312 facet_grid(~level, scales = "free_x", space = "free") | |
296 } | 313 } |
297 # Combine the two plots: | 314 # Combine the two plots: |
298 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) | 315 grid.arrange(pc3, pc4, |
299 | 316 top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns", |
317 gp = gpar(fontsize = 12, font = 3))) | |
318 | |
300 #### pie charts of comic signatures contributions in samples ### | 319 #### pie charts of comic signatures contributions in samples ### |
301 library(reshape2) | 320 library(reshape2) |
302 library(dplyr) | 321 library(dplyr) |
303 if (length(levels(factor(levels_table$level))) < 2) { | 322 if (length(levels(factor(levels_table$level))) < 2) { |
304 fit_res_contrib <- as.data.frame(fit_res$contribution) | 323 fit_res_contrib <- as.data.frame(fit_res$contribution) |
305 worklist <- cbind(signature=rownames(fit_res$contribution), | 324 worklist <- cbind(signature = rownames(fit_res$contribution), |
306 level=rep("nolabels", length(fit_res_contrib[,1])), | 325 level = rep("nolabels", length(fit_res_contrib[, 1])), |
307 fit_res_contrib, | 326 fit_res_contrib, |
308 sum=rowSums(fit_res_contrib)) | 327 sum = rowSums(fit_res_contrib)) |
309 worklist <- worklist[order(worklist[ ,"sum"], decreasing = T), ] | 328 worklist <- worklist[order(worklist[, "sum"], decreasing = T), ] |
310 worklist <- worklist[1:opt$signum,] | 329 worklist <- worklist[1:opt$signum, ] |
311 worklist <- worklist[ , -length(worklist[1,])] | 330 worklist <- worklist[, -length(worklist[1, ])] |
312 worklist <- melt(worklist) | 331 worklist <- melt(worklist) |
313 worklist <- worklist[,c(1,3,4,2)] | 332 worklist <- worklist[, c(1, 3, 4, 2)] |
314 } else { | 333 } else { |
315 worklist <- list() | 334 worklist <- list() |
316 for (i in levels(factor(levels_table$level))) { | 335 for (i in levels(factor(levels_table$level))) { |
317 fit_res$contribution[,levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] | 336 fit_res$contribution[, levels_table$element_identifier[levels_table$level == i]] -> worklist[[i]] |
318 sum <- rowSums(as.data.frame(worklist[[i]])) | 337 sum <- rowSums(as.data.frame(worklist[[i]])) |
319 worklist[[i]] <- cbind(worklist[[i]], sum) | 338 worklist[[i]] <- cbind(worklist[[i]], sum) |
320 worklist[[i]] <- worklist[[i]][order(worklist[[i]][ ,"sum"], decreasing = T), ] | 339 worklist[[i]] <- worklist[[i]][order(worklist[[i]][, "sum"], decreasing = T), ] |
321 worklist[[i]] <- worklist[[i]][1:opt$signum,] | 340 worklist[[i]] <- worklist[[i]][1:opt$signum, ] |
322 worklist[[i]] <- worklist[[i]][ , -length(as.data.frame(worklist[[i]]))] | 341 worklist[[i]] <- worklist[[i]][, -length(as.data.frame(worklist[[i]]))] |
323 } | 342 } |
324 worklist <- as.data.frame(melt(worklist)) | 343 worklist <- as.data.frame(melt(worklist)) |
325 worklist[,2] <- paste0(worklist[,4], " - ", worklist[,2]) | 344 worklist[, 2] <- paste0(worklist[, 4], " - ", worklist[, 2]) |
326 head(worklist) | 345 head(worklist) |
327 } | 346 } |
328 | 347 |
329 colnames(worklist) <- c("signature", "sample", "value", "level") | 348 colnames(worklist) <- c("signature", "sample", "value", "level") |
330 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value=value/sum(value)*100)) | 349 worklist <- as.data.frame(worklist %>% group_by(sample) %>% mutate(value = value / sum(value) * 100)) |
331 worklist$pos <- cumsum(worklist$value) - worklist$value/2 | 350 worklist$pos <- cumsum(worklist$value) - worklist$value / 2 |
332 worklist$label <- factor(worklist$signature) | 351 worklist$label <- factor(worklist$signature) |
333 worklist$signature <- factor(worklist$signature) | 352 worklist$signature <- factor(worklist$signature) |
334 p7 <- ggplot(worklist, aes(x="", y=value, group=signature, fill=signature)) + | 353 p7 <- ggplot(worklist, aes(x = "", y = value, group = signature, fill = signature)) + |
335 geom_bar(width = 1, stat = "identity") + | 354 geom_bar(width = 1, stat = "identity") + |
336 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + | 355 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color = "black", size = 3) + |
337 coord_polar("y", start=0) + facet_wrap(.~sample) + | 356 coord_polar("y", start = 0) + facet_wrap(.~sample) + |
338 labs(x="", y="Samples", fill = cosmic_tag) + | 357 labs(x = "", y = "Samples", fill = cosmic_tag) + |
339 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), | 358 scale_fill_manual(name = paste0(opt$signum, " most contributing\nsignatures\n(in each label/tissue)"), |
340 values = cosmic_colors[as.numeric(levels(factor(worklist$label)))]) + | 359 values = cosmic_colors[as.numeric(levels(factor(worklist$label)))]) + |
341 theme(axis.text = element_blank(), | 360 theme(axis.text = element_blank(), |
342 axis.ticks = element_blank(), | 361 axis.ticks = element_blank(), |
343 panel.grid = element_blank()) | 362 panel.grid = element_blank()) |
346 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering | 365 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering |
347 if (length(vcf_paths) > 1) { | 366 if (length(vcf_paths) > 1) { |
348 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") | 367 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") |
349 grid.arrange(p8) | 368 grid.arrange(p8) |
350 } | 369 } |
351 | 370 |
352 # export relative contribution matrix | 371 # export relative contribution matrix |
353 if (!is.na(opt$sig_contrib_matrix)) { | 372 if (!is.na(opt$sig_contrib_matrix)) { |
354 output_table <- t(fit_res$contribution)/rowSums(t(fit_res$contribution)) | 373 output_table <- t(fit_res$contribution) / rowSums(t(fit_res$contribution)) |
355 colnames(output_table) <- paste0("s", colnames(output_table)) | 374 colnames(output_table) <- paste0("s", colnames(output_table)) |
356 if (length(levels(factor(levels_table$level))) > 1) { | 375 if (length(levels(factor(levels_table$level))) > 1) { |
357 output_table <- data.frame(sample=paste0(metadata_table[metadata_table$element_identifier==colnames(fit_res$contribution), | 376 output_table <- data.frame(sample = paste0(metadata_table[metadata_table$element_identifier == colnames(fit_res$contribution), |
358 3], "-", colnames(fit_res$contribution) ), | 377 3], "-", colnames(fit_res$contribution)), |
359 output_table) | 378 output_table) |
360 } else { | 379 } else { |
361 output_table <- data.frame(sample=rownames(output_table), output_table) | 380 output_table <- data.frame(sample = rownames(output_table), output_table) |
362 } | 381 } |
363 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) | 382 write.table(output_table, file = opt$sig_contrib_matrix, sep = "\t", quote = F, row.names = F) |
364 } | 383 } |
365 | 384 |
366 # calculate all pairwise cosine similarities | 385 # calculate all pairwise cosine similarities |
367 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) | 386 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) |
368 # extract cosine similarities per sample between original and reconstructed | 387 # extract cosine similarities per sample between original and reconstructed |
369 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) | 388 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) |
370 | 389 |
371 # We can use ggplot to make a barplot of the cosine similarities between the original and | 390 # We can use ggplot to make a barplot of the cosine similarities between the original and |
372 # reconstructed mutational profile of each sample. This clearly shows how well each mutational | 391 # reconstructed mutational profile of each sample. This clearly shows how well each mutational |
373 # profile can be reconstructed with the COSMIC mutational signatures. Two identical profiles | 392 # profile can be reconstructed with the COSMIC mutational signatures. Two identical profiles |
374 # have a cosine similarity of 1. The lower the cosine similarity between original and | 393 # have a cosine similarity of 1. The lower the cosine similarity between original and |
375 # reconstructed, the less well the original mutational profile can be reconstructed with | 394 # reconstructed, the less well the original mutational profile can be reconstructed with |
376 # the COSMIC signatures. You could use, for example, cosine similarity of 0.95 as a cutoff. | 395 # the COSMIC signatures. You could use, for example, cosine similarity of 0.95 as a cutoff. |
377 | 396 |
378 # Adjust data frame for plotting with gpplot | 397 # Adjust data frame for plotting with gpplot |
379 colnames(cos_sim_ori_rec) = "cos_sim" | 398 colnames(cos_sim_ori_rec) <- "cos_sim" |
380 cos_sim_ori_rec$sample = row.names(cos_sim_ori_rec) | 399 cos_sim_ori_rec$sample <- row.names(cos_sim_ori_rec) |
381 # Make barplot | 400 # Make barplot |
382 p9 <- ggplot(cos_sim_ori_rec, aes(y=cos_sim, x=sample)) + | 401 p9 <- ggplot(cos_sim_ori_rec, aes(y = cos_sim, x = sample)) + |
383 geom_bar(stat="identity", fill = "skyblue4") + | 402 geom_bar(stat = "identity", fill = "skyblue4") + |
384 coord_cartesian(ylim=c(0.8, 1)) + | 403 coord_cartesian(ylim = c(0.8, 1)) + |
385 # coord_flip(ylim=c(0.8,1)) + | 404 # coord_flip(ylim=c(0.8,1)) + |
386 ylab("Cosine similarity\n original VS reconstructed") + | 405 ylab("Cosine similarity\n original VS reconstructed") + |
387 xlab("") + | 406 xlab("") + |
388 # Reverse order of the samples such that first is up | 407 # Reverse order of the samples such that first is up |
389 # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) + | 408 # xlim(rev(levels(factor(cos_sim_ori_rec$sample)))) + |
390 theme_bw() + | 409 theme_bw() + |
391 theme(panel.grid.minor.y=element_blank(), | 410 theme(panel.grid.minor.y = element_blank(), |
392 panel.grid.major.y=element_blank()) + | 411 panel.grid.major.y = element_blank()) + |
393 # Add cut.off line | 412 # Add cut.off line |
394 geom_hline(aes(yintercept=.95)) | 413 geom_hline(aes(yintercept = .95)) |
395 grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)",gp=gpar(fontsize=12,font=3))) | 414 grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)", gp = gpar(fontsize = 12, font = 3))) |
396 dev.off() | 415 dev.off() |
397 } | 416 } |
398 | 417 |
399 | 418 |
400 # Output RData file | 419 # Output RData file |
401 if (!is.null(opt$rdata)) { | 420 if (!is.null(opt$rdata)) { |
402 save.image(file=opt$rdata) | 421 save.image(file = opt$rdata) |
403 } | 422 } |