Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 3:e332cf9dfa06 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit 0f7593f703ba0dfd12aea1c0460e371f08b57d2f"
author | artbio |
---|---|
date | Thu, 24 Sep 2020 01:32:52 +0000 |
parents | aea952be68cb |
children | 7ba08c826888 |
comparison
equal
deleted
inserted
replaced
2:aea952be68cb | 3:e332cf9dfa06 |
---|---|
27 default = NA, | 27 default = NA, |
28 type = 'character', | 28 type = 'character', |
29 help = "path to the tab separated file describing the levels in function of datasets" | 29 help = "path to the tab separated file describing the levels in function of datasets" |
30 ), | 30 ), |
31 make_option( | 31 make_option( |
32 "--cosmic_version", | |
33 default = "v2", | |
34 type = 'character', | |
35 help = "Version of the Cosmic Signature set to be used to express mutational patterns" | |
36 ), | |
37 make_option( | |
32 "--signum", | 38 "--signum", |
33 default = 2, | 39 default = 2, |
34 type = 'integer', | 40 type = 'integer', |
35 help = "selects the N most significant signatures in samples to express mutational patterns" | 41 help = "selects the N most significant signatures in samples to express mutational patterns" |
36 ), | 42 ), |
50 "--newsignum", | 56 "--newsignum", |
51 default = 2, | 57 default = 2, |
52 type = 'integer', | 58 type = 'integer', |
53 help = "Number of new signatures to be captured" | 59 help = "Number of new signatures to be captured" |
54 ), | 60 ), |
55 | |
56 make_option( | 61 make_option( |
57 "--output_spectrum", | 62 "--output_spectrum", |
58 default = NA, | 63 default = NA, |
59 type = 'character', | 64 type = 'character', |
60 help = "path to output dataset" | 65 help = "path to output dataset" |
64 default = NA, | 69 default = NA, |
65 type = 'character', | 70 type = 'character', |
66 help = "path to output dataset" | 71 help = "path to output dataset" |
67 ), | 72 ), |
68 make_option( | 73 make_option( |
74 "--sigmatrix", | |
75 default = NA, | |
76 type = 'character', | |
77 help = "path to signature matrix" | |
78 ), | |
79 make_option( | |
69 "--output_cosmic", | 80 "--output_cosmic", |
70 default = NA, | 81 default = NA, |
71 type = 'character', | 82 type = 'character', |
72 help = "path to output dataset" | 83 help = "path to output dataset" |
73 ) | 84 ), |
85 make_option( | |
86 c("-r", "--rdata"), | |
87 type="character", | |
88 default=NULL, | |
89 help="Path to RData output file") | |
74 ) | 90 ) |
75 | 91 |
76 opt = parse_args(OptionParser(option_list = option_list), | 92 opt = parse_args(OptionParser(option_list = option_list), |
77 args = commandArgs(trailingOnly = TRUE)) | 93 args = commandArgs(trailingOnly = TRUE)) |
78 | 94 |
79 ################ Manage input data #################### | 95 ################ Manage input data #################### |
80 json_dict <- opt$inputs | 96 json_dict <- opt$inputs |
81 parser <- newJSONParser() | 97 parser <- newJSONParser() |
82 parser$addData(json_dict) | 98 parser$addData(json_dict) |
83 fileslist <- parser$getObject() | 99 fileslist <- parser$getObject() |
84 vcf_files <- attr(fileslist, "names") | 100 vcf_paths <- attr(fileslist, "names") |
85 sample_names <- unname(unlist(fileslist)) | 101 element_identifiers <- unname(unlist(fileslist)) |
86 ref_genome <- opt$genome | 102 ref_genome <- opt$genome |
87 vcf_table <- data.frame(sample_name=sample_names, path=vcf_files) | 103 vcf_table <- data.frame(element_identifier=element_identifiers, path=vcf_paths) |
88 | 104 |
89 library(MutationalPatterns) | 105 library(MutationalPatterns) |
90 library(ref_genome, character.only = TRUE) | 106 library(ref_genome, character.only = TRUE) |
107 library(ggplot2) | |
91 | 108 |
92 # Load the VCF files into a GRangesList: | 109 # Load the VCF files into a GRangesList: |
93 vcfs <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome) | 110 vcfs <- read_vcfs_as_granges(vcf_paths, element_identifiers, ref_genome) |
94 if (!is.na(opt$levels)[1]) { # manage levels if there are | 111 if (!is.na(opt$levels)[1]) { # manage levels if there are |
95 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("sample_name","level")) | 112 levels_table <- read.delim(opt$levels, header=FALSE, col.names=c("element_identifier","level")) |
96 library(dplyr) | 113 library(plyr) |
97 metadata_table <- left_join(vcf_table, levels_table, by = "sample_name") | 114 metadata_table <- join(vcf_table, levels_table, by = "element_identifier") |
98 # detach("package:dplyr", unload=TRUE) | 115 tissue <- as.vector(metadata_table$level) |
99 tissue <- metadata_table$level | |
100 } | 116 } |
101 | 117 |
102 ##### This is done for any section ###### | 118 ##### This is done for any section ###### |
103 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) | 119 mut_mat <- mut_matrix(vcf_list = vcfs, ref_genome = ref_genome) |
104 | |
105 | 120 |
106 ###### Section 1 Mutation characteristics and spectrums ############# | 121 ###### Section 1 Mutation characteristics and spectrums ############# |
107 if (!is.na(opt$output_spectrum)[1]) { | 122 if (!is.na(opt$output_spectrum)[1]) { |
108 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) | 123 pdf(opt$output_spectrum, paper = "special", width = 11.69, height = 11.69) |
109 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) | 124 type_occurrences <- mut_type_occurrences(vcfs, ref_genome) |
112 | 127 |
113 if (is.na(opt$levels)[1]) { | 128 if (is.na(opt$levels)[1]) { |
114 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) | 129 p1 <- plot_spectrum(type_occurrences, CT = TRUE, legend=TRUE) |
115 plot(p1) | 130 plot(p1) |
116 } else { | 131 } else { |
117 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE, legend=TRUE) # by sample | 132 p2 <- plot_spectrum(type_occurrences, by = tissue, CT=TRUE) # by levels |
118 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total | 133 p3 <- plot_spectrum(type_occurrences, CT=TRUE, legend=TRUE) # total |
119 grid.arrange(p2, p3, ncol=2, widths=c(4,2.3), heights=c(4,1)) | 134 grid.arrange(p2, p3, ncol=2, widths=c(4,2.3), heights=c(4,1)) |
120 } | 135 } |
121 plot_96_profile(mut_mat, condensed = TRUE) | 136 plot_96_profile(mut_mat, condensed = TRUE) |
122 dev.off() | 137 dev.off() |
123 } | 138 } |
124 | 139 |
125 ###### Section 2: De novo mutational signature extraction using NMF ####### | 140 ###### Section 2: De novo mutational signature extraction using NMF ####### |
126 if (!is.na(opt$output_denovo)[1]) { | 141 if (!is.na(opt$output_denovo)[1]) { |
127 mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix | 142 # opt$rank cannot be higher than the number of samples |
143 if (opt$rank > length(element_identifiers)) {opt$rank <-length(element_identifiers)} | |
144 # likewise, opt$signum cannot be higher thant the number of samples | |
145 if (opt$signum > length(element_identifiers)) {opt$signum <-length(element_identifiers)} | |
146 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small pseudocount to the mutation count matrix | |
128 # Use the NMF package to generate an estimate rank plot | 147 # Use the NMF package to generate an estimate rank plot |
129 library("NMF") | 148 library("NMF") |
130 estimate <- nmf(mut_mat, rank=1:opt$rank+1, method="brunet", nrun=opt$nrun, seed=123456) | 149 estimate <- nmf(pseudo_mut_mat, rank=1:opt$rank, method="brunet", nrun=opt$nrun, seed=123456) |
131 # And plot it | 150 # And plot it |
132 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69) | 151 pdf(opt$output_denovo, paper = "special", width = 11.69, height = 11.69) |
133 p.trans <- plot(estimate) | 152 p4 <- plot(estimate) |
134 grid.arrange(p.trans) | 153 grid.arrange(p4) |
135 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures | 154 # Extract 4 (PARAMETIZE) mutational signatures from the mutation count matrix with extract_signatures |
136 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter | 155 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter |
137 # to achieve stability and avoid local minima) | 156 # to achieve stability and avoid local minima) |
138 nmf_res <- extract_signatures(mut_mat, rank=opt$rank, nrun=opt$nrun) | 157 nmf_res <- extract_signatures(pseudo_mut_mat, rank=opt$newsignum, nrun=opt$nrun) |
139 # Assign signature names | 158 # Assign signature names |
140 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$rank) | 159 colnames(nmf_res$signatures) <- paste0("NewSig_", 1:opt$newsignum) |
141 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$rank) | 160 rownames(nmf_res$contribution) <- paste0("NewSig_", 1:opt$newsignum) |
142 # Plot the 96-profile of the signatures: | 161 # Plot the 96-profile of the signatures: |
143 p.trans <- plot_96_profile(nmf_res$signatures, condensed = TRUE) | 162 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) |
144 grid.arrange(p.trans) | 163 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ variable, value.var = "value") |
164 new_sig_matrix = format(new_sig_matrix, scientific=TRUE) | |
165 write.table(new_sig_matrix, file=opt$sigmatrix, quote = FALSE, row.names = FALSE, sep="\t") | |
166 grid.arrange(p5) | |
145 # Visualize the contribution of the signatures in a barplot | 167 # Visualize the contribution of the signatures in a barplot |
146 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="relative", coord_flip = TRUE) | 168 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="relative", coord_flip = TRUE) |
147 # Visualize the contribution of the signatures in absolute number of mutations | 169 # Visualize the contribution of the signatures in absolute number of mutations |
148 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="absolute", coord_flip = TRUE) | 170 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode="absolute", coord_flip = TRUE) |
149 # Combine the two plots: | 171 # Combine the two plots: |
153 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. | 175 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. |
154 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures | 176 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures |
155 # can be plotted in a user-specified order. | 177 # can be plotted in a user-specified order. |
156 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: | 178 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: |
157 pch1 <- plot_contribution_heatmap(nmf_res$contribution, | 179 pch1 <- plot_contribution_heatmap(nmf_res$contribution, |
158 sig_order = paste0("NewSig_", 1:opt$rank)) | 180 sig_order = paste0("NewSig_", 1:opt$newsignum)) |
159 # Plot signature contribution as a heatmap without sample clustering: | 181 # Plot signature contribution as a heatmap without sample clustering: |
160 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples=FALSE) | 182 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples=FALSE) |
161 #Combine the plots into one figure: | 183 #Combine the plots into one figure: |
162 grid.arrange(pch1, pch2, ncol = 2, widths = c(2,1.6)) | 184 grid.arrange(pch1, pch2, ncol = 2, widths = c(2,1.6)) |
163 | 185 |
164 # Compare the reconstructed mutational profile with the original mutational profile: | 186 # Compare the reconstructed mutational profile with the original mutational profile: |
165 plot_compare_profiles(mut_mat[,1], | 187 plot_compare_profiles(pseudo_mut_mat[,1], |
166 nmf_res$reconstructed[,1], | 188 nmf_res$reconstructed[,1], |
167 profile_names = c("Original", "Reconstructed"), | 189 profile_names = c("Original", "Reconstructed"), |
168 condensed = TRUE) | 190 condensed = TRUE) |
169 dev.off() | 191 dev.off() |
170 } | 192 } |
171 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### | 193 ##### Section 3: Find optimal contribution of known signatures: COSMIC mutational signatures #### |
172 | 194 |
173 if (!is.na(opt$output_cosmic)[1]) { | 195 if (!is.na(opt$output_cosmic)[1]) { |
174 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) | 196 pdf(opt$output_cosmic, paper = "special", width = 11.69, height = 11.69) |
175 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") | 197 pseudo_mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix |
176 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | 198 if (opt$cosmic_version == "v2") { |
177 mut_mat <- mut_mat + 0.0001 # First add a small psuedocount to the mutation count matrix | 199 sp_url <- paste("https://cancer.sanger.ac.uk/cancergenome/assets/", "signatures_probabilities.txt", sep = "") |
178 new_order = match(row.names(mut_mat), cancer_signatures$Somatic.Mutation.Type) | 200 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) |
179 cancer_signatures = cancer_signatures[as.vector(new_order),] | 201 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) |
180 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | 202 cancer_signatures = cancer_signatures[as.vector(new_order),] |
181 cancer_signatures = as.matrix(cancer_signatures[,4:33]) | 203 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type |
204 cancer_signatures = as.matrix(cancer_signatures[,4:33]) | |
205 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels | |
206 cosmic_tag <- "Signatures (Cosmic v2, March 2015)" | |
207 } else { | |
208 sp_url <- "https://raw.githubusercontent.com/ARTbio/startbio/master/sigProfiler_SBS_signatures_2019_05_22.tsv" | |
209 cancer_signatures = read.table(sp_url, sep = "\t", header = TRUE) | |
210 new_order = match(row.names(pseudo_mut_mat), cancer_signatures$Somatic.Mutation.Type) | |
211 cancer_signatures = cancer_signatures[as.vector(new_order),] | |
212 row.names(cancer_signatures) = cancer_signatures$Somatic.Mutation.Type | |
213 cancer_signatures = as.matrix(cancer_signatures[,4:70]) | |
214 colnames(cancer_signatures) <- gsub("SBS", "", colnames(cancer_signatures)) # shorten signature labels | |
215 cosmic_tag <- "Signatures (Cosmic v3, May 2019)" | |
216 } | |
182 | 217 |
183 # Plot mutational profiles of the COSMIC signatures | 218 # Plot mutational profiles of the COSMIC signatures |
184 colnames(cancer_signatures) <- gsub("Signature.", "", colnames(cancer_signatures)) # shorten signature labels | 219 if (opt$cosmic_version == "v2") { |
185 p.trans <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) | 220 p6 <- plot_96_profile(cancer_signatures, condensed = TRUE, ymax = 0.3) |
186 grid.arrange(p.trans, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) | 221 grid.arrange(p6, top = textGrob("COSMIC signature profiles",gp=gpar(fontsize=12,font=3))) |
222 } else { | |
223 print(length(cancer_signatures)) | |
224 p6 <- plot_96_profile(cancer_signatures[,1:33], condensed = TRUE, ymax = 0.3) | |
225 p6bis <- plot_96_profile(cancer_signatures[,34:67], condensed = TRUE, ymax = 0.3) | |
226 grid.arrange(p6, top = textGrob("COSMIC signature profiles (on two pages)",gp=gpar(fontsize=12,font=3))) | |
227 grid.arrange(p6bis, top = textGrob("COSMIC signature profiles (continued)",gp=gpar(fontsize=12,font=3))) | |
228 } | |
187 | 229 |
188 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage | 230 # Hierarchically cluster the COSMIC signatures based on their similarity with average linkage |
189 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") | 231 # hclust_cosmic = cluster_signatures(cancer_signatures, method = "average") |
190 # store signatures in new order | 232 # store signatures in new order |
191 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] | 233 # cosmic_order = colnames(cancer_signatures)[hclust_cosmic$order] |
209 # Plot heatmap with specified signature order | 251 # Plot heatmap with specified signature order |
210 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) | 252 # p.trans <- plot_cosine_heatmap(cos_sim_samples_signatures, col_order = cosmic_order, cluster_rows = TRUE) |
211 # grid.arrange(p.trans) | 253 # grid.arrange(p.trans) |
212 | 254 |
213 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles | 255 # Find optimal contribution of COSMIC signatures to reconstruct 96 mutational profiles |
214 fit_res <- fit_to_signatures(mut_mat, cancer_signatures) | 256 fit_res <- fit_to_signatures(pseudo_mut_mat, cancer_signatures) |
215 | 257 |
216 # Select signatures with some contribution (above a threshold) | 258 # Select signatures with some contribution (above a threshold) |
217 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] | 259 threshold <- tail(sort(unlist(rowSums(fit_res$contribution), use.names = FALSE)), opt$signum)[1] |
218 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded | 260 select <- which(rowSums(fit_res$contribution) >= threshold) # ensure opt$signum best signatures in samples are retained, the others discarded |
219 # Plot contribution barplots | 261 # Plot contribution barplots |
220 pc1 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") | 262 pc3 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "absolute") |
221 pc2 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") | 263 pc4 <- plot_contribution(fit_res$contribution[select,], cancer_signatures[,select], coord_flip = T, mode = "relative") |
264 ##### | |
265 # ggplot2 alternative | |
266 if (!is.na(opt$levels)[1]) { # if there are levels to display in graphs | |
267 pc3_data <- pc3$data | |
268 pc3_data <- merge (pc3_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | |
269 pc3 <- ggplot(pc3_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | |
270 geom_bar(stat="identity", position='stack') + | |
271 scale_fill_discrete(name="Cosmic\nSignature") + | |
272 labs(x = "Samples", y = "Absolute contribution") + theme_bw() + | |
273 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + | |
274 facet_grid(~level, scales = "free_x") | |
275 pc4_data <- pc4$data | |
276 pc4_data <- merge (pc4_data, metadata_table[,c(1,3)], by.x="Sample", by.y="element_identifier") | |
277 pc4 <- ggplot(pc4_data, aes(x=Sample, y=Contribution, fill=as.factor(Signature))) + | |
278 geom_bar(stat="identity", position='fill') + | |
279 scale_fill_discrete(name="Cosmic\nSignature") + | |
280 scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + | |
281 labs(x = "Samples", y = "Relative contribution") + theme_bw() + | |
282 theme(panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank()) + | |
283 facet_grid(~level, scales = "free_x") | |
284 } | |
222 # Combine the two plots: | 285 # Combine the two plots: |
223 grid.arrange(pc1, pc2) | 286 grid.arrange(pc3, pc4, top = textGrob("Absolute and Relative Contributions of Cosmic signatures to mutational patterns",gp=gpar(fontsize=12,font=3))) |
224 ## pie charts of comic signatures contributions in samples | 287 ## pie charts of comic signatures contributions in samples |
225 | 288 |
226 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) | 289 sig_data_pie <- as.data.frame(t(head(fit_res$contribution[select,]))) |
227 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) | 290 colnames(sig_data_pie) <- gsub("nature", "", colnames(sig_data_pie)) |
228 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 | 291 sig_data_pie_percents <- sig_data_pie / (apply(sig_data_pie,1,sum)) * 100 |
229 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) | 292 sig_data_pie_percents$sample <- rownames(sig_data_pie_percents) |
230 library(reshape2) | 293 library(reshape2) |
231 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents) | 294 melted_sig_data_pie_percents <-melt(data=sig_data_pie_percents) |
232 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable) | 295 melted_sig_data_pie_percents$label <- sub("Sig.", "", melted_sig_data_pie_percents$variable) |
233 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2 | 296 melted_sig_data_pie_percents$pos <- cumsum(melted_sig_data_pie_percents$value) - melted_sig_data_pie_percents$value/2 |
234 library(ggplot2) | 297 p7 <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + |
235 p.trans <- ggplot(melted_sig_data_pie_percents, aes(x="", y=value, group=variable, fill=variable)) + | |
236 geom_bar(width = 1, stat = "identity") + | 298 geom_bar(width = 1, stat = "identity") + |
237 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + | 299 geom_text(aes(label = label), position = position_stack(vjust = 0.5), color="black", size=3) + |
238 coord_polar("y", start=0) + facet_wrap(~ sample) + | 300 coord_polar("y", start=0) + facet_wrap(~ sample) + |
239 labs(x="", y="Samples", fill = "Signatures (Cosmic_v2,March 2015)") + | 301 labs(x="", y="Samples", fill = cosmic_tag) + |
240 theme(axis.text = element_blank(), | 302 theme(axis.text = element_blank(), |
241 axis.ticks = element_blank(), | 303 axis.ticks = element_blank(), |
242 panel.grid = element_blank()) | 304 panel.grid = element_blank()) |
243 grid.arrange(p.trans) | 305 grid.arrange(p7) |
244 | 306 |
245 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering | 307 # Plot relative contribution of the cancer signatures in each sample as a heatmap with sample clustering |
246 p.trans <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") | 308 p8 <- plot_contribution_heatmap(fit_res$contribution, cluster_samples = TRUE, method = "complete") |
247 grid.arrange(p.trans) | 309 grid.arrange(p8) |
248 | 310 |
249 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile | 311 # Compare the reconstructed mutational profile of sample 1 with its original mutational profile |
250 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1], | 312 # plot_compare_profiles(mut_mat[,1], fit_res$reconstructed[,1], |
251 # profile_names = c("Original", "Reconstructed"), | 313 # profile_names = c("Original", "Reconstructed"), |
252 # condensed = TRUE) | 314 # condensed = TRUE) |
253 | 315 |
254 # Calculate the cosine similarity between all original and reconstructed mutational profiles with | 316 # Calculate the cosine similarity between all original and reconstructed mutational profiles with |
255 # `cos_sim_matrix` | 317 # `cos_sim_matrix` |
256 | 318 |
257 # calculate all pairwise cosine similarities | 319 # calculate all pairwise cosine similarities |
258 cos_sim_ori_rec <- cos_sim_matrix(mut_mat, fit_res$reconstructed) | 320 cos_sim_ori_rec <- cos_sim_matrix(pseudo_mut_mat, fit_res$reconstructed) |
259 # extract cosine similarities per sample between original and reconstructed | 321 # extract cosine similarities per sample between original and reconstructed |
260 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) | 322 cos_sim_ori_rec <- as.data.frame(diag(cos_sim_ori_rec)) |
261 | 323 |
262 # We can use ggplot to make a barplot of the cosine similarities between the original and | 324 # We can use ggplot to make a barplot of the cosine similarities between the original and |
263 # reconstructed mutational profile of each sample. This clearly shows how well each mutational | 325 # reconstructed mutational profile of each sample. This clearly shows how well each mutational |
268 | 330 |
269 # Adjust data frame for plotting with gpplot | 331 # Adjust data frame for plotting with gpplot |
270 colnames(cos_sim_ori_rec) = "cos_sim" | 332 colnames(cos_sim_ori_rec) = "cos_sim" |
271 cos_sim_ori_rec$sample = row.names(cos_sim_ori_rec) | 333 cos_sim_ori_rec$sample = row.names(cos_sim_ori_rec) |
272 # Make barplot | 334 # Make barplot |
273 p.trans <- ggplot(cos_sim_ori_rec, aes(y=cos_sim, x=sample)) + | 335 p9 <- ggplot(cos_sim_ori_rec, aes(y=cos_sim, x=sample)) + |
274 geom_bar(stat="identity", fill = "skyblue4") + | 336 geom_bar(stat="identity", fill = "skyblue4") + |
275 coord_cartesian(ylim=c(0.8, 1)) + | 337 coord_cartesian(ylim=c(0.8, 1)) + |
276 # coord_flip(ylim=c(0.8,1)) + | 338 # coord_flip(ylim=c(0.8,1)) + |
277 ylab("Cosine similarity\n original VS reconstructed") + | 339 ylab("Cosine similarity\n original VS reconstructed") + |
278 xlab("") + | 340 xlab("") + |
281 theme_bw() + | 343 theme_bw() + |
282 theme(panel.grid.minor.y=element_blank(), | 344 theme(panel.grid.minor.y=element_blank(), |
283 panel.grid.major.y=element_blank()) + | 345 panel.grid.major.y=element_blank()) + |
284 # Add cut.off line | 346 # Add cut.off line |
285 geom_hline(aes(yintercept=.95)) | 347 geom_hline(aes(yintercept=.95)) |
286 grid.arrange(p.trans, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)",gp=gpar(fontsize=12,font=3))) | 348 grid.arrange(p9, top = textGrob("Similarity between true and reconstructed profiles (with all Cosmic sig.)",gp=gpar(fontsize=12,font=3))) |
287 | |
288 dev.off() | 349 dev.off() |
289 } | 350 } |
351 | |
352 | |
353 # Output RData file | |
354 if (!is.null(opt$rdata)) { | |
355 save.image(file=opt$rdata) | |
356 } |