Mercurial > repos > artbio > mutational_patterns
comparison mutational_patterns.R @ 25:b00fef2b1c2c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mutational_patterns commit eeb46055822c6805c209af0c450ae941100960bd
author | artbio |
---|---|
date | Wed, 06 Jul 2022 11:43:09 +0000 |
parents | ca6c19ee7da0 |
children | af5c65ad5317 |
comparison
equal
deleted
inserted
replaced
24:ca6c19ee7da0 | 25:b00fef2b1c2c |
---|---|
67 make_option( | 67 make_option( |
68 "--newsignum", | 68 "--newsignum", |
69 default = 2, | 69 default = 2, |
70 type = "integer", | 70 type = "integer", |
71 help = "Number of new signatures to be captured" | 71 help = "Number of new signatures to be captured" |
72 ), | |
73 make_option( | |
74 "--cosmic_id_threshold", | |
75 default = 0.85, | |
76 type = "double", | |
77 help = "minimu cosine similarity to rename a new signature according to cosmic v3.2" | |
72 ), | 78 ), |
73 make_option( | 79 make_option( |
74 "--output_spectrum", | 80 "--output_spectrum", |
75 default = NA, | 81 default = NA, |
76 type = "character", | 82 type = "character", |
203 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter | 209 # (For larger datasets it is wise to perform more iterations by changing the nrun parameter |
204 # to achieve stability and avoid local minima) | 210 # to achieve stability and avoid local minima) |
205 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) | 211 nmf_res <- extract_signatures(pseudo_mut_mat, rank = opt$newsignum, nrun = opt$nrun) |
206 # Assign signature COSMICv3.2 names | 212 # Assign signature COSMICv3.2 names |
207 cosmic_signatures <- get_known_signatures() | 213 cosmic_signatures <- get_known_signatures() |
208 nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = 0.85) | 214 nmf_res <- rename_nmf_signatures(nmf_res, cosmic_signatures, cutoff = opt$cosmic_id_threshold) |
209 sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures) | 215 sim_matrix <- cos_sim_matrix(cosmic_signatures, nmf_res$signatures) |
210 plot_cosine_sim <- plot_cosine_heatmap(sim_matrix) | 216 plot_cosine_sim <- plot_cosine_heatmap(sim_matrix) |
211 grid.arrange(plot_cosine_sim) | 217 grid.arrange(plot_cosine_sim) |
212 # Plot the 96-profile of the signatures: | 218 # Plot the 96-profile of the signatures: |
213 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) | 219 p5 <- plot_96_profile(nmf_res$signatures, condensed = TRUE) |
220 grid.arrange(p5) | |
221 # write matrix of deno signatures for user | |
214 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") | 222 new_sig_matrix <- reshape2::dcast(p5$data, substitution + context ~ sample, value.var = "freq") |
215 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) | 223 new_sig_matrix <- format(new_sig_matrix, scientific = TRUE) |
216 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), | 224 newcol <- paste0(gsub("\\..", "", new_sig_matrix$context, perl = TRUE), |
217 "[", new_sig_matrix$substitution, "]", | 225 "[", new_sig_matrix$substitution, "]", |
218 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) | 226 gsub("^.\\.", "", new_sig_matrix$context, perl = TRUE)) |
219 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) | 227 new_sig_matrix <- cbind(Type = newcol, new_sig_matrix[, seq_along(new_sig_matrix)[-c(1, 2)]]) |
220 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") | 228 write.table(new_sig_matrix, file = opt$sigmatrix, quote = FALSE, row.names = FALSE, sep = "\t") |
221 grid.arrange(p5) | |
222 # Visualize the contribution of the signatures in a barplot | 229 # Visualize the contribution of the signatures in a barplot |
223 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) | 230 pc1 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "relative", coord_flip = TRUE) |
224 # Visualize the contribution of the signatures in absolute number of mutations | 231 # Visualize the contribution of the signatures in absolute number of mutations |
225 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "absolute", coord_flip = TRUE) | 232 pc2 <- plot_contribution(nmf_res$contribution, nmf_res$signature, mode = "absolute", coord_flip = TRUE) |
226 # Combine the two plots: | 233 # Combine the two plots: |
229 # The relative contribution of each signature for each sample can also be plotted as a heatmap with | 236 # The relative contribution of each signature for each sample can also be plotted as a heatmap with |
230 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. | 237 # plot_contribution_heatmap, which might be easier to interpret and compare than stacked barplots. |
231 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures | 238 # The samples can be hierarchically clustered based on their euclidean dis- tance. The signatures |
232 # can be plotted in a user-specified order. | 239 # can be plotted in a user-specified order. |
233 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: | 240 # Plot signature contribution as a heatmap with sample clustering dendrogram and a specified signature order: |
234 pch1 <- plot_contribution_heatmap(nmf_res$contribution, | 241 pch1 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE) |
235 sig_order = paste0("NewSig_", 1:opt$newsignum)) | |
236 # Plot signature contribution as a heatmap without sample clustering: | 242 # Plot signature contribution as a heatmap without sample clustering: |
237 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) | 243 pch2 <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = FALSE) |
238 #Combine the plots into one figure: | 244 #Combine the plots into one figure: |
239 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) | 245 grid.arrange(pch1, pch2, ncol = 2, widths = c(2, 1.6)) |
240 | 246 |
241 # Compare the reconstructed mutational profile with the original mutational profile: | 247 # Compare the reconstructed mutational profile with the original mutational profile: |
242 plot_compare_profiles(pseudo_mut_mat[, 1], | 248 pch3 <- plot_original_vs_reconstructed(pseudo_mut_mat, nmf_res$reconstructed, y_intercept = 0.95) |
243 nmf_res$reconstructed[, 1], | 249 grid.arrange(pch3) |
244 profile_names = c("Original", "Reconstructed"), | |
245 condensed = TRUE) | |
246 dev.off() | 250 dev.off() |
247 } | 251 } |
248 | 252 |
249 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### | 253 ##### Section 3: Find optimal contribution of known signatures: COSMIC or OWN mutational signatures #### |
250 | 254 |