Mercurial > repos > galaxyp > mqppep_preproc
comparison MaxQuantProcessingScript.R @ 0:8dfd5d2b5903 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3a7b3609d6e514c9e8f980ecb684960c6b2252fe
author | galaxyp |
---|---|
date | Mon, 11 Jul 2022 19:22:54 +0000 |
parents | |
children | b76c75521d91 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8dfd5d2b5903 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 # This is the implementation for the | |
4 # "MaxQuant Phosphopeptide Localization Probability Cutoff" | |
5 # Galaxy tool (mqppep_lclztn_filter) | |
6 # It is adapted from the MaxQuant Processing Script written by Larry Cheng. | |
7 | |
8 # libraries | |
9 library(optparse) | |
10 library(data.table) | |
11 library(stringr) | |
12 library(ggplot2) | |
13 | |
14 # title: "MaxQuant Processing Script" | |
15 # author: "Larry Cheng" | |
16 # date: "February 19, 2018" | |
17 # | |
18 # # MaxQuant Processing Script | |
19 # Takes MaxQuant Phospho (STY)sites.txt file as input | |
20 # and performs the following (in order): | |
21 # 1) Runs the Proteomics Quality Control software | |
22 # 2) Remove contaminant and reverse sequence rows | |
23 # 3) Filters rows based on localization probability | |
24 # 4) Extract the quantitative data | |
25 # 5) Sequences phosphopeptides | |
26 # 6) Merges multiply phosphorylated peptides | |
27 # 7) Filters out phosphopeptides based on enrichment | |
28 # The output file contains the phosphopeptide (first column) | |
29 # and the quantitative values for each sample. | |
30 # | |
31 # ## Revision History | |
32 # Rev. 2022-02-10 :wrap for inclusion in Galaxy | |
33 # Rev. 2018-02-19 :break up analysis script into "MaxQuant Processing Script" | |
34 # and "Phosphopeptide Processing Script" | |
35 # Rev. 2017-12-12 :added PTXQC | |
36 # added additional plots and table outputs for quality control | |
37 # allowed for more than 2 samples to be grouped together | |
38 # (up to 26 (eg, 1A, 1B, 1C, etc)) | |
39 # converted from .r to .rmd file to knit report | |
40 # for quality control | |
41 # Rev. 2016-09-11 :automated the FDR cutoffs; removed the option to data | |
42 # impute multiple times | |
43 # Rev. 2016-09-09 :added filter to eliminate contaminant & reverse sequence rows | |
44 # Rev. 2016-09-01 :moved the collapse step from after ANOVA filter to prior to | |
45 # preANOVA file output | |
46 # Rev. 2016-08-22 :use regexSampleNames <- "\\.(\\d + )[AB]$" | |
47 # so that it looks at the end of string | |
48 # Rev. 2016-08-05 :Removed vestigial line (ppeptides <- ....) | |
49 # Rev. 2016-07-03 :Removed row names from the write.table() output for | |
50 # ANOVA and PreANOVA | |
51 # Rev. 2016-06-25 :Set default Localization Probability cutoff to 0.75 | |
52 # Rev. 2016-06-23 :fixed a bug in filtering for pY enrichment by resetting | |
53 # the row numbers afterwards | |
54 # Rev. 2016-06-21 :test18 + standardized the regexpression in protocol | |
55 | |
56 | |
57 ### FUNCTION DECLARATIONS begin ---------------------------------------------- | |
58 | |
59 # Read first line of file at filePath | |
60 # adapted from: https://stackoverflow.com/a/35761217/15509512 | |
61 read_first_line <- function(filepath) { | |
62 con <- file(filepath, "r") | |
63 line <- readLines(con, n = 1) | |
64 close(con) | |
65 return(line) | |
66 } | |
67 | |
68 # Move columns to the end of dataframe | |
69 # - data: the dataframe | |
70 # - move: a vector of column names, each of which is an element of names(data) | |
71 movetolast <- function(data, move) { | |
72 data[c(setdiff(names(data), move), move)] | |
73 } | |
74 | |
75 # Generate phosphopeptide and build list when applied | |
76 phosphopeptide_func <- function(df) { | |
77 # generate peptide sequence and list of phosphopositions | |
78 phosphoprobsequence <- | |
79 strsplit(as.character(df["Phospho (STY) Score diffs"]), "")[[1]] | |
80 output <- vector() | |
81 phosphopeptide <- "" | |
82 counter <- 0 # keep track of position in peptide | |
83 phosphopositions <- | |
84 vector() # keep track of phosphorylation positions in peptide | |
85 score_diff <- "" | |
86 for (chara in phosphoprobsequence) { | |
87 # build peptide sequence | |
88 if (!( | |
89 chara == " " || | |
90 chara == "(" || | |
91 chara == ")" || | |
92 chara == "." || | |
93 chara == "-" || | |
94 chara == "0" || | |
95 chara == "1" || | |
96 chara == "2" || | |
97 chara == "3" || | |
98 chara == "4" || | |
99 chara == "5" || | |
100 chara == "6" || | |
101 chara == "7" || | |
102 chara == "8" || | |
103 chara == "9") | |
104 ) { | |
105 phosphopeptide <- paste(phosphopeptide, chara, sep = "") | |
106 counter <- counter + 1 | |
107 } | |
108 # generate score_diff | |
109 if (chara == "-" || | |
110 chara == "." || | |
111 chara == "0" || | |
112 chara == "1" || | |
113 chara == "2" || | |
114 chara == "3" || | |
115 chara == "4" || | |
116 chara == "5" || | |
117 chara == "6" || | |
118 chara == "7" || | |
119 chara == "8" || | |
120 chara == "9" | |
121 ) { | |
122 score_diff <- paste(score_diff, chara, sep = "") | |
123 } | |
124 # evaluate score_diff | |
125 if (chara == ")") { | |
126 score_diff <- as.numeric(score_diff) | |
127 # only consider a phosphoresidue if score_diff > 0 | |
128 if (score_diff > 0) { | |
129 phosphopositions <- append(phosphopositions, counter) | |
130 } | |
131 score_diff <- "" | |
132 } | |
133 } | |
134 | |
135 # generate phosphopeptide sequence (ie, peptide sequence with "p"'s) | |
136 counter <- 1 | |
137 phosphoposition_correction1 <- | |
138 -1 # used to correct phosphosposition as "p"'s | |
139 # are inserted into the phosphopeptide string | |
140 phosphoposition_correction2 <- | |
141 0 # used to correct phosphosposition as "p"'s | |
142 # are inserted into the phosphopeptide string | |
143 while (counter <= length(phosphopositions)) { | |
144 phosphopeptide <- | |
145 paste( | |
146 substr( | |
147 phosphopeptide, | |
148 0, | |
149 phosphopositions[counter] + phosphoposition_correction1 | |
150 ), | |
151 "p", | |
152 substr( | |
153 phosphopeptide, | |
154 phosphopositions[counter] + phosphoposition_correction2, | |
155 nchar(phosphopeptide) | |
156 ), | |
157 sep = "" | |
158 ) | |
159 counter <- counter + 1 | |
160 phosphoposition_correction1 <- phosphoposition_correction1 + 1 | |
161 phosphoposition_correction2 <- phosphoposition_correction2 + 1 | |
162 } | |
163 # building phosphopeptide list | |
164 output <- append(output, phosphopeptide) | |
165 return(output) | |
166 } | |
167 | |
168 ### FUNCTION DECLARATIONS end ------------------------------------------------ | |
169 | |
170 | |
171 ### EXTRACT ARGUMENTS begin -------------------------------------------------- | |
172 | |
173 # parse options | |
174 option_list <- list( | |
175 make_option( | |
176 c("-i", "--input"), | |
177 action = "store", | |
178 type = "character", | |
179 help = "A MaxQuant Phospho (STY)Sites.txt" | |
180 ) | |
181 , | |
182 make_option( | |
183 c("-o", "--output"), | |
184 action = "store", | |
185 type = "character", | |
186 help = "path to output file" | |
187 ) | |
188 , | |
189 make_option( | |
190 c("-E", "--enrichGraph"), | |
191 action = "store", | |
192 type = "character", | |
193 help = "path to enrichment graph PDF" | |
194 ) | |
195 , | |
196 make_option( | |
197 c("-F", "--enrichGraph_svg"), | |
198 action = "store", | |
199 type = "character", | |
200 help = "path to enrichment graph SVG" | |
201 ) | |
202 , | |
203 make_option( | |
204 c("-L", "--locProbCutoffGraph"), | |
205 action = "store", | |
206 type = "character", | |
207 help = "path to location-proability cutoff graph PDF" | |
208 ) | |
209 , | |
210 make_option( | |
211 c("-M", "--locProbCutoffGraph_svg"), | |
212 action = "store", | |
213 type = "character", | |
214 help = "path to location-proability cutoff graph SVG" | |
215 ) | |
216 , | |
217 make_option( | |
218 c("-e", "--enriched"), | |
219 action = "store", | |
220 type = "character", | |
221 help = "pY or pST enriched samples (ie, 'Y' or 'ST')" | |
222 ) | |
223 # default = "^Number of Phospho [(]STY[)]$", | |
224 , | |
225 make_option( | |
226 c("-p", "--phosphoCol"), | |
227 action = "store", | |
228 type = "character", | |
229 help = paste0("PERL-compatible regular expression matching", | |
230 " header of column having number of 'Phospho (STY)'") | |
231 ) | |
232 # default = "^Intensity[^_]", | |
233 , | |
234 make_option( | |
235 c("-s", "--startCol"), | |
236 action = "store", | |
237 type = "character", | |
238 help = paste0("PERL-compatible regular expression matching", | |
239 " header of column having first sample intensity") | |
240 ) | |
241 # default = 1, | |
242 , | |
243 make_option( | |
244 c("-I", "--intervalCol"), | |
245 action = "store", | |
246 type = "integer", | |
247 help = paste0("Column interval between the Intensities of samples", | |
248 " (eg, 1 if subsequent column; 2 if every other column") | |
249 ) | |
250 # default = 0.75, | |
251 , | |
252 make_option( | |
253 c("-l", "--localProbCutoff"), | |
254 action = "store", | |
255 type = "double", | |
256 help = "Localization Probability Cutoff" | |
257 ) | |
258 # default = "sum", | |
259 , | |
260 make_option( | |
261 c("-f", "--collapse_func"), | |
262 action = "store", | |
263 type = "character", | |
264 help = paste0("merge identical phosphopeptides", | |
265 " by ('sum' or 'average') the intensities") | |
266 ) | |
267 # default = "filtered_data.txt", | |
268 , | |
269 make_option( | |
270 c("-r", "--filtered_data"), | |
271 action = "store", | |
272 type = "character", | |
273 help = "filtered_data.txt" | |
274 ) | |
275 # default = "quantData.txt", | |
276 , | |
277 make_option( | |
278 c("-q", "--quant_data"), | |
279 action = "store", | |
280 type = "character", | |
281 help = "quantData.txt" | |
282 ) | |
283 ) | |
284 args <- parse_args(OptionParser(option_list = option_list)) | |
285 # Check parameter values | |
286 | |
287 ### EXTRACT ARGUMENTS end ---------------------------------------------------- | |
288 | |
289 | |
290 ### EXTRACT PARAMETERS from arguments begin ---------------------------------- | |
291 | |
292 if (!file.exists(args$input)) { | |
293 stop((paste("File", args$input, "does not exist"))) | |
294 } | |
295 | |
296 phospho_col_pattern <- "^Number of Phospho [(][STY][STY]*[)]$" | |
297 start_col_pattern <- "^Intensity[^_]" | |
298 phospho_col_pattern <- read_first_line(args$phosphoCol) | |
299 start_col_pattern <- read_first_line(args$startCol) | |
300 | |
301 sink(getConnection(2)) | |
302 | |
303 input_file_name <- args$input | |
304 filtered_filename <- args$filtered_data | |
305 quant_file_name <- args$quant_data | |
306 interval_col <- as.integer(args$intervalCol) | |
307 | |
308 first_line <- read_first_line(input_file_name) | |
309 col_headers <- | |
310 unlist(strsplit( | |
311 x = first_line, | |
312 split = c("\t"), | |
313 fixed = TRUE | |
314 )) | |
315 sink(getConnection(2)) | |
316 sink() | |
317 | |
318 | |
319 intensity_header_cols <- | |
320 grep(pattern = start_col_pattern, x = col_headers, perl = TRUE) | |
321 if (length(intensity_header_cols) == 0) { | |
322 err_msg <- | |
323 paste("Found no intensity columns matching pattern:", | |
324 start_col_pattern) | |
325 # Divert output to stderr | |
326 sink(getConnection(2)) | |
327 print(err_msg) | |
328 sink() | |
329 stop(err_msg) | |
330 } | |
331 | |
332 | |
333 phospho_col <- | |
334 grep(pattern = phospho_col_pattern, x = col_headers, perl = TRUE)[1] | |
335 if (is.na(phospho_col)) { | |
336 err_msg <- | |
337 paste("Found no 'number of phospho sites' columns matching pattern:", | |
338 phospho_col_pattern) | |
339 # Divert output to stderr | |
340 sink(getConnection(2)) | |
341 print(err_msg) | |
342 sink() | |
343 stop(err_msg) | |
344 } | |
345 | |
346 | |
347 i_count <- 0 | |
348 this_column <- 1 | |
349 last_value <- intensity_header_cols[1] | |
350 intensity_cols <- c(last_value) | |
351 | |
352 while (length(intensity_header_cols) >= interval_col * i_count) { | |
353 i_count <- 1 + i_count | |
354 this_column <- interval_col + this_column | |
355 if (last_value + interval_col != intensity_header_cols[this_column]) | |
356 break | |
357 last_value <- intensity_header_cols[this_column] | |
358 if (length(intensity_header_cols) < interval_col * i_count) | |
359 break | |
360 intensity_cols <- | |
361 c(intensity_cols, intensity_header_cols[this_column]) | |
362 } | |
363 | |
364 start_col <- intensity_cols[1] | |
365 num_samples <- i_count | |
366 | |
367 output_filename <- args$output | |
368 enrich_graph_filename <- args$enrichGraph | |
369 loc_prob_cutoff_graph_filename <- args$locProbCutoffGraph | |
370 enrich_graph_filename_svg <- args$enrichGraph_svg | |
371 loc_prob_cutoff_graph_fn_svg <- args$locProbCutoffGraph_svg | |
372 | |
373 local_prob_cutoff <- args$localProbCutoff | |
374 enriched <- args$enriched | |
375 collapse_fn <- args$collapse_func | |
376 | |
377 ### EXTRACT PARAMETERS from arguments end ------------------------------------ | |
378 | |
379 | |
380 # Proteomics Quality Control for MaxQuant Results | |
381 # (Bielow C et al. J Proteome Res. 2016 PMID: 26653327) | |
382 # is run by the Galaxy MaxQuant wrapper and need not be invoked here. | |
383 | |
384 | |
385 # Read & filter out contaminants, reverse sequences, & localization probability | |
386 # --- | |
387 full_data <- | |
388 read.table( | |
389 file = input_file_name, | |
390 sep = "\t", | |
391 header = TRUE, | |
392 quote = "" | |
393 ) | |
394 | |
395 # Filter out contaminant rows and reverse rows | |
396 filtered_data <- subset(full_data, !grepl("CON__", Proteins)) | |
397 filtered_data <- | |
398 subset(filtered_data, !grepl("_MYCOPLASMA", Proteins)) | |
399 filtered_data <- | |
400 subset(filtered_data, !grepl("CONTAMINANT_", Proteins)) | |
401 filtered_data <- | |
402 subset(filtered_data, !grepl("REV__", Protein) | |
403 ) # since REV__ rows are blank in the first column (Proteins) | |
404 write.table( | |
405 filtered_data, | |
406 file = filtered_filename, | |
407 sep = "\t", | |
408 quote = FALSE, | |
409 col.names = TRUE, | |
410 row.names = FALSE | |
411 ) | |
412 # ... | |
413 | |
414 | |
415 # Filter out data with localization probability below localProbCutoff | |
416 # --- | |
417 # Data filtered by localization probability | |
418 loc_prob_filtered_data <- | |
419 filtered_data[ | |
420 filtered_data$Localization.prob >= local_prob_cutoff, | |
421 ] | |
422 # ... | |
423 | |
424 | |
425 # Localization probability -- visualize locprob cutoff | |
426 # --- | |
427 loc_prob_graph_data <- | |
428 data.frame( | |
429 group = c(paste(">", toString(local_prob_cutoff), sep = ""), | |
430 paste("<", toString(local_prob_cutoff), sep = "")), | |
431 value = c( | |
432 nrow(loc_prob_filtered_data) / nrow(filtered_data) * 100, | |
433 (nrow(filtered_data) - nrow(loc_prob_filtered_data)) | |
434 / nrow(filtered_data) * 100 | |
435 ) | |
436 ) | |
437 gigi <- | |
438 ggplot(loc_prob_graph_data, aes(x = "", y = value, fill = group)) + | |
439 geom_bar(width = 0.5, | |
440 stat = "identity", | |
441 color = "black") + | |
442 labs(x = NULL, | |
443 y = "percent", | |
444 title = "Phosphopeptides partitioned by localization-probability cutoff" | |
445 ) + | |
446 scale_fill_discrete(name = "phosphopeptide\nlocalization-\nprobability") + | |
447 theme_minimal() + | |
448 theme( | |
449 legend.position = "right", | |
450 legend.title = element_text(), | |
451 plot.title = element_text(hjust = 0.5), | |
452 plot.subtitle = element_text(hjust = 0.5), | |
453 plot.title.position = "plot" | |
454 ) | |
455 pdf(loc_prob_cutoff_graph_filename) | |
456 print(gigi) | |
457 dev.off() | |
458 svg(loc_prob_cutoff_graph_fn_svg) | |
459 print(gigi) | |
460 dev.off() | |
461 # ... | |
462 | |
463 | |
464 # Extract quantitative values from filtered data | |
465 # --- | |
466 quant_data <- | |
467 loc_prob_filtered_data[, seq(from = start_col, | |
468 by = interval_col, | |
469 length.out = num_samples)] | |
470 # ... | |
471 | |
472 | |
473 # Generate Phosphopeptide Sequence | |
474 # for latest version of MaxQuant (Version 1.5.3.30) | |
475 # --- | |
476 metadata_df <- | |
477 data.frame( | |
478 loc_prob_filtered_data[, 1:8], | |
479 loc_prob_filtered_data[, phospho_col], | |
480 loc_prob_filtered_data[, phospho_col + 1], | |
481 loc_prob_filtered_data[, phospho_col + 2], | |
482 loc_prob_filtered_data[, phospho_col + 3], | |
483 loc_prob_filtered_data[, phospho_col + 4], | |
484 loc_prob_filtered_data[, phospho_col + 5], | |
485 loc_prob_filtered_data[, phospho_col + 6], | |
486 loc_prob_filtered_data[, phospho_col + 7], | |
487 quant_data | |
488 ) | |
489 colnames(metadata_df) <- | |
490 c( | |
491 "Proteins", | |
492 "Positions within proteins", | |
493 "Leading proteins", | |
494 "Protein", | |
495 "Protein names", | |
496 "Gene names", | |
497 "Fasta headers", | |
498 "Localization prob", | |
499 "Number of Phospho (STY)", | |
500 "Amino Acid", | |
501 "Sequence window", | |
502 "Modification window", | |
503 "Peptide window coverage", | |
504 "Phospho (STY) Probabilities", | |
505 "Phospho (STY) Score diffs", | |
506 "Position in peptide", | |
507 colnames(quant_data) | |
508 ) | |
509 # 'phosphopeptide_func' generates a phosphopeptide sequence | |
510 # for each row of data. | |
511 # for the 'apply' function: MARGIN 1 == rows, 2 == columns, c(1, 2) = both | |
512 metadata_df$phosphopeptide <- | |
513 apply(X = metadata_df, MARGIN = 1, FUN = phosphopeptide_func) | |
514 colnames(metadata_df)[1] <- "Phosphopeptide" | |
515 # Move the quant data columns to the right end of the data.frame | |
516 metadata_df <- movetolast(metadata_df, c(colnames(quant_data))) | |
517 # ... | |
518 | |
519 | |
520 # Write quantitative values for debugging purposes | |
521 # --- | |
522 quant_write <- cbind(metadata_df[, "Sequence window"], quant_data) | |
523 colnames(quant_write)[1] <- "Sequence.Window" | |
524 write.table( | |
525 quant_write, | |
526 file = quant_file_name, | |
527 sep = "\t", | |
528 quote = FALSE, | |
529 col.names = TRUE, | |
530 row.names = FALSE | |
531 ) | |
532 # ... | |
533 | |
534 | |
535 # Make new data frame containing only Phosphopeptides | |
536 # that are to be mapped to quant data (merge_df) | |
537 # --- | |
538 metadata_df <- | |
539 setDT(metadata_df, keep.rownames = TRUE) # row name will be used to map | |
540 merge_df <- | |
541 data.frame( | |
542 as.integer(metadata_df$rn), | |
543 metadata_df$phosphopeptide # row index to merge data frames | |
544 ) | |
545 colnames(merge_df) <- c("rn", "Phosphopeptide") | |
546 # ... | |
547 | |
548 | |
549 # Add Phosphopeptide column to quant columns for quality control checking | |
550 # --- | |
551 quant_data_qc <- as.data.frame(quant_data) | |
552 setDT(quant_data_qc, keep.rownames = TRUE) # will use to match rowname to data | |
553 quant_data_qc$rn <- as.integer(quant_data_qc$rn) | |
554 quant_data_qc <- merge(merge_df, quant_data_qc, by = "rn") | |
555 quant_data_qc$rn <- NULL # remove rn column | |
556 # ... | |
557 | |
558 | |
559 # Collapse multiphosphorylated peptides | |
560 # --- | |
561 quant_data_qc_collapsed <- | |
562 data.table(quant_data_qc, key = "Phosphopeptide") | |
563 quant_data_qc_collapsed <- | |
564 aggregate(. ~ Phosphopeptide, quant_data_qc, FUN = collapse_fn) | |
565 # ... | |
566 print("quant_data_qc_collapsed") | |
567 head(quant_data_qc_collapsed) | |
568 | |
569 # Compute (as string) % of phosphopeptides that are multiphosphorylated | |
570 # (for use in next step) | |
571 # --- | |
572 pct_multiphos <- | |
573 ( | |
574 nrow(quant_data_qc) - nrow(quant_data_qc_collapsed) | |
575 ) / (2 * nrow(quant_data_qc)) | |
576 pct_multiphos <- sprintf("%0.1f%s", 100 * pct_multiphos, "%") | |
577 # ... | |
578 | |
579 | |
580 # Compute and visualize breakdown of pY, pS, and pT before enrichment filter | |
581 # --- | |
582 py_data <- | |
583 quant_data_qc_collapsed[ | |
584 str_detect(quant_data_qc_collapsed$Phosphopeptide, "pY"), | |
585 ] | |
586 ps_data <- | |
587 quant_data_qc_collapsed[ | |
588 str_detect(quant_data_qc_collapsed$Phosphopeptide, "pS"), | |
589 ] | |
590 pt_data <- | |
591 quant_data_qc_collapsed[ | |
592 str_detect(quant_data_qc_collapsed$Phosphopeptide, "pT"), | |
593 ] | |
594 | |
595 py_num <- nrow(py_data) | |
596 ps_num <- nrow(ps_data) | |
597 pt_num <- nrow(pt_data) | |
598 | |
599 # Visualize enrichment | |
600 enrich_graph_data <- data.frame(group = c("pY", "pS", "pT"), | |
601 value = c(py_num, ps_num, pt_num)) | |
602 | |
603 enrich_graph_data <- | |
604 enrich_graph_data[ | |
605 enrich_graph_data$value > 0, | |
606 ] | |
607 | |
608 # Plot pie chart with legend | |
609 # start: https://stackoverflow.com/a/62522478/15509512 | |
610 # refine: https://www.statology.org/ggplot-pie-chart/ | |
611 # colors: https://colorbrewer2.org/#type=diverging&scheme=BrBG&n=8 | |
612 slices <- enrich_graph_data$value | |
613 phosphoresidue <- enrich_graph_data$group | |
614 pct <- round(100 * slices / sum(slices)) | |
615 lbls <- | |
616 paste(enrich_graph_data$group, "\n", pct, "%\n(", slices, ")", sep = "") | |
617 slc_ctr <- c() | |
618 run_tot <- 0 | |
619 for (p in pct) { | |
620 slc_ctr <- c(slc_ctr, run_tot + p / 2.0) | |
621 run_tot <- run_tot + p | |
622 } | |
623 lbl_y <- 100 - slc_ctr | |
624 df <- | |
625 data.frame(slices, | |
626 pct, | |
627 lbls, | |
628 phosphoresidue = factor(phosphoresidue, levels = phosphoresidue)) | |
629 gigi <- ggplot(df | |
630 , aes(x = 1, y = pct, fill = phosphoresidue)) + | |
631 geom_col(position = "stack", orientation = "x") + | |
632 geom_text(aes(x = 1, y = lbl_y, label = lbls), col = "black") + | |
633 coord_polar(theta = "y", direction = -1) + | |
634 labs( | |
635 x = NULL | |
636 , | |
637 y = NULL | |
638 , | |
639 title = "Percentages (and counts) of phosphosites, by type of residue" | |
640 , | |
641 caption = sprintf( | |
642 "Roughly %s of peptides have multiple phosphosites.", | |
643 pct_multiphos | |
644 ) | |
645 ) + | |
646 labs(x = NULL, y = NULL, fill = NULL) + | |
647 theme_classic() + | |
648 theme( | |
649 legend.position = "right" | |
650 , | |
651 axis.line = element_blank() | |
652 , | |
653 axis.text = element_blank() | |
654 , | |
655 axis.ticks = element_blank() | |
656 , | |
657 plot.title = element_text(hjust = 0.5) | |
658 , | |
659 plot.subtitle = element_text(hjust = 0.5) | |
660 , | |
661 plot.caption = element_text(hjust = 0.5) | |
662 , | |
663 plot.title.position = "plot" | |
664 ) + | |
665 scale_fill_manual(breaks = phosphoresidue, | |
666 values = c("#c7eae5", "#f6e8c3", "#dfc27d")) | |
667 | |
668 pdf(enrich_graph_filename) | |
669 print(gigi) | |
670 dev.off() | |
671 svg(enrich_graph_filename_svg) | |
672 print(gigi) | |
673 dev.off() | |
674 # ... | |
675 | |
676 | |
677 # Filter phosphopeptides by enrichment | |
678 # -- | |
679 if (enriched == "Y") { | |
680 quant_data_qc_enrichment <- quant_data_qc_collapsed[ | |
681 str_detect(quant_data_qc_collapsed$Phosphopeptide, "pY"), | |
682 ] | |
683 } else if (enriched == "ST") { | |
684 quant_data_qc_enrichment <- quant_data_qc_collapsed[ | |
685 str_detect(quant_data_qc_collapsed$Phosphopeptide, "pS") | | |
686 str_detect(quant_data_qc_collapsed$Phosphopeptide, "pT"), | |
687 ] | |
688 } else { | |
689 print("Error in enriched variable. Set to either 'Y' or 'ST'") | |
690 } | |
691 # ... | |
692 | |
693 print("quant_data_qc_enrichment") | |
694 head(quant_data_qc_enrichment) | |
695 | |
696 # Write phosphopeptides filtered by enrichment | |
697 # -- | |
698 write.table( | |
699 quant_data_qc_enrichment, | |
700 file = output_filename, | |
701 sep = "\t", | |
702 quote = FALSE, | |
703 row.names = FALSE | |
704 ) | |
705 # ... |