Mercurial > repos > galaxyp > mqppep_preproc
comparison mqppep_anova_script.Rmd @ 3:bae3a23461c9 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3dcf0d08f006b888061ff83eadc65e550d751869
author | galaxyp |
---|---|
date | Tue, 31 Jan 2023 22:27:00 +0000 |
parents | a5e7469dfdfa |
children | 5c2beb4eb41c |
comparison
equal
deleted
inserted
replaced
2:a5e7469dfdfa | 3:bae3a23461c9 |
---|---|
46 meanPercentile: 50 | 46 meanPercentile: 50 |
47 #meanPercentile: 1 | 47 #meanPercentile: 1 |
48 # for small random value imputation, what should `s / mean(x)` ratio be? | 48 # for small random value imputation, what should `s / mean(x)` ratio be? |
49 sdPercentile: 1.0 | 49 sdPercentile: 1.0 |
50 # output path for imputed data file | 50 # output path for imputed data file |
51 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" | 51 imputedDataFilename: "test-data/imputedDataFilename.txt" |
52 # output path for imputed/quantile-normalized/log-transformed data file | 52 # output path for imputed/quantile-normalized/log-transformed data file |
53 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" | 53 imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt" |
54 # output path for contents of `stats_metadata_v` table | 54 # output path for contents of `stats_metadata_v` table |
55 anovaKseaMetadata: "test-data/limbo/anovaKseaMetadata.txt" | 55 anovaKseaMetadata: "test-data/anovaKseaMetadata.txt" |
56 # how to test one variable with > 2 categories (e.g., aov or kruskal.test) | 56 # how to test one variable with > 2 categories (e.g., aov or kruskal.test) |
57 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1] | 57 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1] |
58 # how to test one variable with 2 categories (e.g., oneway.test) | 58 # how to test one variable with 2 categories (e.g., oneway.test) |
59 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3] | 59 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3] |
60 # what should be the minimum quality for consideration in both | 60 # what should be the minimum quality for consideration in both |
97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" | 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" |
98 # should debugging trace messages be printed? | 98 # should debugging trace messages be printed? |
99 showEnrichedSubstrates: FALSE | 99 showEnrichedSubstrates: FALSE |
100 # should debugging nb/nbe messages be printed? | 100 # should debugging nb/nbe messages be printed? |
101 printNBMsgs: FALSE | 101 printNBMsgs: FALSE |
102 # showld row-scaling be applied to heatmaps: "none" or "row" | 102 #printNBMsgs: TRUE |
103 defaultHeatMapRowScaling: "none" | 103 # should row-scaling be applied to heatmaps: "none" or "row" |
104 defaultHeatMapRowScaling: !r c("none", "row")[1] | |
105 # how missing values be displayed on heat maps: "NA" or " " | |
106 heatMapNAcellNote: !r c(" ", "NA")[1] | |
107 # how missing values be displayed on heat maps: "NA" or " " | |
108 heatMapNAgrey: "#D8D8D8" | |
109 # temporary hack | |
110 heatMapNAsubstitute: TRUE | |
111 # what color should be used for missing values be displayed on heat maps | |
112 heatMapNAcellColor: "grey15" | |
104 # should debugging trace messages be printed? | 113 # should debugging trace messages be printed? |
105 printTraceMsgs: FALSE | 114 printTraceMsgs: FALSE |
106 # when debugging files are needed, set debugFileBasePath to the path | 115 # when debugging files are needed, set debugFileBasePath to the path |
107 # to the directory where they should be written | 116 # to the directory where they should be written |
108 debugFileBasePath: !r if (TRUE) NULL else "test-data" | 117 debugFileBasePath: !r if (TRUE) NULL else "test-data" |
122 nbe <- if (!print_nb_messages) { | 131 nbe <- if (!print_nb_messages) { |
123 function(...) invisible() | 132 function(...) invisible() |
124 } else { | 133 } else { |
125 function(..., f = cat, file = stderr()) { | 134 function(..., f = cat, file = stderr()) { |
126 cat( | 135 cat( |
127 stringi::stri_unescape_unicode("\nNBE \\u2203\\u2283\\u2200"), | 136 stringi::stri_unescape_unicode("\nN.B. \\u2203\\u2283\\u2200"), |
128 ..., | 137 ..., |
129 file = file | 138 file = file |
130 ) | 139 ) |
131 } | 140 } |
132 } | 141 } |
146 | 155 |
147 if (print_nb_messages) nbe("library(gplots)") | 156 if (print_nb_messages) nbe("library(gplots)") |
148 library(gplots) | 157 library(gplots) |
149 if (print_nb_messages) nbe("library(caret)") | 158 if (print_nb_messages) nbe("library(caret)") |
150 # load caret for nearZeroVar | 159 # load caret for nearZeroVar |
151 if (print_nb_messages) nbe("Please ignore the messages about systemd, if any.\n") | 160 if (print_nb_messages) nbe("Please ignore any messages about systemd.\n") |
152 library(caret) | 161 library(caret) |
153 if (print_nb_messages) nbe("library(DBI)") | 162 if (print_nb_messages) nbe("library(DBI)") |
154 library(DBI) | 163 library(DBI) |
155 if (print_nb_messages) nbe("library(RSQLite)") | 164 if (print_nb_messages) nbe("library(RSQLite)") |
156 library(RSQLite) | 165 library(RSQLite) |
340 msg, "\n" | 349 msg, "\n" |
341 ) | 350 ) |
342 } | 351 } |
343 } | 352 } |
344 | 353 |
354 divert_warnings <- | |
355 function(expr, classes = "warning") { | |
356 withCallingHandlers( | |
357 expr, | |
358 warning = function(w) { | |
359 cat(" divert_warnings: ", w$message, "\n", file = stderr()) | |
360 if (inherits(w, classes)) | |
361 tryInvokeRestart("muffleWarning") | |
362 } | |
363 ) | |
364 } | |
365 | |
345 # ref: https://tug.org/texinfohtml/latex2e.html | 366 # ref: https://tug.org/texinfohtml/latex2e.html |
346 # LaTeX sets aside the following characters for special purposes. | 367 # LaTeX sets aside the following characters for special purposes. |
347 # For example, the percent sign % is for comments. | 368 # For example, the percent sign % is for comments. |
348 # They are called reserved characters or special characters. | 369 # They are called reserved characters or special characters. |
349 # They are all discussed elsewhere in this manual. | 370 # They are all discussed elsewhere in this manual. |
358 # As to the last three characters, to get a tilde in the text body font, | 379 # As to the last three characters, to get a tilde in the text body font, |
359 # use \~{} (omitting the curly braces would result in the next character | 380 # use \~{} (omitting the curly braces would result in the next character |
360 # receiving a tilde accent). | 381 # receiving a tilde accent). |
361 # Similarly, to get a text body font circumflex use \^{}. | 382 # Similarly, to get a text body font circumflex use \^{}. |
362 # To get a backslash in the font of the text body enter \textbackslash{}. | 383 # To get a backslash in the font of the text body enter \textbackslash{}. |
363 whack_math <- | 384 whack_math <- if (TRUE) { |
364 function(v) { | 385 function(v) { |
365 v <- as.character(v) | 386 v <- as.character(v) |
387 w <- v | |
366 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE) | 388 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE) |
367 w <- Reduce( | 389 w <- Reduce( |
368 f = function(l, r) { | 390 f = function(l, r) { |
369 gsub(r, paste0("\\", r), l, fixed = TRUE) | 391 ptrn <- paste0("\\", r) |
392 for (i in seq_along(l)) { | |
393 if (!grepl(ptrn, l[i], fixed = TRUE)) { | |
394 l[i] <- gsub(r, ptrn, l[i], fixed = TRUE) | |
395 } | |
396 } | |
397 l | |
370 }, | 398 }, |
371 x = c("#", "$", "%", "&", "{", "}", "_"), | 399 x = c("#", "$", "%", "&", "{", "}", "_"), |
372 init = w | 400 init = w |
373 ) | 401 ) |
374 w <- gsub("^", "\\^{}", w, fixed = TRUE) | 402 w <- gsub("^", "\\^{}", w, fixed = TRUE) |
403 w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE) | |
404 return(w) | |
405 } | |
406 } else { | |
407 function(v) { | |
408 v <- as.character(v) | |
409 w <- v | |
410 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE) | |
411 w <- Reduce( | |
412 f = function(l, r) { | |
413 if (length(l) > 1) { | |
414 cat("whack_math: deparse1(v) = `", deparse1(v), "`\n", | |
415 file = stderr()) | |
416 cat("whack_math: v = `", v, "`\n", file = stderr()) | |
417 cat(" Reduce f: l = `", l, | |
418 "` r = `", r, | |
419 "`\n", file = stderr() | |
420 ) | |
421 divert_warnings(grepl(r, l, fixed = TRUE)) | |
422 } | |
423 ptrn <- paste0("\\", r) | |
424 cat("ptrn: `", ptrn, "`\n", file = stderr()) | |
425 for (i in seq_along(l)) { | |
426 cat(" before l[i] = ", l[i], "\n", file = stderr()) | |
427 if (!grepl(ptrn, l[i], fixed = TRUE)) { | |
428 l[i] <- gsub(r, ptrn, l[i], fixed = TRUE) | |
429 } | |
430 cat(" after l[i] = ", l[i], "\n", file = stderr()) | |
431 } | |
432 l | |
433 }, | |
434 x = c("#", "$", "%", "&", "{", "}", "_"), | |
435 init = w | |
436 ) | |
437 w <- gsub("^", "\\^{}", w, fixed = TRUE) | |
438 w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE) | |
375 return(w) | 439 return(w) |
376 if (all(v == w)) | 440 if (all(v == w)) |
377 v | 441 v |
378 else | 442 else |
379 paste0("\\texttt{", w, "}") | 443 paste0("\\texttt{", w, "}") |
380 } | 444 } |
445 } | |
381 whack_underscores <- whack_math | 446 whack_underscores <- whack_math |
382 | 447 |
383 ## dump params to stderr (remove this eventually) | 448 ## dump params to stderr (remove this eventually) |
384 | 449 |
385 if (FALSE) nbe(see_variable(params)) | 450 if (FALSE) nbe(see_variable(params)) |
578 g_intensity_min_per_class <- params$intensityMinValuesPerGroup | 643 g_intensity_min_per_class <- params$intensityMinValuesPerGroup |
579 if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) { | 644 if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) { |
580 cat("invalid intensityMinValuesPerGroup (must be integer > -1") | 645 cat("invalid intensityMinValuesPerGroup (must be integer > -1") |
581 knitr::knit_exit() | 646 knitr::knit_exit() |
582 } | 647 } |
583 | 648 g_correlate_substrates <- params$correlateSubstrates |
584 if (is.na(as.logical(g_correlate_substrates <- params$correlateSubstrates))) { | 649 if (is.na(as.logical(g_correlate_substrates))) { |
585 cat("invalid correlateSubstrates (must be TRUE or FALSE)") | 650 cat("invalid correlateSubstrates (must be TRUE or FALSE)") |
586 knitr::knit_exit() | 651 knitr::knit_exit() |
587 } | 652 } |
588 | 653 g_filter_cov_var_gt_1 <- params$filterCovVarGT1 |
589 if (is.na(as.logical(g_filter_cov_var_gt_1 <- params$filterCovVarGT1))) { | 654 if (is.na(as.logical(g_filter_cov_var_gt_1))) { |
590 cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)") | 655 cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)") |
591 knitr::knit_exit() | 656 knitr::knit_exit() |
592 } | 657 } |
593 | 658 |
594 # TODO Validate >> 0 < 30 | 659 # TODO Validate >> 0 < 30 |
617 value = rvalue, | 682 value = rvalue, |
618 pos = pf | 683 pos = pf |
619 ) | 684 ) |
620 invisible(rvalue) | 685 invisible(rvalue) |
621 } | 686 } |
687 # Infix concatenation | |
688 `%||%` <- function(lvalue, rvalue) paste0(lvalue, rvalue) | |
689 | |
622 | 690 |
623 ### FUNCTIONS | 691 ### FUNCTIONS |
624 | 692 |
625 no_op <- | 693 no_op <- |
626 function() { | 694 function() { |
703 }, | 771 }, |
704 simplify = "array" | 772 simplify = "array" |
705 ) | 773 ) |
706 grpl_rslt <- grpl_rslt + grpl_rslt_v | 774 grpl_rslt <- grpl_rslt + grpl_rslt_v |
707 } | 775 } |
708 rslt <- unname(grpl_rslt > 0) | 776 unname(grpl_rslt > 0) |
709 } | 777 } |
710 | 778 |
711 ##' Produce positions in a vector where succeeding value != current valus | 779 ##' Produce positions in a vector where succeeding value != current valus |
712 ##' | 780 ##' |
713 ##' @title Search vector for neighboring positions having different values | 781 ##' @title Search vector for neighboring positions having different values |
826 pvalue | 894 pvalue |
827 } | 895 } |
828 | 896 |
829 # This code adapted from matrixcalc::is.positive.definite | 897 # This code adapted from matrixcalc::is.positive.definite |
830 # Notably, this simply tests without calling stop() | 898 # Notably, this simply tests without calling stop() |
899 # nolint start | |
900 # squash un-actionable cyclomatic_complexity warning | |
831 is_positive_definite <- function(x, tol = 1e-08) { | 901 is_positive_definite <- function(x, tol = 1e-08) { |
902 # nolint end | |
832 if (!is.matrix(x)) | 903 if (!is.matrix(x)) |
833 return(FALSE) | 904 return(FALSE) |
834 if (!is.numeric(x)) | 905 if (!is.numeric(x)) |
835 return(FALSE) | 906 return(FALSE) |
836 if (nrow(x) < 1) | 907 if (nrow(x) < 1) |
970 } | 1041 } |
971 invisible(x) | 1042 invisible(x) |
972 } | 1043 } |
973 | 1044 |
974 # Use this like print.data.frame, from which it is adapted: | 1045 # Use this like print.data.frame, from which it is adapted: |
1046 # nolint start | |
1047 # squash un-actionable cyclomatic_complexity warning | |
1048 # squash un-actionable "no visible global for ..." | |
975 data_frame_tabbing_latex <- | 1049 data_frame_tabbing_latex <- |
976 function( | 1050 function( |
1051 # nolint end | |
977 x, | 1052 x, |
978 # vector of tab stops, in inches | 1053 # vector of tab stops, in inches |
979 tabstops, | 1054 tabstops, |
980 # vector of headings, registered with tab-stops | 1055 # vector of headings, registered with tab-stops |
981 headings = colnames(x), | 1056 headings = colnames(x), |
1149 return(s) | 1224 return(s) |
1150 } | 1225 } |
1151 nolatex_verbatim <- | 1226 nolatex_verbatim <- |
1152 function(expr) eval(expr) | 1227 function(expr) eval(expr) |
1153 | 1228 |
1229 error_knitrexit_stop <- | |
1230 function(e) { | |
1231 cat("Caught error:\n\n") | |
1232 str(e) | |
1233 knitr::knit_exit() | |
1234 stop(e) | |
1235 } | |
1236 | |
1154 latex_verbatim <- | 1237 latex_verbatim <- |
1155 function(expr) { | 1238 function(expr) { |
1156 arg_string <- deparse1(substitute(expr)) | |
1157 cat("\n\\begin{verbatim}\n___\n") | 1239 cat("\n\\begin{verbatim}\n___\n") |
1158 tryCatch( | 1240 tryCatch( |
1159 expr = expr, | 1241 expr = expr, |
1160 error = param_df_exit, | 1242 error = param_df_exit, |
1161 #ACE error = | |
1162 #ACE function(e) { | |
1163 #ACE cat("Caught error:\n\n") | |
1164 #ACE str(e) | |
1165 #ACE knitr::knit_exit() | |
1166 #ACE stop(e) | |
1167 #ACE }, | |
1168 finally = cat("...\n\\end{verbatim}\n") | 1243 finally = cat("...\n\\end{verbatim}\n") |
1169 ) | 1244 ) |
1170 } | 1245 } |
1171 | 1246 |
1172 latex_samepage <- | 1247 latex_samepage <- |
1173 function(expr) { | 1248 function(expr) { |
1174 arg_string <- deparse1(substitute(expr)) | |
1175 cat("\n\\begin{samepage}\n") | 1249 cat("\n\\begin{samepage}\n") |
1176 tryCatch( | 1250 tryCatch( |
1177 expr = expr, | 1251 expr = expr, |
1178 error = param_df_exit, | 1252 error = param_df_exit, |
1179 #ACE error = | |
1180 #ACE function(e) { | |
1181 #ACE cat("Caught error:\n\n") | |
1182 #ACE str(e) | |
1183 #ACE knitr::knit_exit() | |
1184 #ACE stop(e) | |
1185 #ACE }, | |
1186 finally = cat("\n\\end{samepage}\n") | 1253 finally = cat("\n\\end{samepage}\n") |
1187 ) | 1254 ) |
1188 } | 1255 } |
1189 | 1256 |
1190 # return the result of invocation after showing parameters | 1257 # return the result of invocation after showing parameters |
1191 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ | 1258 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ |
1192 latex_show_invocation <- | 1259 latex_show_invocation <- |
1193 function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) { | 1260 function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) { |
1194 function(...) { | 1261 function(...) { |
1195 my_env <- (as.list(environment())) | |
1196 va <- list(...) | 1262 va <- list(...) |
1197 my_rslt <- new_env() | 1263 my_rslt <- new_env() |
1198 my_rslt$rslt <- NULL | 1264 my_rslt$rslt <- NULL |
1199 latex_verbatim( | 1265 latex_verbatim( |
1200 expr = { | 1266 expr = { |
1201 cat(sprintf("\n .. Local variables for '%s':\n\n", f_name)) | 1267 cat(sprintf("\n .. Local variables for '%s':\n\n", f_name)) |
1202 str(va) | 1268 str(va) |
1203 if (!head_patch) { | 1269 if (!head_patch) { |
1204 # return this result | 1270 # return this result |
1205 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ | 1271 # ref: |
1272 # https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ | |
1206 cat(sprintf("\n .. Invoking '%s'\n", f_name)) | 1273 cat(sprintf("\n .. Invoking '%s'\n", f_name)) |
1207 tryCatch( | 1274 tryCatch( |
1208 { | 1275 { |
1209 cat("\n\\end{verbatim}\n") | 1276 cat("\n\\end{verbatim}\n") |
1210 rslt <- do.call(f, va) | 1277 rslt <- do.call(f, va) |
1211 }, | 1278 }, |
1212 error = param_df_exit, | 1279 error = param_df_exit, |
1213 #ACE error = function(e) { | |
1214 #ACE cat("\n\\begin{verbatim}\n") | |
1215 #ACE str(e) | |
1216 #ACE cat("\n\\end{verbatim}\n") | |
1217 #ACE knitr::knit_exit() | |
1218 #ACE stop(e) | |
1219 #ACE }, | |
1220 finally = cat("\n\\begin{verbatim}\n") | 1280 finally = cat("\n\\begin{verbatim}\n") |
1221 ) | 1281 ) |
1222 cat(sprintf("\n .. '%s' returned:\n", f_name)) | 1282 cat(sprintf("\n .. '%s' returned:\n", f_name)) |
1223 str(rslt) | 1283 str(rslt) |
1224 my_rslt$rslt <- rslt | 1284 my_rslt$rslt <- rslt |
1246 collapse = collapse_string | 1306 collapse = collapse_string |
1247 ) | 1307 ) |
1248 ) | 1308 ) |
1249 } | 1309 } |
1250 | 1310 |
1251 latex_itemized_collapsed <- function(collapse_string, v, underscore_whack = TRUE) { | 1311 latex_itemized_collapsed <- |
1312 function(collapse_string, v, underscore_whack = TRUE) { | |
1252 cat("\\begin{itemize}\n\\item ") | 1313 cat("\\begin{itemize}\n\\item ") |
1253 latex_collapsed_vector(collapse_string, v, underscore_whack) | 1314 latex_collapsed_vector(collapse_string, v, underscore_whack) |
1254 cat("\n\\end{itemize}\n") | 1315 cat("\n\\end{itemize}\n") |
1255 } | 1316 } |
1256 | 1317 |
1257 latex_itemized_list <- function(v, underscore_whack = TRUE) { | 1318 latex_itemized_list <- function(v, underscore_whack = TRUE) { |
1258 latex_itemized_collapsed("\n\\item ", v, underscore_whack) | 1319 latex_itemized_collapsed("\n\\item ", v, underscore_whack) |
1259 } | 1320 } |
1260 | 1321 |
1261 latex_enumerated_collapsed <- function(collapse_string, v, underscore_whack = TRUE) { | 1322 latex_enumerated_collapsed <- |
1323 function(collapse_string, v, underscore_whack = TRUE) { | |
1262 cat("\\begin{enumerate}\n\\item ") | 1324 cat("\\begin{enumerate}\n\\item ") |
1263 latex_collapsed_vector(collapse_string, v, underscore_whack) | 1325 latex_collapsed_vector(collapse_string, v, underscore_whack) |
1264 cat("\n\\end{enumerate}\n") | 1326 cat("\n\\end{enumerate}\n") |
1265 } | 1327 } |
1266 | 1328 |
1300 } | 1362 } |
1301 | 1363 |
1302 # N.B. use con = "" to emulate regular cat | 1364 # N.B. use con = "" to emulate regular cat |
1303 fcat0 <- | 1365 fcat0 <- |
1304 function(..., sprtr = " ", cnnctn = file()) { | 1366 function(..., sprtr = " ", cnnctn = file()) { |
1305 cat0(..., sep = sprtr, file = cnnctn) | 1367 gsubfn::cat0(..., sep = sprtr, file = cnnctn) |
1306 invisible(cnnctn) | 1368 invisible(cnnctn) |
1307 } | 1369 } |
1308 | 1370 |
1309 hypersub <- | 1371 hypersub <- |
1310 function(s) { | 1372 function(s) { |
1426 #' | 1488 #' |
1427 #' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20 | 1489 #' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20 |
1428 #' | 1490 #' |
1429 #' Horn et al. (2014) Nature Methods 11(6):603-4 | 1491 #' Horn et al. (2014) Nature Methods 11(6):603-4 |
1430 #' | 1492 #' |
1493 # nolint start | |
1494 # squash un-actionable cyclomatic_complexity warning | |
1431 ksea_scores <- function( | 1495 ksea_scores <- function( |
1496 # nolint end | |
1432 # For human data, typically, ksdata = KSEAapp::ksdata | 1497 # For human data, typically, ksdata = KSEAapp::ksdata |
1433 ksdata, | 1498 ksdata, |
1434 | 1499 |
1435 # Input data file having columns: | 1500 # Input data file having columns: |
1436 # - Protein : abbreviated protein name | 1501 # - Protein : abbreviated protein name |
1454 | 1519 |
1455 # Minimum substrate count, necessary to adjust the p-value appropriately. | 1520 # Minimum substrate count, necessary to adjust the p-value appropriately. |
1456 minimum_substrate_count | 1521 minimum_substrate_count |
1457 | 1522 |
1458 ) { | 1523 ) { |
1524 if (FALSE) { | |
1525 if (print_nb_messages) nbe("Output contents of `ksdata` table\n") | |
1526 cat_variable(ksdata) | |
1527 cat("\n\\clearpage\n") | |
1528 data_frame_tabbing_latex( | |
1529 x = ksdata[order(ksdata$GENE, ksdata$SUB_GENE), ], | |
1530 tabstops = c(0.3, 0.3, 1.2, 0.3, 0.3, 0.3, 0.3, | |
1531 0.8, 0.3, 0.8, 0.3, 0.3, 0.3), | |
1532 caption = "ksdata dump", | |
1533 use_subsubsection_header = FALSE | |
1534 ) | |
1535 } | |
1459 # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution. | 1536 # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution. |
1460 if (length(grep(";", px$Residue.Both)) == 0) { | 1537 if (length(grep(";", px$Residue.Both)) == 0) { |
1461 # There are no Residue.Both entries having semicolons, so new is | 1538 # There are no Residue.Both entries having semicolons, so new is |
1462 # simply px except two columns are renamed and a column is added | 1539 # simply px except two columns are renamed and a column is added |
1463 # for log2(abs(fold-change)) | 1540 # for log2(abs(fold-change)) |
1507 # To take the magnitude into account without taking the direction into | 1584 # To take the magnitude into account without taking the direction into |
1508 # account, set params$kseaUseAbsoluteLog2FC to TRUE | 1585 # account, set params$kseaUseAbsoluteLog2FC to TRUE |
1509 # | 1586 # |
1510 # Should KSEA be performed aggregating signed log2FC or absolute? | 1587 # Should KSEA be performed aggregating signed log2FC or absolute? |
1511 # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores | 1588 # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores |
1589 # nolint start | |
1590 # squash un-actionable "no visible global for ..." | |
1512 if (params$kseaUseAbsoluteLog2FC) { | 1591 if (params$kseaUseAbsoluteLog2FC) { |
1592 # nolint end | |
1513 # TRUE use abs(log2FC) for KSEA as Justin requested; this is a | 1593 # TRUE use abs(log2FC) for KSEA as Justin requested; this is a |
1514 # justifiable deviation from the KSEAapp::KSEA.Scores algorithm. | 1594 # justifiable deviation from the KSEAapp::KSEA.Scores algorithm. |
1515 new$log2_fc <- abs(new$log2_fc) | 1595 new$log2_fc <- abs(new$log2_fc) |
1516 } | 1596 } |
1517 | 1597 |
1520 # set to TRUE to monitor filtration on stderr | 1600 # set to TRUE to monitor filtration on stderr |
1521 sink(stderr()) | 1601 sink(stderr()) |
1522 cat(see_variable(networkin, "\n")) | 1602 cat(see_variable(networkin, "\n")) |
1523 } | 1603 } |
1524 ksdata_filtered <- | 1604 ksdata_filtered <- |
1525 sqldf( | 1605 sqldf::sqldf( |
1526 sprintf("%s %s", | 1606 sprintf("%s %s", |
1527 "select * from ksdata where not Source = 'NetworKIN'", | 1607 "select * from ksdata where not Source = 'NetworKIN'", |
1528 if (networkin) | 1608 if (networkin) |
1529 sprintf("or networkin_score >= %d", networkin_cutoff) | 1609 sprintf("or networkin_score >= %d", networkin_cutoff) |
1530 else | 1610 else |
1531 "" | 1611 "" |
1532 ) | 1612 ) |
1533 ) | 1613 ) |
1614 if (FALSE) write.csv(x = ksdata_filtered, file = "ksdata_filtered.csv") | |
1534 if (monitor_filtration_on_stderr) { | 1615 if (monitor_filtration_on_stderr) { |
1535 cat(see_variable(sqldf( | 1616 cat(see_variable(sqldf::sqldf( |
1536 "select count(*), Source from ksdata group by Source"), "\n")) | 1617 "select count(*), Source from ksdata group by Source"), "\n")) |
1537 cat(see_variable(sqldf( | 1618 cat(see_variable(sqldf::sqldf( |
1538 "select count(*), Source from ksdata_filtered group by Source"), "\n")) | 1619 "select count(*), Source from ksdata_filtered group by Source"), "\n")) |
1539 sink() | 1620 sink() |
1540 } | 1621 } |
1541 | 1622 |
1542 ############################################################################ | 1623 ############################################################################ |
1602 ksdata_dataset_abbrev$Substrate.Gene, | 1683 ksdata_dataset_abbrev$Substrate.Gene, |
1603 ksdata_dataset_abbrev$Substrate.Mod, | 1684 ksdata_dataset_abbrev$Substrate.Mod, |
1604 ksdata_dataset_abbrev$p), | 1685 ksdata_dataset_abbrev$p), |
1605 ] | 1686 ] |
1606 if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev)) | 1687 if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev)) |
1688 if (FALSE) write.csv( | |
1689 x = ksdata_dataset_abbrev, | |
1690 file = "ksdata_dataset_abbrev_no_aggregation.csv", | |
1691 row.names = FALSE | |
1692 ) | |
1693 | |
1694 # For some kinases, the only difference between kinase names in two databases | |
1695 # is the case of the the letters; dedup to eliminate this false distinction | |
1696 # The only imperfection with this approach is when the NetworKIN-only fliter | |
1697 # is in force and there is a duplication between NetworKIN and HPRD (which | |
1698 # outranks NetworKIN). Presently, the script does not invoke with this | |
1699 # function with the NetworKIN-only filter active. | |
1700 dedup_sql <- " | |
1701 select | |
1702 r.'Kinase.Gene', r.'Substrate.Gene', r.'Substrate.Mod', | |
1703 r.Peptide, r.p, r.FC, r.log2FC, r.Source | |
1704 from | |
1705 ( select | |
1706 rowid, | |
1707 rank() over | |
1708 (partition by | |
1709 lower(k.'Kinase.Gene'), k.p, k.FC | |
1710 order by | |
1711 k.Source | |
1712 ) as rank, | |
1713 * | |
1714 from | |
1715 ksdata_dataset_abbrev k | |
1716 ) as r | |
1717 where r.rank = 1 | |
1718 " | |
1719 ksdata_dataset_abbrev <- sqldf::sqldf(x = dedup_sql) | |
1720 if (FALSE) write.csv( | |
1721 x = ksdata_dataset_abbrev, | |
1722 file = "ksdata_dataset_abbrev_no_aggregation_dedup.csv", | |
1723 row.names = FALSE | |
1724 ) | |
1725 | |
1607 # First aggregation step to account for multiply phosphorylated peptides | 1726 # First aggregation step to account for multiply phosphorylated peptides |
1608 # and differing peptide sequences; the goal here is to combine results | 1727 # and differing peptide sequences; the goal here is to combine results |
1609 # for all measurements of the same substrate. | 1728 # for all measurements of the same substrate. |
1610 # SELECT `Kinase.Gene`, `Substrate.Gene`, `Substrate.Mod`, | 1729 # SELECT `Kinase.Gene`, `Substrate.Gene`, `Substrate.Mod`, |
1611 # `Source`, avg(log2FC) AS log2FC | 1730 # `Source`, avg(log2FC) AS log2FC |
1654 log2FC ~ Kinase.Gene, | 1773 log2FC ~ Kinase.Gene, |
1655 data = ksdata_dataset_abbrev, | 1774 data = ksdata_dataset_abbrev, |
1656 FUN = mean | 1775 FUN = mean |
1657 ) | 1776 ) |
1658 if (print_nb_messages) nbe(see_variable(mean_fc), "\n") | 1777 if (print_nb_messages) nbe(see_variable(mean_fc), "\n") |
1778 if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv") | |
1659 | 1779 |
1660 # for contrast j | 1780 # for contrast j |
1661 # for each kinase i | 1781 # for each kinase i |
1662 # extract log2 of fold-change (from `new` above) | 1782 # extract log2 of fold-change (from `new` above) |
1663 # (used in KSEA.Scores.R lines # 130 & 132) | 1783 # (used in KSEA.Scores.R lines # 130 & 132) |
1679 mean(log2_fc_j_each_i, na.rm = TRUE) | 1799 mean(log2_fc_j_each_i, na.rm = TRUE) |
1680 | 1800 |
1681 # Reorder mean_fc, although I don't see why | 1801 # Reorder mean_fc, although I don't see why |
1682 # (KSEA.Scores.R line 128 | 1802 # (KSEA.Scores.R line 128 |
1683 mean_fc <- mean_fc[order(mean_fc[, 1]), ] | 1803 mean_fc <- mean_fc[order(mean_fc[, 1]), ] |
1804 if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv") | |
1684 # mean_fc columns so far: "Kinase.Gene", "log2FC" | 1805 # mean_fc columns so far: "Kinase.Gene", "log2FC" |
1685 # - Kinase.Gene | 1806 # - Kinase.Gene |
1686 # indicates the gene name for each kinase. | 1807 # indicates the gene name for each kinase. |
1687 | 1808 |
1688 # Create column 3: mS | 1809 # Create column 3: mS |
1734 , | 1855 , |
1735 drop = FALSE | 1856 drop = FALSE |
1736 ] | 1857 ] |
1737 } | 1858 } |
1738 | 1859 |
1739 #ACE nb(see_variable(nrow(mean_fc)), "\n") | |
1740 # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value | 1860 # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value |
1741 # - FDR | 1861 # - FDR |
1742 # is the p-value adjusted for multiple hypothesis testing | 1862 # is the p-value adjusted for multiple hypothesis testing |
1743 # using the Benjamini & Hochberg method." | 1863 # using the Benjamini & Hochberg method." |
1744 # (KSEA.Scores.R line # 134) | 1864 # (KSEA.Scores.R line # 134) |
1745 mean_fc$fdr <- | 1865 mean_fc$fdr <- |
1746 p.adjust(mean_fc$p_value, method = "fdr") | 1866 p.adjust(mean_fc$p_value, method = "fdr") |
1747 | 1867 |
1748 # It makes no sense to leave Z-scores negative when using | 1868 # It makes no sense to leave Z-scores negative when using |
1749 # absolute value of fold-change | 1869 # absolute value of fold-change |
1870 # nolint start | |
1871 # squash un-actionable "no visible global for ..." | |
1750 if (params$kseaUseAbsoluteLog2FC) { | 1872 if (params$kseaUseAbsoluteLog2FC) { |
1873 # nolint end | |
1751 mean_fc$z_score <- abs(mean_fc$z_score) | 1874 mean_fc$z_score <- abs(mean_fc$z_score) |
1752 } | 1875 } |
1753 | 1876 |
1754 # Remove second column (log2FC), which is duplicated as mS | 1877 # Remove second column (log2FC), which is duplicated as mS |
1755 # (KSEA.Scores.R line # 136) | 1878 # (KSEA.Scores.R line # 136) |
1807 } | 1930 } |
1808 ) | 1931 ) |
1809 | 1932 |
1810 k <- k[selector < ksea_cutoff_threshold, ] | 1933 k <- k[selector < ksea_cutoff_threshold, ] |
1811 nrow_k <- nrow(k) | 1934 nrow_k <- nrow(k) |
1812 | |
1813 #ACE nbe(see_variable(fdr_barplot_dataframe <- k)) | |
1814 | 1935 |
1815 if (nrow_k > 0) { | 1936 if (nrow_k > 0) { |
1816 max_nchar_rowname <- max(nchar(rownames(k))) | 1937 max_nchar_rowname <- max(nchar(rownames(k))) |
1817 my_cex_names <- 1.0 / (1 + nrow_k / 50) | 1938 my_cex_names <- 1.0 / (1 + nrow_k / 50) |
1818 | 1939 |
1976 "m_s", | 2097 "m_s", |
1977 "z_score", | 2098 "z_score", |
1978 "p_value", | 2099 "p_value", |
1979 "fdr" | 2100 "fdr" |
1980 ) | 2101 ) |
1981 #ACE output_order <- with(output_df, order(fdr)) | 2102 |
2103 # nolint start | |
1982 output_order <- with(output_df, order(p_value)) | 2104 output_order <- with(output_df, order(p_value)) |
2105 # nolint end | |
1983 output_df <- output_df[output_order, ] | 2106 output_df <- output_df[output_order, ] |
1984 | 2107 |
1985 output_df[, 2] <- sprintf("%0.3g", output_df[, 2]) | 2108 output_df[, 2] <- sprintf("%0.3g", output_df[, 2]) |
1986 output_df$fdr <- sprintf("%0.4f", output_df$fdr) | 2109 output_df$fdr <- sprintf("%0.4f", output_df$fdr) |
1987 output_df$p_value <- sprintf("%0.2e", output_df$p_value) | 2110 output_df$p_value <- sprintf("%0.2e", output_df$p_value) |
1988 output_df$z_score <- sprintf("%0.2f", output_df$z_score) | 2111 output_df$z_score <- sprintf("%0.2f", output_df$z_score) |
1989 output_df$m_s <- sprintf("%d", output_df$m_s) | 2112 output_df$m_s <- sprintf("%d", output_df$m_s) |
1990 output_df$enrichment <- sprintf("%0.3g", output_df$enrichment) | 2113 output_df$enrichment <- sprintf("%0.3g", output_df$enrichment) |
1991 output_ncol <- ncol(output_df) | |
1992 colnames(output_df) <- | 2114 colnames(output_df) <- |
1993 c( | 2115 c( |
1994 "Kinase", | 2116 "Kinase or motif", |
1995 "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)", | 2117 "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)", |
1996 "Enrichment", | 2118 "Enrichment", |
1997 "Substrates", | 2119 "Substrates", |
1998 "z-score", | 2120 "z-score", |
1999 "p-value", | 2121 "p-value", |
2020 ) | 2142 ) |
2021 if (sum(selector < ksea_cutoff_threshold) > 0) { | 2143 if (sum(selector < ksea_cutoff_threshold) > 0) { |
2022 if (print_nb_messages) nbe(see_variable(output_df)) | 2144 if (print_nb_messages) nbe(see_variable(output_df)) |
2023 math_caption <- gsub("{", "\\{", caption, fixed = TRUE) | 2145 math_caption <- gsub("{", "\\{", caption, fixed = TRUE) |
2024 math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE) | 2146 math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE) |
2025 # with ( | |
2026 # output_df, | |
2027 # ) | |
2028 if (TRUE) { | 2147 if (TRUE) { |
2029 output_df$Kinase <- whack_underscores(output_df$Kinase) | |
2030 data_frame_tabbing_latex( | 2148 data_frame_tabbing_latex( |
2031 x = output_df, | 2149 x = output_df, |
2032 # vector of tab stops, in inches | 2150 # vector of tab stops, in inches |
2033 tabstops = c(1.0, 1.2, 1.0, 1.0, 1.0, 1.0), | 2151 tabstops = c(1.8, 1.2, 0.8, 0.8, 0.8, 0.8, 0.8), |
2034 # vector of headings, registered with tab-stops | 2152 # vector of headings, registered with tab-stops |
2035 headings = colnames(output_df), | 2153 headings = colnames(output_df), |
2036 # digits to pass to format.data.frame | 2154 # digits to pass to format.data.frame |
2037 digits = NULL, | 2155 digits = NULL, |
2038 # maximumn number of rows to print | 2156 # maximumn number of rows to print |
2055 # https://tug.org/texinfohtml/latex2e.html#Font-sizes | 2173 # https://tug.org/texinfohtml/latex2e.html#Font-sizes |
2056 charactersize = "small", | 2174 charactersize = "small", |
2057 # set verbatim to TRUE to debug output | 2175 # set verbatim to TRUE to debug output |
2058 verbatim = FALSE | 2176 verbatim = FALSE |
2059 ) | 2177 ) |
2060 } else { | |
2061 data_frame_table_latex( | |
2062 x = output_df, | |
2063 justification = "l c c c c c c", | |
2064 centered = TRUE, | |
2065 caption = sprintf( | |
2066 "\\text{%s}, KSEA %s < %s", | |
2067 math_caption, | |
2068 ksea_cutoff_statistic, | |
2069 ksea_cutoff_threshold | |
2070 ), | |
2071 anchor = anchor, | |
2072 underscore_whack = FALSE | |
2073 ) | |
2074 } | 2178 } |
2075 } else { | 2179 } else { |
2076 cat( | 2180 cat( |
2077 sprintf( | 2181 sprintf( |
2078 "\\break | 2182 "\\break |
2130 mycol <- unique(append(mycol_neg, mycol_pos)) | 2234 mycol <- unique(append(mycol_neg, mycol_pos)) |
2131 color_breaks <- list(breaks_all, mycol) | 2235 color_breaks <- list(breaks_all, mycol) |
2132 return(color_breaks) | 2236 return(color_breaks) |
2133 } | 2237 } |
2134 | 2238 |
2239 # nolint start | |
2240 # squash un-actionable cyclomatic_complexity warning | |
2135 hm2plus <- function( | 2241 hm2plus <- function( |
2242 # nolint end | |
2136 x, | 2243 x, |
2137 mat = matrix( | 2244 mat = matrix( |
2138 c( | 2245 c( |
2139 c(0, 4, 0), | 2246 c(0, 4, 0), |
2140 c(0, 3, 3), | 2247 c(0, 3, 3), |
2159 key_par = list(), | 2266 key_par = list(), |
2160 hclustfun = hclust, | 2267 hclustfun = hclust, |
2161 ... | 2268 ... |
2162 ) { | 2269 ) { |
2163 | 2270 |
2164 varargs <- list(...) | |
2165 if (FALSE) # this is to avoid commenting out code to pass linting... | 2271 if (FALSE) # this is to avoid commenting out code to pass linting... |
2166 my_hm2 <- latex_show_invocation(heatmap.2, head_patch = FALSE) | 2272 my_hm2 <- latex_show_invocation(gplots::heatmap.2, head_patch = FALSE) |
2167 else | 2273 else |
2168 my_hm2 <- heatmap.2 | 2274 my_hm2 <- gplots::heatmap.2 |
2169 | 2275 |
2170 x <- as.matrix(x) | 2276 x <- as.matrix(x) |
2171 if (sum(!is.na(x)) < 1) | 2277 if (sum(!is.na(x)) < 1) |
2172 return(NULL) | 2278 return(NULL) |
2173 color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough | 2279 color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough |
2175 min_nonax <- min(x, na.rm = TRUE) | 2281 min_nonax <- min(x, na.rm = TRUE) |
2176 max_nonax <- max(x, na.rm = TRUE) | 2282 max_nonax <- max(x, na.rm = TRUE) |
2177 if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n") | 2283 if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n") |
2178 if (divergent) { | 2284 if (divergent) { |
2179 zlim <- max(abs(min_nonax), abs(max_nonax)) | 2285 zlim <- max(abs(min_nonax), abs(max_nonax)) |
2180 if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n")) | 2286 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
2181 breaks <- (zlim) / (break_count:1) | 2287 breaks <- (zlim) / (break_count:1) |
2182 if (print_nb_messages) nb(see_variable(breaks, "\n")) | 2288 if (print_nb_messages) nb(see_variable(breaks, "\n")) |
2183 breaks <- breaks - median(breaks) | 2289 breaks <- breaks - median(breaks) |
2184 zlim <- c(-zlim, zlim) | 2290 zlim <- c(-zlim, zlim) |
2185 if (print_nb_messages) nb(see_variable(zlim, "\n")) | 2291 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
2186 } else { | 2292 } else { |
2187 zlim <- max(abs(min_nonax), abs(max_nonax)) | 2293 zlim <- max(abs(min_nonax), abs(max_nonax)) |
2188 if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n")) | 2294 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
2189 breaks <- zlim / (break_count:1) | 2295 breaks <- zlim / (break_count:1) |
2190 if (print_nb_messages) nb(see_variable(breaks, "\n")) | 2296 if (print_nb_messages) nb(see_variable(breaks, "\n")) |
2191 if (max_nonax < 0) { | 2297 if (max_nonax < 0) { |
2192 breaks <- breaks - zlim | 2298 breaks <- breaks - zlim |
2193 zlim <- c(-zlim, 0) | 2299 zlim <- c(-zlim, 0) |
2195 zlim <- c(0, zlim) | 2301 zlim <- c(0, zlim) |
2196 } | 2302 } |
2197 if (print_nb_messages) nb(see_variable(zlim, "\n")) | 2303 if (print_nb_messages) nb(see_variable(zlim, "\n")) |
2198 } | 2304 } |
2199 nonax <- x | 2305 nonax <- x |
2200 nonax[is.na(x)] <- min_nonax | 2306 # nolint start |
2307 # squash un-actionable "no visible global for ..." | |
2308 if (params$heatMapNAsubstitute) nonax[is.na(x)] <- min_nonax | |
2309 # nolint end | |
2201 if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5) | 2310 if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5) |
2202 if (is.null(heights)) heights <- c(0.4, denhgt, 4.0) | 2311 if (is.null(heights)) heights <- c(0.4, denhgt, 4.0) |
2203 colors <- | 2312 colors <- |
2204 if (divergent && min_nonax < 0) { | 2313 if (divergent && min_nonax < 0) { |
2205 # divergent colors on both sides of zero | 2314 # divergent colors on both sides of zero |
2210 } else if (divergent && max_nonax < 0) { | 2319 } else if (divergent && max_nonax < 0) { |
2211 # "divergent" colors < zero | 2320 # "divergent" colors < zero |
2212 colorRampPalette(c("red", "white"))(color_count) | 2321 colorRampPalette(c("red", "white"))(color_count) |
2213 } else { | 2322 } else { |
2214 # "non-divergent" colors including zero | 2323 # "non-divergent" colors including zero |
2215 hcl.colors(color_count, "YlOrRd", rev = TRUE) | 2324 tmp <- hcl.colors(color_count, "YlOrRd", rev = TRUE) |
2325 # nolint start | |
2326 # squash un-actionable "no visible global for ..." | |
2327 tmp[1] <- params$heatMapNAgrey | |
2328 # nolint end | |
2329 tmp | |
2216 } | 2330 } |
2217 | 2331 |
2218 #ACE if (print_nb_messages) nb("within hm2plus", see_variable(key_par), "\n") | 2332 nbe("There are ", sum(is.na(x)), " NA values") |
2219 #ACE if (print_nb_messages) nb(see_variable(colors, "\n")) | 2333 |
2220 #ACE key_par$col = colors | |
2221 #ACE key_par$breaks = breaks | |
2222 | |
2223 if (print_nb_messages) nb(see_variable(par(), "\n")) #ACE TODO remove me | |
2224 if (print_nb_messages) cat("\\leavevmode\n\\linebreak\n") #ACE TODO remove me | |
2225 suppressWarnings( | 2334 suppressWarnings( |
2226 my_hm2( | 2335 my_hm2( |
2227 x = x, | 2336 x = x, |
2228 col = colors, | 2337 col = colors, |
2229 #ACE symkey = FALSE, | |
2230 density.info = density_info, | 2338 density.info = density_info, |
2231 srtCol = srtcol, | 2339 srtCol = srtcol, |
2232 srtRow = srtrow, | 2340 srtRow = srtrow, |
2233 margins = margins, | 2341 margins = margins, |
2234 lwid = widths, | 2342 lwid = widths, |
2239 lmat = mat, | 2347 lmat = mat, |
2240 notecol = notecol, | 2348 notecol = notecol, |
2241 trace = trace, | 2349 trace = trace, |
2242 bg = "yellow", | 2350 bg = "yellow", |
2243 hclustfun = hclustfun, | 2351 hclustfun = hclustfun, |
2244 #ACE breaks = breaks, | |
2245 oldstyle = FALSE, | 2352 oldstyle = FALSE, |
2246 ... # varargs | 2353 ... # varargs |
2247 ) | 2354 ) |
2248 ) | 2355 ) |
2249 # implicitly returning value returned by heatmap.2 | 2356 # implicitly returning value returned by heatmap.2 |
2250 } | 2357 } |
2251 | 2358 |
2252 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap | 2359 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap |
2360 # nolint start | |
2361 # squash un-actionable cyclomatic_complexity warning | |
2253 draw_kseaapp_summary_heatmap <- function( | 2362 draw_kseaapp_summary_heatmap <- function( |
2363 # nolint end | |
2254 x, # matrix with row/col names already formatted | 2364 x, # matrix with row/col names already formatted |
2255 sample_cluster, # a binary input of TRUE or FALSE, | 2365 sample_cluster, # a binary input of TRUE or FALSE, |
2256 # indicating whether or not to perform | 2366 # indicating whether or not to perform |
2257 # hierarchical clustering of the sample columns | 2367 # hierarchical clustering of the sample columns |
2258 merged_asterisk, # matrix having dimensions of x, values "*" or "" | 2368 merged_asterisk, # matrix having dimensions of x, values "*" or "" |
2293 cat_variable(x) | 2403 cat_variable(x) |
2294 return(FALSE) | 2404 return(FALSE) |
2295 } | 2405 } |
2296 max_nchar_rowname <- max(nchar(rownames(x))) | 2406 max_nchar_rowname <- max(nchar(rownames(x))) |
2297 max_nchar_colname <- max(nchar(colnames(x))) | 2407 max_nchar_colname <- max(nchar(colnames(x))) |
2298 my_limit <- g_intensity_hm_rows | |
2299 | 2408 |
2300 my_row_cex_scale <- master_cex * 150 / nrow_x | 2409 my_row_cex_scale <- master_cex * 150 / nrow_x |
2301 #ACE row cex shrink hack begin | 2410 # row cex shrink hack begin |
2302 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x) | 2411 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x) |
2303 #ACE row cex shrink hack end | 2412 # row cex shrink hack end |
2304 | 2413 |
2305 my_col_cex_scale <- 3.0 | 2414 my_col_cex_scale <- 3.0 |
2306 #ACE col cex shrink hack begin | 2415 # col cex shrink hack begin |
2307 if (ncol_x > 40) | 2416 if (ncol_x > 40) |
2308 my_col_cex_scale <- 3.0 * 40 / ncol_x | 2417 my_col_cex_scale <- 3.0 * 40 / ncol_x |
2309 #ACE col cex shrink hack end | 2418 # col cex shrink hack end |
2310 | 2419 |
2311 my_asterisk_scale <- 0.4 * my_row_cex_scale | 2420 my_asterisk_scale <- 0.4 * my_row_cex_scale |
2312 my_row_warp <- 1 | 2421 my_row_warp <- 1 |
2313 my_note_warp <- 2 | 2422 my_note_warp <- 2 |
2314 my_row_warp <- 1 | 2423 my_row_warp <- 1 |
2336 ) | 2445 ) |
2337 my_margins <- c(1, 1) + | 2446 my_margins <- c(1, 1) + |
2338 c( | 2447 c( |
2339 margins[1] * 0.08 * max_nchar_colname * my_col_cex, | 2448 margins[1] * 0.08 * max_nchar_colname * my_col_cex, |
2340 margins[2] * 0.04 * max_nchar_rowname * my_row_cex | 2449 margins[2] * 0.04 * max_nchar_rowname * my_row_cex |
2341 ) | |
2342 | |
2343 my_notecex <- | |
2344 my_scale * | |
2345 min( | |
2346 1.1, | |
2347 my_row_cex_asterisk * my_note_warp, | |
2348 my_col_cex * my_note_warp | |
2349 ) | 2450 ) |
2350 | 2451 |
2351 if (print_trace_messages) { | 2452 if (print_trace_messages) { |
2352 cat_variable(my_heights, suffix = "; ") | 2453 cat_variable(my_heights, suffix = "; ") |
2353 cat_variable(my_margins, suffix = "\n\n") | 2454 cat_variable(my_margins, suffix = "\n\n") |
2401 return(TRUE) | 2502 return(TRUE) |
2402 } | 2503 } |
2403 | 2504 |
2404 # function drawing heatmap of contrast fold-change for each kinase, | 2505 # function drawing heatmap of contrast fold-change for each kinase, |
2405 # adapted from KSEAapp::KSEA.Heatmap | 2506 # adapted from KSEAapp::KSEA.Heatmap |
2507 # nolint start | |
2508 # squash un-actionable cyclomatic_complexity warning | |
2406 ksea_heatmap <- function( | 2509 ksea_heatmap <- function( |
2510 # nolint end | |
2407 # the data frame outputs from the KSEA.Scores() function, in list format | 2511 # the data frame outputs from the KSEA.Scores() function, in list format |
2408 score_list, | 2512 score_list, |
2409 # a character vector of all the sample names for heatmap annotation: | 2513 # a character vector of all the sample names for heatmap annotation: |
2410 # - the names must be in the same order as the data in score_list | 2514 # - the names must be in the same order as the data in score_list |
2411 # - please avoid long names, as they may get cropped in the final image | 2515 # - please avoid long names, as they may get cropped in the final image |
2427 # hierarchical clustering of the sample columns | 2531 # hierarchical clustering of the sample columns |
2428 sample_cluster, | 2532 sample_cluster, |
2429 # a binary input of TRUE or FALSE, indicating whether or not to export | 2533 # a binary input of TRUE or FALSE, indicating whether or not to export |
2430 # the heatmap as a .png image into the working directory | 2534 # the heatmap as a .png image into the working directory |
2431 export = FALSE, | 2535 export = FALSE, |
2432 # bottom and right margins; adjust as needehttps://tex.stackexchange.com/a/56795d if contrast names are too long | 2536 # bottom and right margins; adjust as needed if contrast names are too long |
2433 margins = c(6, 6), | 2537 margins = c(6, 6), |
2434 # print which kinases? | 2538 # print which kinases? |
2435 # - Mandatory argument, must be one of const_ksea_.*_kinases | 2539 # - Mandatory argument, must be one of const_ksea_.*_kinases |
2436 which_kinases, | 2540 which_kinases, |
2437 # additional arguments to gplots::heatmap.2, such as: | 2541 # additional arguments to gplots::heatmap.2, such as: |
2482 asterisk_rows <- rowSums(merged_asterisk == "*") > 0 | 2586 asterisk_rows <- rowSums(merged_asterisk == "*") > 0 |
2483 all_rows <- rownames(merged_stats) | 2587 all_rows <- rownames(merged_stats) |
2484 names(asterisk_rows) <- all_rows | 2588 names(asterisk_rows) <- all_rows |
2485 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE]) | 2589 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE]) |
2486 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE]) | 2590 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE]) |
2487 merged_scores_asterisk <- merged_scores[names(asterisk_rows), , drop = FALSE] | |
2488 merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), , drop = FALSE] | |
2489 | 2591 |
2490 row_list <- list() | 2592 row_list <- list() |
2491 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows | 2593 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows |
2492 row_list[[const_ksea_all_kinases]] <- all_rows | 2594 row_list[[const_ksea_all_kinases]] <- all_rows |
2493 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows | 2595 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows |
2611 } | 2713 } |
2612 | 2714 |
2613 keep_var_gtr_0 <- keep_var_gtr_min(0) | 2715 keep_var_gtr_0 <- keep_var_gtr_min(0) |
2614 | 2716 |
2615 # function drawing heatmap of phosphopeptide intensities | 2717 # function drawing heatmap of phosphopeptide intensities |
2718 # nolint start | |
2719 # squash un-actionable cyclomatic_complexity warning | |
2616 ppep_heatmap <- | 2720 ppep_heatmap <- |
2721 # nolint end | |
2617 function( | 2722 function( |
2618 m, # matrix with rownames already formatted | 2723 m, # matrix with rownames already formatted |
2619 cutoff, # cutoff used by hm_heading_function | 2724 cutoff, # cutoff used by hm_heading_function |
2620 hm_heading_function, # construct $ cat heading from m and cutoff | 2725 hm_heading_function, # construct $ cat heading from m and cutoff |
2621 hm_main_title, # main title for plot (drawn below heading) | 2726 hm_main_title, # main title for plot (drawn below heading) |
2628 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right | 2733 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right |
2629 row_scaling = # should row-scaling be applied if possible? | 2734 row_scaling = # should row-scaling be applied if possible? |
2630 g_default_heatmap_row_scaling, | 2735 g_default_heatmap_row_scaling, |
2631 ... # passthru to hm2plus or heatmap.2 | 2736 ... # passthru to hm2plus or heatmap.2 |
2632 ) { | 2737 ) { |
2633 use_heatmap_1 <- FALSE | |
2634 peptide_count <- 0 | 2738 peptide_count <- 0 |
2635 # emit the heading for the heatmap | 2739 # emit the heading for the heatmap |
2636 if (hm_heading_function(m, cutoff)) { | 2740 if (hm_heading_function(m, cutoff)) { |
2637 nrow_m <- nrow(m) | 2741 nrow_m <- nrow(m) |
2638 peptide_count <- min(max_peptide_count, nrow_m) | 2742 peptide_count <- min(max_peptide_count, nrow_m) |
2641 # Margin was heuristically derived to accommodate the widest label | 2745 # Margin was heuristically derived to accommodate the widest label |
2642 row_mchar_max <- max(nchar(rownames(m_margin))) | 2746 row_mchar_max <- max(nchar(rownames(m_margin))) |
2643 col_mchar_max <- max(nchar(colnames(m_margin))) | 2747 col_mchar_max <- max(nchar(colnames(m_margin))) |
2644 row_margin <- master_cex * row_mchar_max * 2.6 | 2748 row_margin <- master_cex * row_mchar_max * 2.6 |
2645 col_margin <- master_cex * col_mchar_max * 2.6 | 2749 col_margin <- master_cex * col_mchar_max * 2.6 |
2646 if (print_trace_messages) cat(sprintf("row_margin = %0.3f; ", row_margin)) | 2750 if (print_trace_messages) cat( |
2647 if (print_trace_messages) cat(sprintf("col_margin = %0.3f; ", col_margin)) | 2751 sprintf("row_margin = %0.3f; ", row_margin)) |
2752 if (print_trace_messages) cat( | |
2753 sprintf("col_margin = %0.3f; ", col_margin)) | |
2648 hm_call <- NULL | 2754 hm_call <- NULL |
2649 tryCatch( | 2755 tryCatch( |
2650 { | 2756 { |
2651 # set non-argument parameters for hm_call inner function | 2757 # set non-argument parameters for hm_call inner function |
2652 my_row_cex <- | 2758 my_row_cex <- |
2654 (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows | 2760 (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows |
2655 ) | 2761 ) |
2656 m_hm <- m[peptide_count:1, , drop = FALSE] | 2762 m_hm <- m[peptide_count:1, , drop = FALSE] |
2657 if (is.null(cellnote)) { | 2763 if (is.null(cellnote)) { |
2658 cellnote <- matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm)) | 2764 cellnote <- matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm)) |
2659 cellnote[is.na(m_hm)] <- "NA" | 2765 # nolint start |
2766 # squash un-actionable "no visible global for ..." | |
2767 cellnote[is.na(m_hm)] <- params$heatMapNAcellNote | |
2768 # nolint end | |
2660 } else { | 2769 } else { |
2661 cellnote <- cellnote[peptide_count:1, , drop = FALSE] | 2770 cellnote <- cellnote[peptide_count:1, , drop = FALSE] |
2662 } | 2771 } |
2663 m_hm[is.na(m_hm)] <- 0 | 2772 # nolint start |
2773 # squash un-actionable "no visible global for ..." | |
2774 if (params$heatMapNAsubstitute) m_hm[is.na(m_hm)] <- 0 | |
2775 # nolint end | |
2664 nrow_m_hm <- nrow(m_hm) | 2776 nrow_m_hm <- nrow(m_hm) |
2665 ncol_m_hm <- ncol(m_hm) | 2777 ncol_m_hm <- ncol(m_hm) |
2666 if (nrow_m_hm < 1 || ncol_m_hm < 1) | 2778 if (nrow_m_hm < 1 || ncol_m_hm < 1) |
2667 return(peptide_count) # return zero as initialized above | 2779 return(peptide_count) # return zero as initialized above |
2668 my_limit <- g_intensity_hm_rows | |
2669 | 2780 |
2670 | 2781 |
2671 my_row_cex <- master_cex * (100 / (2 + row_mchar_max)) | 2782 my_row_cex <- master_cex * (100 / (2 + row_mchar_max)) |
2672 my_col_cex <- master_cex * 6 * row_margin / col_margin | 2783 my_col_cex <- master_cex * 6 * row_margin / col_margin |
2673 my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex | 2784 my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex |
2674 my_col_cex <- min(my_col_cex, my_row_cex) | 2785 my_col_cex <- min(my_col_cex, my_row_cex) |
2675 col_margin <- sqrt(my_col_adj) * col_margin | 2786 col_margin <- sqrt(my_col_adj) * col_margin |
2676 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) | 2787 if (print_trace_messages) cat( |
2677 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) | 2788 sprintf("my_row_cex = %0.3f; ", my_row_cex)) |
2789 if (print_trace_messages) cat( | |
2790 sprintf("my_col_cex = %0.3f; ", my_col_cex)) | |
2678 if (is.null(margins)) my_margins <- | 2791 if (is.null(margins)) my_margins <- |
2679 c( | 2792 c( |
2680 (my_col_cex + col_margin), # col | 2793 (my_col_cex + col_margin), # col |
2681 (my_row_cex + row_margin) / my_row_cex # row | 2794 (my_row_cex + row_margin) / my_row_cex # row |
2682 ) | 2795 ) |
2688 "my_margins = c(%s)\n\n", | 2801 "my_margins = c(%s)\n\n", |
2689 paste(my_margins, collapse = ", ") | 2802 paste(my_margins, collapse = ", ") |
2690 ) | 2803 ) |
2691 ) | 2804 ) |
2692 my_hm2_cex <- 2 * master_cex | 2805 my_hm2_cex <- 2 * master_cex |
2693 my_key_cex <- 0.9 - 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows | 2806 my_key_cex <- |
2807 0.9 - | |
2808 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows | |
2694 my_key_warp <- 1.5 * 22.75 / row_margin | 2809 my_key_warp <- 1.5 * 22.75 / row_margin |
2695 my_key_cex <- min(1.10, my_key_warp * my_key_cex) | 2810 my_key_cex <- min(1.10, my_key_warp * my_key_cex) |
2696 my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1) | 2811 my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1) |
2697 my_hgt_scale <- 3.75 # 3.615 | 2812 my_hgt_scale <- 3.75 # 3.615 |
2698 my_hgt_scale <- 3.60 # 3.615 | 2813 my_hgt_scale <- 3.60 # 3.615 |
2703 cat_variable(my_warp, "\n\n", 3) | 2818 cat_variable(my_warp, "\n\n", 3) |
2704 # added 0.9 heuristically... | 2819 # added 0.9 heuristically... |
2705 my_plot_height <- | 2820 my_plot_height <- |
2706 (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) * | 2821 (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) * |
2707 min(my_hgt_scale, my_hgt_scale * my_warp) | 2822 min(my_hgt_scale, my_hgt_scale * my_warp) |
2708 my_plot_height <- min(3.65, my_plot_height * g_intensity_hm_rows / 50) | 2823 my_plot_height <- |
2824 min(3.65, my_plot_height * g_intensity_hm_rows / 50) | |
2709 my_heights <- c( | 2825 my_heights <- c( |
2710 0.3, # title and top dendrogram | 2826 0.3, # title and top dendrogram |
2711 my_plot_height, # plot and bottom margin | 2827 my_plot_height, # plot and bottom margin |
2712 4.15 - my_hgt_scale, # legend | 2828 4.15 - my_hgt_scale, # legend |
2713 0.05 + my_hgt_scale - my_plot_height # whitespace below legend | 2829 0.05 + my_hgt_scale - my_plot_height # whitespace below legend |
2768 scale = scaling, | 2884 scale = scaling, |
2769 srtcol = 90, | 2885 srtcol = 90, |
2770 srtrow = 0, | 2886 srtrow = 0, |
2771 xlab = "", | 2887 xlab = "", |
2772 cellnote = cellnote, | 2888 cellnote = cellnote, |
2889 # ref for na.color: | |
2890 # https://cran.r-project.org/web/packages/gplots/gplots.pdf | |
2891 # nolint start | |
2892 # squash un-actionable "no visible global for ..." | |
2893 na.color = params$heatMapNAcellColor, | |
2894 # nolint end | |
2773 notecex = my_note_cex, | 2895 notecex = my_note_cex, |
2774 ... | 2896 ... |
2775 ) | 2897 ) |
2776 ) | 2898 ) |
2777 ) { | 2899 ) { |
2831 }, | 2953 }, |
2832 error = function(r) { | 2954 error = function(r) { |
2833 cat( | 2955 cat( |
2834 sprintf( | 2956 sprintf( |
2835 "\n%s %s Internal message: %s\n\\newline\n\n", | 2957 "\n%s %s Internal message: %s\n\\newline\n\n", |
2836 "Failure drawing heatmap,", | 2958 "Failure drawing heatmap, possibly because of ", |
2837 "possibly because of too many missing values.\n\\newline\n\n", | 2959 "too many missing values.\n\\newline\n\n", |
2838 r$message | 2960 r$message |
2839 ) | 2961 ) |
2840 ) | 2962 ) |
2841 cat_margins() | 2963 cat_margins() |
2842 } | 2964 } |
2843 ) | 2965 ) |
2844 } else { | 2966 } else { |
2845 cat( | 2967 cat( |
2846 "\nFailure drawing heatmap, possibly because of too many missing values.\n" | 2968 "\nFailure drawing heatmap, ", |
2969 "possibly because of too many missing values.\n" | |
2847 ) | 2970 ) |
2848 } | 2971 } |
2849 } | 2972 } |
2850 ) | 2973 ) |
2851 } | 2974 } |
2853 # return value: | 2976 # return value: |
2854 peptide_count | 2977 peptide_count |
2855 } | 2978 } |
2856 | 2979 |
2857 # function drawing heatmap of correlations if they exist, else covariances | 2980 # function drawing heatmap of correlations if they exist, else covariances |
2981 # nolint start | |
2982 # squash un-actionable cyclomatic_complexity warning | |
2983 # squash un-actionable "no visible global for ..." | |
2858 cov_heatmap <- | 2984 cov_heatmap <- |
2859 function( | 2985 function( |
2986 # nolint end | |
2860 m, # matrix with rownames already formatted | 2987 m, # matrix with rownames already formatted |
2988 kinase_name, | |
2861 top_substrates = FALSE, | 2989 top_substrates = FALSE, |
2862 ... # passthru to hm2plus or heatmap.2 | 2990 ... # passthru to hm2plus or heatmap.2 |
2863 ) { | 2991 ) { |
2864 if (print_nb_messages) nbe(see_variable(m), " [", nrow(m), "x", ncol(m), "\n") | 2992 if (print_nb_messages) nbe( |
2865 #ACE nb(rowSums(m, na.rm = TRUE)) | 2993 see_variable(m), " [", nrow(m), "x", ncol(m), "\n") |
2866 #ACE bad_rows <- (rowSums(m, na.rm = TRUE) == 0) | |
2867 #ACE nb(see_variable(bad_rows)) | |
2868 #ACE m <- m[-bad_rows, , drop = FALSE] | |
2869 colnames_m <- colnames(m) | 2994 colnames_m <- colnames(m) |
2870 is_na_m <- is.na(m) | 2995 is_na_m <- is.na(m) |
2871 tmp <- m | 2996 tmp <- m |
2872 tmp[is_na_m] <- 0 | 2997 tmp[is_na_m] <- 0 |
2873 | 2998 |
2901 } | 3026 } |
2902 | 3027 |
2903 | 3028 |
2904 t_m <- t(tmp) | 3029 t_m <- t(tmp) |
2905 t_m[is.na(t_m)] <- 0 | 3030 t_m[is.na(t_m)] <- 0 |
2906 prefiltered_nrow <- ncol(t_m) | |
2907 | 3031 |
2908 my_corcov <- cov(t_m) | 3032 my_corcov <- cov(t_m) |
2909 did_filter_rows <- did_filter_cols <- FALSE | 3033 did_filter_rows <- did_filter_cols <- FALSE |
2910 if (g_correlate_substrates && !is_positive_definite(my_corcov)) { | 3034 if (g_correlate_substrates && !is_positive_definite(my_corcov)) { |
2911 my_correlate_substrates <- FALSE | 3035 my_correlate_substrates <- FALSE |
2942 | 3066 |
2943 f_omissions <- | 3067 f_omissions <- |
2944 function(is_corr) { | 3068 function(is_corr) { |
2945 cat( | 3069 cat( |
2946 sprintf( | 3070 sprintf( |
2947 "Below is the %s plot for %s substrates", | 3071 "Below is the %s plot for %s sites", |
2948 if (is_corr) "correlation" else "covariance", | 3072 if (is_corr) "correlation" else "covariance", |
2949 sprintf( | 3073 sprintf( |
2950 if (top_substrates) | 3074 if (top_substrates) |
2951 "%0.0f \"highest-quality\"" | 3075 "%0.0f \"highest-quality\"" |
2952 else | 3076 else |
3004 | 3128 |
3005 parjust <- par(adj = 0.5) | 3129 parjust <- par(adj = 0.5) |
3006 on.exit(par(parjust)) | 3130 on.exit(par(parjust)) |
3007 my_corcov <- my_corcov[order(rownames(my_corcov)), ] | 3131 my_corcov <- my_corcov[order(rownames(my_corcov)), ] |
3008 my_main <- | 3132 my_main <- |
3009 sprintf("%s among %s substrates %s", | 3133 sprintf("%s among %s sites %s", |
3010 if (my_correlate_substrates) "Correlation" | 3134 if (my_correlate_substrates) "Correlation" |
3011 else "Covariance", | 3135 else "Covariance", |
3012 kinase_name, | 3136 kinase_name, |
3013 if (!my_correlate_substrates && | 3137 if (!my_correlate_substrates && |
3014 g_filter_cov_var_gt_1 && | 3138 g_filter_cov_var_gt_1 && |
3023 my_plot_height, | 3147 my_plot_height, |
3024 my_legend_height # was 4.0 - my_hgt_scale # was 4.19 | 3148 my_legend_height # was 4.0 - my_hgt_scale # was 4.19 |
3025 ) | 3149 ) |
3026 if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar)) | 3150 if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar)) |
3027 if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin)) | 3151 if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin)) |
3028 if (print_trace_messages) cat(sprintf("my_plot_height = %0.3f\n\n", my_plot_height)) | 3152 if (print_trace_messages) cat( |
3153 sprintf("my_plot_height = %0.3f\n\n", my_plot_height)) | |
3029 if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex)) | 3154 if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex)) |
3030 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) | 3155 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) |
3031 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) | 3156 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) |
3032 if (print_trace_messages) cat(sprintf("my_key_cex = %0.3f\n\n", my_key_cex)) | 3157 if (print_trace_messages) cat( |
3033 if (print_trace_messages) cat(sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale)) | 3158 sprintf("my_key_cex = %0.3f\n\n", my_key_cex)) |
3034 if (print_trace_messages) cat(sprintf("legend height = %0.3f\n\n", my_legend_height)) | 3159 if (print_trace_messages) cat( |
3160 sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale)) | |
3161 if (print_trace_messages) cat( | |
3162 sprintf("legend height = %0.3f\n\n", my_legend_height)) | |
3035 if (print_trace_messages) cat( | 3163 if (print_trace_messages) cat( |
3036 sprintf( | 3164 sprintf( |
3037 "my_heights = c(%s); sum = %0.3f\n\n", | 3165 "my_heights = c(%s); sum = %0.3f\n\n", |
3038 paste( | 3166 paste( |
3039 sprintf("%0.3f", my_heights), | 3167 sprintf("%0.3f", my_heights), |
3671 "median peptide-intensity across all sample classes.\n" | 3799 "median peptide-intensity across all sample classes.\n" |
3672 ) | 3800 ) |
3673 # Take the accurate ln(x+1) because the data are log-normally distributed | 3801 # Take the accurate ln(x+1) because the data are log-normally distributed |
3674 # and because median can involve an average of two measurements. | 3802 # and because median can involve an average of two measurements. |
3675 quant_data_imp <- log1p(quant_data_imp) | 3803 quant_data_imp <- log1p(quant_data_imp) |
3676 quant_data_imp[ind] <- row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]] | 3804 quant_data_imp[ind] <- |
3805 row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]] | |
3677 # Take the accurate e^x - 1 to match scaling of original input. | 3806 # Take the accurate e^x - 1 to match scaling of original input. |
3678 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) | 3807 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) |
3679 good_rows <- !is.nan(rowMeans(quant_data_imp)) | 3808 good_rows <- !is.nan(rowMeans(quant_data_imp)) |
3680 } | 3809 } |
3681 , "mean" = { | 3810 , "mean" = { |
3687 # Take the accurate ln(x+1) because the data are log-normally distributed, | 3816 # Take the accurate ln(x+1) because the data are log-normally distributed, |
3688 # so arguments to mean should be previously transformed. | 3817 # so arguments to mean should be previously transformed. |
3689 # this will have to be | 3818 # this will have to be |
3690 quant_data_imp <- log1p(quant_data_imp) | 3819 quant_data_imp <- log1p(quant_data_imp) |
3691 # Assign to NA cells the mean for the row | 3820 # Assign to NA cells the mean for the row |
3692 quant_data_imp[ind] <- row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]] | 3821 quant_data_imp[ind] <- |
3822 row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]] | |
3693 # Take the accurate e^x - 1 to match scaling of original input. | 3823 # Take the accurate e^x - 1 to match scaling of original input. |
3694 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) | 3824 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) |
3695 good_rows <- !is.nan(rowMeans(quant_data_imp)) | 3825 good_rows <- !is.nan(rowMeans(quant_data_imp)) |
3696 } | 3826 } |
3697 , "random" = { | 3827 , "random" = { |
3698 quant_data_imp <- quant_data | 3828 quant_data_imp <- quant_data |
3699 m1 <- median(sds, na.rm = TRUE) * sd_percentile #sd to be used is the median sd | 3829 # sd to be used is the median sd |
3830 m1 <- median(sds, na.rm = TRUE) * sd_percentile | |
3700 # If you want results to be reproducible, you will want to call | 3831 # If you want results to be reproducible, you will want to call |
3701 # base::set.seed before calling stats::rnorm | 3832 # base::set.seed before calling stats::rnorm |
3702 imputation_method_description <- | 3833 imputation_method_description <- |
3703 paste("Substitute each missing value with random intensity", | 3834 paste("Substitute each missing value with random intensity", |
3704 sprintf( | 3835 sprintf( |
3733 imp_smry_pot_peptides_after <- sum(good_rows) | 3864 imp_smry_pot_peptides_after <- sum(good_rows) |
3734 imp_smry_rejected_after <- sum(!good_rows) | 3865 imp_smry_rejected_after <- sum(!good_rows) |
3735 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) | 3866 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) |
3736 | 3867 |
3737 # From ?`%in%`: | 3868 # From ?`%in%`: |
3738 # %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 | 3869 # %in% is currently defined |
3870 # as function(x, table) match(x, table, nomatch = 0) > 0 | |
3739 | 3871 |
3740 stock_in <- | 3872 stock_in <- |
3741 names(good_rows) %in% | 3873 names(good_rows) %in% |
3742 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) | 3874 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) |
3743 if (print_nb_messages) nbe(see_variable(stock_in), "\n") | 3875 if (print_nb_messages) nbe(see_variable(stock_in), "\n") |
3796 cat(tabular_lines) | 3928 cat(tabular_lines) |
3797 ``` | 3929 ``` |
3798 | 3930 |
3799 ```{r filter_good_rows, echo = FALSE} | 3931 ```{r filter_good_rows, echo = FALSE} |
3800 | 3932 |
3801 if (print_nb_messages) nbe("before name extraction, ", see_variable(length(good_rows)), " ", see_variable(good_rows), "\n") | 3933 if (print_nb_messages) nbe( |
3934 "before name extraction, ", see_variable(length(good_rows)), " ", | |
3935 see_variable(good_rows), "\n") | |
3802 good_rows <- names(good_rows[names(great_enough_row_names)]) | 3936 good_rows <- names(good_rows[names(great_enough_row_names)]) |
3803 if (print_nb_messages) nbe("after name extraction, ", see_variable(length(good_rows)), see_variable(good_rows), "\n") | 3937 if (print_nb_messages) nbe( |
3804 | 3938 "after name extraction, ", see_variable(length(good_rows)), |
3805 #ACE min_group_obs_count <- min_group_obs_count[names(great_enough_row_names)] | 3939 see_variable(good_rows), "\n") |
3806 #ACE nbe("good_rows") | 3940 |
3807 #ACE nbe(see_variable(good_rows)) | |
3808 #ACE nbe("names(min_group_obs_count) before filter for good rows") | |
3809 #ACE nbe(see_variable(names(min_group_obs_count))) | |
3810 min_group_obs_count <- min_group_obs_count[good_rows] | 3941 min_group_obs_count <- min_group_obs_count[good_rows] |
3811 #ACE nbe("min_group_obs_count after filter for good rows") | |
3812 #ACE nbe(see_variable(names(min_group_obs_count))) | |
3813 | 3942 |
3814 # Zap rows where imputation was insufficiently effective | 3943 # Zap rows where imputation was insufficiently effective |
3815 full_data <- full_data [good_rows, ] | 3944 full_data <- full_data [good_rows, ] |
3816 quant_data <- quant_data [good_rows, ] | 3945 quant_data <- quant_data [good_rows, ] |
3817 quant_data_log <- quant_data_log [good_rows, ] | 3946 quant_data_log <- quant_data_log [good_rows, ] |
3818 | 3947 |
3819 if (print_nb_messages) nbe("before row filter, ", see_variable(nrow(quant_data_imp)), "\n") | 3948 if (print_nb_messages) nbe( |
3949 "before row filter, ", see_variable(nrow(quant_data_imp)), "\n") | |
3820 quant_data_imp <- quant_data_imp[good_rows, ] | 3950 quant_data_imp <- quant_data_imp[good_rows, ] |
3821 if (print_nb_messages) nbe("after row filter, ", see_variable(nrow(quant_data_imp)), "\n") | 3951 if (print_nb_messages) nbe( |
3952 "after row filter, ", see_variable(nrow(quant_data_imp)), "\n") | |
3822 write_debug_file(quant_data_imp) | 3953 write_debug_file(quant_data_imp) |
3823 quant_data_imp_good_rows <- quant_data_imp | 3954 quant_data_imp_good_rows <- quant_data_imp |
3824 | 3955 |
3825 write_debug_file(quant_data_imp_good_rows) | 3956 write_debug_file(quant_data_imp_good_rows) |
3826 ``` | 3957 ``` |
3945 my_ppep_distrib_bxp( | 4076 my_ppep_distrib_bxp( |
3946 x = blue_dots | 4077 x = blue_dots |
3947 , sample_name_grow = sample_name_grow | 4078 , sample_name_grow = sample_name_grow |
3948 , main = "Peptide intensities after eliminating unusable peptides" | 4079 , main = "Peptide intensities after eliminating unusable peptides" |
3949 , varwidth = boxplot_varwidth | 4080 , varwidth = boxplot_varwidth |
3950 , sub = paste(boxplot_sub, "Box widths reflect number of peptides for sample") | 4081 , sub = paste(boxplot_sub, |
4082 "Box widths reflect number of peptides for sample") | |
3951 , xlab = "Sample" | 4083 , xlab = "Sample" |
3952 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | 4084 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
3953 , col = const_boxplot_fill | 4085 , col = const_boxplot_fill |
3954 , notch = FALSE | 4086 , notch = FALSE |
3955 , ylim = ylim | 4087 , ylim = ylim |
4281 | 4413 |
4282 ## Assignment of $p$-value and quality score | 4414 ## Assignment of $p$-value and quality score |
4283 | 4415 |
4284 For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups. | 4416 For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups. |
4285 | 4417 |
4286 However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance. For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https:/doi.org/10.1111/j.2517-6161.1995.tb02031.x). | 4418 However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance. For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https://doi.org/10.1111/j.2517-6161.1995.tb02031.x). |
4287 | 4419 |
4288 Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as: | 4420 Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as: |
4289 | 4421 |
4290 $$ | 4422 $$ |
4291 \textit{quality}_j | 4423 \textit{quality}_j |
4311 ``` | 4443 ``` |
4312 | 4444 |
4313 ```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'} | 4445 ```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'} |
4314 g_can_run_ksea <- FALSE | 4446 g_can_run_ksea <- FALSE |
4315 old_oma <- par("oma") | 4447 old_oma <- par("oma") |
4448 | |
4449 # nolint start | |
4450 # squash un-actionable cyclomatic_complexity warning | |
4316 if (count_of_treatment_levels < 2) { | 4451 if (count_of_treatment_levels < 2) { |
4452 # nolint end | |
4317 cat( | 4453 cat( |
4318 "ERROR!!!! Cannot perform ANOVA analysis", | 4454 "ERROR!!!! Cannot perform ANOVA analysis", |
4319 "(see next page)\\newpage\n" | 4455 "(see next page)\\newpage\n" |
4320 ) | 4456 ) |
4321 cat( | 4457 cat( |
4365 } else { | 4501 } else { |
4366 | 4502 |
4367 if (print_nb_messages) nbe("computing p_value_data_anova_ps\n") | 4503 if (print_nb_messages) nbe("computing p_value_data_anova_ps\n") |
4368 if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n") | 4504 if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n") |
4369 if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n") | 4505 if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n") |
4370 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log[, ".NE.7C"]), "\n") | |
4371 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n") | 4506 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n") |
4372 if (print_nb_messages) nbe(see_variable(anova_func), "\n") | 4507 if (print_nb_messages) nbe(see_variable(anova_func), "\n") |
4373 if (print_nb_messages) nbe(see_variable(smpl_trt), "\n") | 4508 if (print_nb_messages) nbe(see_variable(smpl_trt), "\n") |
4374 if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n") | 4509 if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n") |
4375 tryCatch( | 4510 tryCatch( |
4475 if (number_of_peptides_found > g_intensity_hm_rows - 1) { | 4610 if (number_of_peptides_found > g_intensity_hm_rows - 1) { |
4476 cat("\\leavevmode\n\n\n") | 4611 cat("\\leavevmode\n\n\n") |
4477 break | 4612 break |
4478 } | 4613 } |
4479 | 4614 |
4615 if (print_trace_messages) { | |
4616 cat_variable(cutoff) | |
4617 cat("\n\n") | |
4618 } | |
4619 | |
4480 bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff) | 4620 bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff) |
4481 bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class) | 4621 bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class) |
4482 g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0) | 4622 g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0) |
4483 bool_4 <- (p_value_data$quality >= params$minQuality) | 4623 bool_4 <- (p_value_data$quality >= params$minQuality) |
4484 bool_3 <- as.logical( | 4624 bool_3 <- as.logical( |
4507 | 4647 |
4508 if (print_trace_messages) | 4648 if (print_trace_messages) |
4509 cat_variable(filtered_p, force_str = TRUE) | 4649 cat_variable(filtered_p, force_str = TRUE) |
4510 | 4650 |
4511 have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0 | 4651 have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0 |
4652 | |
4512 filter_result_string <- | 4653 filter_result_string <- |
4513 sprintf( | 4654 sprintf( |
4514 "%s, %s of %0.0f peptides remained having both %s and %s.\n\n", | 4655 "%s, %s of %0.0f peptides remained having both %s and %s.\n\n", |
4515 "After filtering for ANOVA results", | 4656 "After filtering for ANOVA results", |
4516 if (have_remaining_peptides) | 4657 if (have_remaining_peptides) |
4517 as.character(sum(bool_3, na.rm = TRUE)) | 4658 as.character(sum(bool_3, na.rm = TRUE)) |
4518 else | 4659 else |
4519 "none", | 4660 "none", |
4520 length(bool_3), | 4661 length(bool_3), |
4521 sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))), | 4662 adj_p_string <- |
4663 sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))), | |
4522 sprintf( | 4664 sprintf( |
4523 "more than %0.0f observations in some groups", | 4665 "more than %0.0f observations in some groups", |
4524 max(0, g_intensity_min_per_class - 1) | 4666 max(0, g_intensity_min_per_class - 1) |
4525 ) | 4667 ) |
4526 ) | 4668 ) |
4535 filtered_data_filtered[ | 4677 filtered_data_filtered[ |
4536 order(filtered_p$fdr_adjusted_anova_p), | 4678 order(filtered_p$fdr_adjusted_anova_p), |
4537 , drop = FALSE | 4679 , drop = FALSE |
4538 ] | 4680 ] |
4539 | 4681 |
4540 if (have_remaining_peptides && nrow(filtered_p) > 0 && nrow(filtered_data_filtered) > 0) { | 4682 if (have_remaining_peptides |
4683 && nrow(filtered_p) > 0 | |
4684 && nrow(filtered_data_filtered) > 0 | |
4685 ) { | |
4541 if (first_page_suppress == 1) { | 4686 if (first_page_suppress == 1) { |
4542 first_page_suppress <- 0 | 4687 first_page_suppress <- 0 |
4543 } else { | 4688 } else { |
4544 cat("\\newpage\n") | 4689 cat("\\newpage\n") |
4545 } | 4690 } |
4546 latex_samepage({ | 4691 latex_samepage({ |
4547 cat(filter_result_string) | 4692 cat(filter_result_string) |
4548 if (nrow(filtered_data_filtered) > 1) { | 4693 if (nrow(filtered_data_filtered) > 1) { |
4549 cat(subsection_header(sprintf( | 4694 cat(subsection_header(sprintf( |
4550 "Intensity distributions for %d phosphopeptides\n", | 4695 "Intensity distributions for %d phosphopeptides, %s\n", |
4551 nrow(filtered_data_filtered) | 4696 nrow(filtered_data_filtered), |
4697 adj_p_string | |
4552 ))) | 4698 ))) |
4553 } else { | 4699 } else { |
4554 cat(subsection_header(sprintf( | 4700 cat( |
4555 "Intensity distribution for one phosphopeptide (%s)\n", | 4701 subsection_header( |
4556 rownames(filtered_data_filtered)[1] | 4702 sprintf( |
4557 ))) | 4703 "Intensity distribution for one phosphopeptide, %s\n", |
4704 adj_p_string | |
4705 ) | |
4706 ), | |
4707 rownames(filtered_data_filtered)[1], | |
4708 "\n" | |
4709 ) | |
4558 } | 4710 } |
4559 }) # end latex_samepage | 4711 }) # end latex_samepage |
4560 | 4712 |
4561 old_par <- par( | 4713 old_par <- par( |
4562 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1), | 4714 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1), |
4635 g_intensity_hm_rows), | 4787 g_intensity_hm_rows), |
4636 sprintf("whose adjusted p-value < %0.2f\n", cutoff) | 4788 sprintf("whose adjusted p-value < %0.2f\n", cutoff) |
4637 ) | 4789 ) |
4638 )) | 4790 )) |
4639 } else { | 4791 } else { |
4640 if (nrow(m) == 0) { | 4792 if (nrow(m) < 2) { |
4641 return(FALSE) | 4793 return(FALSE) |
4642 } else { | 4794 } else { |
4643 cat(subsection_header( | 4795 cat(subsection_header( |
4644 paste( | 4796 paste( |
4645 sprintf("Heatmap for %d usable peptide genes whose", nrow(m)), | 4797 sprintf("Heatmap for %d usable peptide genes whose", nrow(m)), |
4659 nrow_m <- nrow(m) | 4811 nrow_m <- nrow(m) |
4660 ncol_m <- ncol(m) | 4812 ncol_m <- ncol(m) |
4661 if (nrow_m > 0) { | 4813 if (nrow_m > 0) { |
4662 rownames_m <- rownames(m) | 4814 rownames_m <- rownames(m) |
4663 q <- data.frame(pepname = rownames_m) | 4815 q <- data.frame(pepname = rownames_m) |
4664 g <- sqldf(" | 4816 g <- sqldf::sqldf(" |
4665 SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name | 4817 SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name |
4666 FROM q, metadata_plus_p AS met | 4818 FROM q, metadata_plus_p AS met |
4667 WHERE q.pepname = met.Phosphopeptide | 4819 WHERE q.pepname = met.Phosphopeptide |
4668 ORDER BY q.rowid | 4820 ORDER BY q.rowid |
4669 ") | 4821 ") |
4716 } | 4868 } |
4717 } | 4869 } |
4718 } | 4870 } |
4719 } | 4871 } |
4720 } | 4872 } |
4721 cat(filter_result_string) | 4873 |
4722 cat("\\leavevmode\n") | 4874 cat("\\leavevmode\n") |
4723 | 4875 |
4724 if (!g_can_run_ksea) { | 4876 if (!g_can_run_ksea) { |
4725 errmsg <- paste("Cannot proceed with KSEA analysis", | 4877 errmsg <- paste("Cannot proceed with KSEA analysis", |
4726 "because too many values are missing.") | 4878 "because too many values are missing.") |
4745 sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p) | 4897 sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p) |
4746 display_p_value_data$quality <- | 4898 display_p_value_data$quality <- |
4747 sprintf("%0.3g", display_p_value_data$quality) | 4899 sprintf("%0.3g", display_p_value_data$quality) |
4748 | 4900 |
4749 headers_1st_line <- | 4901 headers_1st_line <- |
4750 c("", "Raw ANOVA", "FDR-adj.", "Missing", "Min. #", "", "") | 4902 c("", "Raw ANOVA", "FDR-adj.", "Missing", "Min. #", |
4903 "", "") | |
4751 headers_2nd_line <- | 4904 headers_2nd_line <- |
4752 c("Phosphopeptide", "p-value", "p-value", "values", "group-obs", "Quality", "Ranking") | 4905 c("Phosphopeptide", "p-value", "p-value", "values", "group-obs", |
4906 "Quality", "Ranking") | |
4753 data_frame_tabbing_latex( | 4907 data_frame_tabbing_latex( |
4754 x = display_p_value_data, | 4908 x = display_p_value_data, |
4755 tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60), | 4909 tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60), |
4756 use_subsubsection_header = FALSE, | 4910 use_subsubsection_header = FALSE, |
4757 headings = c(headers_1st_line, headers_2nd_line), | 4911 headings = c(headers_1st_line, headers_2nd_line), |
5333 | 5487 |
5334 # KSEA Analysis Summaries | 5488 # KSEA Analysis Summaries |
5335 | 5489 |
5336 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched. Enrichments are found by the CRAN `KSEAapp` package: | 5490 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched. Enrichments are found by the CRAN `KSEAapp` package: |
5337 | 5491 |
5338 - The package is available on CRAN, at https:/cran.r-project.org/package=KSEAapp | 5492 - The package is available on CRAN, at https://cran.r-project.org/package=KSEAapp |
5339 - The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https:/doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https:/doi.org/10.1093/bioinformatics/btx415). | 5493 - The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https://doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https://doi.org/10.1093/bioinformatics/btx415). |
5340 - An online alternative (supporting only analysis of human data) is available at [https:/casecpb.shinyapps.io/ksea/](https:/casecpb.shinyapps.io/ksea/). | 5494 - An online alternative (supporting only analysis of human data) is available at [https://casecpb.shinyapps.io/ksea/](https://casecpb.shinyapps.io/ksea/). |
5341 | 5495 |
5342 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as: | 5496 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as: |
5343 | 5497 |
5344 $$ | 5498 $$ |
5345 \text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j} | 5499 \text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j} |
5459 ) | 5613 ) |
5460 ) | 5614 ) |
5461 kseaapp_input <- | 5615 kseaapp_input <- |
5462 sqldf::sqldf( | 5616 sqldf::sqldf( |
5463 x = sprintf(" | 5617 x = sprintf(" |
5464 SELECT `Protein`, `Gene`, `Peptide`, phosphopep AS `Residue.Both`, `p`, `FC` | 5618 SELECT `Protein`,`Gene`,`Peptide`,phosphopep AS `Residue.Both`,`p`,`FC` |
5465 FROM v_kseaapp_input | 5619 FROM v_kseaapp_input |
5466 WHERE contrast = %d | 5620 WHERE contrast = %d |
5467 ", | 5621 ", |
5468 i_cntrst | 5622 i_cntrst |
5469 ), | 5623 ), |
5501 drop = FALSE | 5655 drop = FALSE |
5502 ] | 5656 ] |
5503 } | 5657 } |
5504 | 5658 |
5505 if (FALSE) { | 5659 if (FALSE) { |
5660 if (print_nb_messages) nbe( | |
5661 "Output contents of `ksea_scores_rslt` table\n") | |
5662 cat_variable(ksea_scores_rslt) | |
5663 cat("\n\\clearpage\n") | |
5506 data_frame_tabbing_latex( | 5664 data_frame_tabbing_latex( |
5507 x = ksea_scores_rslt, | 5665 x = ksea_scores_rslt, |
5508 tabstops = c(0.8, 0.8, 0.8, 0.8, 0.8, 0.8), | 5666 tabstops = c(1.8, 0.8, 0.8, 0.3, 0.8, 0.8), |
5509 caption = paste("KSEA scores for contrast ", | 5667 caption = paste("KSEA scores for contrast ", |
5510 cntrst_b_level, "to", cntrst_a_level), | 5668 cntrst_b_level, "to", cntrst_a_level), |
5511 use_subsubsection_header = FALSE | 5669 use_subsubsection_header = FALSE |
5512 ) | 5670 ) |
5513 } | |
5514 | |
5515 if (FALSE) { | |
5516 if (print_nb_messages) nbe("Output contents of `ksea_scores_rslt` table\n") | |
5517 cat_variable(ksea_scores_rslt) | |
5518 cat("\n\\clearpage\n") | |
5519 } | 5671 } |
5520 | 5672 |
5521 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { | 5673 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { |
5522 next_index <- 1 + next_index | 5674 next_index <- 1 + next_index |
5523 rslt$score_list[[next_index]] <- ksea_scores_rslt | 5675 rslt$score_list[[next_index]] <- ksea_scores_rslt |
5625 ) | 5777 ) |
5626 sub_title <- contrast_longlabel | 5778 sub_title <- contrast_longlabel |
5627 tryCatch( | 5779 tryCatch( |
5628 expr = { | 5780 expr = { |
5629 ksea_scores_rslt <- rslt$score_list[[next_index]] | 5781 ksea_scores_rslt <- rslt$score_list[[next_index]] |
5630 if (print_nb_messages) nbe(see_variable(ksea_scores_rslt)) #ACE | 5782 if (print_nb_messages) nbe(see_variable(ksea_scores_rslt)) |
5631 | 5783 |
5632 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { | 5784 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { |
5633 sink(deferred <- file()) | 5785 sink(deferred <- file()) |
5634 ksea_low_fdr_print( | 5786 ksea_low_fdr_print( |
5635 rslt = rslt, | 5787 rslt = rslt, |
5758 loaded_packages_df <- data.frame( | 5910 loaded_packages_df <- data.frame( |
5759 package = loaded_packages_df$package, | 5911 package = loaded_packages_df$package, |
5760 version = loaded_packages_df$loadedversion, | 5912 version = loaded_packages_df$loadedversion, |
5761 date = loaded_packages_df$date | 5913 date = loaded_packages_df$date |
5762 ) | 5914 ) |
5763 #ACE cat("\\clearpage\n\\section{R package versions}\n") | |
5764 #ACE data_frame_tabbing_latex( | |
5765 #ACE x = loaded_packages_df, | |
5766 #ACE tabstops = c(2.5, 1.25), | |
5767 #ACE caption = "R package versions" | |
5768 #ACE ) | |
5769 cat("\\clearpage\n\\section{Input parameter settings}\n") | 5915 cat("\\clearpage\n\\section{Input parameter settings}\n") |
5770 data_frame_tabbing_latex( | 5916 data_frame_tabbing_latex( |
5771 x = param_df, | 5917 x = param_df, |
5772 tabstops = c(1.75), | 5918 tabstops = c(1.75), |
5773 underscore_whack = TRUE, | 5919 underscore_whack = TRUE, |
5784 knitr::knit_exit() | 5930 knitr::knit_exit() |
5785 } | 5931 } |
5786 ``` | 5932 ``` |
5787 | 5933 |
5788 ```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'} | 5934 ```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'} |
5935 # nolint start | |
5936 # squash un-actionable cyclomatic_complexity warning | |
5789 if (have_kinase_descriptions) { | 5937 if (have_kinase_descriptions) { |
5938 # nolint end | |
5790 my_section_header <- | 5939 my_section_header <- |
5791 sprintf( | 5940 sprintf( |
5792 "inases whose KSEA %s < %s\n", | 5941 "inases whose KSEA %s < %s\n", |
5793 ksea_cutoff_statistic, | 5942 ksea_cutoff_statistic, |
5794 signif(ksea_cutoff_threshold, 2) | 5943 signif(ksea_cutoff_threshold, 2) |
5824 caption = paste0("Descriptions of k", my_section_header) | 5973 caption = paste0("Descriptions of k", my_section_header) |
5825 ) | 5974 ) |
5826 } | 5975 } |
5827 | 5976 |
5828 if (FALSE) { | 5977 if (FALSE) { |
5829 cat_variable(sqldf("SELECT kinase FROM enriched_kinases")) | 5978 cat_variable(sqldf::sqldf("SELECT kinase FROM enriched_kinases")) |
5830 cat_variable(sqldf(" | 5979 cat_variable(sqldf::sqldf(" |
5831 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep | 5980 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep |
5832 FROM pseudo_ksdata | 5981 FROM pseudo_ksdata |
5833 WHERE gene IN (SELECT kinase FROM enriched_kinases) | 5982 WHERE gene IN (SELECT kinase FROM enriched_kinases) |
5834 ")) | 5983 ")) |
5835 data_frame_table_latex( | 5984 data_frame_table_latex( |
5836 x = sqldf(" | 5985 x = sqldf::sqldf(" |
5837 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep | 5986 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep |
5838 FROM pseudo_ksdata | 5987 FROM pseudo_ksdata |
5839 WHERE gene IN (SELECT kinase FROM enriched_kinases) | 5988 WHERE gene IN (SELECT kinase FROM enriched_kinases) |
5840 "), | 5989 "), |
5841 justification = "l l l", | 5990 justification = "l l l", |
5843 caption = "substrates of enriched kinases", | 5992 caption = "substrates of enriched kinases", |
5844 anchor = c(const_table_anchor_p, const_table_anchor_t), | 5993 anchor = c(const_table_anchor_p, const_table_anchor_t), |
5845 underscore_whack = TRUE | 5994 underscore_whack = TRUE |
5846 ) | 5995 ) |
5847 data_frame_table_latex( | 5996 data_frame_table_latex( |
5848 x = sqldf(" | 5997 x = sqldf::sqldf(" |
5849 SELECT | 5998 SELECT |
5850 gene AS kinase, | 5999 gene AS kinase, |
5851 ppep, | 6000 ppep, |
5852 sub_gene, | 6001 sub_gene, |
5853 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, | 6002 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, |
5869 caption = "labeled substrates of enriched kinases", | 6018 caption = "labeled substrates of enriched kinases", |
5870 anchor = c(const_table_anchor_p, const_table_anchor_t), | 6019 anchor = c(const_table_anchor_p, const_table_anchor_t), |
5871 underscore_whack = TRUE | 6020 underscore_whack = TRUE |
5872 ) | 6021 ) |
5873 } | 6022 } |
5874 all_enriched_substrates <- sqldf(" | 6023 all_enriched_substrates <- sqldf::sqldf(" |
5875 SELECT | 6024 SELECT |
5876 gene AS kinase, | 6025 gene AS kinase, |
5877 ppep, | 6026 ppep, |
5878 sub_gene, | 6027 sub_gene, |
5879 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, | 6028 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, |
5897 , | 6046 , |
5898 drop = FALSE | 6047 drop = FALSE |
5899 ] | 6048 ] |
5900 | 6049 |
5901 all_enriched_substrates$sub_gene <- | 6050 all_enriched_substrates$sub_gene <- |
5902 sub( | 6051 sub(" ///.*", "...", |
5903 " ///.*", | |
5904 " ...", | |
5905 all_enriched_substrates$sub_gene | 6052 all_enriched_substrates$sub_gene |
5906 ) | 6053 ) |
5907 | 6054 |
5908 all_enriched_substrates$label <- | 6055 all_enriched_substrates$label <- |
5909 with( | 6056 with( |
5926 if (g_neednewpage) cat("\\newpage\n") | 6073 if (g_neednewpage) cat("\\newpage\n") |
5927 g_neednewpage <- TRUE | 6074 g_neednewpage <- TRUE |
5928 if (nrow(m) > g_intensity_hm_rows) { | 6075 if (nrow(m) > g_intensity_hm_rows) { |
5929 cat(subsection_header( | 6076 cat(subsection_header( |
5930 sprintf( | 6077 sprintf( |
5931 "Highest-quality %d (of %d) enriched %s-substrates", | 6078 "Highest-quality %d (of %d) enriched %s-sites", |
5932 g_intensity_hm_rows, | 6079 g_intensity_hm_rows, |
5933 nrow(m), | 6080 nrow(m), |
5934 kinase | 6081 kinase |
5935 ) | 6082 ) |
5936 )) | 6083 )) |
5939 return(FALSE) | 6086 return(FALSE) |
5940 } else { | 6087 } else { |
5941 nrow_m <- nrow(m) | 6088 nrow_m <- nrow(m) |
5942 cat(subsection_header( | 6089 cat(subsection_header( |
5943 sprintf( | 6090 sprintf( |
5944 "%d enriched %s-substrate%s", | 6091 "%d enriched %s-site%s", |
5945 nrow_m, | 6092 nrow_m, |
5946 kinase, | 6093 kinase, |
5947 if (nrow_m > 1) "s" else "" | 6094 if (nrow_m > 1) "s" else "" |
5948 ) | 6095 ) |
5949 )) | 6096 )) |
6023 | 6170 |
6024 } | 6171 } |
6025 ``` | 6172 ``` |
6026 | 6173 |
6027 ```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'} | 6174 ```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'} |
6175 # nolint start | |
6176 # squash un-actionable cyclomatic_complexity warning | |
6028 if (g_can_run_ksea) { | 6177 if (g_can_run_ksea) { |
6178 # nolint end | |
6029 g_did_enriched_header <- FALSE | 6179 g_did_enriched_header <- FALSE |
6030 for (kinase_name in sort(enriched_kinases$kinase)) { | 6180 for (kinase_name in sort(enriched_kinases$kinase)) { |
6031 enriched_substrates <- | 6181 enriched_substrates <- |
6032 all_enriched_substrates[ | 6182 all_enriched_substrates[ |
6033 all_enriched_substrates$kinase == kinase_name, | 6183 all_enriched_substrates$kinase == kinase_name, |
6046 ten_trunc_ppep | 6196 ten_trunc_ppep |
6047 ) | 6197 ) |
6048 ) | 6198 ) |
6049 # Get the intensity values for the heatmap | 6199 # Get the intensity values for the heatmap |
6050 enriched_intensities <- | 6200 enriched_intensities <- |
6051 as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE]) | 6201 as.matrix(unimputed_quant_data_log[ |
6202 enriched_substrates$ppep, , drop = FALSE | |
6203 ]) | |
6052 | 6204 |
6053 # Remove rows having too many NA values to be relevant | 6205 # Remove rows having too many NA values to be relevant |
6054 good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0) | 6206 good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0) |
6055 #ACE nbe(see_variable(good_rows), "\n") | 6207 |
6056 enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE] | 6208 enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE] |
6057 enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE] | 6209 enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE] |
6058 | 6210 |
6059 # Rename the rows with the display-name for the heatmap | 6211 # Rename the rows with the display-name for the heatmap |
6060 short_row_names <- sub( | 6212 short_row_names <- sub( |
6074 ) | 6226 ) |
6075 rownames(enriched_intensities) <- long_row_names | 6227 rownames(enriched_intensities) <- long_row_names |
6076 # Format as matrix for heatmap | 6228 # Format as matrix for heatmap |
6077 m <- as.matrix(enriched_intensities) | 6229 m <- as.matrix(enriched_intensities) |
6078 rownames(m) <- trunc_enriched_substrate(rownames(m)) | 6230 rownames(m) <- trunc_enriched_substrate(rownames(m)) |
6079 | |
6080 #ACE nb("m with bad rows: ", see_variable(m), "\n") | |
6081 #ACE good_rows <- (rowSums(m, na.rm = TRUE) != 0) | |
6082 #ACE nb(see_variable(good_rows), "\n") | |
6083 #ACE m <- m[good_rows, , drop = FALSE] | |
6084 #ACE nb("m without(?) bad rows: ", see_variable(m), "\n") | |
6085 #ACE nb(see_variable(short_row_names), "\n") | |
6086 #ACE local_short_row_names <- short_row_names[good_rows] | |
6087 #ACE local_long_row_names <- long_row_names[good_rows] | |
6088 #ACE local_enriched_intensities <- enriched_intensities[local_long_row_names, ] | |
6089 | 6231 |
6090 # Draw the heading and heatmap | 6232 # Draw the heading and heatmap |
6091 nrow_m <- nrow(m) | 6233 nrow_m <- nrow(m) |
6092 if (nrow_m > 0) { | 6234 if (nrow_m > 0) { |
6093 if (!g_did_enriched_header) { | 6235 if (!g_did_enriched_header) { |
6096 g_did_enriched_header <- TRUE | 6238 g_did_enriched_header <- TRUE |
6097 } | 6239 } |
6098 is_na_m <- is.na(m) | 6240 is_na_m <- is.na(m) |
6099 cellnote_m <- is_na_m | 6241 cellnote_m <- is_na_m |
6100 cellnote_m[!is_na_m] <- "" | 6242 cellnote_m[!is_na_m] <- "" |
6101 cellnote_m[is_na_m] <- "NA" | 6243 cellnote_m[is_na_m] <- params$heatMapNAcellNote |
6102 cut_args <- new_env() | 6244 cut_args <- new_env() |
6103 cut_args$cutoff <- cutoff | 6245 cut_args$cutoff <- cutoff |
6104 cut_args$kinase <- kinase_name | 6246 cut_args$kinase <- kinase_name |
6105 cut_args$statistic <- ksea_cutoff_statistic | 6247 cut_args$statistic <- ksea_cutoff_statistic |
6106 cut_args$threshold <- ksea_cutoff_threshold | 6248 cut_args$threshold <- ksea_cutoff_threshold |
6108 ppep_heatmap( | 6250 ppep_heatmap( |
6109 m = m, | 6251 m = m, |
6110 cellnote = cellnote_m, | 6252 cellnote = cellnote_m, |
6111 cutoff = cut_args, | 6253 cutoff = cut_args, |
6112 hm_heading_function = cat_enriched_heading, | 6254 hm_heading_function = cat_enriched_heading, |
6113 hm_main_title | 6255 hm_main_title = paste( |
6114 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", | 6256 "Unnormalized (zero-imputed)", |
6257 "intensities of enriched kinase-substrates"), | |
6115 suppress_row_dendrogram = FALSE, | 6258 suppress_row_dendrogram = FALSE, |
6116 master_cex = 0.35, | 6259 master_cex = 0.35, |
6117 sepcolor = "black", | 6260 sepcolor = "black", |
6118 colsep = sample_colsep, | 6261 colsep = sample_colsep, |
6119 row_scaling = "none" | 6262 row_scaling = "none" |
6121 if (number_of_peptides_found > 1) { | 6264 if (number_of_peptides_found > 1) { |
6122 | 6265 |
6123 tryCatch( | 6266 tryCatch( |
6124 { | 6267 { |
6125 rownames(m) <- short_row_names | 6268 rownames(m) <- short_row_names |
6126 cov_heatmap(m, nrow_m > g_intensity_hm_rows) | 6269 cov_heatmap( |
6270 m = m, | |
6271 kinase_name = kinase_name, | |
6272 top_substrates = nrow_m > g_intensity_hm_rows | |
6273 ) | |
6127 }, | 6274 }, |
6128 error = function(e) { | 6275 error = function(e) { |
6129 cat( | 6276 cat( |
6130 sprintf( | 6277 sprintf( |
6131 "ERROR: %s\n\\newline\n", | 6278 "ERROR: %s\n\\newline\n", |
6160 headers_1st_line <- c("", "", "ANOVA", "min(values", "") | 6307 headers_1st_line <- c("", "", "ANOVA", "min(values", "") |
6161 if (1 < nrow(enriched_substrates)) | 6308 if (1 < nrow(enriched_substrates)) |
6162 cat("\n\\newpage\n") | 6309 cat("\n\\newpage\n") |
6163 cat(subsubsection_header( | 6310 cat(subsubsection_header( |
6164 sprintf( | 6311 sprintf( |
6165 "Details for %s%s-substrates", | 6312 "Details for %s%s-sites", |
6166 if (excess_substrates) | 6313 if (excess_substrates) |
6167 sprintf( | 6314 sprintf( |
6168 "%s \"highest quality\" ", | 6315 "%s \"highest quality\" ", |
6169 g_intensity_hm_rows | 6316 g_intensity_hm_rows |
6170 ) | 6317 ) |
6188 | 6335 |
6189 # get kinase, ppep, concat(kinase) tuples for enriched kinases | 6336 # get kinase, ppep, concat(kinase) tuples for enriched kinases |
6190 | 6337 |
6191 if (print_nb_messages) nb("kinase_ppep_label <- ...\n") | 6338 if (print_nb_messages) nb("kinase_ppep_label <- ...\n") |
6192 if (print_nb_messages) nbe("kinase_ppep_label <- ...\n") | 6339 if (print_nb_messages) nbe("kinase_ppep_label <- ...\n") |
6193 kinase_ppep_label <- sqldf(" | 6340 kinase_ppep_label <- sqldf::sqldf(" |
6194 WITH | 6341 WITH |
6195 t(ppep, label) AS | 6342 t(ppep, label) AS |
6196 ( | 6343 ( |
6197 SELECT DISTINCT | 6344 SELECT DISTINCT |
6198 SUB_MOD_RSD AS ppep, | 6345 SUB_MOD_RSD AS ppep, |
6228 ON f.Phosphopeptide = k.ppep, | 6375 ON f.Phosphopeptide = k.ppep, |
6229 impish q | 6376 impish q |
6230 WHERE | 6377 WHERE |
6231 f.Phosphopeptide = q.Phosphopeptide | 6378 f.Phosphopeptide = q.Phosphopeptide |
6232 " | 6379 " |
6233 data_table_imputed <- sqldf(data_table_imputed_sql) | 6380 data_table_imputed <- sqldf::sqldf(data_table_imputed_sql) |
6234 # Zap the duplicated 'Phosphopeptide' column named 'ppep' | 6381 # Zap the duplicated 'Phosphopeptide' column named 'ppep' |
6235 data_table_imputed <- | 6382 data_table_imputed <- |
6236 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] | 6383 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] |
6237 | 6384 |
6238 # Output imputed, un-normalized data | 6385 # Output imputed, un-normalized data |
6239 if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n") | 6386 if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n") |
6240 if (print_nb_messages) nbe("Output imputed, un-normalized data tabular file\n") | 6387 if (print_nb_messages) nbe( |
6388 "Output imputed, un-normalized data tabular file\n") | |
6241 write.table( | 6389 write.table( |
6242 data_table_imputed | 6390 data_table_imputed |
6243 , file = imputed_data_filename | 6391 , file = imputed_data_filename |
6244 , sep = "\t" | 6392 , sep = "\t" |
6245 , col.names = TRUE | 6393 , col.names = TRUE |
6249 | 6397 |
6250 | 6398 |
6251 #output quantile normalized data | 6399 #output quantile normalized data |
6252 impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log) | 6400 impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log) |
6253 colnames(impish)[1] <- "Phosphopeptide" | 6401 colnames(impish)[1] <- "Phosphopeptide" |
6254 data_table_imputed <- sqldf(data_table_imputed_sql) | 6402 data_table_imputed <- sqldf::sqldf(data_table_imputed_sql) |
6255 # Zap the duplicated 'Phosphopeptide' column named 'ppep' | 6403 # Zap the duplicated 'Phosphopeptide' column named 'ppep' |
6256 data_table_imputed <- | 6404 data_table_imputed <- |
6257 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] | 6405 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] |
6258 if (print_nb_messages) nb("Output quantile normalized data tabular file\n") | 6406 if (print_nb_messages) nb("Output quantile normalized data tabular file\n") |
6259 if (print_nb_messages) nbe("Output quantile normalized data tabular file\n") | 6407 if (print_nb_messages) nbe("Output quantile normalized data tabular file\n") |
6264 col.names = TRUE, | 6412 col.names = TRUE, |
6265 row.names = FALSE, | 6413 row.names = FALSE, |
6266 quote = FALSE | 6414 quote = FALSE |
6267 ) | 6415 ) |
6268 | 6416 |
6269 ppep_kinase <- sqldf(" | 6417 ppep_kinase <- sqldf::sqldf(" |
6270 SELECT DISTINCT k.ppep, k.kinase | 6418 SELECT DISTINCT k.ppep, k.kinase |
6271 FROM ( | 6419 FROM ( |
6272 SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep | 6420 SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep |
6273 FROM pseudo_ksdata | 6421 FROM pseudo_ksdata |
6274 WHERE GENE IN (SELECT kinase FROM enriched_kinases) | 6422 WHERE GENE IN (SELECT kinase FROM enriched_kinases) |
6312 ON m.phospho_peptide = kek.ppep | 6460 ON m.phospho_peptide = kek.ppep |
6313 ; | 6461 ; |
6314 " | 6462 " |
6315 ) | 6463 ) |
6316 | 6464 |
6317 if (print_nb_messages) nb("Output contents of `stats_metadata_v` table to tabular file\n") | 6465 if (print_nb_messages) nb( |
6318 if (print_nb_messages) nbe("Output contents of `stats_metadata_v` table to tabular file\n") | 6466 "Output contents of `stats_metadata_v` table to tabular file\n") |
6467 if (print_nb_messages) nbe( | |
6468 "Output contents of `stats_metadata_v` table to tabular file\n") | |
6469 | |
6319 write.table( | 6470 write.table( |
6320 dbReadTable(db, "stats_metadata_v"), | 6471 dbReadTable(db, "stats_metadata_v"), |
6321 file = anova_ksea_mtdt_file, | 6472 file = anova_ksea_mtdt_file, |
6322 sep = "\t", | 6473 sep = "\t", |
6323 col.names = TRUE, | 6474 col.names = TRUE, |