comparison mqppep_anova_script.Rmd @ 3:bae3a23461c9 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3dcf0d08f006b888061ff83eadc65e550d751869
author galaxyp
date Tue, 31 Jan 2023 22:27:00 +0000
parents a5e7469dfdfa
children 5c2beb4eb41c
comparison
equal deleted inserted replaced
2:a5e7469dfdfa 3:bae3a23461c9
46 meanPercentile: 50 46 meanPercentile: 50
47 #meanPercentile: 1 47 #meanPercentile: 1
48 # for small random value imputation, what should `s / mean(x)` ratio be? 48 # for small random value imputation, what should `s / mean(x)` ratio be?
49 sdPercentile: 1.0 49 sdPercentile: 1.0
50 # output path for imputed data file 50 # output path for imputed data file
51 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" 51 imputedDataFilename: "test-data/imputedDataFilename.txt"
52 # output path for imputed/quantile-normalized/log-transformed data file 52 # output path for imputed/quantile-normalized/log-transformed data file
53 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" 53 imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt"
54 # output path for contents of `stats_metadata_v` table 54 # output path for contents of `stats_metadata_v` table
55 anovaKseaMetadata: "test-data/limbo/anovaKseaMetadata.txt" 55 anovaKseaMetadata: "test-data/anovaKseaMetadata.txt"
56 # how to test one variable with > 2 categories (e.g., aov or kruskal.test) 56 # how to test one variable with > 2 categories (e.g., aov or kruskal.test)
57 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1] 57 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1]
58 # how to test one variable with 2 categories (e.g., oneway.test) 58 # how to test one variable with 2 categories (e.g., oneway.test)
59 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3] 59 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3]
60 # what should be the minimum quality for consideration in both 60 # what should be the minimum quality for consideration in both
97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2" 97 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2"
98 # should debugging trace messages be printed? 98 # should debugging trace messages be printed?
99 showEnrichedSubstrates: FALSE 99 showEnrichedSubstrates: FALSE
100 # should debugging nb/nbe messages be printed? 100 # should debugging nb/nbe messages be printed?
101 printNBMsgs: FALSE 101 printNBMsgs: FALSE
102 # showld row-scaling be applied to heatmaps: "none" or "row" 102 #printNBMsgs: TRUE
103 defaultHeatMapRowScaling: "none" 103 # should row-scaling be applied to heatmaps: "none" or "row"
104 defaultHeatMapRowScaling: !r c("none", "row")[1]
105 # how missing values be displayed on heat maps: "NA" or " "
106 heatMapNAcellNote: !r c(" ", "NA")[1]
107 # how missing values be displayed on heat maps: "NA" or " "
108 heatMapNAgrey: "#D8D8D8"
109 # temporary hack
110 heatMapNAsubstitute: TRUE
111 # what color should be used for missing values be displayed on heat maps
112 heatMapNAcellColor: "grey15"
104 # should debugging trace messages be printed? 113 # should debugging trace messages be printed?
105 printTraceMsgs: FALSE 114 printTraceMsgs: FALSE
106 # when debugging files are needed, set debugFileBasePath to the path 115 # when debugging files are needed, set debugFileBasePath to the path
107 # to the directory where they should be written 116 # to the directory where they should be written
108 debugFileBasePath: !r if (TRUE) NULL else "test-data" 117 debugFileBasePath: !r if (TRUE) NULL else "test-data"
122 nbe <- if (!print_nb_messages) { 131 nbe <- if (!print_nb_messages) {
123 function(...) invisible() 132 function(...) invisible()
124 } else { 133 } else {
125 function(..., f = cat, file = stderr()) { 134 function(..., f = cat, file = stderr()) {
126 cat( 135 cat(
127 stringi::stri_unescape_unicode("\nNBE \\u2203\\u2283\\u2200"), 136 stringi::stri_unescape_unicode("\nN.B. \\u2203\\u2283\\u2200"),
128 ..., 137 ...,
129 file = file 138 file = file
130 ) 139 )
131 } 140 }
132 } 141 }
146 155
147 if (print_nb_messages) nbe("library(gplots)") 156 if (print_nb_messages) nbe("library(gplots)")
148 library(gplots) 157 library(gplots)
149 if (print_nb_messages) nbe("library(caret)") 158 if (print_nb_messages) nbe("library(caret)")
150 # load caret for nearZeroVar 159 # load caret for nearZeroVar
151 if (print_nb_messages) nbe("Please ignore the messages about systemd, if any.\n") 160 if (print_nb_messages) nbe("Please ignore any messages about systemd.\n")
152 library(caret) 161 library(caret)
153 if (print_nb_messages) nbe("library(DBI)") 162 if (print_nb_messages) nbe("library(DBI)")
154 library(DBI) 163 library(DBI)
155 if (print_nb_messages) nbe("library(RSQLite)") 164 if (print_nb_messages) nbe("library(RSQLite)")
156 library(RSQLite) 165 library(RSQLite)
340 msg, "\n" 349 msg, "\n"
341 ) 350 )
342 } 351 }
343 } 352 }
344 353
354 divert_warnings <-
355 function(expr, classes = "warning") {
356 withCallingHandlers(
357 expr,
358 warning = function(w) {
359 cat(" divert_warnings: ", w$message, "\n", file = stderr())
360 if (inherits(w, classes))
361 tryInvokeRestart("muffleWarning")
362 }
363 )
364 }
365
345 # ref: https://tug.org/texinfohtml/latex2e.html 366 # ref: https://tug.org/texinfohtml/latex2e.html
346 # LaTeX sets aside the following characters for special purposes. 367 # LaTeX sets aside the following characters for special purposes.
347 # For example, the percent sign % is for comments. 368 # For example, the percent sign % is for comments.
348 # They are called reserved characters or special characters. 369 # They are called reserved characters or special characters.
349 # They are all discussed elsewhere in this manual. 370 # They are all discussed elsewhere in this manual.
358 # As to the last three characters, to get a tilde in the text body font, 379 # As to the last three characters, to get a tilde in the text body font,
359 # use \~{} (omitting the curly braces would result in the next character 380 # use \~{} (omitting the curly braces would result in the next character
360 # receiving a tilde accent). 381 # receiving a tilde accent).
361 # Similarly, to get a text body font circumflex use \^{}. 382 # Similarly, to get a text body font circumflex use \^{}.
362 # To get a backslash in the font of the text body enter \textbackslash{}. 383 # To get a backslash in the font of the text body enter \textbackslash{}.
363 whack_math <- 384 whack_math <- if (TRUE) {
364 function(v) { 385 function(v) {
365 v <- as.character(v) 386 v <- as.character(v)
387 w <- v
366 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE) 388 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE)
367 w <- Reduce( 389 w <- Reduce(
368 f = function(l, r) { 390 f = function(l, r) {
369 gsub(r, paste0("\\", r), l, fixed = TRUE) 391 ptrn <- paste0("\\", r)
392 for (i in seq_along(l)) {
393 if (!grepl(ptrn, l[i], fixed = TRUE)) {
394 l[i] <- gsub(r, ptrn, l[i], fixed = TRUE)
395 }
396 }
397 l
370 }, 398 },
371 x = c("#", "$", "%", "&", "{", "}", "_"), 399 x = c("#", "$", "%", "&", "{", "}", "_"),
372 init = w 400 init = w
373 ) 401 )
374 w <- gsub("^", "\\^{}", w, fixed = TRUE) 402 w <- gsub("^", "\\^{}", w, fixed = TRUE)
403 w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE)
404 return(w)
405 }
406 } else {
407 function(v) {
408 v <- as.character(v)
409 w <- v
410 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE)
411 w <- Reduce(
412 f = function(l, r) {
413 if (length(l) > 1) {
414 cat("whack_math: deparse1(v) = `", deparse1(v), "`\n",
415 file = stderr())
416 cat("whack_math: v = `", v, "`\n", file = stderr())
417 cat(" Reduce f: l = `", l,
418 "` r = `", r,
419 "`\n", file = stderr()
420 )
421 divert_warnings(grepl(r, l, fixed = TRUE))
422 }
423 ptrn <- paste0("\\", r)
424 cat("ptrn: `", ptrn, "`\n", file = stderr())
425 for (i in seq_along(l)) {
426 cat(" before l[i] = ", l[i], "\n", file = stderr())
427 if (!grepl(ptrn, l[i], fixed = TRUE)) {
428 l[i] <- gsub(r, ptrn, l[i], fixed = TRUE)
429 }
430 cat(" after l[i] = ", l[i], "\n", file = stderr())
431 }
432 l
433 },
434 x = c("#", "$", "%", "&", "{", "}", "_"),
435 init = w
436 )
437 w <- gsub("^", "\\^{}", w, fixed = TRUE)
438 w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE)
375 return(w) 439 return(w)
376 if (all(v == w)) 440 if (all(v == w))
377 v 441 v
378 else 442 else
379 paste0("\\texttt{", w, "}") 443 paste0("\\texttt{", w, "}")
380 } 444 }
445 }
381 whack_underscores <- whack_math 446 whack_underscores <- whack_math
382 447
383 ## dump params to stderr (remove this eventually) 448 ## dump params to stderr (remove this eventually)
384 449
385 if (FALSE) nbe(see_variable(params)) 450 if (FALSE) nbe(see_variable(params))
578 g_intensity_min_per_class <- params$intensityMinValuesPerGroup 643 g_intensity_min_per_class <- params$intensityMinValuesPerGroup
579 if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) { 644 if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) {
580 cat("invalid intensityMinValuesPerGroup (must be integer > -1") 645 cat("invalid intensityMinValuesPerGroup (must be integer > -1")
581 knitr::knit_exit() 646 knitr::knit_exit()
582 } 647 }
583 648 g_correlate_substrates <- params$correlateSubstrates
584 if (is.na(as.logical(g_correlate_substrates <- params$correlateSubstrates))) { 649 if (is.na(as.logical(g_correlate_substrates))) {
585 cat("invalid correlateSubstrates (must be TRUE or FALSE)") 650 cat("invalid correlateSubstrates (must be TRUE or FALSE)")
586 knitr::knit_exit() 651 knitr::knit_exit()
587 } 652 }
588 653 g_filter_cov_var_gt_1 <- params$filterCovVarGT1
589 if (is.na(as.logical(g_filter_cov_var_gt_1 <- params$filterCovVarGT1))) { 654 if (is.na(as.logical(g_filter_cov_var_gt_1))) {
590 cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)") 655 cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)")
591 knitr::knit_exit() 656 knitr::knit_exit()
592 } 657 }
593 658
594 # TODO Validate >> 0 < 30 659 # TODO Validate >> 0 < 30
617 value = rvalue, 682 value = rvalue,
618 pos = pf 683 pos = pf
619 ) 684 )
620 invisible(rvalue) 685 invisible(rvalue)
621 } 686 }
687 # Infix concatenation
688 `%||%` <- function(lvalue, rvalue) paste0(lvalue, rvalue)
689
622 690
623 ### FUNCTIONS 691 ### FUNCTIONS
624 692
625 no_op <- 693 no_op <-
626 function() { 694 function() {
703 }, 771 },
704 simplify = "array" 772 simplify = "array"
705 ) 773 )
706 grpl_rslt <- grpl_rslt + grpl_rslt_v 774 grpl_rslt <- grpl_rslt + grpl_rslt_v
707 } 775 }
708 rslt <- unname(grpl_rslt > 0) 776 unname(grpl_rslt > 0)
709 } 777 }
710 778
711 ##' Produce positions in a vector where succeeding value != current valus 779 ##' Produce positions in a vector where succeeding value != current valus
712 ##' 780 ##'
713 ##' @title Search vector for neighboring positions having different values 781 ##' @title Search vector for neighboring positions having different values
826 pvalue 894 pvalue
827 } 895 }
828 896
829 # This code adapted from matrixcalc::is.positive.definite 897 # This code adapted from matrixcalc::is.positive.definite
830 # Notably, this simply tests without calling stop() 898 # Notably, this simply tests without calling stop()
899 # nolint start
900 # squash un-actionable cyclomatic_complexity warning
831 is_positive_definite <- function(x, tol = 1e-08) { 901 is_positive_definite <- function(x, tol = 1e-08) {
902 # nolint end
832 if (!is.matrix(x)) 903 if (!is.matrix(x))
833 return(FALSE) 904 return(FALSE)
834 if (!is.numeric(x)) 905 if (!is.numeric(x))
835 return(FALSE) 906 return(FALSE)
836 if (nrow(x) < 1) 907 if (nrow(x) < 1)
970 } 1041 }
971 invisible(x) 1042 invisible(x)
972 } 1043 }
973 1044
974 # Use this like print.data.frame, from which it is adapted: 1045 # Use this like print.data.frame, from which it is adapted:
1046 # nolint start
1047 # squash un-actionable cyclomatic_complexity warning
1048 # squash un-actionable "no visible global for ..."
975 data_frame_tabbing_latex <- 1049 data_frame_tabbing_latex <-
976 function( 1050 function(
1051 # nolint end
977 x, 1052 x,
978 # vector of tab stops, in inches 1053 # vector of tab stops, in inches
979 tabstops, 1054 tabstops,
980 # vector of headings, registered with tab-stops 1055 # vector of headings, registered with tab-stops
981 headings = colnames(x), 1056 headings = colnames(x),
1149 return(s) 1224 return(s)
1150 } 1225 }
1151 nolatex_verbatim <- 1226 nolatex_verbatim <-
1152 function(expr) eval(expr) 1227 function(expr) eval(expr)
1153 1228
1229 error_knitrexit_stop <-
1230 function(e) {
1231 cat("Caught error:\n\n")
1232 str(e)
1233 knitr::knit_exit()
1234 stop(e)
1235 }
1236
1154 latex_verbatim <- 1237 latex_verbatim <-
1155 function(expr) { 1238 function(expr) {
1156 arg_string <- deparse1(substitute(expr))
1157 cat("\n\\begin{verbatim}\n___\n") 1239 cat("\n\\begin{verbatim}\n___\n")
1158 tryCatch( 1240 tryCatch(
1159 expr = expr, 1241 expr = expr,
1160 error = param_df_exit, 1242 error = param_df_exit,
1161 #ACE error =
1162 #ACE function(e) {
1163 #ACE cat("Caught error:\n\n")
1164 #ACE str(e)
1165 #ACE knitr::knit_exit()
1166 #ACE stop(e)
1167 #ACE },
1168 finally = cat("...\n\\end{verbatim}\n") 1243 finally = cat("...\n\\end{verbatim}\n")
1169 ) 1244 )
1170 } 1245 }
1171 1246
1172 latex_samepage <- 1247 latex_samepage <-
1173 function(expr) { 1248 function(expr) {
1174 arg_string <- deparse1(substitute(expr))
1175 cat("\n\\begin{samepage}\n") 1249 cat("\n\\begin{samepage}\n")
1176 tryCatch( 1250 tryCatch(
1177 expr = expr, 1251 expr = expr,
1178 error = param_df_exit, 1252 error = param_df_exit,
1179 #ACE error =
1180 #ACE function(e) {
1181 #ACE cat("Caught error:\n\n")
1182 #ACE str(e)
1183 #ACE knitr::knit_exit()
1184 #ACE stop(e)
1185 #ACE },
1186 finally = cat("\n\\end{samepage}\n") 1253 finally = cat("\n\\end{samepage}\n")
1187 ) 1254 )
1188 } 1255 }
1189 1256
1190 # return the result of invocation after showing parameters 1257 # return the result of invocation after showing parameters
1191 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ 1258 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
1192 latex_show_invocation <- 1259 latex_show_invocation <-
1193 function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) { 1260 function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) {
1194 function(...) { 1261 function(...) {
1195 my_env <- (as.list(environment()))
1196 va <- list(...) 1262 va <- list(...)
1197 my_rslt <- new_env() 1263 my_rslt <- new_env()
1198 my_rslt$rslt <- NULL 1264 my_rslt$rslt <- NULL
1199 latex_verbatim( 1265 latex_verbatim(
1200 expr = { 1266 expr = {
1201 cat(sprintf("\n .. Local variables for '%s':\n\n", f_name)) 1267 cat(sprintf("\n .. Local variables for '%s':\n\n", f_name))
1202 str(va) 1268 str(va)
1203 if (!head_patch) { 1269 if (!head_patch) {
1204 # return this result 1270 # return this result
1205 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/ 1271 # ref:
1272 # https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
1206 cat(sprintf("\n .. Invoking '%s'\n", f_name)) 1273 cat(sprintf("\n .. Invoking '%s'\n", f_name))
1207 tryCatch( 1274 tryCatch(
1208 { 1275 {
1209 cat("\n\\end{verbatim}\n") 1276 cat("\n\\end{verbatim}\n")
1210 rslt <- do.call(f, va) 1277 rslt <- do.call(f, va)
1211 }, 1278 },
1212 error = param_df_exit, 1279 error = param_df_exit,
1213 #ACE error = function(e) {
1214 #ACE cat("\n\\begin{verbatim}\n")
1215 #ACE str(e)
1216 #ACE cat("\n\\end{verbatim}\n")
1217 #ACE knitr::knit_exit()
1218 #ACE stop(e)
1219 #ACE },
1220 finally = cat("\n\\begin{verbatim}\n") 1280 finally = cat("\n\\begin{verbatim}\n")
1221 ) 1281 )
1222 cat(sprintf("\n .. '%s' returned:\n", f_name)) 1282 cat(sprintf("\n .. '%s' returned:\n", f_name))
1223 str(rslt) 1283 str(rslt)
1224 my_rslt$rslt <- rslt 1284 my_rslt$rslt <- rslt
1246 collapse = collapse_string 1306 collapse = collapse_string
1247 ) 1307 )
1248 ) 1308 )
1249 } 1309 }
1250 1310
1251 latex_itemized_collapsed <- function(collapse_string, v, underscore_whack = TRUE) { 1311 latex_itemized_collapsed <-
1312 function(collapse_string, v, underscore_whack = TRUE) {
1252 cat("\\begin{itemize}\n\\item ") 1313 cat("\\begin{itemize}\n\\item ")
1253 latex_collapsed_vector(collapse_string, v, underscore_whack) 1314 latex_collapsed_vector(collapse_string, v, underscore_whack)
1254 cat("\n\\end{itemize}\n") 1315 cat("\n\\end{itemize}\n")
1255 } 1316 }
1256 1317
1257 latex_itemized_list <- function(v, underscore_whack = TRUE) { 1318 latex_itemized_list <- function(v, underscore_whack = TRUE) {
1258 latex_itemized_collapsed("\n\\item ", v, underscore_whack) 1319 latex_itemized_collapsed("\n\\item ", v, underscore_whack)
1259 } 1320 }
1260 1321
1261 latex_enumerated_collapsed <- function(collapse_string, v, underscore_whack = TRUE) { 1322 latex_enumerated_collapsed <-
1323 function(collapse_string, v, underscore_whack = TRUE) {
1262 cat("\\begin{enumerate}\n\\item ") 1324 cat("\\begin{enumerate}\n\\item ")
1263 latex_collapsed_vector(collapse_string, v, underscore_whack) 1325 latex_collapsed_vector(collapse_string, v, underscore_whack)
1264 cat("\n\\end{enumerate}\n") 1326 cat("\n\\end{enumerate}\n")
1265 } 1327 }
1266 1328
1300 } 1362 }
1301 1363
1302 # N.B. use con = "" to emulate regular cat 1364 # N.B. use con = "" to emulate regular cat
1303 fcat0 <- 1365 fcat0 <-
1304 function(..., sprtr = " ", cnnctn = file()) { 1366 function(..., sprtr = " ", cnnctn = file()) {
1305 cat0(..., sep = sprtr, file = cnnctn) 1367 gsubfn::cat0(..., sep = sprtr, file = cnnctn)
1306 invisible(cnnctn) 1368 invisible(cnnctn)
1307 } 1369 }
1308 1370
1309 hypersub <- 1371 hypersub <-
1310 function(s) { 1372 function(s) {
1426 #' 1488 #'
1427 #' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20 1489 #' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20
1428 #' 1490 #'
1429 #' Horn et al. (2014) Nature Methods 11(6):603-4 1491 #' Horn et al. (2014) Nature Methods 11(6):603-4
1430 #' 1492 #'
1493 # nolint start
1494 # squash un-actionable cyclomatic_complexity warning
1431 ksea_scores <- function( 1495 ksea_scores <- function(
1496 # nolint end
1432 # For human data, typically, ksdata = KSEAapp::ksdata 1497 # For human data, typically, ksdata = KSEAapp::ksdata
1433 ksdata, 1498 ksdata,
1434 1499
1435 # Input data file having columns: 1500 # Input data file having columns:
1436 # - Protein : abbreviated protein name 1501 # - Protein : abbreviated protein name
1454 1519
1455 # Minimum substrate count, necessary to adjust the p-value appropriately. 1520 # Minimum substrate count, necessary to adjust the p-value appropriately.
1456 minimum_substrate_count 1521 minimum_substrate_count
1457 1522
1458 ) { 1523 ) {
1524 if (FALSE) {
1525 if (print_nb_messages) nbe("Output contents of `ksdata` table\n")
1526 cat_variable(ksdata)
1527 cat("\n\\clearpage\n")
1528 data_frame_tabbing_latex(
1529 x = ksdata[order(ksdata$GENE, ksdata$SUB_GENE), ],
1530 tabstops = c(0.3, 0.3, 1.2, 0.3, 0.3, 0.3, 0.3,
1531 0.8, 0.3, 0.8, 0.3, 0.3, 0.3),
1532 caption = "ksdata dump",
1533 use_subsubsection_header = FALSE
1534 )
1535 }
1459 # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution. 1536 # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution.
1460 if (length(grep(";", px$Residue.Both)) == 0) { 1537 if (length(grep(";", px$Residue.Both)) == 0) {
1461 # There are no Residue.Both entries having semicolons, so new is 1538 # There are no Residue.Both entries having semicolons, so new is
1462 # simply px except two columns are renamed and a column is added 1539 # simply px except two columns are renamed and a column is added
1463 # for log2(abs(fold-change)) 1540 # for log2(abs(fold-change))
1507 # To take the magnitude into account without taking the direction into 1584 # To take the magnitude into account without taking the direction into
1508 # account, set params$kseaUseAbsoluteLog2FC to TRUE 1585 # account, set params$kseaUseAbsoluteLog2FC to TRUE
1509 # 1586 #
1510 # Should KSEA be performed aggregating signed log2FC or absolute? 1587 # Should KSEA be performed aggregating signed log2FC or absolute?
1511 # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores 1588 # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores
1589 # nolint start
1590 # squash un-actionable "no visible global for ..."
1512 if (params$kseaUseAbsoluteLog2FC) { 1591 if (params$kseaUseAbsoluteLog2FC) {
1592 # nolint end
1513 # TRUE use abs(log2FC) for KSEA as Justin requested; this is a 1593 # TRUE use abs(log2FC) for KSEA as Justin requested; this is a
1514 # justifiable deviation from the KSEAapp::KSEA.Scores algorithm. 1594 # justifiable deviation from the KSEAapp::KSEA.Scores algorithm.
1515 new$log2_fc <- abs(new$log2_fc) 1595 new$log2_fc <- abs(new$log2_fc)
1516 } 1596 }
1517 1597
1520 # set to TRUE to monitor filtration on stderr 1600 # set to TRUE to monitor filtration on stderr
1521 sink(stderr()) 1601 sink(stderr())
1522 cat(see_variable(networkin, "\n")) 1602 cat(see_variable(networkin, "\n"))
1523 } 1603 }
1524 ksdata_filtered <- 1604 ksdata_filtered <-
1525 sqldf( 1605 sqldf::sqldf(
1526 sprintf("%s %s", 1606 sprintf("%s %s",
1527 "select * from ksdata where not Source = 'NetworKIN'", 1607 "select * from ksdata where not Source = 'NetworKIN'",
1528 if (networkin) 1608 if (networkin)
1529 sprintf("or networkin_score >= %d", networkin_cutoff) 1609 sprintf("or networkin_score >= %d", networkin_cutoff)
1530 else 1610 else
1531 "" 1611 ""
1532 ) 1612 )
1533 ) 1613 )
1614 if (FALSE) write.csv(x = ksdata_filtered, file = "ksdata_filtered.csv")
1534 if (monitor_filtration_on_stderr) { 1615 if (monitor_filtration_on_stderr) {
1535 cat(see_variable(sqldf( 1616 cat(see_variable(sqldf::sqldf(
1536 "select count(*), Source from ksdata group by Source"), "\n")) 1617 "select count(*), Source from ksdata group by Source"), "\n"))
1537 cat(see_variable(sqldf( 1618 cat(see_variable(sqldf::sqldf(
1538 "select count(*), Source from ksdata_filtered group by Source"), "\n")) 1619 "select count(*), Source from ksdata_filtered group by Source"), "\n"))
1539 sink() 1620 sink()
1540 } 1621 }
1541 1622
1542 ############################################################################ 1623 ############################################################################
1602 ksdata_dataset_abbrev$Substrate.Gene, 1683 ksdata_dataset_abbrev$Substrate.Gene,
1603 ksdata_dataset_abbrev$Substrate.Mod, 1684 ksdata_dataset_abbrev$Substrate.Mod,
1604 ksdata_dataset_abbrev$p), 1685 ksdata_dataset_abbrev$p),
1605 ] 1686 ]
1606 if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev)) 1687 if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev))
1688 if (FALSE) write.csv(
1689 x = ksdata_dataset_abbrev,
1690 file = "ksdata_dataset_abbrev_no_aggregation.csv",
1691 row.names = FALSE
1692 )
1693
1694 # For some kinases, the only difference between kinase names in two databases
1695 # is the case of the the letters; dedup to eliminate this false distinction
1696 # The only imperfection with this approach is when the NetworKIN-only fliter
1697 # is in force and there is a duplication between NetworKIN and HPRD (which
1698 # outranks NetworKIN). Presently, the script does not invoke with this
1699 # function with the NetworKIN-only filter active.
1700 dedup_sql <- "
1701 select
1702 r.'Kinase.Gene', r.'Substrate.Gene', r.'Substrate.Mod',
1703 r.Peptide, r.p, r.FC, r.log2FC, r.Source
1704 from
1705 ( select
1706 rowid,
1707 rank() over
1708 (partition by
1709 lower(k.'Kinase.Gene'), k.p, k.FC
1710 order by
1711 k.Source
1712 ) as rank,
1713 *
1714 from
1715 ksdata_dataset_abbrev k
1716 ) as r
1717 where r.rank = 1
1718 "
1719 ksdata_dataset_abbrev <- sqldf::sqldf(x = dedup_sql)
1720 if (FALSE) write.csv(
1721 x = ksdata_dataset_abbrev,
1722 file = "ksdata_dataset_abbrev_no_aggregation_dedup.csv",
1723 row.names = FALSE
1724 )
1725
1607 # First aggregation step to account for multiply phosphorylated peptides 1726 # First aggregation step to account for multiply phosphorylated peptides
1608 # and differing peptide sequences; the goal here is to combine results 1727 # and differing peptide sequences; the goal here is to combine results
1609 # for all measurements of the same substrate. 1728 # for all measurements of the same substrate.
1610 # SELECT `Kinase.Gene`, `Substrate.Gene`, `Substrate.Mod`, 1729 # SELECT `Kinase.Gene`, `Substrate.Gene`, `Substrate.Mod`,
1611 # `Source`, avg(log2FC) AS log2FC 1730 # `Source`, avg(log2FC) AS log2FC
1654 log2FC ~ Kinase.Gene, 1773 log2FC ~ Kinase.Gene,
1655 data = ksdata_dataset_abbrev, 1774 data = ksdata_dataset_abbrev,
1656 FUN = mean 1775 FUN = mean
1657 ) 1776 )
1658 if (print_nb_messages) nbe(see_variable(mean_fc), "\n") 1777 if (print_nb_messages) nbe(see_variable(mean_fc), "\n")
1778 if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv")
1659 1779
1660 # for contrast j 1780 # for contrast j
1661 # for each kinase i 1781 # for each kinase i
1662 # extract log2 of fold-change (from `new` above) 1782 # extract log2 of fold-change (from `new` above)
1663 # (used in KSEA.Scores.R lines # 130 & 132) 1783 # (used in KSEA.Scores.R lines # 130 & 132)
1679 mean(log2_fc_j_each_i, na.rm = TRUE) 1799 mean(log2_fc_j_each_i, na.rm = TRUE)
1680 1800
1681 # Reorder mean_fc, although I don't see why 1801 # Reorder mean_fc, although I don't see why
1682 # (KSEA.Scores.R line 128 1802 # (KSEA.Scores.R line 128
1683 mean_fc <- mean_fc[order(mean_fc[, 1]), ] 1803 mean_fc <- mean_fc[order(mean_fc[, 1]), ]
1804 if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv")
1684 # mean_fc columns so far: "Kinase.Gene", "log2FC" 1805 # mean_fc columns so far: "Kinase.Gene", "log2FC"
1685 # - Kinase.Gene 1806 # - Kinase.Gene
1686 # indicates the gene name for each kinase. 1807 # indicates the gene name for each kinase.
1687 1808
1688 # Create column 3: mS 1809 # Create column 3: mS
1734 , 1855 ,
1735 drop = FALSE 1856 drop = FALSE
1736 ] 1857 ]
1737 } 1858 }
1738 1859
1739 #ACE nb(see_variable(nrow(mean_fc)), "\n")
1740 # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value 1860 # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value
1741 # - FDR 1861 # - FDR
1742 # is the p-value adjusted for multiple hypothesis testing 1862 # is the p-value adjusted for multiple hypothesis testing
1743 # using the Benjamini & Hochberg method." 1863 # using the Benjamini & Hochberg method."
1744 # (KSEA.Scores.R line # 134) 1864 # (KSEA.Scores.R line # 134)
1745 mean_fc$fdr <- 1865 mean_fc$fdr <-
1746 p.adjust(mean_fc$p_value, method = "fdr") 1866 p.adjust(mean_fc$p_value, method = "fdr")
1747 1867
1748 # It makes no sense to leave Z-scores negative when using 1868 # It makes no sense to leave Z-scores negative when using
1749 # absolute value of fold-change 1869 # absolute value of fold-change
1870 # nolint start
1871 # squash un-actionable "no visible global for ..."
1750 if (params$kseaUseAbsoluteLog2FC) { 1872 if (params$kseaUseAbsoluteLog2FC) {
1873 # nolint end
1751 mean_fc$z_score <- abs(mean_fc$z_score) 1874 mean_fc$z_score <- abs(mean_fc$z_score)
1752 } 1875 }
1753 1876
1754 # Remove second column (log2FC), which is duplicated as mS 1877 # Remove second column (log2FC), which is duplicated as mS
1755 # (KSEA.Scores.R line # 136) 1878 # (KSEA.Scores.R line # 136)
1807 } 1930 }
1808 ) 1931 )
1809 1932
1810 k <- k[selector < ksea_cutoff_threshold, ] 1933 k <- k[selector < ksea_cutoff_threshold, ]
1811 nrow_k <- nrow(k) 1934 nrow_k <- nrow(k)
1812
1813 #ACE nbe(see_variable(fdr_barplot_dataframe <- k))
1814 1935
1815 if (nrow_k > 0) { 1936 if (nrow_k > 0) {
1816 max_nchar_rowname <- max(nchar(rownames(k))) 1937 max_nchar_rowname <- max(nchar(rownames(k)))
1817 my_cex_names <- 1.0 / (1 + nrow_k / 50) 1938 my_cex_names <- 1.0 / (1 + nrow_k / 50)
1818 1939
1976 "m_s", 2097 "m_s",
1977 "z_score", 2098 "z_score",
1978 "p_value", 2099 "p_value",
1979 "fdr" 2100 "fdr"
1980 ) 2101 )
1981 #ACE output_order <- with(output_df, order(fdr)) 2102
2103 # nolint start
1982 output_order <- with(output_df, order(p_value)) 2104 output_order <- with(output_df, order(p_value))
2105 # nolint end
1983 output_df <- output_df[output_order, ] 2106 output_df <- output_df[output_order, ]
1984 2107
1985 output_df[, 2] <- sprintf("%0.3g", output_df[, 2]) 2108 output_df[, 2] <- sprintf("%0.3g", output_df[, 2])
1986 output_df$fdr <- sprintf("%0.4f", output_df$fdr) 2109 output_df$fdr <- sprintf("%0.4f", output_df$fdr)
1987 output_df$p_value <- sprintf("%0.2e", output_df$p_value) 2110 output_df$p_value <- sprintf("%0.2e", output_df$p_value)
1988 output_df$z_score <- sprintf("%0.2f", output_df$z_score) 2111 output_df$z_score <- sprintf("%0.2f", output_df$z_score)
1989 output_df$m_s <- sprintf("%d", output_df$m_s) 2112 output_df$m_s <- sprintf("%d", output_df$m_s)
1990 output_df$enrichment <- sprintf("%0.3g", output_df$enrichment) 2113 output_df$enrichment <- sprintf("%0.3g", output_df$enrichment)
1991 output_ncol <- ncol(output_df)
1992 colnames(output_df) <- 2114 colnames(output_df) <-
1993 c( 2115 c(
1994 "Kinase", 2116 "Kinase or motif",
1995 "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)", 2117 "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)",
1996 "Enrichment", 2118 "Enrichment",
1997 "Substrates", 2119 "Substrates",
1998 "z-score", 2120 "z-score",
1999 "p-value", 2121 "p-value",
2020 ) 2142 )
2021 if (sum(selector < ksea_cutoff_threshold) > 0) { 2143 if (sum(selector < ksea_cutoff_threshold) > 0) {
2022 if (print_nb_messages) nbe(see_variable(output_df)) 2144 if (print_nb_messages) nbe(see_variable(output_df))
2023 math_caption <- gsub("{", "\\{", caption, fixed = TRUE) 2145 math_caption <- gsub("{", "\\{", caption, fixed = TRUE)
2024 math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE) 2146 math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE)
2025 # with (
2026 # output_df,
2027 # )
2028 if (TRUE) { 2147 if (TRUE) {
2029 output_df$Kinase <- whack_underscores(output_df$Kinase)
2030 data_frame_tabbing_latex( 2148 data_frame_tabbing_latex(
2031 x = output_df, 2149 x = output_df,
2032 # vector of tab stops, in inches 2150 # vector of tab stops, in inches
2033 tabstops = c(1.0, 1.2, 1.0, 1.0, 1.0, 1.0), 2151 tabstops = c(1.8, 1.2, 0.8, 0.8, 0.8, 0.8, 0.8),
2034 # vector of headings, registered with tab-stops 2152 # vector of headings, registered with tab-stops
2035 headings = colnames(output_df), 2153 headings = colnames(output_df),
2036 # digits to pass to format.data.frame 2154 # digits to pass to format.data.frame
2037 digits = NULL, 2155 digits = NULL,
2038 # maximumn number of rows to print 2156 # maximumn number of rows to print
2055 # https://tug.org/texinfohtml/latex2e.html#Font-sizes 2173 # https://tug.org/texinfohtml/latex2e.html#Font-sizes
2056 charactersize = "small", 2174 charactersize = "small",
2057 # set verbatim to TRUE to debug output 2175 # set verbatim to TRUE to debug output
2058 verbatim = FALSE 2176 verbatim = FALSE
2059 ) 2177 )
2060 } else {
2061 data_frame_table_latex(
2062 x = output_df,
2063 justification = "l c c c c c c",
2064 centered = TRUE,
2065 caption = sprintf(
2066 "\\text{%s}, KSEA %s < %s",
2067 math_caption,
2068 ksea_cutoff_statistic,
2069 ksea_cutoff_threshold
2070 ),
2071 anchor = anchor,
2072 underscore_whack = FALSE
2073 )
2074 } 2178 }
2075 } else { 2179 } else {
2076 cat( 2180 cat(
2077 sprintf( 2181 sprintf(
2078 "\\break 2182 "\\break
2130 mycol <- unique(append(mycol_neg, mycol_pos)) 2234 mycol <- unique(append(mycol_neg, mycol_pos))
2131 color_breaks <- list(breaks_all, mycol) 2235 color_breaks <- list(breaks_all, mycol)
2132 return(color_breaks) 2236 return(color_breaks)
2133 } 2237 }
2134 2238
2239 # nolint start
2240 # squash un-actionable cyclomatic_complexity warning
2135 hm2plus <- function( 2241 hm2plus <- function(
2242 # nolint end
2136 x, 2243 x,
2137 mat = matrix( 2244 mat = matrix(
2138 c( 2245 c(
2139 c(0, 4, 0), 2246 c(0, 4, 0),
2140 c(0, 3, 3), 2247 c(0, 3, 3),
2159 key_par = list(), 2266 key_par = list(),
2160 hclustfun = hclust, 2267 hclustfun = hclust,
2161 ... 2268 ...
2162 ) { 2269 ) {
2163 2270
2164 varargs <- list(...)
2165 if (FALSE) # this is to avoid commenting out code to pass linting... 2271 if (FALSE) # this is to avoid commenting out code to pass linting...
2166 my_hm2 <- latex_show_invocation(heatmap.2, head_patch = FALSE) 2272 my_hm2 <- latex_show_invocation(gplots::heatmap.2, head_patch = FALSE)
2167 else 2273 else
2168 my_hm2 <- heatmap.2 2274 my_hm2 <- gplots::heatmap.2
2169 2275
2170 x <- as.matrix(x) 2276 x <- as.matrix(x)
2171 if (sum(!is.na(x)) < 1) 2277 if (sum(!is.na(x)) < 1)
2172 return(NULL) 2278 return(NULL)
2173 color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough 2279 color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough
2175 min_nonax <- min(x, na.rm = TRUE) 2281 min_nonax <- min(x, na.rm = TRUE)
2176 max_nonax <- max(x, na.rm = TRUE) 2282 max_nonax <- max(x, na.rm = TRUE)
2177 if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n") 2283 if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n")
2178 if (divergent) { 2284 if (divergent) {
2179 zlim <- max(abs(min_nonax), abs(max_nonax)) 2285 zlim <- max(abs(min_nonax), abs(max_nonax))
2180 if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n")) 2286 if (print_nb_messages) nb(see_variable(zlim, "\n"))
2181 breaks <- (zlim) / (break_count:1) 2287 breaks <- (zlim) / (break_count:1)
2182 if (print_nb_messages) nb(see_variable(breaks, "\n")) 2288 if (print_nb_messages) nb(see_variable(breaks, "\n"))
2183 breaks <- breaks - median(breaks) 2289 breaks <- breaks - median(breaks)
2184 zlim <- c(-zlim, zlim) 2290 zlim <- c(-zlim, zlim)
2185 if (print_nb_messages) nb(see_variable(zlim, "\n")) 2291 if (print_nb_messages) nb(see_variable(zlim, "\n"))
2186 } else { 2292 } else {
2187 zlim <- max(abs(min_nonax), abs(max_nonax)) 2293 zlim <- max(abs(min_nonax), abs(max_nonax))
2188 if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n")) 2294 if (print_nb_messages) nb(see_variable(zlim, "\n"))
2189 breaks <- zlim / (break_count:1) 2295 breaks <- zlim / (break_count:1)
2190 if (print_nb_messages) nb(see_variable(breaks, "\n")) 2296 if (print_nb_messages) nb(see_variable(breaks, "\n"))
2191 if (max_nonax < 0) { 2297 if (max_nonax < 0) {
2192 breaks <- breaks - zlim 2298 breaks <- breaks - zlim
2193 zlim <- c(-zlim, 0) 2299 zlim <- c(-zlim, 0)
2195 zlim <- c(0, zlim) 2301 zlim <- c(0, zlim)
2196 } 2302 }
2197 if (print_nb_messages) nb(see_variable(zlim, "\n")) 2303 if (print_nb_messages) nb(see_variable(zlim, "\n"))
2198 } 2304 }
2199 nonax <- x 2305 nonax <- x
2200 nonax[is.na(x)] <- min_nonax 2306 # nolint start
2307 # squash un-actionable "no visible global for ..."
2308 if (params$heatMapNAsubstitute) nonax[is.na(x)] <- min_nonax
2309 # nolint end
2201 if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5) 2310 if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5)
2202 if (is.null(heights)) heights <- c(0.4, denhgt, 4.0) 2311 if (is.null(heights)) heights <- c(0.4, denhgt, 4.0)
2203 colors <- 2312 colors <-
2204 if (divergent && min_nonax < 0) { 2313 if (divergent && min_nonax < 0) {
2205 # divergent colors on both sides of zero 2314 # divergent colors on both sides of zero
2210 } else if (divergent && max_nonax < 0) { 2319 } else if (divergent && max_nonax < 0) {
2211 # "divergent" colors < zero 2320 # "divergent" colors < zero
2212 colorRampPalette(c("red", "white"))(color_count) 2321 colorRampPalette(c("red", "white"))(color_count)
2213 } else { 2322 } else {
2214 # "non-divergent" colors including zero 2323 # "non-divergent" colors including zero
2215 hcl.colors(color_count, "YlOrRd", rev = TRUE) 2324 tmp <- hcl.colors(color_count, "YlOrRd", rev = TRUE)
2325 # nolint start
2326 # squash un-actionable "no visible global for ..."
2327 tmp[1] <- params$heatMapNAgrey
2328 # nolint end
2329 tmp
2216 } 2330 }
2217 2331
2218 #ACE if (print_nb_messages) nb("within hm2plus", see_variable(key_par), "\n") 2332 nbe("There are ", sum(is.na(x)), " NA values")
2219 #ACE if (print_nb_messages) nb(see_variable(colors, "\n")) 2333
2220 #ACE key_par$col = colors
2221 #ACE key_par$breaks = breaks
2222
2223 if (print_nb_messages) nb(see_variable(par(), "\n")) #ACE TODO remove me
2224 if (print_nb_messages) cat("\\leavevmode\n\\linebreak\n") #ACE TODO remove me
2225 suppressWarnings( 2334 suppressWarnings(
2226 my_hm2( 2335 my_hm2(
2227 x = x, 2336 x = x,
2228 col = colors, 2337 col = colors,
2229 #ACE symkey = FALSE,
2230 density.info = density_info, 2338 density.info = density_info,
2231 srtCol = srtcol, 2339 srtCol = srtcol,
2232 srtRow = srtrow, 2340 srtRow = srtrow,
2233 margins = margins, 2341 margins = margins,
2234 lwid = widths, 2342 lwid = widths,
2239 lmat = mat, 2347 lmat = mat,
2240 notecol = notecol, 2348 notecol = notecol,
2241 trace = trace, 2349 trace = trace,
2242 bg = "yellow", 2350 bg = "yellow",
2243 hclustfun = hclustfun, 2351 hclustfun = hclustfun,
2244 #ACE breaks = breaks,
2245 oldstyle = FALSE, 2352 oldstyle = FALSE,
2246 ... # varargs 2353 ... # varargs
2247 ) 2354 )
2248 ) 2355 )
2249 # implicitly returning value returned by heatmap.2 2356 # implicitly returning value returned by heatmap.2
2250 } 2357 }
2251 2358
2252 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap 2359 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap
2360 # nolint start
2361 # squash un-actionable cyclomatic_complexity warning
2253 draw_kseaapp_summary_heatmap <- function( 2362 draw_kseaapp_summary_heatmap <- function(
2363 # nolint end
2254 x, # matrix with row/col names already formatted 2364 x, # matrix with row/col names already formatted
2255 sample_cluster, # a binary input of TRUE or FALSE, 2365 sample_cluster, # a binary input of TRUE or FALSE,
2256 # indicating whether or not to perform 2366 # indicating whether or not to perform
2257 # hierarchical clustering of the sample columns 2367 # hierarchical clustering of the sample columns
2258 merged_asterisk, # matrix having dimensions of x, values "*" or "" 2368 merged_asterisk, # matrix having dimensions of x, values "*" or ""
2293 cat_variable(x) 2403 cat_variable(x)
2294 return(FALSE) 2404 return(FALSE)
2295 } 2405 }
2296 max_nchar_rowname <- max(nchar(rownames(x))) 2406 max_nchar_rowname <- max(nchar(rownames(x)))
2297 max_nchar_colname <- max(nchar(colnames(x))) 2407 max_nchar_colname <- max(nchar(colnames(x)))
2298 my_limit <- g_intensity_hm_rows
2299 2408
2300 my_row_cex_scale <- master_cex * 150 / nrow_x 2409 my_row_cex_scale <- master_cex * 150 / nrow_x
2301 #ACE row cex shrink hack begin 2410 # row cex shrink hack begin
2302 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x) 2411 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x)
2303 #ACE row cex shrink hack end 2412 # row cex shrink hack end
2304 2413
2305 my_col_cex_scale <- 3.0 2414 my_col_cex_scale <- 3.0
2306 #ACE col cex shrink hack begin 2415 # col cex shrink hack begin
2307 if (ncol_x > 40) 2416 if (ncol_x > 40)
2308 my_col_cex_scale <- 3.0 * 40 / ncol_x 2417 my_col_cex_scale <- 3.0 * 40 / ncol_x
2309 #ACE col cex shrink hack end 2418 # col cex shrink hack end
2310 2419
2311 my_asterisk_scale <- 0.4 * my_row_cex_scale 2420 my_asterisk_scale <- 0.4 * my_row_cex_scale
2312 my_row_warp <- 1 2421 my_row_warp <- 1
2313 my_note_warp <- 2 2422 my_note_warp <- 2
2314 my_row_warp <- 1 2423 my_row_warp <- 1
2336 ) 2445 )
2337 my_margins <- c(1, 1) + 2446 my_margins <- c(1, 1) +
2338 c( 2447 c(
2339 margins[1] * 0.08 * max_nchar_colname * my_col_cex, 2448 margins[1] * 0.08 * max_nchar_colname * my_col_cex,
2340 margins[2] * 0.04 * max_nchar_rowname * my_row_cex 2449 margins[2] * 0.04 * max_nchar_rowname * my_row_cex
2341 )
2342
2343 my_notecex <-
2344 my_scale *
2345 min(
2346 1.1,
2347 my_row_cex_asterisk * my_note_warp,
2348 my_col_cex * my_note_warp
2349 ) 2450 )
2350 2451
2351 if (print_trace_messages) { 2452 if (print_trace_messages) {
2352 cat_variable(my_heights, suffix = "; ") 2453 cat_variable(my_heights, suffix = "; ")
2353 cat_variable(my_margins, suffix = "\n\n") 2454 cat_variable(my_margins, suffix = "\n\n")
2401 return(TRUE) 2502 return(TRUE)
2402 } 2503 }
2403 2504
2404 # function drawing heatmap of contrast fold-change for each kinase, 2505 # function drawing heatmap of contrast fold-change for each kinase,
2405 # adapted from KSEAapp::KSEA.Heatmap 2506 # adapted from KSEAapp::KSEA.Heatmap
2507 # nolint start
2508 # squash un-actionable cyclomatic_complexity warning
2406 ksea_heatmap <- function( 2509 ksea_heatmap <- function(
2510 # nolint end
2407 # the data frame outputs from the KSEA.Scores() function, in list format 2511 # the data frame outputs from the KSEA.Scores() function, in list format
2408 score_list, 2512 score_list,
2409 # a character vector of all the sample names for heatmap annotation: 2513 # a character vector of all the sample names for heatmap annotation:
2410 # - the names must be in the same order as the data in score_list 2514 # - the names must be in the same order as the data in score_list
2411 # - please avoid long names, as they may get cropped in the final image 2515 # - please avoid long names, as they may get cropped in the final image
2427 # hierarchical clustering of the sample columns 2531 # hierarchical clustering of the sample columns
2428 sample_cluster, 2532 sample_cluster,
2429 # a binary input of TRUE or FALSE, indicating whether or not to export 2533 # a binary input of TRUE or FALSE, indicating whether or not to export
2430 # the heatmap as a .png image into the working directory 2534 # the heatmap as a .png image into the working directory
2431 export = FALSE, 2535 export = FALSE,
2432 # bottom and right margins; adjust as needehttps://tex.stackexchange.com/a/56795d if contrast names are too long 2536 # bottom and right margins; adjust as needed if contrast names are too long
2433 margins = c(6, 6), 2537 margins = c(6, 6),
2434 # print which kinases? 2538 # print which kinases?
2435 # - Mandatory argument, must be one of const_ksea_.*_kinases 2539 # - Mandatory argument, must be one of const_ksea_.*_kinases
2436 which_kinases, 2540 which_kinases,
2437 # additional arguments to gplots::heatmap.2, such as: 2541 # additional arguments to gplots::heatmap.2, such as:
2482 asterisk_rows <- rowSums(merged_asterisk == "*") > 0 2586 asterisk_rows <- rowSums(merged_asterisk == "*") > 0
2483 all_rows <- rownames(merged_stats) 2587 all_rows <- rownames(merged_stats)
2484 names(asterisk_rows) <- all_rows 2588 names(asterisk_rows) <- all_rows
2485 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE]) 2589 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE])
2486 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE]) 2590 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE])
2487 merged_scores_asterisk <- merged_scores[names(asterisk_rows), , drop = FALSE]
2488 merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), , drop = FALSE]
2489 2591
2490 row_list <- list() 2592 row_list <- list()
2491 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows 2593 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows
2492 row_list[[const_ksea_all_kinases]] <- all_rows 2594 row_list[[const_ksea_all_kinases]] <- all_rows
2493 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows 2595 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows
2611 } 2713 }
2612 2714
2613 keep_var_gtr_0 <- keep_var_gtr_min(0) 2715 keep_var_gtr_0 <- keep_var_gtr_min(0)
2614 2716
2615 # function drawing heatmap of phosphopeptide intensities 2717 # function drawing heatmap of phosphopeptide intensities
2718 # nolint start
2719 # squash un-actionable cyclomatic_complexity warning
2616 ppep_heatmap <- 2720 ppep_heatmap <-
2721 # nolint end
2617 function( 2722 function(
2618 m, # matrix with rownames already formatted 2723 m, # matrix with rownames already formatted
2619 cutoff, # cutoff used by hm_heading_function 2724 cutoff, # cutoff used by hm_heading_function
2620 hm_heading_function, # construct $ cat heading from m and cutoff 2725 hm_heading_function, # construct $ cat heading from m and cutoff
2621 hm_main_title, # main title for plot (drawn below heading) 2726 hm_main_title, # main title for plot (drawn below heading)
2628 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right 2733 adj = 0.5, # adjust text: 0 left, 0.5 middle, 1 right
2629 row_scaling = # should row-scaling be applied if possible? 2734 row_scaling = # should row-scaling be applied if possible?
2630 g_default_heatmap_row_scaling, 2735 g_default_heatmap_row_scaling,
2631 ... # passthru to hm2plus or heatmap.2 2736 ... # passthru to hm2plus or heatmap.2
2632 ) { 2737 ) {
2633 use_heatmap_1 <- FALSE
2634 peptide_count <- 0 2738 peptide_count <- 0
2635 # emit the heading for the heatmap 2739 # emit the heading for the heatmap
2636 if (hm_heading_function(m, cutoff)) { 2740 if (hm_heading_function(m, cutoff)) {
2637 nrow_m <- nrow(m) 2741 nrow_m <- nrow(m)
2638 peptide_count <- min(max_peptide_count, nrow_m) 2742 peptide_count <- min(max_peptide_count, nrow_m)
2641 # Margin was heuristically derived to accommodate the widest label 2745 # Margin was heuristically derived to accommodate the widest label
2642 row_mchar_max <- max(nchar(rownames(m_margin))) 2746 row_mchar_max <- max(nchar(rownames(m_margin)))
2643 col_mchar_max <- max(nchar(colnames(m_margin))) 2747 col_mchar_max <- max(nchar(colnames(m_margin)))
2644 row_margin <- master_cex * row_mchar_max * 2.6 2748 row_margin <- master_cex * row_mchar_max * 2.6
2645 col_margin <- master_cex * col_mchar_max * 2.6 2749 col_margin <- master_cex * col_mchar_max * 2.6
2646 if (print_trace_messages) cat(sprintf("row_margin = %0.3f; ", row_margin)) 2750 if (print_trace_messages) cat(
2647 if (print_trace_messages) cat(sprintf("col_margin = %0.3f; ", col_margin)) 2751 sprintf("row_margin = %0.3f; ", row_margin))
2752 if (print_trace_messages) cat(
2753 sprintf("col_margin = %0.3f; ", col_margin))
2648 hm_call <- NULL 2754 hm_call <- NULL
2649 tryCatch( 2755 tryCatch(
2650 { 2756 {
2651 # set non-argument parameters for hm_call inner function 2757 # set non-argument parameters for hm_call inner function
2652 my_row_cex <- 2758 my_row_cex <-
2654 (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows 2760 (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows
2655 ) 2761 )
2656 m_hm <- m[peptide_count:1, , drop = FALSE] 2762 m_hm <- m[peptide_count:1, , drop = FALSE]
2657 if (is.null(cellnote)) { 2763 if (is.null(cellnote)) {
2658 cellnote <- matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm)) 2764 cellnote <- matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm))
2659 cellnote[is.na(m_hm)] <- "NA" 2765 # nolint start
2766 # squash un-actionable "no visible global for ..."
2767 cellnote[is.na(m_hm)] <- params$heatMapNAcellNote
2768 # nolint end
2660 } else { 2769 } else {
2661 cellnote <- cellnote[peptide_count:1, , drop = FALSE] 2770 cellnote <- cellnote[peptide_count:1, , drop = FALSE]
2662 } 2771 }
2663 m_hm[is.na(m_hm)] <- 0 2772 # nolint start
2773 # squash un-actionable "no visible global for ..."
2774 if (params$heatMapNAsubstitute) m_hm[is.na(m_hm)] <- 0
2775 # nolint end
2664 nrow_m_hm <- nrow(m_hm) 2776 nrow_m_hm <- nrow(m_hm)
2665 ncol_m_hm <- ncol(m_hm) 2777 ncol_m_hm <- ncol(m_hm)
2666 if (nrow_m_hm < 1 || ncol_m_hm < 1) 2778 if (nrow_m_hm < 1 || ncol_m_hm < 1)
2667 return(peptide_count) # return zero as initialized above 2779 return(peptide_count) # return zero as initialized above
2668 my_limit <- g_intensity_hm_rows
2669 2780
2670 2781
2671 my_row_cex <- master_cex * (100 / (2 + row_mchar_max)) 2782 my_row_cex <- master_cex * (100 / (2 + row_mchar_max))
2672 my_col_cex <- master_cex * 6 * row_margin / col_margin 2783 my_col_cex <- master_cex * 6 * row_margin / col_margin
2673 my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex 2784 my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex
2674 my_col_cex <- min(my_col_cex, my_row_cex) 2785 my_col_cex <- min(my_col_cex, my_row_cex)
2675 col_margin <- sqrt(my_col_adj) * col_margin 2786 col_margin <- sqrt(my_col_adj) * col_margin
2676 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) 2787 if (print_trace_messages) cat(
2677 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) 2788 sprintf("my_row_cex = %0.3f; ", my_row_cex))
2789 if (print_trace_messages) cat(
2790 sprintf("my_col_cex = %0.3f; ", my_col_cex))
2678 if (is.null(margins)) my_margins <- 2791 if (is.null(margins)) my_margins <-
2679 c( 2792 c(
2680 (my_col_cex + col_margin), # col 2793 (my_col_cex + col_margin), # col
2681 (my_row_cex + row_margin) / my_row_cex # row 2794 (my_row_cex + row_margin) / my_row_cex # row
2682 ) 2795 )
2688 "my_margins = c(%s)\n\n", 2801 "my_margins = c(%s)\n\n",
2689 paste(my_margins, collapse = ", ") 2802 paste(my_margins, collapse = ", ")
2690 ) 2803 )
2691 ) 2804 )
2692 my_hm2_cex <- 2 * master_cex 2805 my_hm2_cex <- 2 * master_cex
2693 my_key_cex <- 0.9 - 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows 2806 my_key_cex <-
2807 0.9 -
2808 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows
2694 my_key_warp <- 1.5 * 22.75 / row_margin 2809 my_key_warp <- 1.5 * 22.75 / row_margin
2695 my_key_cex <- min(1.10, my_key_warp * my_key_cex) 2810 my_key_cex <- min(1.10, my_key_warp * my_key_cex)
2696 my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1) 2811 my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1)
2697 my_hgt_scale <- 3.75 # 3.615 2812 my_hgt_scale <- 3.75 # 3.615
2698 my_hgt_scale <- 3.60 # 3.615 2813 my_hgt_scale <- 3.60 # 3.615
2703 cat_variable(my_warp, "\n\n", 3) 2818 cat_variable(my_warp, "\n\n", 3)
2704 # added 0.9 heuristically... 2819 # added 0.9 heuristically...
2705 my_plot_height <- 2820 my_plot_height <-
2706 (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) * 2821 (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) *
2707 min(my_hgt_scale, my_hgt_scale * my_warp) 2822 min(my_hgt_scale, my_hgt_scale * my_warp)
2708 my_plot_height <- min(3.65, my_plot_height * g_intensity_hm_rows / 50) 2823 my_plot_height <-
2824 min(3.65, my_plot_height * g_intensity_hm_rows / 50)
2709 my_heights <- c( 2825 my_heights <- c(
2710 0.3, # title and top dendrogram 2826 0.3, # title and top dendrogram
2711 my_plot_height, # plot and bottom margin 2827 my_plot_height, # plot and bottom margin
2712 4.15 - my_hgt_scale, # legend 2828 4.15 - my_hgt_scale, # legend
2713 0.05 + my_hgt_scale - my_plot_height # whitespace below legend 2829 0.05 + my_hgt_scale - my_plot_height # whitespace below legend
2768 scale = scaling, 2884 scale = scaling,
2769 srtcol = 90, 2885 srtcol = 90,
2770 srtrow = 0, 2886 srtrow = 0,
2771 xlab = "", 2887 xlab = "",
2772 cellnote = cellnote, 2888 cellnote = cellnote,
2889 # ref for na.color:
2890 # https://cran.r-project.org/web/packages/gplots/gplots.pdf
2891 # nolint start
2892 # squash un-actionable "no visible global for ..."
2893 na.color = params$heatMapNAcellColor,
2894 # nolint end
2773 notecex = my_note_cex, 2895 notecex = my_note_cex,
2774 ... 2896 ...
2775 ) 2897 )
2776 ) 2898 )
2777 ) { 2899 ) {
2831 }, 2953 },
2832 error = function(r) { 2954 error = function(r) {
2833 cat( 2955 cat(
2834 sprintf( 2956 sprintf(
2835 "\n%s %s Internal message: %s\n\\newline\n\n", 2957 "\n%s %s Internal message: %s\n\\newline\n\n",
2836 "Failure drawing heatmap,", 2958 "Failure drawing heatmap, possibly because of ",
2837 "possibly because of too many missing values.\n\\newline\n\n", 2959 "too many missing values.\n\\newline\n\n",
2838 r$message 2960 r$message
2839 ) 2961 )
2840 ) 2962 )
2841 cat_margins() 2963 cat_margins()
2842 } 2964 }
2843 ) 2965 )
2844 } else { 2966 } else {
2845 cat( 2967 cat(
2846 "\nFailure drawing heatmap, possibly because of too many missing values.\n" 2968 "\nFailure drawing heatmap, ",
2969 "possibly because of too many missing values.\n"
2847 ) 2970 )
2848 } 2971 }
2849 } 2972 }
2850 ) 2973 )
2851 } 2974 }
2853 # return value: 2976 # return value:
2854 peptide_count 2977 peptide_count
2855 } 2978 }
2856 2979
2857 # function drawing heatmap of correlations if they exist, else covariances 2980 # function drawing heatmap of correlations if they exist, else covariances
2981 # nolint start
2982 # squash un-actionable cyclomatic_complexity warning
2983 # squash un-actionable "no visible global for ..."
2858 cov_heatmap <- 2984 cov_heatmap <-
2859 function( 2985 function(
2986 # nolint end
2860 m, # matrix with rownames already formatted 2987 m, # matrix with rownames already formatted
2988 kinase_name,
2861 top_substrates = FALSE, 2989 top_substrates = FALSE,
2862 ... # passthru to hm2plus or heatmap.2 2990 ... # passthru to hm2plus or heatmap.2
2863 ) { 2991 ) {
2864 if (print_nb_messages) nbe(see_variable(m), " [", nrow(m), "x", ncol(m), "\n") 2992 if (print_nb_messages) nbe(
2865 #ACE nb(rowSums(m, na.rm = TRUE)) 2993 see_variable(m), " [", nrow(m), "x", ncol(m), "\n")
2866 #ACE bad_rows <- (rowSums(m, na.rm = TRUE) == 0)
2867 #ACE nb(see_variable(bad_rows))
2868 #ACE m <- m[-bad_rows, , drop = FALSE]
2869 colnames_m <- colnames(m) 2994 colnames_m <- colnames(m)
2870 is_na_m <- is.na(m) 2995 is_na_m <- is.na(m)
2871 tmp <- m 2996 tmp <- m
2872 tmp[is_na_m] <- 0 2997 tmp[is_na_m] <- 0
2873 2998
2901 } 3026 }
2902 3027
2903 3028
2904 t_m <- t(tmp) 3029 t_m <- t(tmp)
2905 t_m[is.na(t_m)] <- 0 3030 t_m[is.na(t_m)] <- 0
2906 prefiltered_nrow <- ncol(t_m)
2907 3031
2908 my_corcov <- cov(t_m) 3032 my_corcov <- cov(t_m)
2909 did_filter_rows <- did_filter_cols <- FALSE 3033 did_filter_rows <- did_filter_cols <- FALSE
2910 if (g_correlate_substrates && !is_positive_definite(my_corcov)) { 3034 if (g_correlate_substrates && !is_positive_definite(my_corcov)) {
2911 my_correlate_substrates <- FALSE 3035 my_correlate_substrates <- FALSE
2942 3066
2943 f_omissions <- 3067 f_omissions <-
2944 function(is_corr) { 3068 function(is_corr) {
2945 cat( 3069 cat(
2946 sprintf( 3070 sprintf(
2947 "Below is the %s plot for %s substrates", 3071 "Below is the %s plot for %s sites",
2948 if (is_corr) "correlation" else "covariance", 3072 if (is_corr) "correlation" else "covariance",
2949 sprintf( 3073 sprintf(
2950 if (top_substrates) 3074 if (top_substrates)
2951 "%0.0f \"highest-quality\"" 3075 "%0.0f \"highest-quality\""
2952 else 3076 else
3004 3128
3005 parjust <- par(adj = 0.5) 3129 parjust <- par(adj = 0.5)
3006 on.exit(par(parjust)) 3130 on.exit(par(parjust))
3007 my_corcov <- my_corcov[order(rownames(my_corcov)), ] 3131 my_corcov <- my_corcov[order(rownames(my_corcov)), ]
3008 my_main <- 3132 my_main <-
3009 sprintf("%s among %s substrates %s", 3133 sprintf("%s among %s sites %s",
3010 if (my_correlate_substrates) "Correlation" 3134 if (my_correlate_substrates) "Correlation"
3011 else "Covariance", 3135 else "Covariance",
3012 kinase_name, 3136 kinase_name,
3013 if (!my_correlate_substrates && 3137 if (!my_correlate_substrates &&
3014 g_filter_cov_var_gt_1 && 3138 g_filter_cov_var_gt_1 &&
3023 my_plot_height, 3147 my_plot_height,
3024 my_legend_height # was 4.0 - my_hgt_scale # was 4.19 3148 my_legend_height # was 4.0 - my_hgt_scale # was 4.19
3025 ) 3149 )
3026 if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar)) 3150 if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar))
3027 if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin)) 3151 if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin))
3028 if (print_trace_messages) cat(sprintf("my_plot_height = %0.3f\n\n", my_plot_height)) 3152 if (print_trace_messages) cat(
3153 sprintf("my_plot_height = %0.3f\n\n", my_plot_height))
3029 if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex)) 3154 if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex))
3030 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex)) 3155 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex))
3031 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex)) 3156 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex))
3032 if (print_trace_messages) cat(sprintf("my_key_cex = %0.3f\n\n", my_key_cex)) 3157 if (print_trace_messages) cat(
3033 if (print_trace_messages) cat(sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale)) 3158 sprintf("my_key_cex = %0.3f\n\n", my_key_cex))
3034 if (print_trace_messages) cat(sprintf("legend height = %0.3f\n\n", my_legend_height)) 3159 if (print_trace_messages) cat(
3160 sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale))
3161 if (print_trace_messages) cat(
3162 sprintf("legend height = %0.3f\n\n", my_legend_height))
3035 if (print_trace_messages) cat( 3163 if (print_trace_messages) cat(
3036 sprintf( 3164 sprintf(
3037 "my_heights = c(%s); sum = %0.3f\n\n", 3165 "my_heights = c(%s); sum = %0.3f\n\n",
3038 paste( 3166 paste(
3039 sprintf("%0.3f", my_heights), 3167 sprintf("%0.3f", my_heights),
3671 "median peptide-intensity across all sample classes.\n" 3799 "median peptide-intensity across all sample classes.\n"
3672 ) 3800 )
3673 # Take the accurate ln(x+1) because the data are log-normally distributed 3801 # Take the accurate ln(x+1) because the data are log-normally distributed
3674 # and because median can involve an average of two measurements. 3802 # and because median can involve an average of two measurements.
3675 quant_data_imp <- log1p(quant_data_imp) 3803 quant_data_imp <- log1p(quant_data_imp)
3676 quant_data_imp[ind] <- row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]] 3804 quant_data_imp[ind] <-
3805 row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]]
3677 # Take the accurate e^x - 1 to match scaling of original input. 3806 # Take the accurate e^x - 1 to match scaling of original input.
3678 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) 3807 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp))
3679 good_rows <- !is.nan(rowMeans(quant_data_imp)) 3808 good_rows <- !is.nan(rowMeans(quant_data_imp))
3680 } 3809 }
3681 , "mean" = { 3810 , "mean" = {
3687 # Take the accurate ln(x+1) because the data are log-normally distributed, 3816 # Take the accurate ln(x+1) because the data are log-normally distributed,
3688 # so arguments to mean should be previously transformed. 3817 # so arguments to mean should be previously transformed.
3689 # this will have to be 3818 # this will have to be
3690 quant_data_imp <- log1p(quant_data_imp) 3819 quant_data_imp <- log1p(quant_data_imp)
3691 # Assign to NA cells the mean for the row 3820 # Assign to NA cells the mean for the row
3692 quant_data_imp[ind] <- row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]] 3821 quant_data_imp[ind] <-
3822 row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]]
3693 # Take the accurate e^x - 1 to match scaling of original input. 3823 # Take the accurate e^x - 1 to match scaling of original input.
3694 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp)) 3824 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp))
3695 good_rows <- !is.nan(rowMeans(quant_data_imp)) 3825 good_rows <- !is.nan(rowMeans(quant_data_imp))
3696 } 3826 }
3697 , "random" = { 3827 , "random" = {
3698 quant_data_imp <- quant_data 3828 quant_data_imp <- quant_data
3699 m1 <- median(sds, na.rm = TRUE) * sd_percentile #sd to be used is the median sd 3829 # sd to be used is the median sd
3830 m1 <- median(sds, na.rm = TRUE) * sd_percentile
3700 # If you want results to be reproducible, you will want to call 3831 # If you want results to be reproducible, you will want to call
3701 # base::set.seed before calling stats::rnorm 3832 # base::set.seed before calling stats::rnorm
3702 imputation_method_description <- 3833 imputation_method_description <-
3703 paste("Substitute each missing value with random intensity", 3834 paste("Substitute each missing value with random intensity",
3704 sprintf( 3835 sprintf(
3733 imp_smry_pot_peptides_after <- sum(good_rows) 3864 imp_smry_pot_peptides_after <- sum(good_rows)
3734 imp_smry_rejected_after <- sum(!good_rows) 3865 imp_smry_rejected_after <- sum(!good_rows)
3735 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ])) 3866 imp_smry_missing_values_after <- sum(is.na(quant_data_imp[good_rows, ]))
3736 3867
3737 # From ?`%in%`: 3868 # From ?`%in%`:
3738 # %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0 3869 # %in% is currently defined
3870 # as function(x, table) match(x, table, nomatch = 0) > 0
3739 3871
3740 stock_in <- 3872 stock_in <-
3741 names(good_rows) %in% 3873 names(good_rows) %in%
3742 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count]) 3874 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count])
3743 if (print_nb_messages) nbe(see_variable(stock_in), "\n") 3875 if (print_nb_messages) nbe(see_variable(stock_in), "\n")
3796 cat(tabular_lines) 3928 cat(tabular_lines)
3797 ``` 3929 ```
3798 3930
3799 ```{r filter_good_rows, echo = FALSE} 3931 ```{r filter_good_rows, echo = FALSE}
3800 3932
3801 if (print_nb_messages) nbe("before name extraction, ", see_variable(length(good_rows)), " ", see_variable(good_rows), "\n") 3933 if (print_nb_messages) nbe(
3934 "before name extraction, ", see_variable(length(good_rows)), " ",
3935 see_variable(good_rows), "\n")
3802 good_rows <- names(good_rows[names(great_enough_row_names)]) 3936 good_rows <- names(good_rows[names(great_enough_row_names)])
3803 if (print_nb_messages) nbe("after name extraction, ", see_variable(length(good_rows)), see_variable(good_rows), "\n") 3937 if (print_nb_messages) nbe(
3804 3938 "after name extraction, ", see_variable(length(good_rows)),
3805 #ACE min_group_obs_count <- min_group_obs_count[names(great_enough_row_names)] 3939 see_variable(good_rows), "\n")
3806 #ACE nbe("good_rows") 3940
3807 #ACE nbe(see_variable(good_rows))
3808 #ACE nbe("names(min_group_obs_count) before filter for good rows")
3809 #ACE nbe(see_variable(names(min_group_obs_count)))
3810 min_group_obs_count <- min_group_obs_count[good_rows] 3941 min_group_obs_count <- min_group_obs_count[good_rows]
3811 #ACE nbe("min_group_obs_count after filter for good rows")
3812 #ACE nbe(see_variable(names(min_group_obs_count)))
3813 3942
3814 # Zap rows where imputation was insufficiently effective 3943 # Zap rows where imputation was insufficiently effective
3815 full_data <- full_data [good_rows, ] 3944 full_data <- full_data [good_rows, ]
3816 quant_data <- quant_data [good_rows, ] 3945 quant_data <- quant_data [good_rows, ]
3817 quant_data_log <- quant_data_log [good_rows, ] 3946 quant_data_log <- quant_data_log [good_rows, ]
3818 3947
3819 if (print_nb_messages) nbe("before row filter, ", see_variable(nrow(quant_data_imp)), "\n") 3948 if (print_nb_messages) nbe(
3949 "before row filter, ", see_variable(nrow(quant_data_imp)), "\n")
3820 quant_data_imp <- quant_data_imp[good_rows, ] 3950 quant_data_imp <- quant_data_imp[good_rows, ]
3821 if (print_nb_messages) nbe("after row filter, ", see_variable(nrow(quant_data_imp)), "\n") 3951 if (print_nb_messages) nbe(
3952 "after row filter, ", see_variable(nrow(quant_data_imp)), "\n")
3822 write_debug_file(quant_data_imp) 3953 write_debug_file(quant_data_imp)
3823 quant_data_imp_good_rows <- quant_data_imp 3954 quant_data_imp_good_rows <- quant_data_imp
3824 3955
3825 write_debug_file(quant_data_imp_good_rows) 3956 write_debug_file(quant_data_imp_good_rows)
3826 ``` 3957 ```
3945 my_ppep_distrib_bxp( 4076 my_ppep_distrib_bxp(
3946 x = blue_dots 4077 x = blue_dots
3947 , sample_name_grow = sample_name_grow 4078 , sample_name_grow = sample_name_grow
3948 , main = "Peptide intensities after eliminating unusable peptides" 4079 , main = "Peptide intensities after eliminating unusable peptides"
3949 , varwidth = boxplot_varwidth 4080 , varwidth = boxplot_varwidth
3950 , sub = paste(boxplot_sub, "Box widths reflect number of peptides for sample") 4081 , sub = paste(boxplot_sub,
4082 "Box widths reflect number of peptides for sample")
3951 , xlab = "Sample" 4083 , xlab = "Sample"
3952 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") 4084 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
3953 , col = const_boxplot_fill 4085 , col = const_boxplot_fill
3954 , notch = FALSE 4086 , notch = FALSE
3955 , ylim = ylim 4087 , ylim = ylim
4281 4413
4282 ## Assignment of $p$-value and quality score 4414 ## Assignment of $p$-value and quality score
4283 4415
4284 For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups. 4416 For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups.
4285 4417
4286 However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance. For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https:/doi.org/10.1111/j.2517-6161.1995.tb02031.x). 4418 However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance. For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https://doi.org/10.1111/j.2517-6161.1995.tb02031.x).
4287 4419
4288 Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as: 4420 Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as:
4289 4421
4290 $$ 4422 $$
4291 \textit{quality}_j 4423 \textit{quality}_j
4311 ``` 4443 ```
4312 4444
4313 ```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'} 4445 ```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'}
4314 g_can_run_ksea <- FALSE 4446 g_can_run_ksea <- FALSE
4315 old_oma <- par("oma") 4447 old_oma <- par("oma")
4448
4449 # nolint start
4450 # squash un-actionable cyclomatic_complexity warning
4316 if (count_of_treatment_levels < 2) { 4451 if (count_of_treatment_levels < 2) {
4452 # nolint end
4317 cat( 4453 cat(
4318 "ERROR!!!! Cannot perform ANOVA analysis", 4454 "ERROR!!!! Cannot perform ANOVA analysis",
4319 "(see next page)\\newpage\n" 4455 "(see next page)\\newpage\n"
4320 ) 4456 )
4321 cat( 4457 cat(
4365 } else { 4501 } else {
4366 4502
4367 if (print_nb_messages) nbe("computing p_value_data_anova_ps\n") 4503 if (print_nb_messages) nbe("computing p_value_data_anova_ps\n")
4368 if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n") 4504 if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n")
4369 if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n") 4505 if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n")
4370 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log[, ".NE.7C"]), "\n")
4371 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n") 4506 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n")
4372 if (print_nb_messages) nbe(see_variable(anova_func), "\n") 4507 if (print_nb_messages) nbe(see_variable(anova_func), "\n")
4373 if (print_nb_messages) nbe(see_variable(smpl_trt), "\n") 4508 if (print_nb_messages) nbe(see_variable(smpl_trt), "\n")
4374 if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n") 4509 if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n")
4375 tryCatch( 4510 tryCatch(
4475 if (number_of_peptides_found > g_intensity_hm_rows - 1) { 4610 if (number_of_peptides_found > g_intensity_hm_rows - 1) {
4476 cat("\\leavevmode\n\n\n") 4611 cat("\\leavevmode\n\n\n")
4477 break 4612 break
4478 } 4613 }
4479 4614
4615 if (print_trace_messages) {
4616 cat_variable(cutoff)
4617 cat("\n\n")
4618 }
4619
4480 bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff) 4620 bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff)
4481 bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class) 4621 bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class)
4482 g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0) 4622 g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0)
4483 bool_4 <- (p_value_data$quality >= params$minQuality) 4623 bool_4 <- (p_value_data$quality >= params$minQuality)
4484 bool_3 <- as.logical( 4624 bool_3 <- as.logical(
4507 4647
4508 if (print_trace_messages) 4648 if (print_trace_messages)
4509 cat_variable(filtered_p, force_str = TRUE) 4649 cat_variable(filtered_p, force_str = TRUE)
4510 4650
4511 have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0 4651 have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0
4652
4512 filter_result_string <- 4653 filter_result_string <-
4513 sprintf( 4654 sprintf(
4514 "%s, %s of %0.0f peptides remained having both %s and %s.\n\n", 4655 "%s, %s of %0.0f peptides remained having both %s and %s.\n\n",
4515 "After filtering for ANOVA results", 4656 "After filtering for ANOVA results",
4516 if (have_remaining_peptides) 4657 if (have_remaining_peptides)
4517 as.character(sum(bool_3, na.rm = TRUE)) 4658 as.character(sum(bool_3, na.rm = TRUE))
4518 else 4659 else
4519 "none", 4660 "none",
4520 length(bool_3), 4661 length(bool_3),
4521 sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))), 4662 adj_p_string <-
4663 sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))),
4522 sprintf( 4664 sprintf(
4523 "more than %0.0f observations in some groups", 4665 "more than %0.0f observations in some groups",
4524 max(0, g_intensity_min_per_class - 1) 4666 max(0, g_intensity_min_per_class - 1)
4525 ) 4667 )
4526 ) 4668 )
4535 filtered_data_filtered[ 4677 filtered_data_filtered[
4536 order(filtered_p$fdr_adjusted_anova_p), 4678 order(filtered_p$fdr_adjusted_anova_p),
4537 , drop = FALSE 4679 , drop = FALSE
4538 ] 4680 ]
4539 4681
4540 if (have_remaining_peptides && nrow(filtered_p) > 0 && nrow(filtered_data_filtered) > 0) { 4682 if (have_remaining_peptides
4683 && nrow(filtered_p) > 0
4684 && nrow(filtered_data_filtered) > 0
4685 ) {
4541 if (first_page_suppress == 1) { 4686 if (first_page_suppress == 1) {
4542 first_page_suppress <- 0 4687 first_page_suppress <- 0
4543 } else { 4688 } else {
4544 cat("\\newpage\n") 4689 cat("\\newpage\n")
4545 } 4690 }
4546 latex_samepage({ 4691 latex_samepage({
4547 cat(filter_result_string) 4692 cat(filter_result_string)
4548 if (nrow(filtered_data_filtered) > 1) { 4693 if (nrow(filtered_data_filtered) > 1) {
4549 cat(subsection_header(sprintf( 4694 cat(subsection_header(sprintf(
4550 "Intensity distributions for %d phosphopeptides\n", 4695 "Intensity distributions for %d phosphopeptides, %s\n",
4551 nrow(filtered_data_filtered) 4696 nrow(filtered_data_filtered),
4697 adj_p_string
4552 ))) 4698 )))
4553 } else { 4699 } else {
4554 cat(subsection_header(sprintf( 4700 cat(
4555 "Intensity distribution for one phosphopeptide (%s)\n", 4701 subsection_header(
4556 rownames(filtered_data_filtered)[1] 4702 sprintf(
4557 ))) 4703 "Intensity distribution for one phosphopeptide, %s\n",
4704 adj_p_string
4705 )
4706 ),
4707 rownames(filtered_data_filtered)[1],
4708 "\n"
4709 )
4558 } 4710 }
4559 }) # end latex_samepage 4711 }) # end latex_samepage
4560 4712
4561 old_par <- par( 4713 old_par <- par(
4562 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1), 4714 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1),
4635 g_intensity_hm_rows), 4787 g_intensity_hm_rows),
4636 sprintf("whose adjusted p-value < %0.2f\n", cutoff) 4788 sprintf("whose adjusted p-value < %0.2f\n", cutoff)
4637 ) 4789 )
4638 )) 4790 ))
4639 } else { 4791 } else {
4640 if (nrow(m) == 0) { 4792 if (nrow(m) < 2) {
4641 return(FALSE) 4793 return(FALSE)
4642 } else { 4794 } else {
4643 cat(subsection_header( 4795 cat(subsection_header(
4644 paste( 4796 paste(
4645 sprintf("Heatmap for %d usable peptide genes whose", nrow(m)), 4797 sprintf("Heatmap for %d usable peptide genes whose", nrow(m)),
4659 nrow_m <- nrow(m) 4811 nrow_m <- nrow(m)
4660 ncol_m <- ncol(m) 4812 ncol_m <- ncol(m)
4661 if (nrow_m > 0) { 4813 if (nrow_m > 0) {
4662 rownames_m <- rownames(m) 4814 rownames_m <- rownames(m)
4663 q <- data.frame(pepname = rownames_m) 4815 q <- data.frame(pepname = rownames_m)
4664 g <- sqldf(" 4816 g <- sqldf::sqldf("
4665 SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name 4817 SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name
4666 FROM q, metadata_plus_p AS met 4818 FROM q, metadata_plus_p AS met
4667 WHERE q.pepname = met.Phosphopeptide 4819 WHERE q.pepname = met.Phosphopeptide
4668 ORDER BY q.rowid 4820 ORDER BY q.rowid
4669 ") 4821 ")
4716 } 4868 }
4717 } 4869 }
4718 } 4870 }
4719 } 4871 }
4720 } 4872 }
4721 cat(filter_result_string) 4873
4722 cat("\\leavevmode\n") 4874 cat("\\leavevmode\n")
4723 4875
4724 if (!g_can_run_ksea) { 4876 if (!g_can_run_ksea) {
4725 errmsg <- paste("Cannot proceed with KSEA analysis", 4877 errmsg <- paste("Cannot proceed with KSEA analysis",
4726 "because too many values are missing.") 4878 "because too many values are missing.")
4745 sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p) 4897 sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p)
4746 display_p_value_data$quality <- 4898 display_p_value_data$quality <-
4747 sprintf("%0.3g", display_p_value_data$quality) 4899 sprintf("%0.3g", display_p_value_data$quality)
4748 4900
4749 headers_1st_line <- 4901 headers_1st_line <-
4750 c("", "Raw ANOVA", "FDR-adj.", "Missing", "Min. #", "", "") 4902 c("", "Raw ANOVA", "FDR-adj.", "Missing", "Min. #",
4903 "", "")
4751 headers_2nd_line <- 4904 headers_2nd_line <-
4752 c("Phosphopeptide", "p-value", "p-value", "values", "group-obs", "Quality", "Ranking") 4905 c("Phosphopeptide", "p-value", "p-value", "values", "group-obs",
4906 "Quality", "Ranking")
4753 data_frame_tabbing_latex( 4907 data_frame_tabbing_latex(
4754 x = display_p_value_data, 4908 x = display_p_value_data,
4755 tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60), 4909 tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60),
4756 use_subsubsection_header = FALSE, 4910 use_subsubsection_header = FALSE,
4757 headings = c(headers_1st_line, headers_2nd_line), 4911 headings = c(headers_1st_line, headers_2nd_line),
5333 5487
5334 # KSEA Analysis Summaries 5488 # KSEA Analysis Summaries
5335 5489
5336 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched. Enrichments are found by the CRAN `KSEAapp` package: 5490 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched. Enrichments are found by the CRAN `KSEAapp` package:
5337 5491
5338 - The package is available on CRAN, at https:/cran.r-project.org/package=KSEAapp 5492 - The package is available on CRAN, at https://cran.r-project.org/package=KSEAapp
5339 - The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https:/doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https:/doi.org/10.1093/bioinformatics/btx415). 5493 - The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https://doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https://doi.org/10.1093/bioinformatics/btx415).
5340 - An online alternative (supporting only analysis of human data) is available at [https:/casecpb.shinyapps.io/ksea/](https:/casecpb.shinyapps.io/ksea/). 5494 - An online alternative (supporting only analysis of human data) is available at [https://casecpb.shinyapps.io/ksea/](https://casecpb.shinyapps.io/ksea/).
5341 5495
5342 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as: 5496 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as:
5343 5497
5344 $$ 5498 $$
5345 \text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j} 5499 \text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j}
5459 ) 5613 )
5460 ) 5614 )
5461 kseaapp_input <- 5615 kseaapp_input <-
5462 sqldf::sqldf( 5616 sqldf::sqldf(
5463 x = sprintf(" 5617 x = sprintf("
5464 SELECT `Protein`, `Gene`, `Peptide`, phosphopep AS `Residue.Both`, `p`, `FC` 5618 SELECT `Protein`,`Gene`,`Peptide`,phosphopep AS `Residue.Both`,`p`,`FC`
5465 FROM v_kseaapp_input 5619 FROM v_kseaapp_input
5466 WHERE contrast = %d 5620 WHERE contrast = %d
5467 ", 5621 ",
5468 i_cntrst 5622 i_cntrst
5469 ), 5623 ),
5501 drop = FALSE 5655 drop = FALSE
5502 ] 5656 ]
5503 } 5657 }
5504 5658
5505 if (FALSE) { 5659 if (FALSE) {
5660 if (print_nb_messages) nbe(
5661 "Output contents of `ksea_scores_rslt` table\n")
5662 cat_variable(ksea_scores_rslt)
5663 cat("\n\\clearpage\n")
5506 data_frame_tabbing_latex( 5664 data_frame_tabbing_latex(
5507 x = ksea_scores_rslt, 5665 x = ksea_scores_rslt,
5508 tabstops = c(0.8, 0.8, 0.8, 0.8, 0.8, 0.8), 5666 tabstops = c(1.8, 0.8, 0.8, 0.3, 0.8, 0.8),
5509 caption = paste("KSEA scores for contrast ", 5667 caption = paste("KSEA scores for contrast ",
5510 cntrst_b_level, "to", cntrst_a_level), 5668 cntrst_b_level, "to", cntrst_a_level),
5511 use_subsubsection_header = FALSE 5669 use_subsubsection_header = FALSE
5512 ) 5670 )
5513 }
5514
5515 if (FALSE) {
5516 if (print_nb_messages) nbe("Output contents of `ksea_scores_rslt` table\n")
5517 cat_variable(ksea_scores_rslt)
5518 cat("\n\\clearpage\n")
5519 } 5671 }
5520 5672
5521 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { 5673 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) {
5522 next_index <- 1 + next_index 5674 next_index <- 1 + next_index
5523 rslt$score_list[[next_index]] <- ksea_scores_rslt 5675 rslt$score_list[[next_index]] <- ksea_scores_rslt
5625 ) 5777 )
5626 sub_title <- contrast_longlabel 5778 sub_title <- contrast_longlabel
5627 tryCatch( 5779 tryCatch(
5628 expr = { 5780 expr = {
5629 ksea_scores_rslt <- rslt$score_list[[next_index]] 5781 ksea_scores_rslt <- rslt$score_list[[next_index]]
5630 if (print_nb_messages) nbe(see_variable(ksea_scores_rslt)) #ACE 5782 if (print_nb_messages) nbe(see_variable(ksea_scores_rslt))
5631 5783
5632 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) { 5784 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) {
5633 sink(deferred <- file()) 5785 sink(deferred <- file())
5634 ksea_low_fdr_print( 5786 ksea_low_fdr_print(
5635 rslt = rslt, 5787 rslt = rslt,
5758 loaded_packages_df <- data.frame( 5910 loaded_packages_df <- data.frame(
5759 package = loaded_packages_df$package, 5911 package = loaded_packages_df$package,
5760 version = loaded_packages_df$loadedversion, 5912 version = loaded_packages_df$loadedversion,
5761 date = loaded_packages_df$date 5913 date = loaded_packages_df$date
5762 ) 5914 )
5763 #ACE cat("\\clearpage\n\\section{R package versions}\n")
5764 #ACE data_frame_tabbing_latex(
5765 #ACE x = loaded_packages_df,
5766 #ACE tabstops = c(2.5, 1.25),
5767 #ACE caption = "R package versions"
5768 #ACE )
5769 cat("\\clearpage\n\\section{Input parameter settings}\n") 5915 cat("\\clearpage\n\\section{Input parameter settings}\n")
5770 data_frame_tabbing_latex( 5916 data_frame_tabbing_latex(
5771 x = param_df, 5917 x = param_df,
5772 tabstops = c(1.75), 5918 tabstops = c(1.75),
5773 underscore_whack = TRUE, 5919 underscore_whack = TRUE,
5784 knitr::knit_exit() 5930 knitr::knit_exit()
5785 } 5931 }
5786 ``` 5932 ```
5787 5933
5788 ```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'} 5934 ```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'}
5935 # nolint start
5936 # squash un-actionable cyclomatic_complexity warning
5789 if (have_kinase_descriptions) { 5937 if (have_kinase_descriptions) {
5938 # nolint end
5790 my_section_header <- 5939 my_section_header <-
5791 sprintf( 5940 sprintf(
5792 "inases whose KSEA %s < %s\n", 5941 "inases whose KSEA %s < %s\n",
5793 ksea_cutoff_statistic, 5942 ksea_cutoff_statistic,
5794 signif(ksea_cutoff_threshold, 2) 5943 signif(ksea_cutoff_threshold, 2)
5824 caption = paste0("Descriptions of k", my_section_header) 5973 caption = paste0("Descriptions of k", my_section_header)
5825 ) 5974 )
5826 } 5975 }
5827 5976
5828 if (FALSE) { 5977 if (FALSE) {
5829 cat_variable(sqldf("SELECT kinase FROM enriched_kinases")) 5978 cat_variable(sqldf::sqldf("SELECT kinase FROM enriched_kinases"))
5830 cat_variable(sqldf(" 5979 cat_variable(sqldf::sqldf("
5831 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep 5980 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
5832 FROM pseudo_ksdata 5981 FROM pseudo_ksdata
5833 WHERE gene IN (SELECT kinase FROM enriched_kinases) 5982 WHERE gene IN (SELECT kinase FROM enriched_kinases)
5834 ")) 5983 "))
5835 data_frame_table_latex( 5984 data_frame_table_latex(
5836 x = sqldf(" 5985 x = sqldf::sqldf("
5837 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep 5986 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
5838 FROM pseudo_ksdata 5987 FROM pseudo_ksdata
5839 WHERE gene IN (SELECT kinase FROM enriched_kinases) 5988 WHERE gene IN (SELECT kinase FROM enriched_kinases)
5840 "), 5989 "),
5841 justification = "l l l", 5990 justification = "l l l",
5843 caption = "substrates of enriched kinases", 5992 caption = "substrates of enriched kinases",
5844 anchor = c(const_table_anchor_p, const_table_anchor_t), 5993 anchor = c(const_table_anchor_p, const_table_anchor_t),
5845 underscore_whack = TRUE 5994 underscore_whack = TRUE
5846 ) 5995 )
5847 data_frame_table_latex( 5996 data_frame_table_latex(
5848 x = sqldf(" 5997 x = sqldf::sqldf("
5849 SELECT 5998 SELECT
5850 gene AS kinase, 5999 gene AS kinase,
5851 ppep, 6000 ppep,
5852 sub_gene, 6001 sub_gene,
5853 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, 6002 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label,
5869 caption = "labeled substrates of enriched kinases", 6018 caption = "labeled substrates of enriched kinases",
5870 anchor = c(const_table_anchor_p, const_table_anchor_t), 6019 anchor = c(const_table_anchor_p, const_table_anchor_t),
5871 underscore_whack = TRUE 6020 underscore_whack = TRUE
5872 ) 6021 )
5873 } 6022 }
5874 all_enriched_substrates <- sqldf(" 6023 all_enriched_substrates <- sqldf::sqldf("
5875 SELECT 6024 SELECT
5876 gene AS kinase, 6025 gene AS kinase,
5877 ppep, 6026 ppep,
5878 sub_gene, 6027 sub_gene,
5879 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, 6028 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label,
5897 , 6046 ,
5898 drop = FALSE 6047 drop = FALSE
5899 ] 6048 ]
5900 6049
5901 all_enriched_substrates$sub_gene <- 6050 all_enriched_substrates$sub_gene <-
5902 sub( 6051 sub(" ///.*", "...",
5903 " ///.*",
5904 " ...",
5905 all_enriched_substrates$sub_gene 6052 all_enriched_substrates$sub_gene
5906 ) 6053 )
5907 6054
5908 all_enriched_substrates$label <- 6055 all_enriched_substrates$label <-
5909 with( 6056 with(
5926 if (g_neednewpage) cat("\\newpage\n") 6073 if (g_neednewpage) cat("\\newpage\n")
5927 g_neednewpage <- TRUE 6074 g_neednewpage <- TRUE
5928 if (nrow(m) > g_intensity_hm_rows) { 6075 if (nrow(m) > g_intensity_hm_rows) {
5929 cat(subsection_header( 6076 cat(subsection_header(
5930 sprintf( 6077 sprintf(
5931 "Highest-quality %d (of %d) enriched %s-substrates", 6078 "Highest-quality %d (of %d) enriched %s-sites",
5932 g_intensity_hm_rows, 6079 g_intensity_hm_rows,
5933 nrow(m), 6080 nrow(m),
5934 kinase 6081 kinase
5935 ) 6082 )
5936 )) 6083 ))
5939 return(FALSE) 6086 return(FALSE)
5940 } else { 6087 } else {
5941 nrow_m <- nrow(m) 6088 nrow_m <- nrow(m)
5942 cat(subsection_header( 6089 cat(subsection_header(
5943 sprintf( 6090 sprintf(
5944 "%d enriched %s-substrate%s", 6091 "%d enriched %s-site%s",
5945 nrow_m, 6092 nrow_m,
5946 kinase, 6093 kinase,
5947 if (nrow_m > 1) "s" else "" 6094 if (nrow_m > 1) "s" else ""
5948 ) 6095 )
5949 )) 6096 ))
6023 6170
6024 } 6171 }
6025 ``` 6172 ```
6026 6173
6027 ```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'} 6174 ```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'}
6175 # nolint start
6176 # squash un-actionable cyclomatic_complexity warning
6028 if (g_can_run_ksea) { 6177 if (g_can_run_ksea) {
6178 # nolint end
6029 g_did_enriched_header <- FALSE 6179 g_did_enriched_header <- FALSE
6030 for (kinase_name in sort(enriched_kinases$kinase)) { 6180 for (kinase_name in sort(enriched_kinases$kinase)) {
6031 enriched_substrates <- 6181 enriched_substrates <-
6032 all_enriched_substrates[ 6182 all_enriched_substrates[
6033 all_enriched_substrates$kinase == kinase_name, 6183 all_enriched_substrates$kinase == kinase_name,
6046 ten_trunc_ppep 6196 ten_trunc_ppep
6047 ) 6197 )
6048 ) 6198 )
6049 # Get the intensity values for the heatmap 6199 # Get the intensity values for the heatmap
6050 enriched_intensities <- 6200 enriched_intensities <-
6051 as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE]) 6201 as.matrix(unimputed_quant_data_log[
6202 enriched_substrates$ppep, , drop = FALSE
6203 ])
6052 6204
6053 # Remove rows having too many NA values to be relevant 6205 # Remove rows having too many NA values to be relevant
6054 good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0) 6206 good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0)
6055 #ACE nbe(see_variable(good_rows), "\n") 6207
6056 enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE] 6208 enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE]
6057 enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE] 6209 enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE]
6058 6210
6059 # Rename the rows with the display-name for the heatmap 6211 # Rename the rows with the display-name for the heatmap
6060 short_row_names <- sub( 6212 short_row_names <- sub(
6074 ) 6226 )
6075 rownames(enriched_intensities) <- long_row_names 6227 rownames(enriched_intensities) <- long_row_names
6076 # Format as matrix for heatmap 6228 # Format as matrix for heatmap
6077 m <- as.matrix(enriched_intensities) 6229 m <- as.matrix(enriched_intensities)
6078 rownames(m) <- trunc_enriched_substrate(rownames(m)) 6230 rownames(m) <- trunc_enriched_substrate(rownames(m))
6079
6080 #ACE nb("m with bad rows: ", see_variable(m), "\n")
6081 #ACE good_rows <- (rowSums(m, na.rm = TRUE) != 0)
6082 #ACE nb(see_variable(good_rows), "\n")
6083 #ACE m <- m[good_rows, , drop = FALSE]
6084 #ACE nb("m without(?) bad rows: ", see_variable(m), "\n")
6085 #ACE nb(see_variable(short_row_names), "\n")
6086 #ACE local_short_row_names <- short_row_names[good_rows]
6087 #ACE local_long_row_names <- long_row_names[good_rows]
6088 #ACE local_enriched_intensities <- enriched_intensities[local_long_row_names, ]
6089 6231
6090 # Draw the heading and heatmap 6232 # Draw the heading and heatmap
6091 nrow_m <- nrow(m) 6233 nrow_m <- nrow(m)
6092 if (nrow_m > 0) { 6234 if (nrow_m > 0) {
6093 if (!g_did_enriched_header) { 6235 if (!g_did_enriched_header) {
6096 g_did_enriched_header <- TRUE 6238 g_did_enriched_header <- TRUE
6097 } 6239 }
6098 is_na_m <- is.na(m) 6240 is_na_m <- is.na(m)
6099 cellnote_m <- is_na_m 6241 cellnote_m <- is_na_m
6100 cellnote_m[!is_na_m] <- "" 6242 cellnote_m[!is_na_m] <- ""
6101 cellnote_m[is_na_m] <- "NA" 6243 cellnote_m[is_na_m] <- params$heatMapNAcellNote
6102 cut_args <- new_env() 6244 cut_args <- new_env()
6103 cut_args$cutoff <- cutoff 6245 cut_args$cutoff <- cutoff
6104 cut_args$kinase <- kinase_name 6246 cut_args$kinase <- kinase_name
6105 cut_args$statistic <- ksea_cutoff_statistic 6247 cut_args$statistic <- ksea_cutoff_statistic
6106 cut_args$threshold <- ksea_cutoff_threshold 6248 cut_args$threshold <- ksea_cutoff_threshold
6108 ppep_heatmap( 6250 ppep_heatmap(
6109 m = m, 6251 m = m,
6110 cellnote = cellnote_m, 6252 cellnote = cellnote_m,
6111 cutoff = cut_args, 6253 cutoff = cut_args,
6112 hm_heading_function = cat_enriched_heading, 6254 hm_heading_function = cat_enriched_heading,
6113 hm_main_title 6255 hm_main_title = paste(
6114 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", 6256 "Unnormalized (zero-imputed)",
6257 "intensities of enriched kinase-substrates"),
6115 suppress_row_dendrogram = FALSE, 6258 suppress_row_dendrogram = FALSE,
6116 master_cex = 0.35, 6259 master_cex = 0.35,
6117 sepcolor = "black", 6260 sepcolor = "black",
6118 colsep = sample_colsep, 6261 colsep = sample_colsep,
6119 row_scaling = "none" 6262 row_scaling = "none"
6121 if (number_of_peptides_found > 1) { 6264 if (number_of_peptides_found > 1) {
6122 6265
6123 tryCatch( 6266 tryCatch(
6124 { 6267 {
6125 rownames(m) <- short_row_names 6268 rownames(m) <- short_row_names
6126 cov_heatmap(m, nrow_m > g_intensity_hm_rows) 6269 cov_heatmap(
6270 m = m,
6271 kinase_name = kinase_name,
6272 top_substrates = nrow_m > g_intensity_hm_rows
6273 )
6127 }, 6274 },
6128 error = function(e) { 6275 error = function(e) {
6129 cat( 6276 cat(
6130 sprintf( 6277 sprintf(
6131 "ERROR: %s\n\\newline\n", 6278 "ERROR: %s\n\\newline\n",
6160 headers_1st_line <- c("", "", "ANOVA", "min(values", "") 6307 headers_1st_line <- c("", "", "ANOVA", "min(values", "")
6161 if (1 < nrow(enriched_substrates)) 6308 if (1 < nrow(enriched_substrates))
6162 cat("\n\\newpage\n") 6309 cat("\n\\newpage\n")
6163 cat(subsubsection_header( 6310 cat(subsubsection_header(
6164 sprintf( 6311 sprintf(
6165 "Details for %s%s-substrates", 6312 "Details for %s%s-sites",
6166 if (excess_substrates) 6313 if (excess_substrates)
6167 sprintf( 6314 sprintf(
6168 "%s \"highest quality\" ", 6315 "%s \"highest quality\" ",
6169 g_intensity_hm_rows 6316 g_intensity_hm_rows
6170 ) 6317 )
6188 6335
6189 # get kinase, ppep, concat(kinase) tuples for enriched kinases 6336 # get kinase, ppep, concat(kinase) tuples for enriched kinases
6190 6337
6191 if (print_nb_messages) nb("kinase_ppep_label <- ...\n") 6338 if (print_nb_messages) nb("kinase_ppep_label <- ...\n")
6192 if (print_nb_messages) nbe("kinase_ppep_label <- ...\n") 6339 if (print_nb_messages) nbe("kinase_ppep_label <- ...\n")
6193 kinase_ppep_label <- sqldf(" 6340 kinase_ppep_label <- sqldf::sqldf("
6194 WITH 6341 WITH
6195 t(ppep, label) AS 6342 t(ppep, label) AS
6196 ( 6343 (
6197 SELECT DISTINCT 6344 SELECT DISTINCT
6198 SUB_MOD_RSD AS ppep, 6345 SUB_MOD_RSD AS ppep,
6228 ON f.Phosphopeptide = k.ppep, 6375 ON f.Phosphopeptide = k.ppep,
6229 impish q 6376 impish q
6230 WHERE 6377 WHERE
6231 f.Phosphopeptide = q.Phosphopeptide 6378 f.Phosphopeptide = q.Phosphopeptide
6232 " 6379 "
6233 data_table_imputed <- sqldf(data_table_imputed_sql) 6380 data_table_imputed <- sqldf::sqldf(data_table_imputed_sql)
6234 # Zap the duplicated 'Phosphopeptide' column named 'ppep' 6381 # Zap the duplicated 'Phosphopeptide' column named 'ppep'
6235 data_table_imputed <- 6382 data_table_imputed <-
6236 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] 6383 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
6237 6384
6238 # Output imputed, un-normalized data 6385 # Output imputed, un-normalized data
6239 if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n") 6386 if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n")
6240 if (print_nb_messages) nbe("Output imputed, un-normalized data tabular file\n") 6387 if (print_nb_messages) nbe(
6388 "Output imputed, un-normalized data tabular file\n")
6241 write.table( 6389 write.table(
6242 data_table_imputed 6390 data_table_imputed
6243 , file = imputed_data_filename 6391 , file = imputed_data_filename
6244 , sep = "\t" 6392 , sep = "\t"
6245 , col.names = TRUE 6393 , col.names = TRUE
6249 6397
6250 6398
6251 #output quantile normalized data 6399 #output quantile normalized data
6252 impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log) 6400 impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log)
6253 colnames(impish)[1] <- "Phosphopeptide" 6401 colnames(impish)[1] <- "Phosphopeptide"
6254 data_table_imputed <- sqldf(data_table_imputed_sql) 6402 data_table_imputed <- sqldf::sqldf(data_table_imputed_sql)
6255 # Zap the duplicated 'Phosphopeptide' column named 'ppep' 6403 # Zap the duplicated 'Phosphopeptide' column named 'ppep'
6256 data_table_imputed <- 6404 data_table_imputed <-
6257 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))] 6405 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
6258 if (print_nb_messages) nb("Output quantile normalized data tabular file\n") 6406 if (print_nb_messages) nb("Output quantile normalized data tabular file\n")
6259 if (print_nb_messages) nbe("Output quantile normalized data tabular file\n") 6407 if (print_nb_messages) nbe("Output quantile normalized data tabular file\n")
6264 col.names = TRUE, 6412 col.names = TRUE,
6265 row.names = FALSE, 6413 row.names = FALSE,
6266 quote = FALSE 6414 quote = FALSE
6267 ) 6415 )
6268 6416
6269 ppep_kinase <- sqldf(" 6417 ppep_kinase <- sqldf::sqldf("
6270 SELECT DISTINCT k.ppep, k.kinase 6418 SELECT DISTINCT k.ppep, k.kinase
6271 FROM ( 6419 FROM (
6272 SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep 6420 SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep
6273 FROM pseudo_ksdata 6421 FROM pseudo_ksdata
6274 WHERE GENE IN (SELECT kinase FROM enriched_kinases) 6422 WHERE GENE IN (SELECT kinase FROM enriched_kinases)
6312 ON m.phospho_peptide = kek.ppep 6460 ON m.phospho_peptide = kek.ppep
6313 ; 6461 ;
6314 " 6462 "
6315 ) 6463 )
6316 6464
6317 if (print_nb_messages) nb("Output contents of `stats_metadata_v` table to tabular file\n") 6465 if (print_nb_messages) nb(
6318 if (print_nb_messages) nbe("Output contents of `stats_metadata_v` table to tabular file\n") 6466 "Output contents of `stats_metadata_v` table to tabular file\n")
6467 if (print_nb_messages) nbe(
6468 "Output contents of `stats_metadata_v` table to tabular file\n")
6469
6319 write.table( 6470 write.table(
6320 dbReadTable(db, "stats_metadata_v"), 6471 dbReadTable(db, "stats_metadata_v"),
6321 file = anova_ksea_mtdt_file, 6472 file = anova_ksea_mtdt_file,
6322 sep = "\t", 6473 sep = "\t",
6323 col.names = TRUE, 6474 col.names = TRUE,