mqppep_preproc: mqppep_anova_script.Rmd comparison

comparison mqppep_anova_script.Rmd @ 3:bae3a23461c9 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 3dcf0d08f006b888061ff83eadc65e550d751869

author	galaxyp
date	Tue, 31 Jan 2023 22:27:00 +0000
parents	a5e7469dfdfa
children	5c2beb4eb41c

comparison

equal deleted inserted replaced

-:a5e7469dfdfa
+:bae3a23461c9
 meanPercentile:       50
 #meanPercentile:       1
 # for small random value imputation, what should `s / mean(x)` ratio be?
 sdPercentile:         1.0
 # output path for imputed data file
-imputedDataFilename:  "test-data/limbo/imputedDataFilename.txt"
+imputedDataFilename:  "test-data/imputedDataFilename.txt"
 # output path for imputed/quantile-normalized/log-transformed data file
-imputedQNLTDataFile:  "test-data/limbo/imputedQNLTDataFile.txt"
+imputedQNLTDataFile:  "test-data/imputedQNLTDataFile.txt"
 # output path for contents of `stats_metadata_v` table
-anovaKseaMetadata:    "test-data/limbo/anovaKseaMetadata.txt"
+anovaKseaMetadata:    "test-data/anovaKseaMetadata.txt"
 # how to test one variable with > 2 categories (e.g., aov or kruskal.test)
 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1]
 # how to test one variable with 2 categories (e.g., oneway.test)
 oneWayTwoCategories:  !r c("aov", "kruskal.test", "oneway.test")[3]
 # what should be the minimum quality for consideration in both
 kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2"
 # should debugging trace messages be printed?
 showEnrichedSubstrates: FALSE
 # should debugging nb/nbe messages be printed?
 printNBMsgs:          FALSE
-# showld row-scaling be applied to heatmaps: "none" or "row"
+#printNBMsgs:          TRUE
-defaultHeatMapRowScaling: "none"
+# should row-scaling be applied to heatmaps: "none" or "row"
+defaultHeatMapRowScaling: !r c("none", "row")[1]
+# how missing values be displayed on heat maps: "NA" or " "
+heatMapNAcellNote:    !r c(" ", "NA")[1]
+# how missing values be displayed on heat maps: "NA" or " "
+heatMapNAgrey:        "#D8D8D8"
+# temporary hack
+heatMapNAsubstitute:  TRUE
+# what color should be used for missing values be displayed on heat maps
+heatMapNAcellColor:   "grey15"
 # should debugging trace messages be printed?
 printTraceMsgs:       FALSE
 # when debugging files are needed, set debugFileBasePath to the path
 #   to the directory where they should be written
 debugFileBasePath:    !r if (TRUE) NULL else "test-data"
 nbe <- if (!print_nb_messages) {
 function(...) invisible()
 } else {
 function(..., f = cat, file = stderr()) {
 cat(
-stringi::stri_unescape_unicode("\nNBE \\u2203\\u2283\\u2200"),
+stringi::stri_unescape_unicode("\nN.B. \\u2203\\u2283\\u2200"),
 ...,
 file = file
 )
 }
 }
 if (print_nb_messages) nbe("library(gplots)")
 library(gplots)
 if (print_nb_messages) nbe("library(caret)")
 # load caret for nearZeroVar
-if (print_nb_messages) nbe("Please ignore the messages about systemd, if any.\n")
+if (print_nb_messages) nbe("Please ignore any messages about systemd.\n")
 library(caret)
 if (print_nb_messages) nbe("library(DBI)")
 library(DBI)
 if (print_nb_messages) nbe("library(RSQLite)")
 library(RSQLite)
 msg, "\n"
 )
 }
 }
+divert_warnings <-
+function(expr, classes = "warning") {
+withCallingHandlers(
+expr,
+warning = function(w) {
+cat("  divert_warnings: ", w$message, "\n", file = stderr())
+if (inherits(w, classes))
+tryInvokeRestart("muffleWarning")
+}
+)
+}
 # ref: https://tug.org/texinfohtml/latex2e.html
 # LaTeX sets aside the following characters for special purposes.
 #   For example, the percent sign % is for comments.
 #   They are called reserved characters or special characters.
 #   They are all discussed elsewhere in this manual.
 # As to the last three characters, to get a tilde in the text body font,
 #   use \~{} (omitting the curly braces would result in the next character
 #   receiving a tilde accent).
 # Similarly, to get a text body font circumflex use \^{}.
 #   To get a backslash in the font of the text body enter \textbackslash{}.
-whack_math <-
+whack_math <- if (TRUE) {
 function(v) {
 v <- as.character(v)
+w <- v
 w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE)
 w <- Reduce(
 f = function(l, r) {
-gsub(r, paste0("\\", r), l, fixed = TRUE)
+ptrn <- paste0("\\", r)
+for (i in seq_along(l)) {
+if (!grepl(ptrn, l[i], fixed = TRUE)) {
+l[i] <- gsub(r, ptrn, l[i], fixed = TRUE)
+}
+}
+l
 },
 x = c("#", "$", "%", "&", "{", "}", "_"),
 init = w
 )
 w <- gsub("^", "\\^{}", w, fixed = TRUE)
+w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE)
+return(w)
+}
+} else {
+function(v) {
+v <- as.character(v)
+w <- v
+w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE)
+w <- Reduce(
+f = function(l, r) {
+if (length(l) > 1) {
+cat("whack_math: deparse1(v) = `", deparse1(v), "`\n",
+file = stderr())
+cat("whack_math: v = `", v, "`\n", file = stderr())
+cat("  Reduce f: l = `", l,
+"` r = `", r,
+"`\n", file = stderr()
+)
+divert_warnings(grepl(r, l, fixed = TRUE))
+}
+ptrn <- paste0("\\", r)
+cat("ptrn: `", ptrn, "`\n", file = stderr())
+for (i in seq_along(l)) {
+cat("  before l[i] = ", l[i], "\n", file = stderr())
+if (!grepl(ptrn, l[i], fixed = TRUE)) {
+l[i] <- gsub(r, ptrn, l[i], fixed = TRUE)
+}
+cat("   after l[i] = ", l[i], "\n", file = stderr())
+}
+l
+},
+x = c("#", "$", "%", "&", "{", "}", "_"),
+init = w
+)
+w <- gsub("^", "\\^{}", w, fixed = TRUE)
+w <- gsub("\\textbackslash \\_", "\\_", w, fixed = TRUE)
 return(w)
 if (all(v == w))
 v
 else
 paste0("\\texttt{", w, "}")
 }
+}
 whack_underscores <- whack_math
 ## dump params to stderr (remove this eventually)
 if (FALSE) nbe(see_variable(params))
 g_intensity_min_per_class <- params$intensityMinValuesPerGroup
 if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) {
 cat("invalid intensityMinValuesPerGroup (must be integer > -1")
 knitr::knit_exit()
 }
+g_correlate_substrates  <- params$correlateSubstrates
-if (is.na(as.logical(g_correlate_substrates  <- params$correlateSubstrates))) {
+if (is.na(as.logical(g_correlate_substrates))) {
 cat("invalid correlateSubstrates (must be TRUE or FALSE)")
 knitr::knit_exit()
 }
+g_filter_cov_var_gt_1 <- params$filterCovVarGT1
-if (is.na(as.logical(g_filter_cov_var_gt_1   <- params$filterCovVarGT1))) {
+if (is.na(as.logical(g_filter_cov_var_gt_1))) {
 cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)")
 knitr::knit_exit()
 }
 # TODO Validate >> 0 < 30
 value = rvalue,
 pos = pf
 )
 invisible(rvalue)
 }
+# Infix concatenation
+`%||%` <- function(lvalue, rvalue) paste0(lvalue, rvalue)
 ### FUNCTIONS
 no_op <-
 function() {
 },
 simplify = "array"
 )
 grpl_rslt <- grpl_rslt + grpl_rslt_v
 }
-rslt <- unname(grpl_rslt > 0)
+unname(grpl_rslt > 0)
 }
 ##' Produce positions in a vector where succeeding value != current valus
 ##'
 ##' @title Search vector for neighboring positions having different values
 pvalue
 }
 # This code adapted from matrixcalc::is.positive.definite
 #   Notably, this simply tests without calling stop()
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 is_positive_definite <- function(x, tol = 1e-08) {
+# nolint end
 if (!is.matrix(x))
 return(FALSE)
 if (!is.numeric(x))
 return(FALSE)
 if (nrow(x) < 1)
 }
 invisible(x)
 }
 # Use this like print.data.frame, from which it is adapted:
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
+#   squash un-actionable "no visible global for ..."
 data_frame_tabbing_latex <-
 function(
+# nolint end
 x,
 # vector of tab stops, in inches
 tabstops,
 # vector of headings, registered with tab-stops
 headings = colnames(x),
 return(s)
 }
 nolatex_verbatim <-
 function(expr) eval(expr)
+error_knitrexit_stop <-
+function(e) {
+cat("Caught error:\n\n")
+str(e)
+knitr::knit_exit()
+stop(e)
+}
 latex_verbatim <-
 function(expr) {
-arg_string <- deparse1(substitute(expr))
 cat("\n\\begin{verbatim}\n___\n")
 tryCatch(
 expr = expr,
 error = param_df_exit,
-#ACE error =
-#ACE   function(e) {
-#ACE     cat("Caught error:\n\n")
-#ACE     str(e)
-#ACE     knitr::knit_exit()
-#ACE     stop(e)
-#ACE   },
 finally = cat("...\n\\end{verbatim}\n")
 )
 }
 latex_samepage <-
 function(expr) {
-arg_string <- deparse1(substitute(expr))
 cat("\n\\begin{samepage}\n")
 tryCatch(
 expr = expr,
 error = param_df_exit,
-#ACE error =
-#ACE   function(e) {
-#ACE     cat("Caught error:\n\n")
-#ACE     str(e)
-#ACE     knitr::knit_exit()
-#ACE     stop(e)
-#ACE   },
 finally = cat("\n\\end{samepage}\n")
 )
 }
 # return the result of invocation after showing parameters
 # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
 latex_show_invocation <-
 function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) {
 function(...) {
-my_env <- (as.list(environment()))
 va <- list(...)
 my_rslt <- new_env()
 my_rslt$rslt <- NULL
 latex_verbatim(
 expr = {
 cat(sprintf("\n ..  Local variables for '%s':\n\n", f_name))
 str(va)
 if (!head_patch) {
 # return this result
-# ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
+# ref:
+# https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
 cat(sprintf("\n ..  Invoking '%s'\n", f_name))
 tryCatch(
 {
 cat("\n\\end{verbatim}\n")
 rslt <- do.call(f, va)
 },
 error = param_df_exit,
-#ACE error = function(e) {
-#ACE    cat("\n\\begin{verbatim}\n")
-#ACE    str(e)
-#ACE    cat("\n\\end{verbatim}\n")
-#ACE    knitr::knit_exit()
-#ACE    stop(e)
-#ACE },
 finally = cat("\n\\begin{verbatim}\n")
 )
 cat(sprintf("\n ..  '%s' returned:\n", f_name))
 str(rslt)
 my_rslt$rslt <- rslt
 collapse = collapse_string
 )
 )
 }
-latex_itemized_collapsed <- function(collapse_string, v, underscore_whack = TRUE) {
+latex_itemized_collapsed <-
+function(collapse_string, v, underscore_whack = TRUE) {
 cat("\\begin{itemize}\n\\item ")
 latex_collapsed_vector(collapse_string, v, underscore_whack)
 cat("\n\\end{itemize}\n")
 }
 latex_itemized_list <- function(v, underscore_whack = TRUE) {
 latex_itemized_collapsed("\n\\item ", v, underscore_whack)
 }
-latex_enumerated_collapsed <- function(collapse_string, v, underscore_whack = TRUE) {
+latex_enumerated_collapsed <-
+function(collapse_string, v, underscore_whack = TRUE) {
 cat("\\begin{enumerate}\n\\item ")
 latex_collapsed_vector(collapse_string, v, underscore_whack)
 cat("\n\\end{enumerate}\n")
 }
 }
 # N.B. use con = "" to emulate regular cat
 fcat0 <-
 function(..., sprtr = " ", cnnctn = file()) {
-cat0(..., sep = sprtr, file = cnnctn)
+gsubfn::cat0(..., sep = sprtr, file = cnnctn)
 invisible(cnnctn)
 }
 hypersub <-
 function(s) {
 #'
 #' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20
 #'
 #' Horn et al. (2014) Nature Methods 11(6):603-4
 #'
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 ksea_scores <- function(
+# nolint end
 # For human data, typically, ksdata = KSEAapp::ksdata
 ksdata,
 # Input data file having columns:
 # - Protein     : abbreviated protein name
 # Minimum substrate count, necessary to adjust the p-value appropriately.
 minimum_substrate_count
 ) {
+if (FALSE) {
+if (print_nb_messages) nbe("Output contents of `ksdata` table\n")
+cat_variable(ksdata)
+cat("\n\\clearpage\n")
+data_frame_tabbing_latex(
+x = ksdata[order(ksdata$GENE, ksdata$SUB_GENE), ],
+tabstops = c(0.3, 0.3, 1.2, 0.3, 0.3, 0.3, 0.3,
+0.8, 0.3, 0.8, 0.3, 0.3, 0.3),
+caption = "ksdata dump",
+use_subsubsection_header = FALSE
+)
+}
 # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution.
 if (length(grep(";", px$Residue.Both)) == 0) {
 # There are no Residue.Both entries having semicolons, so new is
 #   simply px except two columns are renamed and a column is added
 #   for log2(abs(fold-change))
 # To take the magnitude into account without taking the direction into
 #   account, set params$kseaUseAbsoluteLog2FC to TRUE
 #
 # Should KSEA be performed aggregating signed log2FC or absolute?
 # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores
+# nolint start
+#   squash un-actionable "no visible global for ..."
 if (params$kseaUseAbsoluteLog2FC) {
+# nolint end
 # TRUE  use abs(log2FC) for KSEA as Justin requested; this is a
 #         justifiable deviation from the KSEAapp::KSEA.Scores algorithm.
 new$log2_fc <- abs(new$log2_fc)
 }
 # set to TRUE to monitor filtration on stderr
 sink(stderr())
 cat(see_variable(networkin, "\n"))
 }
 ksdata_filtered <-
-sqldf(
+sqldf::sqldf(
 sprintf("%s %s",
 "select * from ksdata where not Source = 'NetworKIN'",
 if (networkin)
 sprintf("or networkin_score >= %d", networkin_cutoff)
 else
 ""
 )
 )
+if (FALSE) write.csv(x = ksdata_filtered, file = "ksdata_filtered.csv")
 if (monitor_filtration_on_stderr) {
-cat(see_variable(sqldf(
+cat(see_variable(sqldf::sqldf(
 "select count(*), Source from ksdata group by Source"), "\n"))
-cat(see_variable(sqldf(
+cat(see_variable(sqldf::sqldf(
 "select count(*), Source from ksdata_filtered group by Source"), "\n"))
 sink()
 }
 ############################################################################
 ksdata_dataset_abbrev$Substrate.Gene,
 ksdata_dataset_abbrev$Substrate.Mod,
 ksdata_dataset_abbrev$p),
 ]
 if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev))
+if (FALSE) write.csv(
+x = ksdata_dataset_abbrev,
+file = "ksdata_dataset_abbrev_no_aggregation.csv",
+row.names = FALSE
+)
+# For some kinases, the only difference between kinase names in two databases
+#   is the case of the the letters; dedup to eliminate this false distinction
+#   The only imperfection with this approach is when the NetworKIN-only fliter
+#   is in force and there is a duplication between NetworKIN and HPRD (which
+#   outranks NetworKIN).  Presently, the script does not invoke with this
+#   function with the NetworKIN-only filter active.
+dedup_sql <- "
+select
+r.'Kinase.Gene', r.'Substrate.Gene', r.'Substrate.Mod',
+r.Peptide, r.p, r.FC, r.log2FC, r.Source
+from
+( select
+rowid,
+rank() over
+(partition by
+lower(k.'Kinase.Gene'), k.p, k.FC
+order by
+k.Source
+) as rank,
+*
+from
+ksdata_dataset_abbrev k
+) as r
+where r.rank = 1
+"
+ksdata_dataset_abbrev <- sqldf::sqldf(x = dedup_sql)
+if (FALSE) write.csv(
+x = ksdata_dataset_abbrev,
+file = "ksdata_dataset_abbrev_no_aggregation_dedup.csv",
+row.names = FALSE
+)
 # First aggregation step to account for multiply phosphorylated peptides
 #   and differing peptide sequences; the goal here is to combine results
 #   for all measurements of the same substrate.
 # SELECT  `Kinase.Gene`, `Substrate.Gene`, `Substrate.Mod`,
 #         `Source`, avg(log2FC) AS log2FC
 log2FC ~ Kinase.Gene,
 data = ksdata_dataset_abbrev,
 FUN = mean
 )
 if (print_nb_messages) nbe(see_variable(mean_fc), "\n")
+if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv")
 # for contrast j
 #   for each kinase i
 #     extract log2 of fold-change (from `new` above)
 #     (used in KSEA.Scores.R lines # 130 & 132)
 mean(log2_fc_j_each_i, na.rm = TRUE)
 # Reorder mean_fc, although I don't see why
 #   (KSEA.Scores.R line 128
 mean_fc <- mean_fc[order(mean_fc[, 1]), ]
+if (FALSE) write.csv(x = mean_fc, file = "mean_fc.csv")
 # mean_fc columns so far: "Kinase.Gene", "log2FC"
 # - Kinase.Gene
 #               indicates the gene name for each kinase.
 # Create column 3: mS
 ,
 drop = FALSE
 ]
 }
-#ACE nb(see_variable(nrow(mean_fc)), "\n")
 # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value
 # - FDR
 #               is the p-value adjusted for multiple hypothesis testing
 #               using the Benjamini & Hochberg method."
 #   (KSEA.Scores.R line # 134)
 mean_fc$fdr <-
 p.adjust(mean_fc$p_value, method = "fdr")
 # It makes no sense to leave Z-scores negative when using
 #   absolute value of fold-change
+# nolint start
+#   squash un-actionable "no visible global for ..."
 if (params$kseaUseAbsoluteLog2FC) {
+# nolint end
 mean_fc$z_score <- abs(mean_fc$z_score)
 }
 # Remove second column (log2FC), which is duplicated as mS
 #   (KSEA.Scores.R line # 136)
 }
 )
 k <- k[selector < ksea_cutoff_threshold, ]
 nrow_k <- nrow(k)
-#ACE nbe(see_variable(fdr_barplot_dataframe <- k))
 if (nrow_k > 0) {
 max_nchar_rowname <- max(nchar(rownames(k)))
 my_cex_names <- 1.0 / (1 + nrow_k / 50)
 "m_s",
 "z_score",
 "p_value",
 "fdr"
 )
-#ACE output_order <- with(output_df, order(fdr))
+# nolint start
 output_order <- with(output_df, order(p_value))
+# nolint end
 output_df <- output_df[output_order, ]
 output_df[, 2] <- sprintf("%0.3g", output_df[, 2])
 output_df$fdr <- sprintf("%0.4f", output_df$fdr)
 output_df$p_value <- sprintf("%0.2e", output_df$p_value)
 output_df$z_score <- sprintf("%0.2f", output_df$z_score)
 output_df$m_s <- sprintf("%d", output_df$m_s)
 output_df$enrichment <- sprintf("%0.3g", output_df$enrichment)
-output_ncol <- ncol(output_df)
 colnames(output_df) <-
 c(
-"Kinase",
+"Kinase or motif",
 "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)",
 "Enrichment",
 "Substrates",
 "z-score",
 "p-value",
 )
 if (sum(selector < ksea_cutoff_threshold) > 0) {
 if (print_nb_messages) nbe(see_variable(output_df))
 math_caption <- gsub("{", "\\{", caption,      fixed = TRUE)
 math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE)
-# with (
-#   output_df,
-#   )
 if (TRUE) {
-output_df$Kinase <- whack_underscores(output_df$Kinase)
 data_frame_tabbing_latex(
 x = output_df,
 # vector of tab stops, in inches
-tabstops = c(1.0, 1.2, 1.0, 1.0, 1.0, 1.0),
+tabstops = c(1.8, 1.2, 0.8, 0.8, 0.8, 0.8, 0.8),
 # vector of headings, registered with tab-stops
 headings = colnames(output_df),
 # digits to pass to format.data.frame
 digits = NULL,
 # maximumn number of rows to print
 #   https://tug.org/texinfohtml/latex2e.html#Font-sizes
 charactersize = "small",
 # set verbatim to TRUE to debug output
 verbatim = FALSE
 )
-} else {
-data_frame_table_latex(
-x = output_df,
-justification = "l c c c c c c",
-centered = TRUE,
-caption = sprintf(
-"\\text{%s}, KSEA %s < %s",
-math_caption,
-ksea_cutoff_statistic,
-ksea_cutoff_threshold
-),
-anchor = anchor,
-underscore_whack = FALSE
-)
 }
 } else {
 cat(
 sprintf(
 "\\break
 mycol <- unique(append(mycol_neg, mycol_pos))
 color_breaks <- list(breaks_all, mycol)
 return(color_breaks)
 }
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 hm2plus <- function(
+# nolint end
 x,
 mat = matrix(
 c(
 c(0, 4, 0),
 c(0, 3, 3),
 key_par = list(),
 hclustfun = hclust,
 ...
 ) {
-varargs <- list(...)
 if (FALSE) # this is to avoid commenting out code to pass linting...
-my_hm2 <- latex_show_invocation(heatmap.2, head_patch = FALSE)
+my_hm2 <- latex_show_invocation(gplots::heatmap.2, head_patch = FALSE)
 else
-my_hm2 <- heatmap.2
+my_hm2 <- gplots::heatmap.2
 x <- as.matrix(x)
 if (sum(!is.na(x)) < 1)
 return(NULL)
 color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough
 min_nonax <- min(x, na.rm = TRUE)
 max_nonax <- max(x, na.rm = TRUE)
 if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n")
 if (divergent) {
 zlim <- max(abs(min_nonax), abs(max_nonax))
-if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n"))
+if (print_nb_messages) nb(see_variable(zlim, "\n"))
 breaks <- (zlim) / (break_count:1)
 if (print_nb_messages) nb(see_variable(breaks, "\n"))
 breaks <- breaks - median(breaks)
 zlim <- c(-zlim, zlim)
 if (print_nb_messages) nb(see_variable(zlim, "\n"))
 } else {
 zlim <- max(abs(min_nonax), abs(max_nonax))
-if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n"))
+if (print_nb_messages) nb(see_variable(zlim, "\n"))
 breaks <- zlim / (break_count:1)
 if (print_nb_messages) nb(see_variable(breaks, "\n"))
 if  (max_nonax < 0) {
 breaks <- breaks - zlim
 zlim <- c(-zlim, 0)
 zlim <- c(0, zlim)
 }
 if (print_nb_messages) nb(see_variable(zlim, "\n"))
 }
 nonax <- x
-nonax[is.na(x)] <- min_nonax
+# nolint start
+#   squash un-actionable "no visible global for ..."
+if (params$heatMapNAsubstitute) nonax[is.na(x)] <- min_nonax
+# nolint end
 if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5)
 if (is.null(heights)) heights <- c(0.4, denhgt, 4.0)
 colors <-
 if (divergent && min_nonax < 0) {
 # divergent colors on both sides of zero
 } else if (divergent && max_nonax < 0) {
 # "divergent" colors < zero
 colorRampPalette(c("red", "white"))(color_count)
 } else {
 # "non-divergent" colors including zero
-hcl.colors(color_count, "YlOrRd", rev = TRUE)
+tmp <- hcl.colors(color_count, "YlOrRd", rev = TRUE)
+# nolint start
+#   squash un-actionable "no visible global for ..."
+tmp[1] <- params$heatMapNAgrey
+# nolint end
+tmp
 }
-#ACE if (print_nb_messages) nb("within hm2plus", see_variable(key_par), "\n")
+nbe("There are ", sum(is.na(x)), " NA values")
-#ACE if (print_nb_messages) nb(see_variable(colors, "\n"))
-#ACE key_par$col = colors
-#ACE key_par$breaks = breaks
-if (print_nb_messages) nb(see_variable(par(), "\n")) #ACE TODO remove me
-if (print_nb_messages) cat("\\leavevmode\n\\linebreak\n") #ACE TODO remove me
 suppressWarnings(
 my_hm2(
 x = x,
 col = colors,
-#ACE symkey = FALSE,
 density.info = density_info,
 srtCol = srtcol,
 srtRow = srtrow,
 margins = margins,
 lwid = widths,
 lmat = mat,
 notecol = notecol,
 trace = trace,
 bg = "yellow",
 hclustfun = hclustfun,
-#ACE breaks = breaks,
 oldstyle = FALSE,
 ... # varargs
 )
 )
 # implicitly returning value returned by heatmap.2
 }
 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 draw_kseaapp_summary_heatmap <- function(
+# nolint end
 x,                       # matrix with row/col names already formatted
 sample_cluster,          # a binary input of TRUE or FALSE,
 #   indicating whether or not to perform
 #   hierarchical clustering of the sample columns
 merged_asterisk,         # matrix having dimensions of x, values "*" or ""
 cat_variable(x)
 return(FALSE)
 }
 max_nchar_rowname <- max(nchar(rownames(x)))
 max_nchar_colname <- max(nchar(colnames(x)))
-my_limit <- g_intensity_hm_rows
 my_row_cex_scale <- master_cex * 150 / nrow_x
-#ACE row cex shrink hack begin
+# row cex shrink hack begin
 my_row_cex_scale <- master_cex * 150 / max(nrow_x, ncol_x)
-#ACE row cex shrink hack end
+# row cex shrink hack end
 my_col_cex_scale <- 3.0
-#ACE col cex shrink hack begin
+# col cex shrink hack begin
 if (ncol_x > 40)
 my_col_cex_scale <- 3.0 * 40 / ncol_x
-#ACE col cex shrink hack end
+# col cex shrink hack end
 my_asterisk_scale <- 0.4 * my_row_cex_scale
 my_row_warp <- 1
 my_note_warp <- 2
 my_row_warp <- 1
 )
 my_margins <- c(1, 1) +
 c(
 margins[1] * 0.08 * max_nchar_colname * my_col_cex,
 margins[2] * 0.04 * max_nchar_rowname * my_row_cex
-)
-my_notecex <-
-my_scale *
-min(
-1.1,
-my_row_cex_asterisk * my_note_warp,
-my_col_cex * my_note_warp
 )
 if (print_trace_messages) {
 cat_variable(my_heights,          suffix = "; ")
 cat_variable(my_margins,          suffix = "\n\n")
 return(TRUE)
 }
 # function drawing heatmap of contrast fold-change for each kinase,
 #   adapted from KSEAapp::KSEA.Heatmap
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 ksea_heatmap <- function(
+# nolint end
 # the data frame outputs from the KSEA.Scores() function, in list format
 score_list,
 # a character vector of all the sample names for heatmap annotation:
 # - the names must be in the same order as the data in score_list
 # - please avoid long names, as they may get cropped in the final image
 #   hierarchical clustering of the sample columns
 sample_cluster,
 # a binary input of TRUE or FALSE, indicating whether or not to export
 #   the heatmap as a .png image into the working directory
 export = FALSE,
-# bottom and right margins; adjust as needehttps://tex.stackexchange.com/a/56795d if contrast names are too long
+# bottom and right margins; adjust as needed if contrast names are too long
 margins = c(6, 6),
 # print which kinases?
 # - Mandatory argument, must be one of const_ksea_.*_kinases
 which_kinases,
 # additional arguments to gplots::heatmap.2, such as:
 asterisk_rows <- rowSums(merged_asterisk == "*") > 0
 all_rows <- rownames(merged_stats)
 names(asterisk_rows) <- all_rows
 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE])
 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE])
-merged_scores_asterisk <- merged_scores[names(asterisk_rows), , drop = FALSE]
-merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), , drop = FALSE]
 row_list <- list()
 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows
 row_list[[const_ksea_all_kinases]] <- all_rows
 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows
 }
 keep_var_gtr_0 <- keep_var_gtr_min(0)
 # function drawing heatmap of phosphopeptide intensities
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 ppep_heatmap <-
+# nolint end
 function(
 m,                              # matrix with rownames already formatted
 cutoff,                         # cutoff used by hm_heading_function
 hm_heading_function,            # construct $ cat heading from m and cutoff
 hm_main_title,                  # main title for plot (drawn below heading)
 adj = 0.5,                      # adjust text: 0 left, 0.5 middle, 1 right
 row_scaling =                   # should row-scaling be applied if possible?
 g_default_heatmap_row_scaling,
 ...                             # passthru to hm2plus or heatmap.2
 ) {
-use_heatmap_1 <- FALSE
 peptide_count <- 0
 # emit the heading for the heatmap
 if (hm_heading_function(m, cutoff)) {
 nrow_m <- nrow(m)
 peptide_count <- min(max_peptide_count, nrow_m)
 # Margin was heuristically derived to accommodate the widest label
 row_mchar_max <- max(nchar(rownames(m_margin)))
 col_mchar_max <- max(nchar(colnames(m_margin)))
 row_margin <- master_cex * row_mchar_max * 2.6
 col_margin <- master_cex * col_mchar_max * 2.6
-if (print_trace_messages) cat(sprintf("row_margin = %0.3f; ", row_margin))
+if (print_trace_messages) cat(
-if (print_trace_messages) cat(sprintf("col_margin = %0.3f; ", col_margin))
+sprintf("row_margin = %0.3f; ", row_margin))
+if (print_trace_messages) cat(
+sprintf("col_margin = %0.3f; ", col_margin))
 hm_call <- NULL
 tryCatch(
 {
 # set non-argument parameters for hm_call inner function
 my_row_cex <-
 (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows
 )
 m_hm <-  m[peptide_count:1, , drop = FALSE]
 if (is.null(cellnote)) {
 cellnote <-  matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm))
-cellnote[is.na(m_hm)] <- "NA"
+# nolint start
+#   squash un-actionable "no visible global for ..."
+cellnote[is.na(m_hm)] <- params$heatMapNAcellNote
+# nolint end
 } else {
 cellnote <-  cellnote[peptide_count:1, , drop = FALSE]
 }
-m_hm[is.na(m_hm)] <- 0
+# nolint start
+#   squash un-actionable "no visible global for ..."
+if (params$heatMapNAsubstitute) m_hm[is.na(m_hm)] <- 0
+# nolint end
 nrow_m_hm <- nrow(m_hm)
 ncol_m_hm <- ncol(m_hm)
 if (nrow_m_hm < 1 || ncol_m_hm < 1)
 return(peptide_count) # return zero as initialized above
-my_limit <- g_intensity_hm_rows
 my_row_cex <- master_cex * (100 / (2 + row_mchar_max))
 my_col_cex <- master_cex * 6 * row_margin / col_margin
 my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex
 my_col_cex <- min(my_col_cex, my_row_cex)
 col_margin <- sqrt(my_col_adj) * col_margin
-if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex))
+if (print_trace_messages) cat(
-if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex))
+sprintf("my_row_cex = %0.3f; ", my_row_cex))
+if (print_trace_messages) cat(
+sprintf("my_col_cex = %0.3f; ", my_col_cex))
 if (is.null(margins)) my_margins <-
 c(
 (my_col_cex + col_margin), # col
 (my_row_cex + row_margin) / my_row_cex  # row
 )
 "my_margins = c(%s)\n\n",
 paste(my_margins, collapse = ", ")
 )
 )
 my_hm2_cex <- 2 * master_cex
-my_key_cex <- 0.9 - 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows
+my_key_cex <-
+0.9 -
+0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows
 my_key_warp <- 1.5 * 22.75 / row_margin
 my_key_cex <- min(1.10, my_key_warp * my_key_cex)
 my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1)
 my_hgt_scale <- 3.75 # 3.615
 my_hgt_scale <- 3.60 # 3.615
 cat_variable(my_warp, "\n\n", 3)
 # added 0.9 heuristically...
 my_plot_height <-
 (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) *
 min(my_hgt_scale, my_hgt_scale * my_warp)
-my_plot_height <- min(3.65, my_plot_height * g_intensity_hm_rows / 50)
+my_plot_height <-
+min(3.65, my_plot_height * g_intensity_hm_rows / 50)
 my_heights <- c(
 0.3,                                 # title and top dendrogram
 my_plot_height,                      # plot and bottom margin
 4.15 - my_hgt_scale,                 # legend
 0.05 + my_hgt_scale - my_plot_height # whitespace below legend
 scale = scaling,
 srtcol = 90,
 srtrow = 0,
 xlab = "",
 cellnote = cellnote,
+# ref for na.color:
+# https://cran.r-project.org/web/packages/gplots/gplots.pdf
+# nolint start
+#   squash un-actionable "no visible global for ..."
+na.color = params$heatMapNAcellColor,
+# nolint end
 notecex = my_note_cex,
 ...
 )
 )
 ) {
 },
 error = function(r) {
 cat(
 sprintf(
 "\n%s %s Internal message: %s\n\\newline\n\n",
-"Failure drawing heatmap,",
+"Failure drawing heatmap, possibly because of ",
-"possibly because of too many missing values.\n\\newline\n\n",
+"too many missing values.\n\\newline\n\n",
 r$message
 )
 )
 cat_margins()
 }
 )
 } else {
 cat(
-"\nFailure drawing heatmap, possibly because of too many missing values.\n"
+"\nFailure drawing heatmap, ",
+"possibly because of too many missing values.\n"
 )
 }
 }
 )
 }
 # return value:
 peptide_count
 }
 # function drawing heatmap of correlations if they exist, else covariances
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
+#   squash un-actionable "no visible global for ..."
 cov_heatmap <-
 function(
+# nolint end
 m,                              # matrix with rownames already formatted
+kinase_name,
 top_substrates = FALSE,
 ...                             # passthru to hm2plus or heatmap.2
 ) {
-if (print_nb_messages) nbe(see_variable(m), " [", nrow(m), "x", ncol(m), "\n")
+if (print_nb_messages) nbe(
-#ACE nb(rowSums(m, na.rm = TRUE))
+see_variable(m), " [", nrow(m), "x", ncol(m), "\n")
-#ACE bad_rows <- (rowSums(m, na.rm = TRUE) == 0)
-#ACE nb(see_variable(bad_rows))
-#ACE m <- m[-bad_rows, , drop = FALSE]
 colnames_m <- colnames(m)
 is_na_m <- is.na(m)
 tmp <- m
 tmp[is_na_m] <- 0
 }
 t_m <- t(tmp)
 t_m[is.na(t_m)] <- 0
-prefiltered_nrow <- ncol(t_m)
 my_corcov <- cov(t_m)
 did_filter_rows <- did_filter_cols <- FALSE
 if (g_correlate_substrates && !is_positive_definite(my_corcov)) {
 my_correlate_substrates <- FALSE
 f_omissions <-
 function(is_corr) {
 cat(
 sprintf(
-"Below is the %s plot for %s substrates",
+"Below is the %s plot for %s sites",
 if (is_corr) "correlation" else "covariance",
 sprintf(
 if (top_substrates)
 "%0.0f \"highest-quality\""
 else
 parjust <- par(adj = 0.5)
 on.exit(par(parjust))
 my_corcov <- my_corcov[order(rownames(my_corcov)), ]
 my_main <-
-sprintf("%s among %s substrates %s",
+sprintf("%s among %s sites %s",
 if (my_correlate_substrates) "Correlation"
 else "Covariance",
 kinase_name,
 if (!my_correlate_substrates &&
 g_filter_cov_var_gt_1 &&
 my_plot_height,
 my_legend_height # was 4.0 - my_hgt_scale # was 4.19
 )
 if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar))
 if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin))
-if (print_trace_messages) cat(sprintf("my_plot_height = %0.3f\n\n", my_plot_height))
+if (print_trace_messages) cat(
+sprintf("my_plot_height = %0.3f\n\n", my_plot_height))
 if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex))
 if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex))
 if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex))
-if (print_trace_messages) cat(sprintf("my_key_cex = %0.3f\n\n", my_key_cex))
+if (print_trace_messages) cat(
-if (print_trace_messages) cat(sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale))
+sprintf("my_key_cex = %0.3f\n\n", my_key_cex))
-if (print_trace_messages) cat(sprintf("legend height = %0.3f\n\n", my_legend_height))
+if (print_trace_messages) cat(
+sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale))
+if (print_trace_messages) cat(
+sprintf("legend height = %0.3f\n\n", my_legend_height))
 if (print_trace_messages) cat(
 sprintf(
 "my_heights = c(%s); sum = %0.3f\n\n",
 paste(
 sprintf("%0.3f", my_heights),
 "median peptide-intensity across all sample classes.\n"
 )
 # Take the accurate ln(x+1) because the data are log-normally distributed
 #   and because median can involve an average of two measurements.
 quant_data_imp <- log1p(quant_data_imp)
-quant_data_imp[ind] <- row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]]
+quant_data_imp[ind] <-
+row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]]
 # Take the accurate e^x - 1 to match scaling of original input.
 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp))
 good_rows <- !is.nan(rowMeans(quant_data_imp))
 }
 , "mean" = {
 # Take the accurate ln(x+1) because the data are log-normally distributed,
 #   so arguments to mean should be previously transformed.
 #   this will have to be
 quant_data_imp <- log1p(quant_data_imp)
 # Assign to NA cells the mean for the row
-quant_data_imp[ind] <- row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]]
+quant_data_imp[ind] <-
+row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]]
 # Take the accurate e^x - 1 to match scaling of original input.
 quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp))
 good_rows <- !is.nan(rowMeans(quant_data_imp))
 }
 , "random" = {
 quant_data_imp <- quant_data
-m1 <- median(sds, na.rm = TRUE) * sd_percentile #sd to be used is the median sd
+# sd to be used is the median sd
+m1 <- median(sds, na.rm = TRUE) * sd_percentile
 # If you want results to be reproducible, you will want to call
 #   base::set.seed before calling stats::rnorm
 imputation_method_description <-
 paste("Substitute each missing value with random intensity",
 sprintf(
 imp_smry_pot_peptides_after <- sum(good_rows)
 imp_smry_rejected_after  <- sum(!good_rows)
 imp_smry_missing_values_after   <- sum(is.na(quant_data_imp[good_rows, ]))
 # From ?`%in%`:
-#   %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0
+#   %in% is currently defined
+#   as function(x, table) match(x, table, nomatch = 0) > 0
 stock_in <-
 names(good_rows) %in%
 names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count])
 if (print_nb_messages) nbe(see_variable(stock_in), "\n")
 cat(tabular_lines)
 ```
 ```{r filter_good_rows, echo = FALSE}
-if (print_nb_messages) nbe("before name extraction, ", see_variable(length(good_rows)), " ", see_variable(good_rows), "\n")
+if (print_nb_messages) nbe(
+"before name extraction, ", see_variable(length(good_rows)), " ",
+see_variable(good_rows), "\n")
 good_rows <- names(good_rows[names(great_enough_row_names)])
-if (print_nb_messages) nbe("after name extraction, ", see_variable(length(good_rows)), see_variable(good_rows), "\n")
+if (print_nb_messages) nbe(
+"after name extraction, ", see_variable(length(good_rows)),
-#ACE min_group_obs_count <- min_group_obs_count[names(great_enough_row_names)]
+see_variable(good_rows), "\n")
-#ACE nbe("good_rows")
-#ACE nbe(see_variable(good_rows))
-#ACE nbe("names(min_group_obs_count) before filter for good rows")
-#ACE nbe(see_variable(names(min_group_obs_count)))
 min_group_obs_count <- min_group_obs_count[good_rows]
-#ACE nbe("min_group_obs_count after filter for good rows")
-#ACE nbe(see_variable(names(min_group_obs_count)))
 # Zap rows where imputation was insufficiently effective
 full_data         <- full_data        [good_rows, ]
 quant_data        <- quant_data       [good_rows, ]
 quant_data_log    <- quant_data_log   [good_rows, ]
-if (print_nb_messages) nbe("before row filter, ", see_variable(nrow(quant_data_imp)), "\n")
+if (print_nb_messages) nbe(
+"before row filter, ", see_variable(nrow(quant_data_imp)), "\n")
 quant_data_imp <- quant_data_imp[good_rows, ]
-if (print_nb_messages) nbe("after row filter, ", see_variable(nrow(quant_data_imp)), "\n")
+if (print_nb_messages) nbe(
+"after row filter, ", see_variable(nrow(quant_data_imp)), "\n")
 write_debug_file(quant_data_imp)
 quant_data_imp_good_rows <- quant_data_imp
 write_debug_file(quant_data_imp_good_rows)
 ```
 my_ppep_distrib_bxp(
 x = blue_dots
 , sample_name_grow = sample_name_grow
 , main = "Peptide intensities after eliminating unusable peptides"
 , varwidth = boxplot_varwidth
-, sub = paste(boxplot_sub, "Box widths reflect number of peptides for sample")
+, sub = paste(boxplot_sub,
+"Box widths reflect number of peptides for sample")
 , xlab = "Sample"
 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
 , col = const_boxplot_fill
 , notch = FALSE
 , ylim = ylim
 ## Assignment of $p$-value and quality score
 For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups.
-However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance.  For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https:/doi.org/10.1111/j.2517-6161.1995.tb02031.x).
+However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance.  For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https://doi.org/10.1111/j.2517-6161.1995.tb02031.x).
 Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as:
 $$
 \textit{quality}_j
 ```
 ```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'}
 g_can_run_ksea <- FALSE
 old_oma <- par("oma")
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 if (count_of_treatment_levels < 2) {
+# nolint end
 cat(
 "ERROR!!!! Cannot perform ANOVA analysis",
 "(see next page)\\newpage\n"
 )
 cat(
 } else {
 if (print_nb_messages) nbe("computing p_value_data_anova_ps\n")
 if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n")
 if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n")
-if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log[, ".NE.7C"]), "\n")
 if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n")
 if (print_nb_messages) nbe(see_variable(anova_func), "\n")
 if (print_nb_messages) nbe(see_variable(smpl_trt), "\n")
 if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n")
 tryCatch(
 if (number_of_peptides_found > g_intensity_hm_rows - 1) {
 cat("\\leavevmode\n\n\n")
 break
 }
+if (print_trace_messages) {
+cat_variable(cutoff)
+cat("\n\n")
+}
 bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff)
 bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class)
 g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0)
 bool_4 <- (p_value_data$quality >= params$minQuality)
 bool_3 <- as.logical(
 if (print_trace_messages)
 cat_variable(filtered_p, force_str = TRUE)
 have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0
 filter_result_string <-
 sprintf(
 "%s, %s of %0.0f peptides remained having both %s and %s.\n\n",
 "After filtering for ANOVA results",
 if (have_remaining_peptides)
 as.character(sum(bool_3, na.rm = TRUE))
 else
 "none",
 length(bool_3),
-sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))),
+adj_p_string <-
+sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))),
 sprintf(
 "more than %0.0f observations in some groups",
 max(0, g_intensity_min_per_class - 1)
 )
 )
 filtered_data_filtered[
 order(filtered_p$fdr_adjusted_anova_p),
 , drop = FALSE
 ]
-if (have_remaining_peptides && nrow(filtered_p) > 0 && nrow(filtered_data_filtered) > 0) {
+if (have_remaining_peptides
+&& nrow(filtered_p) > 0
+&& nrow(filtered_data_filtered) > 0
+) {
 if (first_page_suppress == 1) {
 first_page_suppress <- 0
 } else {
 cat("\\newpage\n")
 }
 latex_samepage({
 cat(filter_result_string)
 if (nrow(filtered_data_filtered) > 1) {
 cat(subsection_header(sprintf(
-"Intensity distributions for %d phosphopeptides\n",
+"Intensity distributions for %d phosphopeptides, %s\n",
-nrow(filtered_data_filtered)
+nrow(filtered_data_filtered),
+adj_p_string
 )))
 } else {
-cat(subsection_header(sprintf(
+cat(
-"Intensity distribution for one phosphopeptide (%s)\n",
+subsection_header(
-rownames(filtered_data_filtered)[1]
+sprintf(
-)))
+"Intensity distribution for one phosphopeptide, %s\n",
+adj_p_string
+)
+),
+rownames(filtered_data_filtered)[1],
+"\n"
+)
 }
 }) # end latex_samepage
 old_par <- par(
 mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1),
 g_intensity_hm_rows),
 sprintf("whose adjusted p-value < %0.2f\n", cutoff)
 )
 ))
 } else {
-if (nrow(m) == 0) {
+if (nrow(m) < 2) {
 return(FALSE)
 } else {
 cat(subsection_header(
 paste(
 sprintf("Heatmap for %d usable peptide genes whose", nrow(m)),
 nrow_m <- nrow(m)
 ncol_m <- ncol(m)
 if (nrow_m > 0) {
 rownames_m <- rownames(m)
 q <- data.frame(pepname = rownames_m)
-g <- sqldf("
+g <- sqldf::sqldf("
 SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name
 FROM q, metadata_plus_p AS met
 WHERE q.pepname = met.Phosphopeptide
 ORDER BY q.rowid
 ")
 }
 }
 }
 }
 }
-cat(filter_result_string)
 cat("\\leavevmode\n")
 if (!g_can_run_ksea) {
 errmsg <- paste("Cannot proceed with KSEA analysis",
 "because too many values are missing.")
 sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p)
 display_p_value_data$quality <-
 sprintf("%0.3g", display_p_value_data$quality)
 headers_1st_line <-
-c("",               "Raw ANOVA", "FDR-adj.", "Missing", "Min. #", "", "")
+c("",               "Raw ANOVA", "FDR-adj.", "Missing", "Min. #",
+"",        "")
 headers_2nd_line <-
-c("Phosphopeptide", "p-value",   "p-value",  "values",  "group-obs", "Quality", "Ranking")
+c("Phosphopeptide", "p-value",   "p-value",  "values",  "group-obs",
+"Quality", "Ranking")
 data_frame_tabbing_latex(
 x = display_p_value_data,
 tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60),
 use_subsubsection_header = FALSE,
 headings = c(headers_1st_line, headers_2nd_line),
 # KSEA Analysis Summaries
 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched.   Enrichments are found by the CRAN `KSEAapp` package:
-- The package is available on CRAN, at https:/cran.r-project.org/package=KSEAapp
+- The package is available on CRAN, at https://cran.r-project.org/package=KSEAapp
-- The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https:/doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https:/doi.org/10.1093/bioinformatics/btx415).
+- The method used is described in Casado et al. (2013) [doi:10.1126/scisignal.2003573](https://doi.org/10.1126/scisignal.2003573) and Wiredja et al (2017) [doi:10.1093/bioinformatics/btx415](https://doi.org/10.1093/bioinformatics/btx415).
-- An online alternative (supporting only analysis of human data) is available at [https:/casecpb.shinyapps.io/ksea/](https:/casecpb.shinyapps.io/ksea/).
+- An online alternative (supporting only analysis of human data) is available at [https://casecpb.shinyapps.io/ksea/](https://casecpb.shinyapps.io/ksea/).
 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as:
 $$
 \text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j}
 )
 )
 kseaapp_input <-
 sqldf::sqldf(
 x = sprintf("
-SELECT `Protein`, `Gene`, `Peptide`, phosphopep AS `Residue.Both`, `p`, `FC`
+SELECT `Protein`,`Gene`,`Peptide`,phosphopep AS `Residue.Both`,`p`,`FC`
 FROM v_kseaapp_input
 WHERE contrast = %d
 ",
 i_cntrst
 ),
 drop = FALSE
 ]
 }
 if (FALSE) {
+if (print_nb_messages) nbe(
+"Output contents of `ksea_scores_rslt` table\n")
+cat_variable(ksea_scores_rslt)
+cat("\n\\clearpage\n")
 data_frame_tabbing_latex(
 x = ksea_scores_rslt,
-tabstops = c(0.8, 0.8, 0.8, 0.8, 0.8, 0.8),
+tabstops = c(1.8, 0.8, 0.8, 0.3, 0.8, 0.8),
 caption = paste("KSEA scores for contrast ",
 cntrst_b_level, "to",  cntrst_a_level),
 use_subsubsection_header = FALSE
 )
-}
-if (FALSE) {
-if (print_nb_messages) nbe("Output contents of `ksea_scores_rslt` table\n")
-cat_variable(ksea_scores_rslt)
-cat("\n\\clearpage\n")
 }
 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) {
 next_index <- 1 + next_index
 rslt$score_list[[next_index]] <- ksea_scores_rslt
 )
 sub_title <- contrast_longlabel
 tryCatch(
 expr = {
 ksea_scores_rslt <- rslt$score_list[[next_index]]
-if (print_nb_messages) nbe(see_variable(ksea_scores_rslt)) #ACE
+if (print_nb_messages) nbe(see_variable(ksea_scores_rslt))
 if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) {
 sink(deferred <- file())
 ksea_low_fdr_print(
 rslt = rslt,
 loaded_packages_df <- data.frame(
 package = loaded_packages_df$package,
 version = loaded_packages_df$loadedversion,
 date    = loaded_packages_df$date
 )
-#ACE cat("\\clearpage\n\\section{R package versions}\n")
-#ACE data_frame_tabbing_latex(
-#ACE   x = loaded_packages_df,
-#ACE   tabstops = c(2.5, 1.25),
-#ACE   caption = "R package versions"
-#ACE   )
 cat("\\clearpage\n\\section{Input parameter settings}\n")
 data_frame_tabbing_latex(
 x = param_df,
 tabstops = c(1.75),
 underscore_whack = TRUE,
 knitr::knit_exit()
 }
 ```
 ```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'}
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 if (have_kinase_descriptions) {
+# nolint end
 my_section_header <-
 sprintf(
 "inases whose KSEA %s < %s\n",
 ksea_cutoff_statistic,
 signif(ksea_cutoff_threshold, 2)
 caption = paste0("Descriptions of k", my_section_header)
 )
 }
 if (FALSE) {
-cat_variable(sqldf("SELECT kinase FROM enriched_kinases"))
+cat_variable(sqldf::sqldf("SELECT kinase FROM enriched_kinases"))
-cat_variable(sqldf("
+cat_variable(sqldf::sqldf("
 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
 FROM pseudo_ksdata
 WHERE gene IN (SELECT kinase FROM enriched_kinases)
 "))
 data_frame_table_latex(
-x = sqldf("
+x = sqldf::sqldf("
 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
 FROM pseudo_ksdata
 WHERE gene IN (SELECT kinase FROM enriched_kinases)
 "),
 justification = "l l l",
 caption = "substrates of enriched kinases",
 anchor = c(const_table_anchor_p, const_table_anchor_t),
 underscore_whack = TRUE
 )
 data_frame_table_latex(
-x = sqldf("
+x = sqldf::sqldf("
 SELECT
 gene AS kinase,
 ppep,
 sub_gene,
 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label,
 caption = "labeled substrates of enriched kinases",
 anchor = c(const_table_anchor_p, const_table_anchor_t),
 underscore_whack = TRUE
 )
 }
-all_enriched_substrates <- sqldf("
+all_enriched_substrates <- sqldf::sqldf("
 SELECT
 gene AS kinase,
 ppep,
 sub_gene,
 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label,
 ,
 drop = FALSE
 ]
 all_enriched_substrates$sub_gene <-
-sub(
+sub(" ///.*", "...",
-" ///.*",
-" ...",
 all_enriched_substrates$sub_gene
 )
 all_enriched_substrates$label <-
 with(
 if (g_neednewpage) cat("\\newpage\n")
 g_neednewpage <- TRUE
 if (nrow(m) > g_intensity_hm_rows) {
 cat(subsection_header(
 sprintf(
-"Highest-quality %d (of %d) enriched %s-substrates",
+"Highest-quality %d (of %d) enriched %s-sites",
 g_intensity_hm_rows,
 nrow(m),
 kinase
 )
 ))
 return(FALSE)
 } else {
 nrow_m <- nrow(m)
 cat(subsection_header(
 sprintf(
-"%d enriched %s-substrate%s",
+"%d enriched %s-site%s",
 nrow_m,
 kinase,
 if (nrow_m > 1) "s" else ""
 )
 ))
 }
 ```
 ```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'}
+# nolint start
+#   squash un-actionable cyclomatic_complexity warning
 if (g_can_run_ksea) {
+# nolint end
 g_did_enriched_header <- FALSE
 for (kinase_name in sort(enriched_kinases$kinase)) {
 enriched_substrates <-
 all_enriched_substrates[
 all_enriched_substrates$kinase == kinase_name,
 ten_trunc_ppep
 )
 )
 # Get the intensity values for the heatmap
 enriched_intensities <-
-as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE])
+as.matrix(unimputed_quant_data_log[
+enriched_substrates$ppep, , drop = FALSE
+])
 # Remove rows having too many NA values to be relevant
 good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0)
-#ACE nbe(see_variable(good_rows), "\n")
 enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE]
 enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE]
 # Rename the rows with the display-name for the heatmap
 short_row_names <- sub(
 )
 rownames(enriched_intensities) <- long_row_names
 # Format as matrix for heatmap
 m <- as.matrix(enriched_intensities)
 rownames(m) <- trunc_enriched_substrate(rownames(m))
-#ACE nb("m with bad rows: ", see_variable(m), "\n")
-#ACE good_rows <- (rowSums(m, na.rm = TRUE) != 0)
-#ACE nb(see_variable(good_rows), "\n")
-#ACE m <- m[good_rows, , drop = FALSE]
-#ACE nb("m without(?) bad rows: ", see_variable(m), "\n")
-#ACE nb(see_variable(short_row_names), "\n")
-#ACE local_short_row_names <- short_row_names[good_rows]
-#ACE local_long_row_names  <- long_row_names[good_rows]
-#ACE local_enriched_intensities <- enriched_intensities[local_long_row_names, ]
 # Draw the heading and heatmap
 nrow_m <- nrow(m)
 if (nrow_m > 0) {
 if (!g_did_enriched_header) {
 g_did_enriched_header <- TRUE
 }
 is_na_m <- is.na(m)
 cellnote_m <- is_na_m
 cellnote_m[!is_na_m] <- ""
-cellnote_m[is_na_m] <- "NA"
+cellnote_m[is_na_m] <- params$heatMapNAcellNote
 cut_args <- new_env()
 cut_args$cutoff <- cutoff
 cut_args$kinase <- kinase_name
 cut_args$statistic <- ksea_cutoff_statistic
 cut_args$threshold <- ksea_cutoff_threshold
 ppep_heatmap(
 m                       = m,
 cellnote                = cellnote_m,
 cutoff                  = cut_args,
 hm_heading_function     = cat_enriched_heading,
-hm_main_title
+hm_main_title           = paste(
-= "Unnormalized (zero-imputed) intensities of enriched kinase-substrates",
+"Unnormalized (zero-imputed)",
+"intensities of enriched kinase-substrates"),
 suppress_row_dendrogram = FALSE,
 master_cex              = 0.35,
 sepcolor                = "black",
 colsep                  = sample_colsep,
 row_scaling             = "none"
 if (number_of_peptides_found > 1) {
 tryCatch(
 {
 rownames(m) <- short_row_names
-cov_heatmap(m, nrow_m > g_intensity_hm_rows)
+cov_heatmap(
+m = m,
+kinase_name = kinase_name,
+top_substrates = nrow_m > g_intensity_hm_rows
+)
 },
 error = function(e) {
 cat(
 sprintf(
 "ERROR: %s\n\\newline\n",
 headers_1st_line <- c("", "", "ANOVA", "min(values", "")
 if (1 < nrow(enriched_substrates))
 cat("\n\\newpage\n")
 cat(subsubsection_header(
 sprintf(
-"Details for %s%s-substrates",
+"Details for %s%s-sites",
 if (excess_substrates)
 sprintf(
 "%s \"highest quality\" ",
 g_intensity_hm_rows
 )
 # get kinase, ppep, concat(kinase) tuples for enriched kinases
 if (print_nb_messages) nb("kinase_ppep_label <- ...\n")
 if (print_nb_messages) nbe("kinase_ppep_label <- ...\n")
-kinase_ppep_label <- sqldf("
+kinase_ppep_label <- sqldf::sqldf("
 WITH
 t(ppep, label) AS
 (
 SELECT DISTINCT
 SUB_MOD_RSD AS ppep,
 ON f.Phosphopeptide = k.ppep,
 impish q
 WHERE
 f.Phosphopeptide = q.Phosphopeptide
 "
-data_table_imputed <- sqldf(data_table_imputed_sql)
+data_table_imputed <- sqldf::sqldf(data_table_imputed_sql)
 # Zap the duplicated 'Phosphopeptide' column named 'ppep'
 data_table_imputed <-
 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
 # Output imputed, un-normalized data
 if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n")
-if (print_nb_messages) nbe("Output imputed, un-normalized data tabular file\n")
+if (print_nb_messages) nbe(
+"Output imputed, un-normalized data tabular file\n")
 write.table(
 data_table_imputed
 , file = imputed_data_filename
 , sep = "\t"
 , col.names = TRUE
 #output quantile normalized data
 impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log)
 colnames(impish)[1] <- "Phosphopeptide"
-data_table_imputed <- sqldf(data_table_imputed_sql)
+data_table_imputed <- sqldf::sqldf(data_table_imputed_sql)
 # Zap the duplicated 'Phosphopeptide' column named 'ppep'
 data_table_imputed <-
 data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
 if (print_nb_messages) nb("Output quantile normalized data tabular file\n")
 if (print_nb_messages) nbe("Output quantile normalized data tabular file\n")
 col.names = TRUE,
 row.names = FALSE,
 quote = FALSE
 )
-ppep_kinase <- sqldf("
+ppep_kinase <- sqldf::sqldf("
 SELECT DISTINCT k.ppep, k.kinase
 FROM (
 SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep
 FROM pseudo_ksdata
 WHERE GENE IN (SELECT kinase FROM enriched_kinases)
 ON m.phospho_peptide = kek.ppep
 ;
 "
 )
-if (print_nb_messages) nb("Output contents of `stats_metadata_v` table to tabular file\n")
+if (print_nb_messages) nb(
-if (print_nb_messages) nbe("Output contents of `stats_metadata_v` table to tabular file\n")
+"Output contents of `stats_metadata_v` table to tabular file\n")
+if (print_nb_messages) nbe(
+"Output contents of `stats_metadata_v` table to tabular file\n")
 write.table(
 dbReadTable(db, "stats_metadata_v"),
 file = anova_ksea_mtdt_file,
 sep = "\t",
 col.names = TRUE,

Mercurial > repos > galaxyp > mqppep_preproc

comparison mqppep_anova_script.Rmd @ 3:bae3a23461c9 draft