Mercurial > repos > galaxyp > mqppep_preproc

diff mqppep_anova_script.Rmd @ 1:b76c75521d91 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 43e7a43b545c24b2dc33d039198551c032aa79be
author: galaxyp
date: Fri, 28 Oct 2022 18:26:42 +0000
parents: 8dfd5d2b5903
children: a5e7469dfdfa
--- a/mqppep_anova_script.Rmd	Mon Jul 11 19:22:54 2022 +0000
+++ b/mqppep_anova_script.Rmd	Fri Oct 28 18:26:42 2022 +0000
@@ -7,81 +7,153 @@
 date:
 - "May 28, 2018"
 - "; revised June 23, 2022"
+lot: true
 output:
   pdf_document:
     toc: true
-    toc_depth: 3
+    toc_depth: 2
     keep_tex: true
-header-includes:
-  - \usepackage{longtable}
-  - \newcommand\T{\rule{0pt}{2.6ex}}       % Top strut
-  - \newcommand\B{\rule[-1.2ex]{0pt}{0pt}} % Bottom strut
+    dev: pdf
+    includes:
+      in_header: mqppep_anova_preamble.tex
+latex_macros: false
+raw_tex: true
+urlcolor: blue
 params:
   alphaFile:            "test-data/alpha_levels.tabular"
   inputFile:            "test-data/test_input_for_anova.tabular"
   preprocDb:            "test-data/test_input_for_anova.sqlite"
   kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]
-  show_toc:             true
-  firstDataColumn:      "^Intensity[^_]"
-  imputationMethod:     !r c("group-median", "median", "mean", "random")[1]
-  meanPercentile:       1
-  sdPercentile:         1.0
   regexSampleNames:     "\\.\\d+[A-Z]$"
   regexSampleGrouping:  "\\d+"
+  groupFilterPatterns:  ".+"
+  groupFilter:    !r c("none", "exclude", "include")[1]
+  imputationMethod:     !r c("group-median", "median", "mean", "random")[4]
+  kseaCutoffThreshold:  !r c(0.05, 0.1, 0.25, 0.5, 0.9)[5]
+  #imputationMethod:     !r c("group-median", "median", "mean", "random")[1]
+
+  # how should sample groups be interpreted?
+  #  - "f": fixed patterns (like `grep -F`)
+  #  - "p": PERL-compatible (like `grep -P`)
+  #  - "r": extended grep patterns (like `grep -E`)
+  # use what case sensitivity?
+  #  - "i": case insensitive matching (like `grep -i`)
+  groupFilterMode: !r c("r", "ri", "p", "pi", "f", "fi")[1]
+  # what pattern should be used for the first column
+  #   (extended grep pattern, case sensitive)
+  firstDataColumn:      "^Intensity[^_]"
+  # for small random value imputation, what percentile should be center?
+  meanPercentile:       50
+  #meanPercentile:       1
+  # for small random value imputation, what should `s / mean(x)` ratio be?
+  sdPercentile:         1.0
+  # output path for imputed data file
   imputedDataFilename:  "test-data/limbo/imputedDataFilename.txt"
+  # output path for imputed/quantile-normalized/log-transformed data file
   imputedQNLTDataFile:  "test-data/limbo/imputedQNLTDataFile.txt"
+  # output path for contents of `stats_metadata_v` table
   anovaKseaMetadata:    "test-data/limbo/anovaKseaMetadata.txt"
+  # how to test one variable with > 2 categories (e.g., aov or kruskal.test)
   oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1]
+  # how to test one variable with 2 categories (e.g., oneway.test)
   oneWayTwoCategories:  !r c("aov", "kruskal.test", "oneway.test")[3]
-  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]
-  kseaCutoffThreshold:  !r c( 0.1, 0.05)[2]
-  kseaMinKinaseCount:   1
-  intensityHeatmapRows: 75
+  # what should be the minimum quality for consideration in both
+  minQuality:           0
+  # correct KSEA with FDR (recommended) or raw p-value
+  kseaCutoffStatistic:  !r c("FDR", "p.value")[1]
+  # correct KSEA threshold 0.05 (conventional) or higher (perhaps better)
+  #   "perhaps better" meaning that KSEA is an hypothesis-generator, not -test
+  #kseaCutoffThreshold:  !r c(0.05, 0.1, 0.25, 0.5)[1]
+  # minimum number of substrates required for a kinase to be considered in KSEA
+  kseaMinSubstrateCount: 1
+  # Should KSEA be performed aggregating signed log2FC or absolute?
+  # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores
+  # TRUE  use abs(log2FC) for KSEA as Justin Drake requested; this is a
+  #         justifiable deviation from the KSEAapp::KSEA.Scores algorithm.
+  kseaUseAbsoluteLog2FC: TRUE
+  #kseaUseAbsoluteLog2FC: FALSE
+  # minimum number of observed values per sample-group
+  intensityMinValuesPerGroup: 1
+  # maximum number of heatmap rows (result are poor when > 50)
+  intensityHeatmapRows: 50
+  # what should be the primary criterion to eliminate excessive heatmap rows
+  intensityHeatmapCriteria: !r c("quality", "na_count", "p_value")[1]
+  # should correlation among substrates be used (rather than covariance)
+  correlateSubstrates:  TRUE
+  # only show covariance among variables having variance > 1
+  filterCovVarGT1:      TRUE
+  # maximum number of residues to display for ppeps in rownames or columnames
+  ppepTruncN:           10
+  # maximum number of characters of subgenes to display in rownames or columnames
+  subgeneTruncN:        10
+  # maximum number of characters for paste(subgene, ppep) for enrichment plots
+  substTruncN:          20
+  # should boxplots use variable-width boxes to reflect # of samples
+  boxPlotVarWidth:      TRUE
+  # should boxplots use notched boxes to reflect difference between samples
+  boxPlotNotch:         TRUE
+  # look-up tables for kinase descriptions
+  kinaseNameUprtLutBz2: "./kinase_name_uniprot_lut.tabular.bz2"
+  kinaseUprtDescLutBz2: "./kinase_uniprot_description_lut.tabular.bz2"
+  # should debugging trace messages be printed?
+  showEnrichedSubstrates: FALSE
+
+  # should debugging nb/nbe messages be printed?
+  printNBMsgs:          FALSE
+  # should debugging trace messages be printed?
+  printTraceMsgs:       FALSE
+  # when debugging files are needed, set debugFileBasePath to the path
+  #   to the directory where they should be writtn
+  debugFileBasePath:    !r if (TRUE) NULL else "test-data"
 ---
-<!--
-  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]
-  kseaCutoffThreshold:  !r c(0.05, 0.1)[1]
-
-  alphaFile:            "test-data/alpha_levels.tabular"
-  inputFile:            "test-data/test_input_for_anova.tabular"
-  preprocDb:            "test-data/test_input_for_anova.sqlite"
-  kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]
-
-  alphaFile:            "test-data/alpha_levels.tabular"
-  inputFile:            "test-data/UT_phospho_ST_sites.preproc.tabular"
-  preprocDb:            "test-data/UT_phospho_ST_sites.preproc.sqlite"
-  kseaAppPrepDb:        !r c(":memory:", "test-data/UT_phospho_ST_sites.ksea.sqlite")[2]
-
-  alphaFile:            "test-data/alpha_levels.tabular"
-  inputFile:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.tabular"
-  preprocDb:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.sqlite"
-  kseaAppPrepDb:        !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2]
-
-  alphaFile:            "test-data/alpha_levels.tabular"
-  inputFile:            "test-data/pST_Sites_NancyDu.txt.preproc.tabular"
-  preprocDb:            "test-data/pST_Sites_NancyDu.txt.preproc.sqlite"
-  kseaAppPrepDb:        !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2]
-
-  inputFile:            "test-data/density_failure.preproc_tab.tabular"
-  kseaAppPrepDb:        !r c(":memory:", "mqppep.sqlite")[2]
-  latex_document: default
--->
-```{r setup, include = FALSE}
+
+```{r setup, include = FALSE, results = 'asis'}
+
+# simple debug messaging
+print_nb_messages <- params$printNBMsgs
+
+nb <- if (!print_nb_messages) {
+    function(...) invisible()
+  } else {
+    function(..., f = cat) f("\n$\\exists{}\\supset\\forall{}$", ...)
+  }
+
+nbe <- if (!print_nb_messages) {
+    function(...) invisible()
+  } else {
+    function(..., f = cat, file = stderr()) {
+      cat(
+        stringi::stri_unescape_unicode("\nNBE \\u2203\\u2283\\u2200"),
+        ...,
+        file = file
+      )
+    }
+  }
+
 #ref for debugging: https://yihui.org/tinytex/r/#debugging
 options(tinytex.verbose = TRUE)
 
 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
 # ref for top and bottom struts: https://tex.stackexchange.com/a/50355
-knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10))
+knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10), dpi = 300)
 
 # freeze the random number generator so the same results will be produced
 #  from run to run
 set.seed(28571)
 
 ### LIBRARIES
+
+if (print_nb_messages) nbe("library(gplots)")
 library(gplots)
+if (print_nb_messages) nbe("library(caret)")
+# load caret for nearZeroVar
+if (print_nb_messages) nbe("Please ignore the messages about systemd, if any.\n")
+library(caret)
+if (print_nb_messages) nbe("library(DBI)")
 library(DBI)
+if (print_nb_messages) nbe("library(RSQLite)")
 library(RSQLite)
+if (print_nb_messages) nbe("library(sqldf)\n")
 # Suppress "Warning: no DISPLAY variable so Tk is not available"
 suppressWarnings(suppressMessages(library(sqldf)))
 
@@ -97,30 +169,42 @@
 
 ### CONSTANTS
 
-const_parfin <- par("fin")
-const_boxplot_fill <- "grey94"
-const_stripchart_cex <- 0.5
-const_stripsmall_cex <-
-  sqrt(const_stripchart_cex * const_stripchart_cex / 2)
-const_stripchart_jitter <- 0.3
-const_write_debug_files <- FALSE
-const_table_anchor_bp <- "bp"
-const_table_anchor_ht <- "ht"
-const_table_anchor_p <- "p"
-const_table_anchor_tbp <- "tbp"
-
-
+const_boxplot_fill           <- "grey94"
 const_ksea_astrsk_kinases    <- 1
 const_ksea_nonastrsk_kinases <- 2
 const_ksea_all_kinases       <- 3
-
-const_log10_e <- log10(exp(1))
-
-### FUNCTIONS
-
-# from `demo(error.catching)`
+const_log10_e                <- log10(exp(1))
+const_stripchart_cex         <- 0.5
+const_stripchart_jitter      <- 0.3
+const_table_anchor_bp        <- "bp"
+const_table_anchor_ht        <- "ht"
+const_table_anchor_p         <- "p"
+const_table_anchor_t         <- "t"
+const_table_anchor_tbp       <- "tbp"
+
+
+### GLOBAL VARIABLES (params)
+
+## functions to process params
+
+is_string_null_or_empty <- function(x) {
+  # N. B. non-strings are intentionally treated as NULL
+  if (is.null(x))
+    TRUE
+  else if (!is.character(x))
+    TRUE
+  else  x == ""
+}
+
 ##' Catch *and* save both errors and warnings, and in the case of
 ##' a warning, also keep the computed result.
+##' return result as list(value = ..., warning = ...)
+##' - value will be:
+##'   - the result if no exception is thrown
+##'   - the exception if an exception is caught
+##' - warning will be a string except perhaps when warning argument is not NULL
+##'
+##' adapted from `demo(error.catching)`
 ##'
 ##' @title tryCatch both warnings (with value) and errors
 ##' @param expr an \R expression to evaluate
@@ -128,32 +212,428 @@
 ##'   'value' may be an error caught.
 ##' @author Martin Maechler;
 ##' Copyright (C) 2010-2012  The R Core Team
-try_catch_w_e <- function(expr) {
-  wrn <- NULL
-  # warning handler
-  w_handler <- function(w) {
-    wrn <<- w
-    invokeRestart("muffleWarning")
+try_catch_w_e <-
+  function(expr, error = function(e) e, warning = NULL) {
+    wrn <- NULL
+    # warning handler
+    w_handler <-
+      if (is.function(warning))
+        warning
+      else
+        function(w) {
+          wrn <<- w
+          invokeRestart("muffleWarning")
+        }
+    e_handler <-
+      if (is.function(error))
+        error
+      else
+        function(e) e
+    # return result as list(value = ..., warning = ...)
+    # - value will be:
+    #   - the result if no exception is thrown
+    #   - the exception if an exception is caught
+    list(
+      value = withCallingHandlers(
+        tryCatch(
+          expr,
+          error = e_handler
+        ),
+        warning = w_handler
+      ),
+      warning = wrn
+    )
+  }
+
+see_kvp <-
+  function(format, key, value, suffix = "") {
+    if (
+      !all(
+        is.character(format),
+        is.character(key),
+        is.character(value),
+        is.character(suffix)
+      )
+    ) {
+      cat("all arguments to see_kvp should be character")
+      knitr::knit_exit()
+    }
+    result <- sprintf(format, value)
+    if (length(result) > 1) {
+      sprintf(
+        "%s = c(%s)%s",
+        whack_underscores(key),
+        paste(result, collapse = ", "),
+        suffix
+      )
+    } else {
+       sprintf(
+         "%s = %s%s",
+         key,
+         result,
+         suffix
+       )
+    }
+  }
+
+see_logical <-
+  function(x, suffix = "", xprssn = deparse1(substitute(x))) {
+      result <- as.character(as.logical(x))
+      # handle NAs and NaNs
+      result[is.na(result)] <- "NA"
+      see_kvp(
+        format = "%s",
+        key    = xprssn,
+        value  = result,
+        suffix = suffix
+      )
+  }
+
+see_numeric <-
+  function(x, suffix = "", digits = 3, xprssn = deparse1(substitute(x))) {
+    if (is.numeric(digits) && is.numeric(x)) {
+      digits <- as.integer(digits)
+      digits <- min(16, max(0, digits))
+      format <- paste0("%0.", as.character(digits), "g")
+      result <- sprintf(format, x)
+      see_kvp(
+        format = "%s",
+        key    = xprssn,
+        value  = result,
+        suffix = suffix
+      )
+    }
+  }
+
+see_character <-
+  function(x, suffix = "", xprssn = deparse1(substitute(x))) {
+    if (is.character(x)) {
+      see_kvp(
+        format = "%s",
+        key    = xprssn,
+        value  = sprintf("\"%s\"", x),
+        suffix = suffix
+      )
+    }
+  }
+
+see_variable <-
+  function(x, suffix = "", digits = 3, xprssn = deparse1(substitute(x))) {
+    if (is.character(x)) {
+      see_character(x, suffix, xprssn)
+    } else if (is.numeric(x)) {
+      see_numeric(x, suffix, digits, xprssn)
+    } else if (is.logical(x)) {
+      see_logical(x, suffix, xprssn)
+    } else {
+      f <- file("")
+      sink(f)
+      str(x)
+      msg <- paste(readLines(f), collapse = "\n")
+      sink()
+      close(f)
+      paste0(
+        "see_variable - str(",
+        xprssn,
+        "):\n",
+        msg, "\n"
+      )
+    }
+  }
+
+# ref: https://tug.org/texinfohtml/latex2e.html
+# LaTeX sets aside the following characters for special purposes.
+#   For example, the percent sign % is for comments.
+#   They are called reserved characters or special characters.
+#   They are all discussed elsewhere in this manual.
+#
+#   $ % & { } _ ~ ^ \ #
+#
+# If you want a reserved character to be printed as itself, in the text body
+#   font, for all but the final three characters in that list simply put
+#   a backslash \ in front of the character.
+#   Thus, typing \$1.23 will produce $1.23 in your output.
+#
+# As to the last three characters, to get a tilde in the text body font,
+#   use \~{} (omitting the curly braces would result in the next character
+#   receiving a tilde accent).
+# Similarly, to get a text body font circumflex use \^{}.
+#   To get a backslash in the font of the text body enter \textbackslash{}.
+whack_math <-
+  function(v) {
+    v <- as.character(v)
+    w <- gsub("\\", "\\textbackslash ", v, fixed = TRUE)
+    w <- Reduce(
+          f = function(l, r) {
+            gsub(r, paste0("\\", r), l, fixed = TRUE)
+          },
+          x = c("#", "$", "%", "&", "{", "}", "_"),
+          init = w
+        )
+    w <- gsub("^", "\\^{}", w, fixed = TRUE)
+    return(w)
+    if (all(v == w))
+      v
+    else
+      paste0("\\texttt{", w, "}")
   }
-  list(
-    value = withCallingHandlers(
-      tryCatch(
-        expr,
-        error = function(e) e
-      ),
-      warning = w_handler
-    ),
-    warning = wrn
+whack_underscores <- whack_math
+
+## dump params to stderr (remove this eventually)
+
+if (FALSE) nbe(see_variable(params))
+
+## unlist params for eventual output
+
+param_unlist <- unlist(as.list(params))
+
+# no need to whack underscores and dollars because this is verbatim
+param_df <- data.frame(
+  parameter = paste0("\\verb@", names(param_unlist), "@"),
+  value = paste0(
+    "\n\\begin{tiny}\n\\verb@",
+    param_unlist,
+    "@\n\\end{tiny}"
+    )
+  )
+param_df <- data.frame(
+  parameter = names(param_unlist),
+  value = param_unlist
+  )
+param_df <- param_df[order(param_df$parameter), ]
+
+## general output control
+
+debug_file_base_path     <- params$debugFileBasePath
+print_trace_messages     <- params$printTraceMsgs
+show_enriched_substrates <- params$showEnrichedSubstrates
+boxplot_varwidth         <- params$boxPlotVarWidth
+boxplot_notch            <- params$boxPlotNotch
+
+## parameters for static data
+
+kinase_name_uprt_lut_bz2 <- params$kinaseNameUprtLutBz2
+kinase_uprt_desc_lut_bz2 <- params$kinaseUprtDescLutBz2
+
+## parameters for input file
+
+preproc_db        <- params$preprocDb
+alpha_file        <- params$alphaFile
+input_file        <- params$inputFile
+
+# First data column - ideally, this could be detected via
+#   regexSampleNames, but for now leave it as is.
+first_data_column <- params$firstDataColumn
+fdc_is_integer    <- is.integer(first_data_column)
+if (fdc_is_integer) {
+  first_data_column <- as.integer(params$firstDataColumn)
+}
+
+## parameters for output files
+
+ksea_app_prep_db      <- params$kseaAppPrepDb
+imputed_data_filename <- params$imputedDataFilename
+imp_qn_lt_data_filenm <- params$imputedQNLTDataFile
+anova_ksea_mtdt_file  <- params$anovaKseaMetadata
+
+## parameters for imputation
+
+# Imputation method, should be one of
+#   "random", "group-median", "median", or "mean"
+imputation_method <- params$imputationMethod
+
+# Selection of percentile of logvalue data to set the mean for random number
+#   generation when using random imputation
+mean_percentile <- params$meanPercentile / 100.0
+
+# deviation adjustment-factor for random values; real number.
+sd_percentile <- params$sdPercentile
+
+## parameters for group parsing and filtering
+
+# Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$"
+regex_sample_names <- params$regexSampleNames
+# Regular expression to extract Sample Grouping from Sample Name;
+#   if error occurs, compare smpl_trt vs. sample_name_matches
+#   to see if groupings/pairs line up
+#   e.g., "(\\d+)"
+
+regex_sample_grouping <- params$regexSampleGrouping
+# What are the patterns for filtering sample groups?
+# How should sample groups be filtered?
+#   - none:    do not filter
+#   - include: include sample groups matching filter
+#   - exclude: include sample groups not matching filter
+
+sample_group_filter <- params$groupFilter
+if (grepl("f", params$groupFilterMode, fixed = TRUE)) {
+  sample_group_filter_perl <- FALSE
+  sample_group_filter_fixed <- TRUE
+} else if (grepl("p", params$groupFilterMode, fixed = TRUE)) {
+  sample_group_filter_perl <- TRUE
+  sample_group_filter_fixed <- FALSE
+} else { # normal regex
+  sample_group_filter_perl <- FALSE
+  sample_group_filter_fixed <- FALSE
+}
+
+sample_group_filter_nocase <-
+  grepl("i", params$groupFilterMode, fixed = TRUE)
+
+# What PCRE patterns should be included or excluded
+group_filter_patterns_csv <- params$groupFilterPatterns
+sample_group_filter_patterns <- strsplit(
+    x = group_filter_patterns_csv,
+    split = ",",
+    fixed = TRUE
+  )[[1]]
+
+## parameters for hypothesis testing
+
+one_way_all_categories_fname <- params$oneWayManyCategories
+
+one_way_all_categories <- try_catch_w_e(
+  match.fun(one_way_all_categories_fname))
+
+if (!is.function(one_way_all_categories$value)) {
+  write("fatal error for parameter oneWayManyCategories:", stderr())
+  write(one_way_all_categories$value$message,             stderr())
+  if (sys.nframe() > 0) {
+    cat("Cannot continue and quit() failed. Goodbye.")
+    knitr::knit_exit()
+    quit(save = "no", status = 1)
+  }
+}
+
+one_way_all_categories <- one_way_all_categories$value
+
+one_way_two_categories_fname <- params$oneWayManyCategories
+one_way_two_categories <- try_catch_w_e(
+  match.fun(one_way_two_categories_fname))
+if (!is.function(one_way_two_categories$value)) {
+  cat("fatal error for parameter oneWayTwoCategories: \n")
+  cat(one_way_two_categories$value$message, fill = TRUE)
+  if (sys.nframe() > 0) {
+    cat("Cannot continue and quit() failed. Goodbye.")
+    knitr::knit_exit()
+    quit(save = "no", status = 1)
+  }
+}
+one_way_two_categories <- one_way_two_categories$value
+
+## parameters for KSEA
+
+ksea_cutoff_statistic <- params$kseaCutoffStatistic
+ksea_cutoff_threshold <- params$kseaCutoffThreshold
+ksea_min_substrate_count <- params$kseaMinSubstrateCount
+
+## parameters for global variables consumed by functions
+
+# intensityHeatmapCriteria: !r c("na_count", "p_value")[2] # TODO switch to 1
+# TODO Validate within list
+g_intensity_hm_criteria <- params$intensityHeatmapCriteria
+if (is_string_null_or_empty(g_intensity_hm_criteria)) {
+  cat("invalid intensityHeatmapCriteria parameter (must be string)")
+  knitr::knit_exit()
+}
+switch(
+  g_intensity_hm_criteria,
+  "quality" = NULL,
+  "na_count" = NULL,
+  "p_value" = NULL,
+  {
+    with(
+      params,
+      cat(
+        sprintf(
+        "invalid %s (must be %s)",
+        see_variable(intensityHeatmapCriteria),
+        "one of quality or na_count or p_value"
+        )
+      )
+    )
+    knitr::knit_exit()
+  }
+)
+
+# intensityHeatmapRows: 50
+# TODO Validate >> 0 < 75
+g_intensity_hm_rows     <- params$intensityHeatmapRows
+if (!is.integer(g_intensity_hm_rows) || g_intensity_hm_rows < 1) {
+  cat("invalid intensityHeatmapRows (must be integer > 0)")
+  knitr::knit_exit()
+}
+
+g_intensity_min_per_class <- params$intensityMinValuesPerGroup
+if (!is.integer(g_intensity_min_per_class) || g_intensity_min_per_class < 0) {
+  cat("invalid intensityMinValuesPerGroup (must be integer > -1")
+  knitr::knit_exit()
+}
+
+if (is.na(as.logical(g_correlate_substrates  <- params$correlateSubstrates))) {
+  cat("invalid correlateSubstrates (must be TRUE or FALSE)")
+  knitr::knit_exit()
+}
+
+if (is.na(as.logical(g_filter_cov_var_gt_1   <- params$filterCovVarGT1))) {
+  cat("invalid filterCovVarGT1 parameter (must be TRUE or FALSE)")
+  knitr::knit_exit()
+}
+
+# TODO Validate >> 0 < 30
+g_ppep_trunc_n          <- params$ppepTruncN
+
+# TODO Validate >> 0 < 30
+g_subgene_trunc_n       <- params$subgeneTruncN
+
+# TODO Validate >> 0 < 30
+g_sbstr_trunc_n         <- params$substTruncN
+
+
+### OPERATORS
+
+# Test for exclusion
+# ref: https://www.reneshbedre.com/blog/in-operator-r.html
+`%notin%` <- Negate(`%in%`)
+
+# Augmented assignment
+# ref: https://www2.cs.arizona.edu/icon/refernce/infix2.htm#aug_assign
+`%||:=%` <- function(lvalue, ...) {
+  pf <- parent.frame()
+  rvalue <- Reduce(paste0, x = ..., init = lvalue)
+  assign(
+    x = as.character(substitute(lvalue)),
+    value = rvalue,
+    pos = pf
+  )
+  invisible(rvalue)
+}
+
+### FUNCTIONS
+
+no_op <-
+  function() {
+  }
+# this function is not used in this file and should be removed while
+#   factoring out reusable code
+all_apply <- function(f, v, na_rm = TRUE, ...) {
+  Reduce(
+    f = function(l, r) if (na_rm && is.na(r)) TRUE else l && r,
+    x = sapply(X = v, FUN = f, ...),
+    init = TRUE
   )
 }
 
-
-write_debug_file <- function(s) {
-  if (const_write_debug_files) {
-    s_path <- sprintf("test-data/%s.txt", deparse(substitute(s)))
-    print(sprintf("DEBUG writing file %s", spath))
+write_debug_file <- function(data_frame) {
+  if (!is.null(debug_file_base_path)) {
+    s_path <-
+      sprintf(
+        "%s/%s.txt",
+        debug_file_base_path,
+        deparse(substitute(data_frame))
+      )
     write.table(
-      s,
+      data_frame,
       file = s_path,
       sep = "\t",
       col.names = TRUE,
@@ -174,6 +654,137 @@
   new.env(parent = emptyenv())
 }
 
+# make apply readable for rows
+row_apply <- function(x, fun, ..., simplify = TRUE) {
+  apply(x, MARGIN = 1, fun, ..., simplify = TRUE)
+}
+
+# make apply readable for columns
+column_apply <- function(x, fun, ..., simplify = TRUE) {
+  apply(x, MARGIN = 2, fun, ..., simplify = TRUE)
+}
+
+##' Produce a vector of boolean values whose i-th value is TRUE when any
+##'   member of v matches the i-th membr of s, where i in 1:seq_len(length(s))
+##'
+##' @title Search multiple strings for matches of multiple substrings
+##' @param v a vector of substrings to match
+##' @param s a vector of strings to search for matches
+##' @param ... additional arguments to grepl
+##' @return a list with keys in s and valuse that are vectors of elements of v
+##' @author Art Eschenlauer
+##' Copyright (C) 2022 Art Eschenlauer;
+##' MIT License; https://en.wikipedia.org/wiki/MIT_License#License_terms
+mgrepl <- function(v, s, ...) {
+  grpl_rslt <- rep_len(0, length(s))
+  for (vi in v) {
+    grpl_rslt_v <- sapply(
+      X = s,
+      FUN = function(t) {
+        Reduce(
+          f = function(l, r) if (is.null(l)) r else c(l, r),
+          x = sapply(
+            X = vi,
+            FUN = function(f) grepl(f, t, ...)
+          ),
+          init = c()
+        )
+      },
+      simplify = "array"
+    )
+    grpl_rslt <- grpl_rslt + grpl_rslt_v
+  }
+  rslt <- unname(grpl_rslt > 0)
+}
+
+##' Produce positions in a vector where succeeding value != current valus
+##'
+##' @title Search vector for neighboring positions having different values
+##' @param v a vector of comparable numeric values (e.g. integers)
+##' @return a vector of positions i where v[i] != v[i + 1]
+##' @author Art Eschenlauer
+##' Copyright (C) 2022 Art Eschenlauer;
+##' MIT License; https://en.wikipedia.org/wiki/MIT_License#License_terms
+transition_positions <- function(v) {
+  Reduce(
+    f = function(l, i) if ((i != 1) && (v[i - 1] != v[i])) c(l, i - 1) else l,
+    x = seq_along(v)[-1:0],
+    init = c()
+  )
+}
+
+### figure debug functions
+
+cat_par_vector <- function(par_name, lbl = "", newlines = TRUE) {
+  cat(
+    sprintf(
+      "%spar(%s) = c(%s)%s",
+      lbl,
+      par_name,
+      paste(par(par_name), collapse = ", "),
+      if (newlines) "\n\n" else ""
+    )
+  )
+}
+
+cat_margins <- function(lbl = NULL) {
+  for (p in c("fig", "fin", "mar", "mai", "omd", "omi", "oma"))
+    cat_par_vector(p, if (!is.null(lbl)) paste0(lbl, " ") else NULL)
+}
+
+cat_variable <-
+  function(x, suffix = "", digits = 3, force_str = FALSE) {
+    xprssn <- deparse1(substitute(x))
+    if (force_str || is.matrix(x) || is.list(x) || is.data.frame(x)) {
+      cat(
+        paste0(
+          "\n\\texttt{\\textbf{",
+          whack_underscores(xprssn),
+          "}} [",
+          typeof(x),
+          ",",
+          mode(x),
+          "] =\n"
+        )
+      )
+      cat("\n\\begin{verbatim}\n")
+      str(x)
+      cat("\n\\end{verbatim}\n")
+    } else {
+      cat("\n", see_variable(x, suffix, digits, xprssn))
+    }
+  }
+
+### structure helper functions
+
+# ref: staque.R - Icon-oriented stack and queue operations
+# - https://gist.github.com/eschen42/917690355e53918b9e7ba7138a02d1f8
+#
+# sq_get(v):x produces the leftmost element of v and removes it from v,
+#   but produces NA if v is empty
+sq_get <- function(v) {
+  if (length(v) == 0) return(NA)
+  assign(as.character(substitute(v)), v[-1], parent.frame())
+  return(v[1])
+}
+#
+# sq_put(v,x1,...,xn):v puts x1, x2, ..., xn onto the right end of v,
+#   producing v.
+#   Values are pushed in order from left to right,
+#   so xn becomes the last (rightmost) value on v.
+#   sq_put(v) with no second argument does nothing.
+sq_put <- function(v, x = NA, ...) {
+  pf <- parent.frame()
+  if (is.null(x)) return(pf$v)
+  if (
+    !(length(x) > 1) &&
+    !rlang::is_closure(x) &&
+    is.na(x)
+    ) return(pf$v)
+  assign(as.character(substitute(v)), c(v, x, ...), pf)
+  pf[[as.character(substitute(v))]]
+}
+
 ### numerical/statistical helper functions
 
 any_nan <- function(x) {
@@ -186,6 +797,7 @@
   sd(x[ok])
 }
 
+# compute anova raw p-value
 anova_func <- function(x, grouping_factor, one_way_f) {
   subject <- data.frame(
     intensity = x
@@ -203,12 +815,421 @@
   pvalue
 }
 
+# This code adapted from matrixcalc::is.positive.definite
+#   Notably, this simply tests without calling stop()
+is_positive_definite <- function(x, tol = 1e-08) {
+  if (!is.matrix(x))
+    return(FALSE)
+  if (!is.numeric(x))
+    return(FALSE)
+  if (nrow(x) < 1)
+    return(FALSE)
+  if (ncol(x) < 1)
+    return(FALSE)
+  if (nrow(x) != ncol(x))
+    return(FALSE)
+  sum_symm <- sum(x == t(x), na.rm = TRUE)
+  value_count <- Reduce("*", dim(x))
+  if (sum_symm != value_count)
+    return(FALSE)
+  eigenvalues <- eigen(x, only.values = TRUE)$values
+  n <- nrow(x)
+  for (i in 1:n) {
+    if (abs(eigenvalues[i]) < tol) {
+      eigenvalues[i] <- 0
+    }
+  }
+  if (any(eigenvalues <= 0)) {
+    return(FALSE)
+  }
+  return(TRUE)
+}
 
 ### LaTeX functions
 
-latex_collapsed_vector <- function(collapse_string, v, underscore_whack = TRUE) {
-  v_sub <- if (underscore_whack) gsub("_", "\\\\_", v) else v
-  cat(
+# Use this like print.data.frame, from which it is adapted:
+data_frame_table_latex <-
+  function(
+    x,
+    # digits to pass to format.data.frame
+    digits = NULL,
+    # TRUE -> right-justify columns; FALSE -> left-justify
+    right = TRUE,
+    # maximumn number of rows to print
+    max = NULL,
+    # string with justification of each column
+    justification = NULL,
+    # TRUE to center on page
+    centered = TRUE,
+    # optional caption
+    caption = NULL,
+    # h(inline); b(bottom); t (top) or p (separate page)
+    anchor = "h",
+    # set underscore_whack to TRUE to escape underscores
+    underscore_whack = TRUE,
+    # how to emit results
+    emit = cat
+  ) {
+    if (is.null(justification))
+      justification <-
+        Reduce(
+          f = paste,
+          x = rep_len(if (right) "r" else "l", length(colnames(x)))
+          )
+    n <- length(rownames(x))
+    if (length(x) == 0L) {
+      emit(
+        sprintf(
+          # if n is one, use singular 'row', else use plural 'rows'
+          ngettext(
+            n,
+            "data frame with 0 columns and %d row",
+            "data frame with 0 columns and %d rows"
+            ),
+          n
+          ),
+        "\n",
+        sep = ""
+        )
+    } else if (n == 0L) {
+      emit("0 rows for:\n")
+      latex_itemized_list(
+        v = names(x),
+        underscore_whack = underscore_whack
+        )
+    } else {
+      if (is.null(max))
+        max <- getOption("max.print", 99999L)
+      if (!is.finite(max)) {
+        cat("Abend because: invalid 'max' / getOption(\"max.print\"): ", max)
+        knitr::knit_exit()
+      }
+      omit <- (n0 <- max %/% length(x)) < n
+      m <- as.matrix(
+        format.data.frame(
+          if (omit) x[seq_len(n0), , drop = FALSE] else x,
+          digits = digits,
+          na.encode = FALSE
+          )
+        )
+      emit(
+        # h(inline); b(bottom); t (top) or p (separate page)
+        paste0("\\begin{table}[", anchor, "]"),
+        "\\leavevmode",
+        sep = "\n"
+        )
+      if (!is.null(caption))
+        emit(paste0(" \\caption{", caption, "}"))
+      if (centered) emit("\\centering\n")
+      emit(
+        paste(
+          " \\begin{tabular}{",
+          justification,
+          "}\n",
+          sep = ""
+          )
+        )
+
+      # ref for top and bottom struts (\T and \B):
+      #   https://tex.stackexchange.com/a/50355
+      if (!is.null(caption))
+        emit("\\B \\\\\n")
+      latex_table_row(
+        v = colnames(m),
+        extra = " \\T \\B",
+        underscore_whack = underscore_whack
+        )
+      emit("\\hline \\\\\n")
+      for (i in seq_len(length(m[, 1]))) {
+        latex_table_row(
+        v = m[i, ],
+        underscore_whack = underscore_whack
+        )
+      }
+      emit(
+        paste(
+          " \\end{tabular}",
+          "\\end{table}",
+          sep = "\n"
+          )
+        )
+      if (omit)
+        emit(" [ reached 'max' / getOption(\"max.print\") -- omitted",
+          n - n0, "rows ]\n")
+    }
+    invisible(x)
+  }
+
+# Use this like print.data.frame, from which it is adapted:
+data_frame_tabbing_latex <-
+  function(
+    x,
+    # vector of tab stops, in inches
+    tabstops,
+    # vector of headings, registered with tab-stops
+    headings = colnames(x),
+    # digits to pass to format.data.frame
+    digits = NULL,
+    # maximumn number of rows to print
+    max = NULL,
+    # optional caption
+    caption = NULL,
+    # set underscore_whack to TRUE to escape underscores
+    underscore_whack = TRUE,
+    # flag for landscape mode
+    landscape = FALSE,
+    # flag indicating that subsubsection should be used for caption
+    #   rather than subsection
+    use_subsubsection_header = TRUE,
+    # character-size indicator; for possible values, see:
+    #   https://tug.org/texinfohtml/latex2e.html#Font-sizes
+    charactersize = "small",
+    # set verbatim to TRUE to debug output
+    verbatim = FALSE
+  ) {
+
+    hlinport <- if (landscape) {
+        function() cat("\\hlinlscp \\\\\n")
+      } else {
+        function() cat("\\hlinport \\\\\n")
+      }
+
+    tabstops_tex <-
+      Reduce(
+        f    = function(l, r) paste0(l, r),
+        x    = sprintf("\\hspace{%0.2fin}\\=", tabstops),
+        init = ""
+      )
+
+    n <- length(rownames(x))
+    if (length(x) == 0L) {
+      cat(
+        sprintf(
+          # if n is one, use singular 'row', else use plural 'rows'
+          ngettext(
+            n,
+            "data frame with 0 columns and %d row",
+            "data frame with 0 columns and %d rows"
+            ),
+          n
+          ),
+        "\n",
+        sep = ""
+        )
+    } else if (n == 0L) {
+      cat("0 rows for:\n")
+      latex_itemized_list(
+        v = names(x),
+        underscore_whack = underscore_whack
+        )
+    } else {
+      if (is.null(max))
+        max <- getOption("max.print", 99999L)
+      if (!is.finite(max)) {
+        cat("Abend because: invalid 'max' / getOption(\"max.print\"): ", max)
+        knitr::knit_exit()
+      }
+      omit <- (n0 <- max %/% length(x)) < n
+      m <- as.matrix(
+        format.data.frame(
+          if (omit) x[seq_len(n0), , drop = FALSE] else x,
+          digits = digits,
+          na.encode = FALSE
+          )
+        )
+      if (landscape)
+        cat("\n\\begin{landscape}")
+      tex_caption <-
+        if (!is.null(caption)) sprintf("\\captionof{table}{%s}\n", caption)
+          else "\n"
+      # build the column names, which have multiple lines when
+      #   length(headings) is a multiple of the number of columns
+      column_names <- ""
+      while (length(headings) > 0) {
+        my_row <- c()
+        for (i in 1:(1 + length(tabstops))) {
+          my_field <- sq_get(headings)
+          sq_put(my_row, if (is.na(my_field)) "" else my_field)
+        }
+        column_names %||:=% latex_tabbing_row(
+          v = my_row,
+          underscore_whack = underscore_whack,
+          action = paste0
+        )
+      }
+
+      # Begin tabbing environment after beginning charactersize environment
+      if (verbatim) cat("\n\\begin{verbatim}")
+      cat(
+        paste0(
+          "\n\\begin{", charactersize, "}", tex_caption,
+          "\\begin{tabwrap}{", tabstops_tex, "}\n"
+        )
+      )
+      # emit column names
+      cat(column_names)
+      # emit hline
+      hlinport()
+      for (i in seq_len(length(m[, 1]))) {
+        my_row <- latex_tabbing_row(
+          v = m[i, ],
+          underscore_whack = underscore_whack,
+          action = paste0
+        )
+        if (FALSE)
+          cat(my_row)
+        else
+          cat(my_row)
+      }
+      hlinport()
+      if (omit)
+        cat(" [ reached 'max' / getOption(\"max.print\") -- omitted",
+          n - n0, "rows ]\n")
+      # End charactersize environment after ending tabbing environment
+      cat(paste0("\\end{tabwrap}\n\\end{", charactersize, "}\n"))
+      if (verbatim) cat("\\end{verbatim}\n")
+      if (landscape)
+        cat("\\end{landscape}\n")
+    }
+    invisible(x)
+  }
+
+param_df_noexit <-
+  function(e = NULL) {
+    data_frame_tabbing_latex(
+      x = param_df,
+      tabstops = c(1.75),
+      underscore_whack = TRUE,
+      caption = "Input parameters",
+      verbatim = FALSE
+      )
+    if (!is.null(e)) {
+      sink(stderr())
+      cat("Caught fatal error:\n\n")
+      str(e)
+      sink()
+    }
+  }
+
+param_df_exit <-
+  function(e = NULL) {
+    param_df_noexit(e)
+    knitr::knit_exit()
+    exit(-1)
+  }
+
+# exit with exit code (default 0) and optional msg
+exit <-
+  function(code = 0, msg = NULL, use_stderr = FALSE) {
+    if (!is.null(msg)) {
+      if (use_stderr) sink(stderr())
+      cat("\n\n", msg, "\n\n")
+      if (use_stderr) sink()
+    }
+    q(save = "no", status = code)
+  }
+
+# make control sequences into printable latex sequences
+latex_printable_control_seqs <-
+  function(s) {
+    s <- gsub("[\\]", "xyzzy_plugh", s)
+    s <- gsub("[$]", "\\\\$", s)
+    s <- gsub("xyzzy_plugh", "$\\\\backslash$", s)
+    return(s)
+  }
+nolatex_verbatim <-
+  function(expr) eval(expr)
+
+latex_verbatim <-
+  function(expr) {
+    arg_string <- deparse1(substitute(expr))
+    cat("\n\\begin{verbatim}\n___\n")
+    tryCatch(
+      expr = expr,
+      error = param_df_exit,
+      #ACE error =
+      #ACE   function(e) {
+      #ACE     cat("Caught error:\n\n")
+      #ACE     str(e)
+      #ACE     knitr::knit_exit()
+      #ACE     stop(e)
+      #ACE   },
+      finally = cat("...\n\\end{verbatim}\n")
+    )
+  }
+
+latex_samepage <-
+  function(expr) {
+    arg_string <- deparse1(substitute(expr))
+    cat("\n\\begin{samepage}\n")
+    tryCatch(
+      expr = expr,
+      error = param_df_exit,
+      #ACE error =
+      #ACE   function(e) {
+      #ACE     cat("Caught error:\n\n")
+      #ACE     str(e)
+      #ACE     knitr::knit_exit()
+      #ACE     stop(e)
+      #ACE   },
+      finally = cat("\n\\end{samepage}\n")
+    )
+  }
+
+# return the result of invocation after showing parameters
+# ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
+latex_show_invocation <-
+  function(f, f_name = deparse1(substitute(f)), head_patch = FALSE) {
+    function(...) {
+      my_env <- (as.list(environment()))
+      va <- list(...)
+      my_rslt <- new_env()
+      my_rslt$rslt <- NULL
+      latex_verbatim(
+        expr = {
+          cat(sprintf("\n ..  Local variables for '%s':\n\n", f_name))
+          str(va)
+          if (!head_patch) {
+            # return this result
+            # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
+            cat(sprintf("\n ..  Invoking '%s'\n", f_name))
+            tryCatch(
+              {
+                cat("\n\\end{verbatim}\n")
+                rslt <- do.call(f, va)
+              },
+              error = param_df_exit,
+              #ACE error = function(e) {
+              #ACE    cat("\n\\begin{verbatim}\n")
+              #ACE    str(e)
+              #ACE    cat("\n\\end{verbatim}\n")
+              #ACE    knitr::knit_exit()
+              #ACE    stop(e)
+              #ACE },
+              finally = cat("\n\\begin{verbatim}\n")
+            )
+            cat(sprintf("\n ..  '%s' returned:\n", f_name))
+            str(rslt)
+            my_rslt$rslt <- rslt
+          }
+        }
+      )
+      # return the result of invocation with the shown parameters
+      # ref: https://www.r-bloggers.com/2013/08/a-new-r-trick-for-me-at-least/
+      if (head_patch) my_rslt$rslt <- do.call(f, va)
+      (my_rslt$rslt)
+    }
+  }
+
+latex_collapsed_vector <- function(
+    collapse_string,
+    v,
+    underscore_whack = TRUE,
+    action = cat0
+  ) {
+  v_sub <-
+    if (underscore_whack) whack_underscores(v) else v
+  action(
     paste0(
       v_sub,
       collapse = collapse_string
@@ -242,113 +1263,36 @@
   cat(" \\\\\n")
 }
 
-# Use this like print.data.frame, from which it is adapted:
-data_frame_latex <-
-  function(
-    x,
-    ...,
-    # digits to pass to format.data.frame
-    digits = NULL,
-    # TRUE -> right-justify columns; FALSE -> left-justify
-    right = TRUE,
-    # maximumn number of rows to print
-    max = NULL,
-    # string with justification of each column
-    justification = NULL,
-    # TRUE to center on page
-    centered = TRUE,
-    # optional caption
-    caption = NULL,
-    # h(inline); b(bottom); t (top) or p (separate page)
-    anchor = "h",
-    # set underscore_whack to TRUE to escape underscores
-    underscore_whack = TRUE
+latex_tabbing_row <- function(
+    v,
+    extra = "",
+    underscore_whack = TRUE,
+    action = cat0
   ) {
-    if (is.null(justification))
-      justification <-
-        Reduce(
-          f = paste,
-          x = rep_len(if (right) "r" else "l", length(colnames(x)))
-          )
-    n <- length(rownames(x))
-    if (length(x) == 0L) {
-      cat(
-        sprintf(
-          # if n is one, use singular 'row', else use plural 'rows'
-          ngettext(
-            n,
-            "data frame with 0 columns and %d row",
-            "data frame with 0 columns and %d rows"
-            ),
-          n
-          ),
-        "\n",
-        sep = ""
-        )
-    } else if (n == 0L) {
-      cat("0 rows for:\n")
-      latex_itemized_list(
-        v = names(x),
-        underscore_whack = underscore_whack
-        )
-    } else {
-      if (is.null(max))
-        max <- getOption("max.print", 99999L)
-      if (!is.finite(max))
-        stop("invalid 'max' / getOption(\"max.print\"): ",
-          max)
-      omit <- (n0 <- max %/% length(x)) < n
-      m <- as.matrix(
-        format.data.frame(
-          if (omit) x[seq_len(n0), , drop = FALSE] else x,
-          digits = digits,
-          na.encode = FALSE
-          )
-        )
-      cat(
-        # h(inline); b(bottom); t (top) or p (separate page)
-        paste0("\\begin{table}[", anchor, "]\n")
-        )
-      if (!is.null(caption))
-        cat(paste0(" \\caption{", caption, "}"))
-      if (centered) cat("\\centering\n")
-      cat(
-        paste(
-          " \\begin{tabular}{",
-          justification,
-          "}\n",
-          sep = ""
-          )
-        )
-      # ref: https://tex.stackexchange.com/a/50353
-      #   Describes use of \rule{0pt}{3ex}
-      if (!is.null(caption))
-        cat("\\B \\\\ \\hline\\hline\n")
-      # ref for top and bottom struts: https://tex.stackexchange.com/a/50355
-      latex_table_row(
-        v = colnames(m),
-        extra = "\\T\\B",
-        underscore_whack = underscore_whack
-        )
-      cat("\\hline\n")
-      for (i in seq_len(length(m[, 1]))) {
-        latex_table_row(
-        v = m[i, ],
-        underscore_whack = underscore_whack
-        )
-      }
-      cat(
-        paste(
-          " \\end{tabular}",
-          "\\end{table}",
-          sep = "\n"
-          )
-        )
-      if (omit)
-        cat(" [ reached 'max' / getOption(\"max.print\") -- omitted",
-          n - n0, "rows ]\n")
-    }
-    invisible(x)
+    # latex_collapsed_vector applies action to result of paste0;
+    #   by default, action = cat;
+    #   hence, a scalar string is assigned to v_collapsed
+    v_collapsed <-
+      latex_collapsed_vector(
+        "} \\> \\tabfill{",
+        v,
+        underscore_whack,
+        action = paste0
+      )
+    action(
+      "\\tabfill{",
+      v_collapsed,
+      "}",
+      extra,
+      " \\\\\n"
+    )
+  }
+
+# N.B. use con = "" to emulate regular cat
+fcat0 <-
+  function(..., sprtr = " ", cnnctn = file()) {
+    cat0(..., sep = sprtr, file = cnnctn)
+    invisible(cnnctn)
   }
 
 hypersub <-
@@ -356,32 +1300,39 @@
     hyper <- tolower(s)
     hyper <- gsub("[^a-z0-9]+", "-", hyper)
     hyper <- gsub("[-]+",       "-", hyper)
+    hyper <- gsub("[_]+",       "-", hyper)
     hyper <- sub("^[-]",        "",  hyper)
     hyper <- sub("[-]$",        "",  hyper)
     return(hyper)
   }
 
-subsection_header <-
-  function(s) {
-    hyper <- hypersub(s)
-    cat(
-      sprintf(
-        "\\hypertarget{%s}\n{\\subsection{%s}\\label{%s}}\n",
-        hyper, s, hyper
-        )
-      )
+table_href <- function(s = "offset", caption = "") {
+    paste0("\\hyperlink{table.\\arabic{", s, "}}{Table \\arabic{", s, "}}")
+  }
+
+table_offset <- function(i = 0, s = "offset", new = FALSE) {
+    paste0(
+      if (new) paste0("\\newcounter{", s, "}\n") else "",
+      "\\setcounter{", s, "}{\\value{table}}\n",
+      paste0(if (i > 0) rep(paste0("\\stepcounter{", s, "}"), i), "\n")
+    )
   }
 
-subsubsection_header <-
-  function(s) {
+a_section_header <-
+  function(s, prefix = "") {
     hyper <- hypersub(s)
-    cat(
-      sprintf(
-        "\\hypertarget{%s}\n{\\subsubsection{%s}\\label{%s}}\n",
-        hyper, s, hyper
-        )
+    my_subsection_header <- sprintf(
+      "\\hypertarget{%s}{\\%ssection{%s}\\label{%s}}\n",
+      hyper,
+      prefix,
+      gsub("_", "\\_", s, fixed = TRUE),
+      hyper
       )
+    my_subsection_header
   }
+section_header <- function(s) a_section_header(s, "")
+subsection_header <- function(s) a_section_header(s, "sub")
+subsubsection_header <- function(s) a_section_header(s, "subsub")
 
 ### SQLite functions
 
@@ -419,10 +1370,54 @@
 
 ### KSEA functions and helpers
 
-# Adapted from KSEAapp::KSEA.Scores to allow retrieval of:
-# - maximum log2(FC)
+#' The KSEA App Analysis (KSEA Kinase Scores Only)
+#'
+#' Compute KSEA kinase scores and statistics from phoshoproteomics data input
+#' Adapted from KSEAapp::KSEA.Scores to allow retrieval of maximum log2(FC)
+#'
+#' Result is an R data.frame with column names
+#'   "Kinase.Gene", "mS", "Enrichment", "m", "z.score", "p.value", "FDR"
+#' "Please refer to the original Casado et al. publication for detailed
+#'   description of these columns and what they represent:
+#'
+#'   - Kinase.Gene indicates the gene name for each kinase.
+#'   - mS          represents the mean log2(fold change) of all the
+#'                   kinase's substrates.
+#'   - Enrichment  is the background-adjusted value of the kinase's mS.
+#'   - m           is the total number of detected substrates
+#'                   from the experimental dataset for each kinase.
+#'   - z.score     is the normalized score for each kinase, weighted by
+#'                   the number of identified substrates.
+#'   - p.value     represents the statistical assessment for the z.score.
+#'   - FDR         is the p-value adjusted for multiple hypothesis testing
+#'                   using the Benjamini & Hochberg method."
+#'
+#' @param ksdata           the Kinase-Substrate dataset uploaded from the file
+#'                           prefaced with "PSP&NetworKIN_"
+#'                           available from github.com/casecpb/KSEA/
+#' @param px               the experimental data file formatted as described in
+#'                           the KSEA.Complete() documentation
+#' @param networkin        a binary input of TRUE or FALSE, indicating whether
+#'                           or not to include NetworKIN predictions, where
+#'                           \code{NetworKIN = TRUE}
+#'                           means include NetworKIN predictions
+#' @param networkin_cutoff a numeric value between 1 and infinity setting
+#'                           the minimum NetworKIN score
+#'                           (this can be omitted if NetworKIN = FALSE)
+#'
+#' @return                 creates a new R data.frame with all the KSEA kinase
+#'                           scores, along with each one's statistical
+#'                           assessment, as described herein.
+#'
+#' @references
+#'
+#' Casado et al. (2013) Sci Signal. 6(268):rs6
+#'
+#' Hornbeck et al. (2015) Nucleic Acids Res. 43:D512-20
+#'
+#' Horn et al. (2014) Nature Methods 11(6):603-4
+#'
 ksea_scores <- function(
-
   # For human data, typically, ksdata = KSEAapp::ksdata
   ksdata,
 
@@ -444,9 +1439,13 @@
 
   # A numeric value between 1 and infinity setting the minimum NetworKIN
   #   score (can be left out if networkin = FALSE)
-  networkin_cutoff
+  networkin_cutoff,
+
+  # Minimum substrate count, necessary to adjust the p-value appropriately.
+  minimum_substrate_count
 
 ) {
+  # no px$FC should be <= 0, but abs(px$FC) is used below as a precaution.
   if (length(grep(";", px$Residue.Both)) == 0) {
     # There are no Residue.Both entries having semicolons, so new is
     #   simply px except two columns are renamed and a column is added
@@ -492,18 +1491,53 @@
     # Eliminate rows having missing values (e.g., non-imputed data)
     new <- new[complete.cases(new$log2_fc), ]
   }
-  if (networkin == TRUE) {
-    # When NetworKIN is true, filter on NetworKIN.cutoff which includes
-    #   PhosphoSitePlus data *because its networkin_score is set to Inf*
-    ksdata_filtered <- ksdata[grep("[a-z]", ksdata$Source), ]
-    ksdata_filtered <- ksdata_filtered[
-      (ksdata_filtered$networkin_score >= networkin_cutoff), ]
-  } else {
-    # Otherwise, simply use PhosphSitePlus rows
-    ksdata_filtered <- ksdata[
-      grep("PhosphoSitePlus", ksdata$Source), ]
+  # At this point, new$log2_fc is signed according to which contrast has
+  #   the greater intensity
+  # To take the magnitude into account without taking the direction into
+  #   account, set params$kseaUseAbsoluteLog2FC to TRUE
+  #
+  # Should KSEA be performed aggregating signed log2FC or absolute?
+  # FALSE use raw log2FC for KSEA as for KSEAapp::KSEA.Scores
+  if (params$kseaUseAbsoluteLog2FC) {
+    # TRUE  use abs(log2FC) for KSEA as Justin requested; this is a
+    #         justifiable deviation from the KSEAapp::KSEA.Scores algorithm.
+    new$log2_fc <- abs(new$log2_fc)
+  }
+
+  monitor_filtration_on_stderr <- TRUE
+  if (monitor_filtration_on_stderr) {
+    # set to TRUE to monitor filtration on stderr
+    sink(stderr())
+    cat(see_variable(networkin, "\n"))
   }
-  # Join the two data.frames on common columns SUB_GENE and SUB_MOD_RSD
+  ksdata_filtered <-
+    sqldf(
+      sprintf("%s %s",
+        "select * from ksdata where not Source = 'NetworKIN'",
+        if (networkin)
+          sprintf("or networkin_score >= %d", networkin_cutoff)
+        else
+          ""
+        )
+      )
+  if (monitor_filtration_on_stderr) {
+    cat(see_variable(sqldf(
+      "select count(*), Source from ksdata group by Source"), "\n"))
+    cat(see_variable(sqldf(
+      "select count(*), Source from ksdata_filtered group by Source"), "\n"))
+    sink()
+  }
+
+  ############################################################################
+  # Line numbers below refer to lines of:
+  #   https://github.com/casecpb/KSEAapp/blob/master/R/KSEA.Scores.R
+  # I would put the original line in a comment but then lint would complain...
+  # - Indeed, I had to rename all the variables because lint didn't like names
+  #   containing periods or capital letters.
+  # ACE
+  ############################################################################
+  #
+  # (1) Join the two data.frames on common columns SUB_GENE and SUB_MOD_RSD
   #   colnames of ksdata_filtered:
   #     "KINASE" "KIN_ACC_ID" "GENE" "KIN_ORGANISM" "SUBSTRATE" "SUB_GENE_ID"
   #     "SUB_ACC_ID" "SUB_GENE" "SUB_ORGANISM" "SUB_MOD_RSD" "SITE_GRP_ID"
@@ -516,6 +1550,8 @@
   #       INNER JOIN new b
   #         ON a.SUB_GENE = b.SUB_GENE
   #           AND a.SUB_MOD_RSD = b.SUB_MOD_RSD
+  #   (KSEA.Scores.R line # 105)
+  #   "Extract KSData.filtered annotations that are only found in new"
   ksdata_dataset <- base::merge(ksdata_filtered, new)
   #   colnames of ksdata_dataset:
   #     "KINASE"      "KIN_ACC_ID"   "GENE"       "KIN_ORGANISM" "SUBSTRATE"
@@ -523,24 +1559,31 @@
   #     "SITE_GRP_ID" "SITE_...7_AA" "networkin_score"  "Source" "Protein"
   #     "Peptide"     "p"            "FC"         "log2_fc" (uniprot_no_isoform)
   # Re-order dataset; prior to accounting for isoforms
+  #   (KSEA.Scores.R line # 106)
   ksdata_dataset <- ksdata_dataset[order(ksdata_dataset$GENE), ]
   # Extract non-isoform accession in UniProtKB
+  #   (KSEA.Scores.R line # 107)
   ksdata_dataset$uniprot_no_isoform <- sapply(
     ksdata_dataset$KIN_ACC_ID,
     function(x) unlist(strsplit(as.character(x), split = "-"))[1]
     )
+  #   "last expression collapses isoforms ... for easy processing"
   # Discard previous results while selecting interesting columns ...
+  #   (KSEA.Scores.R line # 110)
   ksdata_dataset_abbrev <- ksdata_dataset[, c(5, 1, 2, 16:19, 14)]
   # Column names are now:
   #   "GENE"       "SUB_GENE"        "SUB_MOD_RSD"    "Peptide" "p"
   #   "FC" "log2_fc" "Source"
   # Make column names human-readable
+  #   (KSEA.Scores.R line # 111)
   colnames(ksdata_dataset_abbrev) <- c(
     "Kinase.Gene", "Substrate.Gene", "Substrate.Mod", "Peptide", "p",
     "FC", "log2FC", "Source"
     )
   # SELECT * FROM ksdata_dataset_abbrev
   #   ORDER BY Kinase.Gene, Substrate.Gene, Substrate.Mod, p
+  #   (KSEA.Scores.R line # 112)
+  #   "Extract KSData.filtered annotations that are only found in new"
   ksdata_dataset_abbrev <-
     ksdata_dataset_abbrev[
       order(
@@ -549,6 +1592,7 @@
         ksdata_dataset_abbrev$Substrate.Mod,
         ksdata_dataset_abbrev$p),
       ]
+  if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev))
   # First aggregation step to account for multiply phosphorylated peptides
   #   and differing peptide sequences; the goal here is to combine results
   #   for all measurements of the same substrate.
@@ -560,12 +1604,16 @@
   #   ORDER BY `Kinase.Gene`;
   # in two steps:
   # (1) compute average log_2(fold-change)
+  #   "take the mean of the log2FC amongst phosphosite duplicates"
+  #   (KSEA.Scores.R line # 115)
   ksdata_dataset_abbrev <- aggregate(
     log2FC ~ Kinase.Gene + Substrate.Gene + Substrate.Mod + Source,
     data = ksdata_dataset_abbrev,
     FUN = mean
     )
+  if (print_nb_messages) nbe(see_variable(ksdata_dataset_abbrev))
   # (2) order by Kinase.Gene
+  #   (KSEA.Scores.R line # 117)
   ksdata_dataset_abbrev <-
     ksdata_dataset_abbrev[order(ksdata_dataset_abbrev$Kinase.Gene), ]
   # SELECT  `Kinase.Gene`, count(*)
@@ -573,9 +1621,14 @@
   #  GROUP BY `Kinase.Gene`;
   # in two steps:
   # (1) Extract the list of Kinase.Gene names
+  #   "@@@@@@@@@@@@@@@@@@@@"
+  #   "Do analysis for KSEA"
+  #   "@@@@@@@@@@@@@@@@@@@@"
+  #   (KSEA.Scores.R line # 124)
   kinase_list <- as.vector(ksdata_dataset_abbrev$Kinase.Gene)
   # (2) Convert to a named list of counts of kinases in ksdata_dataset_abrev,
   #   named by Kinase.Gene
+  #   (KSEA.Scores.R line # 125)
   kinase_list <- as.matrix(table(kinase_list))
   # Second aggregation step to account for all substrates per kinase
   # CREATE TABLE mean_fc
@@ -583,50 +1636,123 @@
   # SELECT  `Kinase.Gene`, avg(log2FC) AS log2FC
   #   FROM  ksdata_dataset_abbrev
   #   GROUP BY `Kinase.Gene`
-  mean_fc <- aggregate(
-    log2FC ~ Kinase.Gene,
-    data = ksdata_dataset_abbrev,
-    FUN = mean
-    )
-  # mean_fc columns: "Kinase.Gene", "log2FC"
-  if (FALSE) {
-    # I need to re-think this; I was trying to find the most-represented
-    #   peptide, but that horse has already left the barn
-    # SELECT  `Kinase.Gene`, max(abs(log2FC)) AS log2FC
-    #   FROM  ksdata_dataset_abbrev
-    #   GROUP BY `Kinase.Gene`
-    max_fc <- aggregate(
+  #   (KSEA.Scores.R line # 127)
+  if (print_nb_messages) nb(see_variable(ksdata_dataset_abbrev), "\n")
+  mean_fc <-
+    aggregate(
       log2FC ~ Kinase.Gene,
       data = ksdata_dataset_abbrev,
-      FUN = function(r) max(abs(r))
+      FUN = mean
       )
-  }
+  if (print_nb_messages) nbe(see_variable(mean_fc), "\n")
+
+  # for contrast j
+  #   for each kinase i
+  #     extract log2 of fold-change (from `new` above)
+  #     (used in KSEA.Scores.R lines # 130 & 132)
+  log2_fc_j_each_i <-
+    new$log2_fc
+
+  # for contrast j
+  #   for all kinases i
+  #     compute mean of abs(log2 of fold-change)
+  #     (used in KSEA.Scores.R lines # 130)
+  mean_abs_log2_fc_j_all_i <-
+    mean(abs(log2_fc_j_each_i), na.rm = TRUE)
+
+  # for contrast j
+  #   for all kinases i
+  #     compute mean of log2 of fold-change
+  #     (used in KSEA.Scores.R lines # 132)
+  mean_log2_fc_j_all_i <-
+    mean(log2_fc_j_each_i, na.rm = TRUE)
+
+  # Reorder mean_fc, although I don't see why
+  #   (KSEA.Scores.R line 128
+  mean_fc <- mean_fc[order(mean_fc[, 1]), ]
+  # mean_fc columns so far: "Kinase.Gene", "log2FC"
+  # - Kinase.Gene
+  #               indicates the gene name for each kinase.
 
   # Create column 3: mS
-  mean_fc$m_s <- mean_fc[, 2]
+  # - mS
+  #               represents the mean log2(fold change) of all the
+  #               kinase's substrates.
+  #   (KSEA.Scores.R line # 129)
+  mean_fc$m_s <-
+    mean_fc_m_s <- mean_fc[, 2]
+
   # Create column 4: Enrichment
-  mean_fc$enrichment <- mean_fc$m_s / abs(mean(new$log2_fc, na.rm = TRUE))
-  # Create column 5: m, count of substrates
-  mean_fc$m <- kinase_list
+  # - Enrichment
+  #               is the background-adjusted value of the kinase's mS.
+  #   (KSEA.Scores.R line # 130)
+  mean_fc$enrichment <-
+    mean_fc_m_s / mean_abs_log2_fc_j_all_i
+
+  # Create column 5: m, count of substrates of kinase (count of j for i)
+  # - m
+  #               is the total number of detected substrates
+  #               from the experimental dataset for each kinase.
+  #   (KSEA.Scores.R line # 131)
+  mean_fc$m <-
+    mean_fc_m <- kinase_list
+
+
   # Create column 6: z-score
-  mean_fc$z_score <- (
-    (mean_fc$m_s - mean(new$log2_fc, na.rm = TRUE)) *
-      sqrt(mean_fc$m)) / sd(new$log2_fc, na.rm = TRUE)
+  # - z.score
+  #               is the normalized score for each kinase, weighted by
+  #               the number of identified substrates.
+  #   (KSEA.Scores.R line # 132)
+  mean_fc$z_score <-
+    (mean_fc_m_s - mean_log2_fc_j_all_i) * sqrt(mean_fc_m) /
+      sd(log2_fc_j_each_i, na.rm = TRUE)
+
   # Create column 7: p-value, deduced from z-score
-  mean_fc$p_value <- pnorm(-abs(mean_fc$z_score))
+  # - p.value
+  #               represents the statistical assessment for the z.score.
+  #   (KSEA.Scores.R line # 133)
+  # "one-tailed p-value"
+  mean_fc$p_value <-
+    pnorm(-abs(mean_fc$z_score))
+
+  # zap excluded kinases; this must be done before adjusting p-value
+  if (TRUE) {
+    mean_fc <-
+      mean_fc[
+        mean_fc$m >= minimum_substrate_count,
+        ,
+        drop = FALSE
+      ]
+  }
+
+  #ACE nb(see_variable(nrow(mean_fc)), "\n")
   # Create column 8: FDR, deduced by Benjamini-Hochberg adustment from p-value
-  mean_fc$fdr <- p.adjust(mean_fc$p_value, method = "fdr")
-
-  # Remove log2FC column, which is duplicated as mS
-  mean_fc <- mean_fc[order(mean_fc$Kinase.Gene), -2]
+  # - FDR
+  #               is the p-value adjusted for multiple hypothesis testing
+  #               using the Benjamini & Hochberg method."
+  #   (KSEA.Scores.R line # 134)
+  mean_fc$fdr <-
+    p.adjust(mean_fc$p_value, method = "fdr")
+
+  # It makes no sense to leave Z-scores negative when using
+  #   absolute value of fold-change
+  if (params$kseaUseAbsoluteLog2FC) {
+    mean_fc$z_score <- abs(mean_fc$z_score)
+  }
+
+  # Remove second column (log2FC), which is duplicated as mS
+  #   (KSEA.Scores.R line # 136)
+  mean_fc <-
+    mean_fc[order(mean_fc$Kinase.Gene), -2]
   # Correct the column names which we had to hack because of the linter...
   colnames(mean_fc) <- c(
     "Kinase.Gene", "mS", "Enrichment", "m", "z.score", "p.value", "FDR"
     )
+  #   (KSEA.Scores.R line # 138)
   return(mean_fc)
 }
 
-low_fdr_barplot <- function(
+ksea_low_fdr_barplot_factory <- function(
   rslt,
   i_cntrst,
   i,
@@ -658,57 +1784,94 @@
       "p.value" = {
         k$p_value
         },
-      stop(
-        sprintf(
-          "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
-          ksea_cutoff_statistic
+      {
+        cat(
+          sprintf(
+            "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
+            ksea_cutoff_statistic
+            )
           )
-        )
-      )
+        param_df_exit()
+        knitr::knit_exit()
+      }
+    )
 
     k <- k[selector < ksea_cutoff_threshold, ]
-
-    if (nrow(k) > 1) {
-      op <- par(mai = c(1, 1.5, 0.4, 0.4))
+    nrow_k <- nrow(k)
+
+    #ACE nbe(see_variable(fdr_barplot_dataframe <- k))
+
+    if (nrow_k > 0) {
+      max_nchar_rowname <- max(nchar(rownames(k)))
+      my_cex_names <- 1.0 / (1 + nrow_k / 50)
+
+      if (print_trace_messages) cat_margins("Initially")
+      if (print_trace_messages) cat_variable(nrow_k,            "\n\n", 0)
+      if (print_trace_messages) cat_variable(my_cex_names,      "\n\n", 0)
+      if (print_trace_messages) cat_variable(max_nchar_rowname, "\n\n", 0)
+
+      # fin: The figure region dimensions, (width, height), in inches.
+      # mar: A numerical vector of the form c(bottom, left, top, right)
+      #      that gives the number of lines of margin to be specified
+      #      on the four sides of the plot; default: c(5, 4, 4, 2) + 0.1
+
+# mar: The figure region dimensions, (width, height), in inches.
       numeric_z_score <- as.numeric(k$z_score)
-      z_score_order <- order(numeric_z_score)
+      bar_order <- order(-as.numeric(k$p_value))
       kinase_name <- k$kinase_gene
       long_caption <-
         sprintf(
-          "Kinase z-score, %s < %s, %s",
+          "Kinase z-score, %s, KSEA %s < %s",
+          caption,
           ksea_cutoff_statistic,
-          ksea_cutoff_threshold,
-          caption
+          ksea_cutoff_threshold
           )
       my_cex_caption <- 65.0 / max(65.0, nchar(long_caption))
-      cat("\n\\clearpage\n")
-      barplot(
-        height = numeric_z_score[z_score_order],
-        border = NA,
-        xpd = FALSE,
-        cex.names = 1.0,
-        cex.axis = 1.0,
-        main = long_caption,
-        cex.main = my_cex_caption,
-        names.arg = kinase_name[z_score_order],
-        horiz = TRUE,
-        srt = 45,
-        las = 1)
-      par(op)
+      # return a function that draws the plot
+      function() {
+        par_fin <- par("fin") # vector of width_in_inches and height_in_inches)
+        op <- par(
+          bg = if (print_trace_messages) "yellow" else "white",
+          fin = c(par_fin[1], min(par_fin[2], 2.5 + nrow_k / 6)),
+          mar = par("mar") +
+            c(3 / nrow_k, (1 + max_nchar_rowname * my_cex_names) / 2, 0, 0)
+            # bottom,     left,                                     top, right
+        )
+        on.exit(par(op))
+        if (print_trace_messages) cat_margins("Eventually")
+
+        barplot(
+          height = numeric_z_score[bar_order],
+          border = NA,
+          xpd = FALSE,
+          cex.names = my_cex_names,
+          main = long_caption,
+          cex.main = my_cex_caption,
+          names.arg = kinase_name[bar_order],
+          horiz = TRUE,
+          srt = 45,
+          las = 1,
+          cex.axis = 0.9
+        )
+      }
     }
+  } else {
+    no_op
   }
 }
 
 # note that this adds elements to the global variable `ksea_asterisk_hash`
 
-low_fdr_print <- function(
+ksea_low_fdr_print <- function(
   rslt,
   i_cntrst,
   i,
   a_level,
   b_level,
   fold_change,
-  caption
+  caption,
+  write_db = TRUE, # if TRUE, write to DB, else print table
+  anchor = c(const_table_anchor_p, const_table_anchor_t)
 ) {
   rslt_score_list_i <- rslt$score_list[[i]]
   if (!is.null(rslt_score_list_i)) {
@@ -734,13 +1897,17 @@
       "p.value" = {
         k$p_value
         },
-      stop(
-        sprintf(
-          "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
-          ksea_cutoff_statistic
+      {
+        cat(
+          sprintf(
+            "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
+            ksea_cutoff_statistic
+            )
           )
-        )
-      )
+        param_df_exit()
+        knitr::knit_exit()
+      }
+    )
 
     k <- k[selector < ksea_cutoff_threshold, ]
     # save kinase names to ksea_asterisk_hash
@@ -748,110 +1915,175 @@
       ksea_asterisk_hash[[kinase_name]] <- 1
     }
 
-    db_write_table_overwrite <- (i_cntrst < 2)
-    db_write_table_append    <- !db_write_table_overwrite
-    RSQLite::dbWriteTable(
-      conn = db,
-      name = "contrast_ksea_scores",
-      value = contrast_ksea_scores,
-      append = db_write_table_append
+    if (write_db) {
+      db_write_table_overwrite <- (i_cntrst < 2)
+      db_write_table_append    <- !db_write_table_overwrite
+      RSQLite::dbWriteTable(
+        conn = db,
+        name = "contrast_ksea_scores",
+        value = contrast_ksea_scores,
+        append = db_write_table_append
+        )
+      ""
+    } else {
+      selector <- switch(
+        ksea_cutoff_statistic,
+        "FDR" = {
+          contrast_ksea_scores$fdr
+          },
+        "p.value" = {
+          contrast_ksea_scores$p_value
+          },
+        {
+          cat(
+            sprintf(
+              "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
+              ksea_cutoff_statistic
+              )
+            )
+          param_df_exit()
+          knitr::knit_exit()
+        }
       )
-    selector <- switch(
-      ksea_cutoff_statistic,
-      "FDR" = {
-        contrast_ksea_scores$fdr
-        },
-      "p.value" = {
-        contrast_ksea_scores$p_value
-        },
-      stop(
-        sprintf(
-          "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
-          ksea_cutoff_statistic
+      if (print_nb_messages) nbe(see_variable(contrast_ksea_scores))
+      output_df <- contrast_ksea_scores[
+        selector < ksea_cutoff_threshold,
+        c("kinase_gene", "mean_log2_fc", "enrichment", "substrate_count",
+          "z_score", "p_value", "fdr")
+        ]
+      output_df$kinase_gene <-
+        gsub(
+          "_",
+          "\\\\_",
+          output_df$kinase_gene
+        )
+      colnames(output_df) <-
+        c(
+          colnames(output_df)[1],
+          colnames(output_df)[2],
+          "enrichment",
+          "m_s",
+          "z_score",
+          "p_value",
+          "fdr"
+        )
+      #ACE output_order <- with(output_df, order(fdr))
+      output_order <- with(output_df, order(p_value))
+      output_df <- output_df[output_order, ]
+
+      output_df[, 2] <- sprintf("%0.3g", output_df[, 2])
+      output_df$fdr <- sprintf("%0.4f", output_df$fdr)
+      output_df$p_value <- sprintf("%0.2e", output_df$p_value)
+      output_df$z_score <- sprintf("%0.2f", output_df$z_score)
+      output_df$m_s <- sprintf("%d", output_df$m_s)
+      output_df$enrichment <- sprintf("%0.3g", output_df$enrichment)
+      output_ncol <- ncol(output_df)
+      colnames(output_df) <-
+        c(
+          "Kinase",
+          "\\(\\overline{{\\lvert}\\log_2 (\\text{fold-change}){\\rvert}}\\)",
+          "Enrichment",
+          "Substrates",
+          "z-score",
+          "p-value",
+          "FDR"
+        )
+      selector <- switch(
+        ksea_cutoff_statistic,
+        "FDR" = {
+          rslt$score_list[[i]]$FDR
+          },
+        "p.value" = {
+          rslt$score_list[[i]]$p.value
+          },
+        {
+          cat(
+            sprintf(
+              "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
+              ksea_cutoff_statistic
+              )
+            )
+          param_df_exit()
+          knitr::knit_exit()
+        }
+      )
+      if (sum(selector < ksea_cutoff_threshold) > 0) {
+        if (print_nb_messages) nbe(see_variable(output_df))
+        math_caption <- gsub("{", "\\{", caption,      fixed = TRUE)
+        math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE)
+        # with (
+        #   output_df,
+        #   )
+        if (TRUE) {
+          output_df$Kinase <- whack_underscores(output_df$Kinase)
+          data_frame_tabbing_latex(
+            x = output_df,
+            # vector of tab stops, in inches
+            tabstops = c(1.0, 1.2, 1.0, 1.0, 1.0, 1.0),
+            # vector of headings, registered with tab-stops
+            headings = colnames(output_df),
+            # digits to pass to format.data.frame
+            digits = NULL,
+            # maximumn number of rows to print
+            max = NULL,
+            # optional caption
+            caption = sprintf(
+              "\\text{%s}, KSEA %s < %s",
+              math_caption,
+              ksea_cutoff_statistic,
+              ksea_cutoff_threshold
+              ),
+            # set underscore_whack to TRUE to escape underscores
+            underscore_whack = FALSE,
+            # flag for landscape mode
+            landscape = FALSE,
+            # flag indicating that subsubsection should be used for caption
+            #   rather than subsection
+            use_subsubsection_header = TRUE,
+            # character-size indicator; for possible values, see:
+            #   https://tug.org/texinfohtml/latex2e.html#Font-sizes
+            charactersize = "small",
+            # set verbatim to TRUE to debug output
+            verbatim = FALSE
+            )
+        } else {
+          data_frame_table_latex(
+            x = output_df,
+            justification = "l c c c c c c",
+            centered = TRUE,
+            caption = sprintf(
+              "\\text{%s}, KSEA %s < %s",
+              math_caption,
+              ksea_cutoff_statistic,
+              ksea_cutoff_threshold
+              ),
+            anchor = anchor,
+            underscore_whack = FALSE
+            )
+        }
+      } else {
+        cat(
+          sprintf(
+            "\\break
+            No kinases had
+            \\(\\text{KSEA %s}_\\text{enrichment} < %s\\)
+            for contrast %s\\hfill\\break\n",
+            ksea_cutoff_statistic,
+            ksea_cutoff_threshold,
+            caption
           )
         )
-      )
-    output_df <- contrast_ksea_scores[
-      selector < ksea_cutoff_threshold,
-      c("kinase_gene", "mean_log2_fc", "enrichment", "substrate_count",
-        "z_score", "p_value", "fdr")
-      ]
-    output_order <- with(output_df, order(mean_log2_fc, kinase_gene))
-    output_df <- output_df[output_order, ]
-    colnames(output_df) <-
-      c(
-        colnames(output_df)[1],
-        colnames(output_df)[2],
-        "enrichment",
-        "m_s",
-        "z_score",
-        "p_value",
-        "fdr"
-      )
-    output_df$fdr <- sprintf("%0.4f", output_df$fdr)
-    output_df$p_value <- sprintf("%0.2e", output_df$p_value)
-    output_df$z_score <- sprintf("%0.2f", output_df$z_score)
-    output_df$m_s <- sprintf("%d", output_df$m_s)
-    output_df$enrichment <- sprintf("%0.2f", output_df$enrichment)
-    output_ncol <- ncol(output_df)
-    colnames(output_df) <-
-      c(
-        "Kinase",
-        "\\(\\overline{\\log_2 (|\\text{fold-change}|)}\\)",
-        "Enrichment",
-        "Substrates",
-        "z-score",
-        "p-value",
-        "FDR"
-      )
-    selector <- switch(
-      ksea_cutoff_statistic,
-      "FDR" = {
-        rslt$score_list[[i]]$FDR
-        },
-      "p.value" = {
-        rslt$score_list[[i]]$p.value
-        },
-      stop(
-        sprintf(
-          "Unexpected cutoff statistic %s rather than 'FDR' or 'p.value'",
-          ksea_cutoff_statistic
-          )
-        )
-      )
-    if (sum(selector < ksea_cutoff_threshold) > 0) {
-      math_caption <- gsub("{", "\\{", caption,      fixed = TRUE)
-      math_caption <- gsub("}", "\\}", math_caption, fixed = TRUE)
-      data_frame_latex(
-        x = output_df,
-        justification = "l c c c c c c",
-        centered = TRUE,
-        caption = sprintf(
-          "\\text{%s}, %s < %s",
-          math_caption,
-          ksea_cutoff_statistic,
-          ksea_cutoff_threshold
-          ),
-        anchor = const_table_anchor_p
-        )
-    } else {
-      cat(
-        sprintf(
-          "\\break
-          No kinases had
-          \\(\\text{%s}_\\text{enrichment} < %s\\)
-          for contrast %s\\hfill\\break\n",
-          ksea_cutoff_statistic,
-          ksea_cutoff_threshold,
-          caption
-        )
-      )
+      }
     }
+  } else {
+    ""
   }
 }
 
 # create_breaks is a helper for ksea_heatmap
 create_breaks <- function(merged_scores) {
+  if (sum(!is.na(merged_scores)) < 2)
+    return(NULL)
   if (min(merged_scores, na.rm = TRUE) < -1.6) {
     breaks_neg <- seq(-1.6, 0, length.out = 30)
     breaks_neg <-
@@ -889,14 +2121,135 @@
   return(color_breaks)
 }
 
+hm2plus <- function(
+  x,
+  mat = matrix(
+          c(
+            c(0, 4, 0),
+            c(0, 3, 3),
+            c(2, 1, 1)
+          ),
+          nrow = 3,
+          ncol = 3,
+          byrow = TRUE
+        ),
+  denwid = 0.5,
+  denhgt = 0.15,
+  widths = c(0.5, 2.5, 1.5),
+  heights = c(0.4, 0.15, 3.95),
+  divergent = FALSE,
+  notecol = "grey50",
+  trace = "none",
+  margins = c(6, 20),
+  srtcol       = 90,
+  srtrow       = 0,
+  density_info = "none",
+  key_xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"),
+  key_par = list(),
+  hclustfun = hclust,
+  ...
+) {
+
+  varargs <- list(...)
+  if (FALSE) # this is to avoid commenting out code to pass linting...
+    my_hm2 <- latex_show_invocation(heatmap.2, head_patch = FALSE)
+  else
+    my_hm2 <- heatmap.2
+
+  x <- as.matrix(x)
+  if (sum(!is.na(x)) < 1)
+    return(NULL)
+  color_count <- 1 + max(64, length(as.vector(x))) # 8 was not enough
+  break_count <- 1 + color_count
+  min_nonax <- min(x, na.rm = TRUE)
+  max_nonax <- max(x, na.rm = TRUE)
+  if (print_nb_messages) nb("within hm2plus", see_variable(divergent), "\n")
+  if (divergent) {
+    zlim <- max(abs(min_nonax), abs(max_nonax))
+    if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n"))
+    breaks <- (zlim) / (break_count:1)
+    if (print_nb_messages) nb(see_variable(breaks, "\n"))
+    breaks <- breaks - median(breaks)
+    zlim <- c(-zlim, zlim)
+    if (print_nb_messages) nb(see_variable(zlim, "\n"))
+  } else {
+    zlim <- max(abs(min_nonax), abs(max_nonax))
+    if (print_nb_messages) nb(see_variable(pre_zlim <- zlim, "\n"))
+    breaks <- zlim / (break_count:1)
+    if (print_nb_messages) nb(see_variable(breaks, "\n"))
+    if  (max_nonax < 0) {
+      breaks <- breaks - zlim
+      zlim <- c(-zlim, 0)
+    } else {
+      zlim <- c(0, zlim)
+    }
+    if (print_nb_messages) nb(see_variable(zlim, "\n"))
+  }
+  nonax <- x
+  nonax[is.na(x)] <- min_nonax
+  if (is.null(widths)) widths <- c(denwid, 4 - denwid, 1.5)
+  if (is.null(heights)) heights <- c(0.4, denhgt, 4.0)
+  colors <-
+    if (divergent && min_nonax < 0) {
+      # divergent colors on both sides of zero
+      colorRampPalette(c("red", "white", "blue"))(color_count)
+    } else if (divergent && min_nonax > 0) {
+      # "divergent" colors > zero
+      colorRampPalette(c("white", "blue"))(color_count)
+    } else if (divergent && max_nonax < 0) {
+      # "divergent" colors < zero
+      colorRampPalette(c("red", "white"))(color_count)
+    } else {
+      # "non-divergent" colors including zero
+      hcl.colors(color_count, "YlOrRd", rev = TRUE)
+    }
+
+  #ACE if (print_nb_messages) nb("within hm2plus", see_variable(key_par), "\n")
+  #ACE if (print_nb_messages) nb(see_variable(colors, "\n"))
+  #ACE key_par$col = colors
+  #ACE key_par$breaks = breaks
+
+  if (print_nb_messages) nb(see_variable(par(), "\n")) #ACE TODO remove me
+  if (print_nb_messages) cat("\\leavevmode\n\\linebreak\n") #ACE TODO remove me
+  suppressWarnings(
+    my_hm2(
+      x = x,
+      col = colors,
+      #ACE symkey = FALSE,
+      density.info = density_info,
+      srtCol = srtcol,
+      srtRow = srtrow,
+      margins = margins,
+      lwid = widths,
+      lhei = heights,
+      key.title = NA,
+      key.xlab = key_xlab,
+      key.par = key_par,
+      lmat = mat,
+      notecol = notecol,
+      trace = trace,
+      bg = "yellow",
+      hclustfun = hclustfun,
+      #ACE breaks = breaks,
+      oldstyle = FALSE,
+      ... # varargs
+    )
+  )
+  # implicitly returning value returned by heatmap.2
+}
+
 # draw_kseaapp_summary_heatmap is a helper function for ksea_heatmap
 draw_kseaapp_summary_heatmap <- function(
-    x,
-    sample_cluster,
-    merged_asterisk,
-    my_cex_row,
-    color_breaks,
-    margins,
+    x,                       # matrix with row/col names already formatted
+    sample_cluster,          # a binary input of TRUE or FALSE,
+                             #   indicating whether or not to perform
+                             #   hierarchical clustering of the sample columns
+    merged_asterisk,         # matrix having dimensions of x, values "*" or ""
+    color_breaks,            # breaks for color gradation, from create_breaks
+                             #   passed to `breaks` argument of `image`
+    margins = c(8, 15),      # two integers setting the bottom and right margins
+                             #   to accommodate row and column labels
+    master_cex = 0.7,        # basis for text sizes
     ...
 ) {
   merged_scores <- x
@@ -908,44 +2261,128 @@
         "' rather than 'matrix'.\n\n"
         )
       )
-  } else if (nrow(x) < 2) {
-    cat("No plot because matrix x has ", nrow(x), " rows.\n\n")
-    cat("\\begin{verbatim}\n")
-    str(x)
-    cat("\\end{verbatim}\n")
-  } else if (ncol(x) < 2) {
-    cat("No plot because matrix x has ", ncol(x), " columns.\n\n")
-    cat("\\begin{verbatim}\n")
-    str(x)
-    cat("\\end{verbatim}\n")
-  } else {
-    gplots::heatmap.2(
-      x            = merged_scores,
-      Colv         = sample_cluster,
-      scale        = "none",
-      cellnote     = merged_asterisk,
-      notecol      = "white",
-      cexCol       = 0.9,
-      # Heuristically assign size of row labels
-      cexRow       = min(1.0, ((3 * my_cex_row) ^ 1.7) / 2.25),
-      srtCol       = 45,
-      srtRow       = 45,
-      notecex      = 3 * my_cex_row,
-      col          = color_breaks[[2]],
-      density.info = "none",
-      trace        = "none",
-      breaks       = color_breaks[[1]],
-      lmat         = rbind(c(0, 3), c(2, 1), c(0, 4)),
-      lhei         = c(0.4, 8.0, 1.1),
-      lwid         = c(0.5, 3),
-      key          = FALSE,
-      margins      = margins,
-      ...
-    )
+    cat_variable(x)
+    return(FALSE)
+  }
+  if (print_trace_messages) cat(sprintf("master_cex = %03f\n\n", master_cex))
+  nrow_x <- nrow(x)
+  ncol_x <- ncol(x)
+  #if (nrow_x < 2) {
+  if (nrow_x < 1) {
+    cat("No plot because matrix has no rows.\n\n")
+    return(FALSE)
+  } else if (nrow_x < 2) {
+    cat("No plot because matrix has one row.  Matrix looks like this:\n\n")
+    cat("\n\\begin{verbatim}\n")
+    print(x)
+    cat("\n\\end{verbatim}\n")
+    return(FALSE)
+  } else if (ncol_x < 2) {
+    cat("No plot because matrix x has ", ncol_x, " columns.\n\n")
+    cat_variable(x)
+    return(FALSE)
   }
+  max_nchar_rowname <- max(nchar(rownames(x)))
+  max_nchar_colname <- max(nchar(colnames(x)))
+  my_limit <- g_intensity_hm_rows
+
+  my_row_cex_scale <- master_cex * 150 / nrow_x
+  my_col_cex_scale <- 3.0
+  my_asterisk_scale <- 0.4 * my_row_cex_scale
+  my_row_warp <- 1
+  my_note_warp <- 2
+  my_row_warp <- 1
+  my_row_cex_asterisk <-
+    master_cex * my_row_warp * my_asterisk_scale
+
+
+  my_col_cex <- my_col_cex_scale * master_cex
+  my_row_cex <- min(3.5 * my_row_cex_asterisk, my_col_cex)
+  my_key_cex <- 1.286
+  my_hm2_cex <- 1 * master_cex
+  my_offset <- (4.8 / (9 + nrow_x / 10)) - 0.4
+  if (print_trace_messages) cat(sprintf("nrow_x = %03f\n\n", nrow_x))
+  if (print_trace_messages) cat(sprintf("my_offset = %03f\n\n", my_offset))
+  my_offset <- 0.05
+  if (print_trace_messages) cat(sprintf("my_offset = %03f\n\n", my_offset))
+  my_scale <- 3.0
+  if (ncol_x < 10 && nrow_x < 10)
+    my_scale <- my_scale * 10 / (10 - nrow_x) * 10 / (10 - ncol_x)
+
+  my_heights <- c(
+               0.15,
+               3.85 - my_offset,
+               0.5  + my_offset
+             )
+  my_margins <- c(1, 1) +
+      c(
+        margins[1] * 0.08 * max_nchar_colname * my_col_cex,
+        margins[2] * 0.04 * max_nchar_rowname * my_row_cex
+      )
+
+  my_notecex <-
+    my_scale *
+      min(
+        1.1,
+        my_row_cex_asterisk * my_note_warp,
+        my_col_cex * my_note_warp
+      )
+
+  if (print_trace_messages) {
+    cat_variable(my_heights,          suffix = "; ")
+    cat_variable(my_margins,          suffix = "\n\n")
+    cat_variable(my_row_cex_scale,    suffix = "; ")
+    cat_variable(my_col_cex_scale,    suffix = "\n\n")
+    cat_variable(my_row_cex_asterisk, suffix = "\n\n")
+    cat_variable(my_row_cex,          suffix = "; ")
+    cat_variable(my_col_cex,          suffix = "\n\n")
+    cat_variable(my_row_cex,          suffix = "; ")
+    cat_variable(my_col_cex,          suffix = "\n\n")
+  }
+
+  hm2plus(
+    x            = merged_scores,
+    Colv         = sample_cluster,
+    cellnote     = merged_asterisk,
+    cex          = my_hm2_cex,
+    cexCol       = my_col_cex,
+    cexRow       = my_row_cex,
+    denhgt       = 0.15,
+    density_info = "none",
+    denwid       = 0.5,
+    divergent    = TRUE,
+    key_par      = list(cex = my_key_cex),
+    key_xlab     = "Z-score",
+    margins      = my_margins,
+    notecex      = my_scale * min(
+                     1.5,
+                     my_row_cex_asterisk * my_note_warp,
+                     my_col_cex * my_note_warp
+                   ),
+    notecol      = "white",
+    scale        = "none",
+    srtcol       = 90,
+    srtrow       = 0,
+    trace        = "none",
+    mat          = matrix(
+                     c(
+                       c(0, 3, 3),
+                       c(2, 1, 1),
+                       c(0, 4, 0)
+                     ),
+                     nrow = 3,
+                     ncol = 3,
+                     byrow = TRUE
+                   ),
+    widths       = c(0.5, 3.1, 0.9),
+    heights      = my_heights,
+    ...
+  )
+  return(TRUE)
 }
 
-# Adapted from KSEAapp::KSEA.Heatmap
+# function drawing heatmap of contrast fold-change for each kinase,
+#   adapted from KSEAapp::KSEA.Heatmap
 ksea_heatmap <- function(
   # the data frame outputs from the KSEA.Scores() function, in list format
   score_list,
@@ -961,16 +2398,19 @@
   m_cutoff,
   # a numeric value between 0 and 1 indicating the p-value/FDR cutoff
   #   for indicating significant kinases in the heatmap
-  p_cutoff =
-    stop("argument 'p_cutoff' is required for function 'ksea_heatmap'"),
+  p_cutoff = {
+    cat("argument 'p_cutoff' is required for function 'ksea_heatmap'")
+    param_df_exit()
+    knitr::knit_exit()
+  },
   # a binary input of TRUE or FALSE, indicating whether or not to perform
   #   hierarchical clustering of the sample columns
   sample_cluster,
   # a binary input of TRUE or FALSE, indicating whether or not to export
   #   the heatmap as a .png image into the working directory
   export = FALSE,
-  # bottom and right margins; adjust as needed if contrast names are too long
-  margins = c(6, 20),
+  # bottom and right margins; adjust as needehttps://tex.stackexchange.com/a/56795d if contrast names are too long
+  margins = c(6, 6),
   # print which kinases?
   # - Mandatory argument, must be one of const_ksea_.*_kinases
   which_kinases,
@@ -993,7 +2433,7 @@
   master <-
     Reduce(
       f = function(...) {
-        base::merge(..., by = "Kinase.Gene", all = FALSE)
+        base::merge(..., by = "Kinase.Gene", all = TRUE)
       },
       x = score_list_m
       )
@@ -1019,15 +2459,13 @@
   }
   merged_asterisk <- as.matrix(asterisk(merged_stats, p_cutoff))
 
-  # begin hack to print only significant rows
   asterisk_rows <- rowSums(merged_asterisk == "*") > 0
   all_rows <- rownames(merged_stats)
   names(asterisk_rows) <- all_rows
   non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE])
   asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE])
-  merged_scores_asterisk <- merged_scores[names(asterisk_rows), ]
-  merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), ]
-  # end hack to print only significant rows
+  merged_scores_asterisk <- merged_scores[names(asterisk_rows), , drop = FALSE]
+  merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), , drop = FALSE]
 
   row_list <- list()
   row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows
@@ -1036,14 +2474,17 @@
 
   i <- which_kinases
   my_row_names <- row_list[[i]]
-  scrs <- merged_scores[my_row_names, ]
-  stts <- merged_stats[my_row_names, ]
+  scrs <- merged_scores[my_row_names, , drop = FALSE]
+  stts <- merged_stats[my_row_names, , drop = FALSE]
   merged_asterisk <- as.matrix(asterisk(stts, p_cutoff))
 
   color_breaks <- create_breaks(scrs)
+  if (is.null(color_breaks)) {
+    cat("No plot because matrix has too few rows.\n\n")
+    return(NULL)
+  }
   plot_height <- nrow(scrs) ^ 0.55
   plot_width <- ncol(scrs) ^ 0.7
-  my_cex_row <- 0.25 * 16 / plot_height
   if (export == "TRUE") {
     png(
       "KSEA.Merged.Heatmap.png",
@@ -1053,96 +2494,600 @@
       pointsize = 14
     )
   }
-  draw_kseaapp_summary_heatmap(
+  did_draw <- draw_kseaapp_summary_heatmap(
     x               = scrs,
     sample_cluster  = sample_cluster,
     merged_asterisk = merged_asterisk,
-    my_cex_row      = my_cex_row,
     color_breaks    = color_breaks,
     margins         = margins
   )
   if (export == "TRUE") {
     dev.off()
   }
+  if (!did_draw)
+    return(NULL)
   return(my_row_names)
 }
 
-# helper for heatmaps of phosphopeptide intensities
-
-draw_intensity_heatmap <-
+# helpers for heatmaps of phosphopeptide intensities
+
+# factory producing function to truncate string after n characters
+trunc_n <- function(n) {
+    function(x) {
+      sapply(
+        X = x,
+        FUN = function(s) {
+          if (is.na(s))
+            return("NA")
+          cond <- try_catch_w_e(nchar(s) > n)
+          if (!is.logical(cond$value)) {
+            return(cond$value$message)
+          } else if (cond$value) {
+            paste0(
+              strtrim(s, n),
+              "..."
+            )
+          } else {
+            s
+          }
+        },
+        USE.NAMES = FALSE
+      )
+    }
+  }
+trunc_long_ppep <- function(x) trunc_n(40)(x)
+trunc_ppep <- function(x) trunc_n(g_ppep_trunc_n)(x)
+trunc_subgene <- function(x) trunc_n(g_subgene_trunc_n)(x)
+trunc_enriched_substrate <- function(x) trunc_n(g_sbstr_trunc_n)(x)
+
+# factory producing a function that returns a covariance
+#   matrix's rows (and columns) having variance > v_min
+keep_cov_w_var_gtr_min <- function(v_min) {
+  function(x) {
+    if (!is.matrix(x))
+      return(NULL)
+    keepers <- sapply(
+        X = seq_len(nrow(x)),
+        FUN = function(i) {
+          if (x[i, i] < v_min)
+            NA
+          else
+            x[i, i]
+        }
+      )
+    names(keepers) <- rownames(x)
+    keepers <- keepers[!is.na(keepers)]
+    keepers <- names(keepers)
+    if (length(keepers) == 0)
+      return(NULL)
+    x[keepers, keepers]
+  }
+}
+# function that returns a matrix's rows having variance > 1
+keep_cov_w_var_gtr_1 <- keep_cov_w_var_gtr_min(1)
+
+# factory producing a function that returns
+#   - either a matrix's rows (rows = TRUE)
+#   - or a matrix's columns (rows = FALSE)
+#   having variance > v_min
+keep_var_gtr_min <- function(v_min) {
+  function(x, rows = TRUE) {
+    nrowcol <- if (rows) nrow else ncol
+    if (!is.matrix(x))
+      return(NULL)
+    keepers <- sapply(
+        X = seq_len(nrowcol(x)),
+        FUN = function(i) {
+          row_var <- var(
+              if (rows) x[i, ] else  x[, i],
+              na.rm = TRUE
+            )
+          if (is.na(row_var) || row_var <= v_min) NA else i
+        }
+      )
+    keepers <- keepers[!is.na(keepers)]
+    if (rows) x[keepers, ] else  x[, keepers]
+  }
+}
+
+keep_var_gtr_0 <- keep_var_gtr_min(0)
+
+# function drawing heatmap of phosphopeptide intensities
+ppep_heatmap <-
   function(
     m,                              # matrix with rownames already formatted
     cutoff,                         # cutoff used by hm_heading_function
-    hm_heading_function,            # construct and cat heading from m and cutoff
+    hm_heading_function,            # construct $ cat heading from m and cutoff
     hm_main_title,                  # main title for plot (drawn below heading)
     suppress_row_dendrogram = TRUE, # set to false to show dendrogram
-    max_peptide_count               # experimental:
-          = intensity_hm_rows,      #   values of 50 and 75 worked well
-    ...                             # passthru parameters for heatmap
+    max_peptide_count =             # experimental:
+            g_intensity_hm_rows,    #   values of 50 and 75 worked well
+    master_cex = 1.0,               # basis for text sizes
+    margins = NULL,                 # optional margins (bottom, right)
+    cellnote = NULL,                # optional matrix of character; dim = dim(m)
+    adj = 0.5,                      # adjust text: 0 left, 0.5 middle, 1 right
+    ...                             # passthru to hm2plus or heatmap.2
   ) {
+    use_heatmap_1 <- FALSE
     peptide_count <- 0
     # emit the heading for the heatmap
     if (hm_heading_function(m, cutoff)) {
-      peptide_count <- min(max_peptide_count, nrow(m))
-      if (nrow(m) > 1) {
+      nrow_m <- nrow(m)
+      peptide_count <- min(max_peptide_count, nrow_m)
+      if (nrow_m > 1) {
         m_margin <- m[peptide_count:1, ]
-        # Margin setting was heuristically derived
-        margins <-
-          c(0.5, # col
-            max(80, sqrt(nchar(rownames(m_margin)))) * 5 / 16  # row
-          )
-        }
-      if (nrow(m) > 1) {
+        # Margin was heuristically derived to accommodate the widest label
+        row_mchar_max <- max(nchar(rownames(m_margin)))
+        col_mchar_max <- max(nchar(colnames(m_margin)))
+        row_margin <- master_cex * row_mchar_max * 2.6
+        col_margin <- master_cex * col_mchar_max * 2.6
+        if (print_trace_messages) cat(sprintf("row_margin = %0.3f; ", row_margin))
+        if (print_trace_messages) cat(sprintf("col_margin = %0.3f; ", col_margin))
+        hm_call <- NULL
         tryCatch(
           {
-            old_oma <- par("oma")
-            par(cex.main = 0.6)
-            # Heuristically determined character size adjustment formula
-            char_contractor <-
-              250000 / (
-                max(4500, (nchar(rownames(m_margin)))^2) * intensity_hm_rows
+            # set non-argument parameters for hm_call inner function
+            my_row_cex <-
+              master_cex * 200000 / (
+                (max(nchar(rownames(m_margin)))^2) * g_intensity_hm_rows
                 )
-            heatmap(
-              m[peptide_count:1, ],
-              Rowv = if (suppress_row_dendrogram) NA else NULL,
-              Colv = NA,
-              cexRow = char_contractor,
-              cexCol = char_contractor * 50 / max_peptide_count,
-              scale = "row",
-              margins = margins,
-              main =
-                "Unimputed, unnormalized log(intensities)",
-              xlab = "",
-              las = 1,
-              ...
+            m_hm <-  m[peptide_count:1, , drop = FALSE]
+            if (is.null(cellnote)) {
+              cellnote <-  matrix("", nrow = nrow(m_hm), ncol = ncol(m_hm))
+              cellnote[is.na(m_hm)] <- "NA"
+            } else {
+              cellnote <-  cellnote[peptide_count:1, , drop = FALSE]
+            }
+            m_hm[is.na(m_hm)] <- 0
+            nrow_m_hm <- nrow(m_hm)
+            ncol_m_hm <- ncol(m_hm)
+            if (nrow_m_hm < 1 || ncol_m_hm < 1)
+              return(peptide_count) # return zero as initialized above
+            my_limit <- g_intensity_hm_rows
+
+
+            my_row_cex <- master_cex * (100 / (2 + row_mchar_max))
+            my_col_cex <- master_cex * 6 * row_margin / col_margin
+            my_col_adj <- min(my_col_cex, my_row_cex) / my_col_cex
+            my_col_cex <- min(my_col_cex, my_row_cex)
+            col_margin <- sqrt(my_col_adj) * col_margin
+            if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex))
+            if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex))
+            if (is.null(margins)) my_margins <-
+              c(
+                (my_col_cex + col_margin), # col
+                (my_row_cex + row_margin) / my_row_cex  # row
+              )
+            else
+              my_margins <- margins
+
+            if (print_trace_messages) cat(
+              sprintf(
+                "my_margins = c(%s)\n\n",
+                paste(my_margins, collapse = ", ")
+              )
+            )
+            my_hm2_cex <- 2 * master_cex
+            my_key_cex <- 0.9 - 0.1 * (g_intensity_hm_rows + nrow_m_hm) / g_intensity_hm_rows
+            my_key_warp <- 1.5 * 22.75 / row_margin
+            my_key_cex <- min(1.10, my_key_warp * my_key_cex)
+            my_hgt_scale <- 3.70 - 0.4 * (max(1, 0.9 * my_row_cex) - 1)
+            my_hgt_scale <- 3.75 # 3.615
+            my_hgt_scale <- 3.60 # 3.615
+            if (print_trace_messages)
+              cat_variable(my_hgt_scale, "\n\n", 3)
+            my_warp <- max(0.1, 1.4 * (7.5 + nrow_m) / g_intensity_hm_rows)
+            if (print_trace_messages)
+              cat_variable(my_warp, "\n\n", 3)
+            # added 0.9 heuristically...
+            my_plot_height <-
+              (0.566 + 0.354 * (nrow_m / g_intensity_hm_rows)) *
+                min(my_hgt_scale, my_hgt_scale * my_warp)
+            my_plot_height <- min(3.65, my_plot_height * g_intensity_hm_rows / 50)
+            my_heights <- c(
+                0.3,                                 # title and top dendrogram
+                my_plot_height,                      # plot and bottom margin
+                4.15 - my_hgt_scale,                 # legend
+                0.05 + my_hgt_scale - my_plot_height # whitespace below legend
               )
+            my_note_cex <- min(0.8, my_row_cex, my_col_cex)
+            if (print_trace_messages) {
+              cat_variable(my_plot_height, "\n\n", 3)
+              cat_variable(4.19 - my_hgt_scale, "\n\n", 3)
+              cat_variable(nrow_m_hm, "; ", 0)
+              cat_variable(ncol_m_hm, "; ", 0)
+              cat_variable(my_row_cex, "; ", 3)
+              cat_variable(my_col_cex, "; ", 3)
+              cat_variable(my_note_cex, "; ", 3)
+              cat_variable(my_key_cex, "\n\n", 3)
+              cat_variable(my_hgt_scale, "; ", 3)
+              cat_variable(my_plot_height, "; ", 3)
+              cat_variable(my_warp, "\n\n", 3)
+              cat_variable(my_heights, "; ", 2)
+              cat_variable(sum(my_heights), "\n\n", 3)
+            }
+
+            # define hm_call inner function
+            hm_call <- function(x, scaling, title) {
+              my_cex_main <- min(5.0, 220 / nchar(title))
+              op <- par(
+                cex.main = my_cex_main * master_cex,
+                adj = adj
+              )
+              if (
+                !is.null(
+                  hm2plus(
+                    x,
+                    Colv = NA,
+                    Rowv = TRUE,
+                    cexRow = my_row_cex,
+                    cexCol = my_col_cex,
+                    dendrogram = "row",
+                    las = 1,
+                    main = title,
+                    key_xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"),
+                    cex = my_hm2_cex,
+                    key_par = list(cex = my_key_cex),
+                    margins = my_margins,
+                    widths =  c(0.4, 2.6, 1.5),
+                    heights = my_heights,
+                    mat = matrix(
+                            c(
+                              c(0, 3, 3),
+                              c(2, 1, 1),
+                              c(0, 4, 0),
+                              c(0, 0, 0)
+                            ),
+                            nrow = 4,
+                            ncol = 3,
+                            byrow = TRUE
+                          ),
+                    na.rm = TRUE,
+                    scale = scaling,
+                    srtcol = 90,
+                    srtrow = 0,
+                    xlab = "",
+                    cellnote = cellnote,
+                    notecex = my_note_cex,
+                    ...
+                  )
+                )
+              ) {
+                if (print_trace_messages) cat(
+                  sprintf(
+                    "my_heights = c(%s); sum = %0.3f\n\n",
+                    paste(
+                      sprintf("%0.3f", my_heights),
+                      collapse = ", "
+                    ),
+                    sum(my_heights)
+                  )
+                )
+                if (print_trace_messages) cat(
+                      sprintf("my_key_cex = %0.3f\n\n",
+                      my_key_cex)
+                    )
+                if (print_trace_messages) cat(
+                      sprintf("my_key_cex/my_heights[3] = %0.3f\n\n",
+                      my_key_cex / my_heights[3])
+                    )
+                if (print_trace_messages) cat(
+                      sprintf("my_heights[2]/my_heights[3] = %0.3f\n\n",
+                      my_heights[2] / my_heights[3])
+                    )
+              }
+              par(op)
+            }
+
+            # invoke hm_call inner function
+            if (sum(rowSums(!is.na(m_hm)) < 2))
+              hm_call(
+                m_hm,
+                "none",
+                "log(intensities), unscaled, unimputed, and unnormalized"
+                )
+            else
+              hm_call(
+                m_hm,
+                "row",
+                "log(intensities), row-scaled, unimputed, and unnormalized"
+                )
           },
           error = function(e) {
-            cat(
-              sprintf(
-                "\nCould not draw heatmap, possibly because of too many missing values.  Internal message: %s\n",
-                e$message
+            if (!is.null(hm_call)) {
+              m_hm[is.na(m_hm)] <- 0
+              tryCatch(
+                {
+                  if (nrow(m_hm) > 1)
+                    hm_call(
+                      m_hm,
+                      "none",
+                      paste(
+                        "log(intensities), unscaled,",
+                        "zero-imputed, unnormalized"
+                        )
+                      )
+                  else
+                    cat("\nThere are too few peptides to produce a heatmap.\n")
+                },
+                error = function(r) {
+                  cat(
+                    sprintf(
+                      "\n%s %s Internal message: %s\n\\newline\n\n",
+                      "Failure drawing heatmap,",
+                      "possibly because of too many missing values.\n\\newline\n\n",
+                      r$message
+                      )
+                    )
+                  cat_margins()
+                }
+              )
+            } else {
+              cat(
+                "\nFailure drawing heatmap, possibly because of too many missing values.\n"
                 )
-              )
-            },
-          finally = par(old_oma)
+            }
+          }
         )
       }
     }
-    return(peptide_count)
+    # return value:
+    peptide_count
   }
+
+# function drawing heatmap of correlations if they exist, else covariances
+cov_heatmap <-
+  function(
+    m,                              # matrix with rownames already formatted
+    top_substrates = FALSE,
+    ...                             # passthru to hm2plus or heatmap.2
+  ) {
+      if (print_nb_messages) nbe(see_variable(m), " [", nrow(m), "x", ncol(m), "\n")
+      #ACE nb(rowSums(m, na.rm = TRUE))
+      #ACE bad_rows <- (rowSums(m, na.rm = TRUE) == 0)
+      #ACE nb(see_variable(bad_rows))
+      #ACE m <- m[-bad_rows, , drop = FALSE]
+      colnames_m <- colnames(m)
+      is_na_m <- is.na(m)
+      tmp <- m
+      tmp[is_na_m] <- 0
+
+      tmp <- m[, 0 < colSums(x = tmp)] # by default, na.rm is FALSE
+
+      colnames_tmp <- colnames(tmp)
+
+      my_low_p_seq <- seq(
+          from = min(g_intensity_hm_rows, nrow(m)),
+          to = 1,
+          by = -1
+        )
+
+      if (g_correlate_substrates) {
+        # zap samples having zero or near-zero variance
+        tmp[is.na(tmp)] <- 0
+        nzv <- caret::nearZeroVar(
+          tmp,           # matrix of values, samples x variables
+          freqCut = 1.01, # min(freq most prevalent value /
+                         #     freq second most prevalent)
+          uniqueCut = 99 # max(number of unique values /
+                         #     total number of samples)
+        )
+        tmp <- if (length(nzv) > 0) {
+            m[, -nzv, drop = FALSE]
+          } else {
+            m
+          }
+      } else {
+        tmp <- m[my_low_p_seq, , drop = FALSE]
+      }
+
+
+      t_m <- t(tmp)
+      t_m[is.na(t_m)] <- 0
+      prefiltered_nrow <- ncol(t_m)
+
+      my_corcov <- cov(t_m)
+      did_filter_rows <- did_filter_cols <- FALSE
+      if (g_correlate_substrates && !is_positive_definite(my_corcov)) {
+        my_correlate_substrates <- FALSE
+        t_m <- t(m[my_low_p_seq, , drop = FALSE])
+        t_m[is.na(t_m)] <- 0
+        unfiltered_row_count <- ncol(t_m)
+        unfiltered_col_count <- nrow(t_m)
+
+        # zap empty samples
+        t_m <- t_m[0 < rowSums(x = t_m), ]
+        # zap substrates present in fewer than two samples
+        foo <- t_m > 0
+        foo <- colSums(x = foo) > 1
+        t_m <- t_m[, foo]
+
+        did_filter_rows <- unfiltered_row_count > ncol(t_m)
+        did_filter_cols <- unfiltered_col_count > nrow(t_m)
+
+        colnames_tmp <- rownames(t_m)
+        my_corcov <- cov(t_m)
+        if (g_filter_cov_var_gt_1) {
+          my_corcov <- keep_cov_w_var_gtr_1(my_corcov)
+        }
+      } else if (g_correlate_substrates) {
+        my_corcov <- cov2cor(my_corcov)
+        my_correlate_substrates <- TRUE
+      } else {
+        my_correlate_substrates <- FALSE
+        if (g_filter_cov_var_gt_1) my_corcov <- keep_cov_w_var_gtr_1(my_corcov)
+      }
+
+      omitted_samples <- colnames_m[colnames_m %notin% colnames_tmp]
+      suffix <- if (length(omitted_samples) > 1) "s" else ""
+
+      f_omissions <-
+        function(is_corr) {
+          cat(
+            sprintf(
+              "Below is the %s plot for %s substrates",
+              if (is_corr) "correlation" else "covariance",
+              sprintf(
+                if (top_substrates)
+                  "%0.0f \"highest-quality\""
+                else
+                  "%0.0f",
+                ncol(t_m)
+              )
+            )
+          )
+          if (did_filter_cols) {
+            cat(sprintf(", omitting sample%s ", suffix))
+            latex_collapsed_vector(", ", omitted_samples)
+          }
+          cat(".\n\n")
+        }
+
+      if (is.null(my_corcov) || sum(!is.na(t_m)) < 2) {
+        cat(
+          sprintf(
+            "\\newline\n%s %s plot.\n",
+            "Insufficient covariance to produce",
+            if (my_correlate_substrates)
+              "correlation"
+            else
+              "covariance"
+          ),
+          "\\newpage\n"
+        )
+        return(NULL)
+      }
+
+      cat("\\leavevmode\n", "\\newpage\n")
+      f_omissions(my_correlate_substrates)
+
+      master_cex <- 0.4
+      max_nchar <- max(nchar(rownames(t_m)))
+      my_limit <- g_intensity_hm_rows
+      diminution <- sqrt(my_limit  / (my_limit + ncol(t_m)))
+      my_row_cex <-
+        my_col_cex <-
+          min(1.75, master_cex * 9 * diminution ^ 1.5)
+      my_margin <- 3 + my_row_cex * 64 / (8 + max_nchar)
+      my_key_cex <- 1.4
+      my_hm2_cex <- 1.0 * master_cex
+      my_hgt_scale <- 3.50 - 0.26 * (max(0.4, my_key_cex) - 0.4)
+      my_hgt_scale <- 2.7
+
+      my_legend_height <- 4.0 - my_hgt_scale
+      my_legend_height <- 0.5 * my_key_cex
+      my_warp <- 0.65 * (my_limit + ncol(t_m)) / my_limit
+      my_warp <- 0.8
+      my_legend_height <- 0.77
+      my_legend_height <- 0.67
+      my_plot_height <- my_hgt_scale + (1 - my_warp) * my_legend_height
+      my_legend_height <- my_warp * my_legend_height
+
+      parjust <- par(adj = 0.5)
+      on.exit(par(parjust))
+      my_corcov <- my_corcov[order(rownames(my_corcov)), ]
+      my_main <-
+        sprintf("%s among %s substrates %s",
+            if (my_correlate_substrates) "Correlation"
+              else "Covariance",
+            kinase_name,
+            if (!my_correlate_substrates &&
+                g_filter_cov_var_gt_1 &&
+                did_filter_rows
+            )
+              "having variance > 1"
+            else ""
+          )
+      my_main_nchar <- nchar(my_main)
+      my_heights <- c(
+            0.3,
+            my_plot_height,
+            my_legend_height # was 4.0 - my_hgt_scale # was 4.19
+          )
+      if (print_trace_messages) cat(sprintf("max_nchar = %0.3f; ", max_nchar))
+      if (print_trace_messages) cat(sprintf("my_margin = %0.3f; ", my_margin))
+      if (print_trace_messages) cat(sprintf("my_plot_height = %0.3f\n\n", my_plot_height))
+      if (print_trace_messages) cat(sprintf("master_cex = %0.3f; ", master_cex))
+      if (print_trace_messages) cat(sprintf("my_row_cex = %0.3f; ", my_row_cex))
+      if (print_trace_messages) cat(sprintf("my_col_cex = %0.3f; ", my_col_cex))
+      if (print_trace_messages) cat(sprintf("my_key_cex = %0.3f\n\n", my_key_cex))
+      if (print_trace_messages) cat(sprintf("my_hgt_scale = %0.3f\n\n", my_hgt_scale))
+      if (print_trace_messages) cat(sprintf("legend height = %0.3f\n\n", my_legend_height))
+      if (print_trace_messages) cat(
+        sprintf(
+          "my_heights = c(%s); sum = %0.3f\n\n",
+          paste(
+            sprintf("%0.3f", my_heights),
+            collapse = ", "
+          ),
+          sum(my_heights)
+        )
+      )
+      op <- par(cex.main = (30 + my_main_nchar) / my_main_nchar)
+      on.exit(par(op))
+      hm2plus(
+        x            = my_corcov,
+        cex          = my_hm2_cex,
+        cexCol       = my_col_cex,
+        cexRow       = my_row_cex,
+        density_info = "none",
+        denhgt       = 0.15,
+        denwid       = 0.5,
+        divergent    = TRUE,
+        key_par      = list(cex = my_key_cex),
+        key_xlab     = if (my_correlate_substrates) "Correlation"
+                         else "Covariance",
+        main         = my_main,
+        mat          = matrix(
+                         c(
+                           c(0, 3, 3),
+                           c(2, 1, 1),
+                           c(0, 4, 0)
+                         ),
+                         nrow = 3,
+                         ncol = 3,
+                         byrow = TRUE
+                       ),
+        heights = my_heights,
+        margins      = c(my_margin, my_margin),
+        widths       = c(0.5, 3.1, 0.9),
+        scale        = "none",
+        symkey       = TRUE,
+        symbreaks    = TRUE,
+        symm         = FALSE #TODO evaluate TRUE
+        # ...
+      )
+  } # end cov_heatmap
+
+### FILE IMPORT
+
+# function reading bzipped file to data.frame
+bzip2df <- function(d, f, ctor = bzfile) {
+  # read.delim file (by default, compressed by bzip2)
+  if (file.exists(f)) {
+    conn <- NULL
+    pf <- parent.frame()
+    tryCatch(
+      assign(
+        as.character(substitute(d)),
+        read.delim(conn <- bzfile(f, open = "r")),
+        pf
+      ),
+      finally = if (!is.null(conn)) close(conn)
+      )
+  }
+}
+
 ```
 
-```{r, echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
-cat("\\listoftables\n")
-```
 # Purpose
 
-To perform for phosphopeptides:
+The purpose of this analysis is to perform for phosphopeptides:
 
 - imputation of missing values,
 - quantile normalization,
-- ANOVA (using the R stats::`r params$oneWayManyCategories` function), and
+- ANOVA (using the R stats::`r params$oneWayManyCategories` function),
+- assignment of an FDR-adjusted $p$-value and a "quality score" to each phosphopeptide, and
 - KSEA (Kinase-Substrate Enrichment Analysis) using code adapted from the CRAN `KSEAapp` package to search for kinase substrates from the following databases:
   - PhosphoSitesPlus [https://www.phosphosite.org](https://www.phosphosite.org)
   - The Human Proteome Database [http://hprd.org](http://hprd.org)
@@ -1151,13 +3096,15 @@
 
 ```{r include = FALSE}
 
-### GLOBAL VARIABLES
-
-# parameters for KSEA
-
-ksea_cutoff_statistic <- params$kseaCutoffStatistic
-ksea_cutoff_threshold <- params$kseaCutoffThreshold
-ksea_min_kinase_count <- params$kseaMinKinaseCount
+if (params$kseaUseAbsoluteLog2FC) {
+  sfc <- "|s|"
+  pfc <- "|p|"
+  pfc_txt <- "$\\text{absolute value}({\\log_2 (\\text{fold-change})})$"
+} else {
+  sfc <- "s"
+  pfc <- "p"
+  pfc_txt <- "${\\log_2 (\\text{fold-change}})$"
+}
 
 ksea_heatmap_titles <- list()
 ksea_heatmap_titles[[const_ksea_astrsk_kinases]] <-
@@ -1177,24 +3124,11 @@
 # hash to hold names of significantly enriched kinases
 ksea_asterisk_hash <- new_env()
 
-# READ PARAMETERS (mostly)
-
-intensity_hm_rows <- params$intensityHeatmapRows
-# Input Filename
-input_file <- params$inputFile
-
-# First data column - ideally, this could be detected via regexSampleNames,
-#   but for now leave it as is.
-first_data_column <- params$firstDataColumn
-fdc_is_integer <- is.integer(first_data_column)
-if (fdc_is_integer) {
-  first_data_column <- as.integer(params$firstDataColumn)
-}
+# PROCESS (mostly read) PARAMETERS
 
 # False discovery rate adjustment for ANOVA
 #  Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05
-val_fdr <-
-  read.table(file = params$alphaFile, sep = "\t", header = FALSE, quote = "")
+val_fdr <- read.table(file = alpha_file, sep = "\t", header = FALSE, quote = "")
 
 if (
   ncol(val_fdr) != 1 ||
@@ -1202,62 +3136,17 @@
   sum(val_fdr[, 1] < 0) ||
   sum(val_fdr[, 1] > 1)
 ) {
-  stop("alphaFile should be one column of numbers within the range [0.0,1.0]")
+  cat("alphaFile should be one column of numbers within the range [0.0,1.0]")
+  param_df_exit()
+  knitr::knit_exit()
 }
 val_fdr <- val_fdr[, 1]
 
-#Imputed Data filename
-imputed_data_filename <- params$imputedDataFilename
-imp_qn_lt_data_filenm <- params$imputedQNLTDataFile
-anova_ksea_mtdt_file  <- params$anovaKseaMetadata
-
 ```
 
-```{r echo = FALSE}
-# Imputation method, should be one of
-#   "random", "group-median", "median", or "mean"
-imputation_method <- params$imputationMethod
-
-# Selection of percentile of logvalue data to set the mean for random number
-#   generation when using random imputation
-mean_percentile <- params$meanPercentile / 100.0
-
-# deviation adjustment-factor for random values; real number.
-sd_percentile <- params$sdPercentile
-
-# Regular expression of Sample Names, e.g., "\\.(\\d+)[A-Z]$"
-regex_sample_names <- params$regexSampleNames
-
-# Regular expression to extract Sample Grouping from Sample Name;
-#   if error occurs, compare sample_treatment_levels vs. sample_name_matches
-#   to see if groupings/pairs line up
-#   e.g., "(\\d+)"
-regex_sample_grouping <- params$regexSampleGrouping
-
-one_way_all_categories_fname <- params$oneWayManyCategories
-one_way_all_categories <- try_catch_w_e(
-  match.fun(one_way_all_categories_fname))
-if (!is.function(one_way_all_categories$value)) {
-  write("fatal error for parameter oneWayManyCategories:", stderr())
-  write(one_way_all_categories$value$message,             stderr())
-  if (sys.nframe() > 0) quit(save = "no", status = 1)
-  stop("Cannot continue. Goodbye.")
-}
-one_way_all_categories <- one_way_all_categories$value
-
-one_way_two_categories_fname <- params$oneWayManyCategories
-one_way_two_categories <- try_catch_w_e(
-  match.fun(one_way_two_categories_fname))
-if (!is.function(one_way_two_categories$value)) {
-  cat("fatal error for parameter oneWayTwoCategories: \n")
-  cat(one_way_two_categories$value$message, fill = TRUE)
-  if (sys.nframe() > 0) quit(save = "no", status = 1)
-  stop("Cannot continue. Goodbye.")
-}
-one_way_two_categories <- one_way_two_categories$value
-
-preproc_db       <- params$preprocDb
-ksea_app_prep_db <- params$kseaAppPrepDb
+```{r echo = FALSE, results = 'asis'}
+
+
 result <- file.copy(
   from      = preproc_db,
   to        = ksea_app_prep_db,
@@ -1272,9 +3161,19 @@
     ),
     stderr()
   )
-  if (sys.nframe() > 0) quit(save = "no", status = 1)
-  stop("Cannot continue. Goodbye.")
+  if (sys.nframe() > 0) {
+    cat("Cannot continue and quit() failed. Goodbye.")
+    param_df_exit()
+    knitr::knit_exit()
+    # in case knit_exit doesn't exit
+    quit(save = "no", status = 1)
+  }
 }
+
+if (FALSE) {
+  write.table(x = param_df, file = "test-data/params.txt")
+}
+
 ```
 
 ```{r echo = FALSE}
@@ -1289,55 +3188,75 @@
   quote = "",
   check.names = FALSE
   )
+
 ```
 
-# Extract Sample Names and Treatment Levels
-
-Column names parsed from input file are shown in Table 1; sample names and treatment levels, in Table 2.
+
+# Extraction of Sample Classes and Names from Input Data
 
 ```{r echo = FALSE, results = 'asis'}
 
 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE)
+my_column_names <- names(full_data)
 
 if (!fdc_is_integer) {
   if (length(data_column_indices) > 0) {
     first_data_column <- data_column_indices[1]
   } else {
-    stop(paste("failed to convert firstDataColumn:", first_data_column))
+    cat(paste("failed to convert firstDataColumn:", first_data_column))
+    param_df_exit()
+    knitr::knit_exit()
   }
 }
 
 cat(
   sprintf(
     paste(
-      "\n\nThe input data file has peptide-intensity data for each sample",
-      "in one of columns %d through %d.\n\n"
+      "\n\nThe input data file has peptide-intensity data",
+      "in columns %d (\"%s\") through %d (\"%s\")."
       ),
-    min(data_column_indices),
-    max(data_column_indices)
+    tmp <- min(data_column_indices),
+    my_column_names[tmp],
+    tmp <- max(data_column_indices),
+    my_column_names[tmp]
     )
   )
 
-# Write column names as a LaTeX enumerated list.
-column_name_df <- data.frame(
-  column = seq_len(length(colnames(full_data))),
-  name = paste0("\\verb@", colnames(full_data), "@")
+if (TRUE) {
+  cat0(
+    table_offset(i = 1, new = TRUE),
+    "Sample classes and names are shown in ",
+    table_href(),
+    ".\n\n"
   )
-data_frame_latex(
-  x = column_name_df,
-  justification = "l l",
-  centered = TRUE,
-  caption = "Input data column names",
-  anchor = const_table_anchor_bp,
-  underscore_whack = FALSE
+} else {
+  cat0(
+    "\\newcounter{offset}\n",
+    "\\setcounter{offset}{\\value{table}}\n",
+    "\\stepcounter{offset}\n",
+    "Sample classes and names are shown in ",
+    table_href(),
+    ".\n\n"
   )
+}
+
+#TODO remove this unused variable and assignment
+if (FALSE) {
+  # Write column names as a LaTeX enumerated list.
+  column_name_df <- data.frame(
+    column = seq_len(length(colnames(full_data))),
+    name = paste0("\\verb@", colnames(full_data), "@")
+    )
+}
 
 ```
 
 ```{r echo = FALSE, results = 'asis'}
+# extract intensity columns from full_data to quant_data
 quant_data <- full_data[first_data_column:length(full_data)]
 quant_data[quant_data == 0] <- NA
 rownames(quant_data) <- rownames(full_data) <- full_data$Phosphopeptide
+full_data_names <- colnames(quant_data)
 # Extract factors and trt-replicates using regular expressions.
 # Typically:
 #   regex_sample_names    is "\\.\\d+[A-Z]$"
@@ -1355,20 +3274,133 @@
 write_debug_file(quant_data)
 
 rx_match <- regexpr(regex_sample_grouping, sample_name_matches, perl = TRUE)
-sample_treatment_levels <- as.factor(regmatches(sample_name_matches, rx_match))
-number_of_samples <- length(sample_name_matches)
+smpl_trt <- as.factor(regmatches(sample_name_matches, rx_match))
+
+if (print_nb_messages) nbe(see_variable(smpl_trt, "\n\n"))
+if (print_nb_messages) nbe(see_variable(sample_name_matches, "\n\n"))
+if (print_nb_messages) nbe(see_variable(full_data_names, "\n\n"))
+
+sample_treatment_df <-
+  save_sample_treatment_df <-
+    data.frame(
+      class = smpl_trt,
+      sample = sample_name_matches,
+      full_sample_names = full_data_names
+      )
+
+if (print_nb_messages) nbe(see_variable(sample_treatment_df, "\n\n"))
+
+# reorder data
+my_order <- with(sample_treatment_df, order(class, sample))
+quant_data <- quant_data[, my_order]
+sample_name_matches <- sample_name_matches[my_order]
+smpl_trt <- smpl_trt[my_order]
 sample_treatment_df <- data.frame(
-  level = sample_treatment_levels,
+  class = smpl_trt,
   sample = sample_name_matches
   )
-data_frame_latex(
+
+# filter smpl_trt as appropriate
+if (sample_group_filter %in% c("include", "exclude")) {
+  include_sample <-
+    mgrepl(
+      v = sample_group_filter_patterns,
+      s = as.character(smpl_trt),
+      fixed = sample_group_filter_fixed,
+      perl = sample_group_filter_perl,
+      ignore.case = sample_group_filter_nocase
+    )
+  if (sum(include_sample) < 2) {
+    errmsg <-
+      paste(
+        "ERROR:",
+        sum(include_sample),
+        "samples are too few for analysis;",
+        "check input parameters for sample-name parsing"
+        )
+    cat0(
+      errmsg,
+      "\\stepcounter{offset}\n",
+      " in ",
+      table_href(),
+      ".\n\n"
+    )
+    data_frame_tabbing_latex(
+      x = save_sample_treatment_df,
+      tabstops = c(1.25, 1.25),
+      caption = "Sample classes",
+      use_subsubsection_header = FALSE
+      )
+    data_frame_tabbing_latex(
+      x =
+        param_df[
+          c("regexSampleNames",
+            "regexSampleGrouping",
+            "groupFilterPatterns",
+            "groupFilter",
+            "groupFilterMode"
+          ),
+        ],
+      tabstops = c(1.75),
+      underscore_whack = TRUE,
+      caption = "Input parameters for sample-name parsing",
+      verbatim = FALSE
+      )
+    param_df_exit()
+    knitr::knit_exit()
+    return(invisible(-1))
+  }
+  sample_treatment_df <-
+    if (sample_group_filter == "include")
+      sample_treatment_df[include_sample, ]
+    else
+      sample_treatment_df[!include_sample, ]
+} else {
+  include_sample <- rep.int(TRUE, length(smpl_trt))
+}
+sample_name_matches <- sample_treatment_df$sample
+rx_match <- regexpr(regex_sample_grouping, sample_name_matches, perl = TRUE)
+smpl_trt <- as.factor(regmatches(sample_name_matches, rx_match))
+sample_treatment_df$class <- smpl_trt
+
+# filter quant_data as appropriate
+number_of_samples <- length(sample_name_matches)
+quant_data <- quant_data[, sample_name_matches]
+
+sample_level_integers <- as.integer(smpl_trt)
+sample_treatment_levels <- levels(smpl_trt)
+count_of_treatment_levels <- length(sample_treatment_levels)
+
+# for each phosphopeptide, across treatment levels, compute minimum
+#   count of observed (i.e., non-missing) values
+my_env <- new_env()
+for (l in sample_treatment_levels)
+  my_env[[as.character(l)]] <-
+    as.vector(rowSums(!is.na(quant_data[l == smpl_trt])))
+min_group_obs_count <- row_apply(
+    x = Reduce(
+      f = function(l, r) cbind(l, my_env[[r]]),
+      x = sample_treatment_levels,
+      init = c()
+    ),
+    fun = min
+  )
+names(min_group_obs_count) <- rownames(quant_data)
+rm(my_env)
+
+# display (possibly-filtered) results
+cat("\\newpage\n")
+
+if (sum(include_sample) > 1) {
+data_frame_tabbing_latex(
   x = sample_treatment_df,
-  justification = "rp{0.2\\linewidth} lp{0.3\\linewidth}",
-  centered = TRUE,
-  caption = "Treatment levels",
-  anchor = const_table_anchor_tbp,
-  underscore_whack = FALSE
+  tabstops = c(1.25),
+  caption = "Sample classes",
+  use_subsubsection_header = FALSE
   )
+}
+sample_name_grow <- 10 / (10 + max(nchar(sample_name_matches) + 6))
+sample_colsep <- transition_positions(as.integer(sample_treatment_df$class))
 ```
 
 ```{r echo = FALSE, results = 'asis'}
@@ -1377,7 +3409,7 @@
 
 ## Are the log-transformed sample distributions similar?
 
-```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+```{r echo = FALSE, fig.dim = c(9, 6.5), results = 'asis'}
 
 quant_data[quant_data == 0] <- NA  #replace 0 with NA
 quant_data_log <- log10(quant_data)
@@ -1387,25 +3419,81 @@
 
 write_debug_file(quant_data_log)
 
-# data visualization
-old_par <- par(
-  mai = par("mai") + c(0.5, 0, 0, 0)
-)
-# ref: https://r-charts.com/distribution/add-points-boxplot/
-# Vertical plot
-boxplot(
-  quant_data_log
-, las = 1
-, col = const_boxplot_fill
-, ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
-, xlab = "Sample"
-)
-par(old_par)
-
-
-
-cat("\n\n\n")
-cat("\n\n\n")
+g_ppep_distrib_ctl <- new_env()
+g_ppep_distrib_ctl$xlab_line <- 3.5 + 11.86 * (0.67 - sample_name_grow)
+g_ppep_distrib_ctl$mai_bottom <- (0.5 + 3.95 * (0.67 - sample_name_grow))
+g_ppep_distrib_ctl$axis <- (0.6 + 0.925 * (0.67 - sample_name_grow))
+
+my_ppep_distrib_bxp <- function(
+    x
+  , sample_name_grow = sample_name_grow
+  , main
+  , varwidth = FALSE
+  , sub = NULL
+  , xlab
+  , ylab
+  , col = const_boxplot_fill
+  , notch = FALSE
+  , ppep_distrib_ctl = g_ppep_distrib_ctl
+  , ...
+  ) {
+    my_xlab_line  <- g_ppep_distrib_ctl$xlab_line
+    my_mai_bottom <- g_ppep_distrib_ctl$mai_bottom
+    my_axis       <- g_ppep_distrib_ctl$axis
+
+    if (print_trace_messages) {
+      cat_variable(my_xlab_line, suffix = "; ")
+      cat_variable(my_mai_bottom, suffix = "; ")
+      cat_variable(my_axis, suffix = "\n\n")
+    }
+
+    old_par <- par(
+      mai = par("mai") + c(my_mai_bottom, 0, 0, 0),
+      cex.axis = my_axis,
+      cex.lab = 1.2
+    )
+    tryCatch(
+      {
+        # Vertical plot
+        boxplot(
+          x
+        , las = 2
+        , col = col
+        , main = main
+        , sub = NULL
+        , ylab = ylab
+        , xlab = NULL
+        , notch = notch
+        , varwidth = varwidth
+        , ...
+        )
+        title(
+          sub = sub
+        , cex.sub = 1.0
+        , line = my_xlab_line + 1
+        )
+        title(
+          xlab = xlab
+        , line = my_xlab_line
+        )
+      },
+      finally = par(old_par)
+    )
+  }
+
+my_ppep_distrib_bxp(
+    x = quant_data_log
+  , sample_name_grow = sample_name_grow
+  , main = "Peptide intensities for each sample"
+  , varwidth = boxplot_varwidth
+  , sub = "Box widths reflect number of peptides for sample"
+  , xlab = "Sample"
+  , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+  , col = const_boxplot_fill
+  , notch = FALSE
+  )
+
+cat("\n\n\n\n")
 
 ```
 
@@ -1454,6 +3542,8 @@
 )
 ```
 
+# Characterization of Input Data
+
 ## Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values
 
 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'}
@@ -1461,7 +3551,7 @@
 q1 <- quantile(logvalues, probs = mean_percentile)[1]
 
 # 1 = row of matrix (ie, phosphopeptide)
-sds <- apply(quant_data_log, 1, sd_finite)
+sds <- row_apply(quant_data_log, sd_finite)
 if (sum(!is.na(sds)) > 2) {
   plot(
     density(sds, na.rm = TRUE)
@@ -1495,7 +3585,7 @@
 # prep for trt-median based imputation
 
 ```
-# Impute Missing Values
+# Imputation of Missing Values
 
 ```{r echo = FALSE}
 
@@ -1526,11 +3616,10 @@
       paste("Substitute missing value with",
         "median peptide-intensity for sample group.\n"
         )
-    sample_level_integers <- as.integer(sample_treatment_levels)
     # Take the accurate ln(x+1) because the data are log-normally distributed
     #   and because median can involve an average of two measurements.
     quant_data_imp <- log1p(quant_data_imp)
-    for (i in seq_len(length(levels(sample_treatment_levels)))) {
+    for (i in seq_len(count_of_treatment_levels)) {
       # Determine the columns for this factor-level
       level_cols <- i == sample_level_integers
       # Extract those columns
@@ -1541,7 +3630,7 @@
       #   a given ppep has no measurement; otherwise, proceed.
       if (ncol(lvlsbst) > 1) {
         the_centers <-
-          apply(lvlsbst, 1, median, na.rm = TRUE)
+          row_apply(lvlsbst, median, na.rm = TRUE)
         for (j in seq_len(nrow(lvlsbst))) {
           for (k in seq_len(ncol(lvlsbst))) {
             if (is.na(lvlsbst[j, k])) {
@@ -1565,7 +3654,7 @@
     # Take the accurate ln(x+1) because the data are log-normally distributed
     #   and because median can involve an average of two measurements.
     quant_data_imp <- log1p(quant_data_imp)
-    quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = TRUE)[ind[, 1]]
+    quant_data_imp[ind] <- row_apply(quant_data_imp, median, na.rm = TRUE)[ind[, 1]]
     # Take the accurate e^x - 1 to match scaling of original input.
     quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp))
     good_rows <- !is.nan(rowMeans(quant_data_imp))
@@ -1581,7 +3670,7 @@
     #   this will have to be
     quant_data_imp <- log1p(quant_data_imp)
     # Assign to NA cells the mean for the row
-    quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = TRUE)[ind[, 1]]
+    quant_data_imp[ind] <- row_apply(quant_data_imp, mean, na.rm = TRUE)[ind[, 1]]
     # Take the accurate e^x - 1 to match scaling of original input.
     quant_data_imp <- round(expm1(quant_data_imp_ln <- quant_data_imp))
     good_rows <- !is.nan(rowMeans(quant_data_imp))
@@ -1625,11 +3714,40 @@
 imp_smry_pot_peptides_after <- sum(good_rows)
 imp_smry_rejected_after  <- sum(!good_rows)
 imp_smry_missing_values_after   <- sum(is.na(quant_data_imp[good_rows, ]))
+
+# From ?`%in%`, %in% is currently defined as function(x, table) match(x, table, nomatch = 0) > 0
+
+sink(stderr())
+print("`%in%`:")
+print(`%in%`)
+sink()
+
+stock_in <-
+  names(good_rows) %in%
+    names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count])
+if (print_nb_messages) nbe(see_variable(stock_in), "\n")
+
+explicit_in <-
+  0 < match(
+    names(good_rows),
+    names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count])
+    )
+if (print_nb_messages) nbe(see_variable(explicit_in), "\n")
+
+great_enough_row_names <- good_rows[
+  names(good_rows) %in%
+    names(min_group_obs_count[g_intensity_min_per_class <= min_group_obs_count])
+]
+if (print_nb_messages) nbe(see_variable(great_enough_row_names), "\n")
+great_enough_row_names <- great_enough_row_names[great_enough_row_names]
+if (print_nb_messages) nbe(see_variable(great_enough_row_names), "\n")
 ```
+
 ```{r echo = FALSE, results = 'asis'}
 # ref: http://www1.maths.leeds.ac.uk/latex/TableHelp1.pdf
 tabular_lines_fmt <- paste(
   "\\begin{table}[hb]", # h(inline); b(bottom); t (top) or p (separate page)
+  " \\leavevmode",
   " \\caption{Imputation Results}",
   " \\centering", # \centering centers the table on the page
   " \\begin{tabular}{l c c c}",
@@ -1637,8 +3755,9 @@
   "  \\  & potential peptides   & missing values & rejected",
   "    peptides \\\\ [0.5ex]",
   "  \\hline",
-  "  before imputation & %d     & %d (%d\\%s)    &    \\\\",
-  "  after imputation  & %d     & %d             & %d \\\\ [1ex]",
+  "  before imputation     & %d & %d (%d\\%s) &    \\\\",
+  "  after imputation      & %d & %d          & %d \\\\",
+  "  after keep comparable & %d &             & %d \\\\ [1ex]",
   "  \\hline",
   " \\end{tabular}",
   #" \\label{table:nonlin}", # may be used to refer this table in the text
@@ -1654,18 +3773,37 @@
     "%",
     imp_smry_pot_peptides_after,
     imp_smry_missing_values_after,
-    imp_smry_rejected_after
+    imp_smry_rejected_after,
+    length(great_enough_row_names),
+    imp_smry_pot_peptides_before -
+      length(great_enough_row_names)
     )
 cat(tabular_lines)
 ```
-```{r echo = FALSE}
-
-
-# Zap rows where imputation was ineffective
+
+```{r filter_good_rows, echo = FALSE}
+
+if (print_nb_messages) nbe("before name extraction, ", see_variable(length(good_rows)), " ", see_variable(good_rows), "\n")
+good_rows <- names(good_rows[names(great_enough_row_names)])
+if (print_nb_messages) nbe("after name extraction, ", see_variable(length(good_rows)), see_variable(good_rows), "\n")
+
+#ACE min_group_obs_count <- min_group_obs_count[names(great_enough_row_names)]
+#ACE nbe("good_rows")
+#ACE nbe(see_variable(good_rows))
+#ACE nbe("names(min_group_obs_count) before filter for good rows")
+#ACE nbe(see_variable(names(min_group_obs_count)))
+min_group_obs_count <- min_group_obs_count[good_rows]
+#ACE nbe("min_group_obs_count after filter for good rows")
+#ACE nbe(see_variable(names(min_group_obs_count)))
+
+# Zap rows where imputation was insufficiently effective
 full_data         <- full_data        [good_rows, ]
 quant_data        <- quant_data       [good_rows, ]
-
+quant_data_log    <- quant_data_log   [good_rows, ]
+
+if (print_nb_messages) nbe("before row filter, ", see_variable(nrow(quant_data_imp)), "\n")
 quant_data_imp <- quant_data_imp[good_rows, ]
+if (print_nb_messages) nbe("after row filter, ", see_variable(nrow(quant_data_imp)), "\n")
 write_debug_file(quant_data_imp)
 quant_data_imp_good_rows <- quant_data_imp
 
@@ -1719,35 +3857,41 @@
 
 ```
 
-```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+```{r echo = FALSE, fig.dim = c(9, 6.5), results = 'asis'}
 zero_sd_rownames <-
   rownames(quant_data_imp)[
-    is.na((apply(quant_data_imp, 1, sd, na.rm = TRUE)) == 0)
+    is.na((row_apply(quant_data_imp, sd, na.rm = TRUE)) == 0)
   ]
 
 if (length(zero_sd_rownames) >= nrow(quant_data_imp)) {
-  stop("All peptides have zero standard deviation.  Cannot continue.")
+  cat("All peptides have zero standard deviation.  Cannot continue.")
+  param_df_exit()
+  knitr::knit_exit()
 }
 if (length(zero_sd_rownames) > 0) {
   cat(
-    sprintf("%d peptides with zero variance were removed from statistical consideration",
-      length(zero_sd_rownames)
+    sprintf(
+      "%d %s %s",
+      length(zero_sd_rownames),
+      "peptides with zero variance",
+      "were removed from statistical consideration"
     )
   )
   zap_named_rows <- function(df, nms) {
     return(df[!(row.names(df) %in% nms), ])
   }
-  quant_data_imp <- zap_named_rows(quant_data_imp, zero_sd_rownames)
-  quant_data     <- zap_named_rows(quant_data,     zero_sd_rownames)
-  full_data      <- zap_named_rows(full_data,      zero_sd_rownames)
+  quant_data_imp <-
+    zap_named_rows(quant_data_imp, zero_sd_rownames)
+  quant_data <-
+    zap_named_rows(quant_data, zero_sd_rownames)
+  full_data <-
+    zap_named_rows(full_data, zero_sd_rownames)
+  min_group_obs_count <-
+    min_group_obs_count[names(min_group_obs_count) %notin% zero_sd_rownames]
 }
 
 if (sum(is.na(quant_data)) > 0) {
   cat("\\leavevmode\\newpage\n")
-  # data visualization
-  old_par <- par(
-    mai = par("mai") + c(0.5, 0, 0, 0)
-  )
   # Copy quant data to x
   x <- quant_data
   # x gets to have values of:
@@ -1776,22 +3920,24 @@
   show_stripchart <-
     50 > (count_red + count_blue) / length(sample_name_matches)
   if (show_stripchart) {
-    boxplot_sub <- "Light blue = data before imputation; Red = imputed data"
+    boxplot_sub <- "Light blue = data before imputation; Red = imputed data;"
   } else {
     boxplot_sub <- ""
   }
 
   # Vertical plot
   colnames(blue_dots) <- sample_name_matches
-  boxplot(
-      blue_dots
-    , las = 1 # "always horizontal"
-    , col = const_boxplot_fill
-    , ylim = ylim
+  my_ppep_distrib_bxp(
+      x = blue_dots
+    , sample_name_grow = sample_name_grow
     , main = "Peptide intensities after eliminating unusable peptides"
-    , sub = boxplot_sub
+    , varwidth = boxplot_varwidth
+    , sub = paste(boxplot_sub, "Box widths reflect number of peptides for sample")
     , xlab = "Sample"
     , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+    , col = const_boxplot_fill
+    , notch = FALSE
+    , ylim = ylim
     )
 
   if (show_stripchart) {
@@ -1803,7 +3949,7 @@
       method = "jitter",          # Random noise
       jitter = const_stripchart_jitter,
       pch = 19,                   # Pch symbols
-      cex = const_stripsmall_cex, # Size of symbols reduced
+      cex = const_stripchart_cex, # Size of symbols reduced
       col = "lightblue",          # Color of the symbol
       vertical = TRUE,            # Vertical mode
       add = TRUE                  # Add it over
@@ -1813,49 +3959,71 @@
       method = "jitter",          # Random noise
       jitter = const_stripchart_jitter,
       pch = 19,                   # Pch symbols
-      cex = const_stripsmall_cex, # Size of symbols reduced
+      cex = const_stripchart_cex, # Size of symbols reduced
       col = "red",                # Color of the symbol
       vertical = TRUE,            # Vertical mode
       add = TRUE                  # Add it over
       )
 
   }
-  if (TRUE) {
-    # show measured values in blue on left half-violin plot
-    cat("\\leavevmode\n\\quad\n\n\\quad\n\n")
-    vioplot::vioplot(
-      x = lapply(blue_dots, function(x) x[!is.na(x)]),
-      col = "lightblue1",
-      side = "left",
-      plotCentre = "line",
-      ylim = ylim_save,
-      main = "Distributions of observed and imputed data",
-      sub = "Light blue = observed data; Pink = imputed data",
-      xlab = "Sample",
-      ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+}
+```
+
+```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+if (sum(is.na(quant_data)) > 0) {
+  # show measured values in blue on left half-violin plot
+  cat("\\leavevmode\n\\quad\n\n\\quad\n\n")
+  old_par <- par(
+    mai = par("mai") + c(g_ppep_distrib_ctl$mai_bottom, 0, 0, 0),
+    cex.axis = g_ppep_distrib_ctl$axis,
+    cex.lab = 1.2
+  )
+  tryCatch(
+    {
+      vioplot::vioplot(
+        x = lapply(blue_dots, function(x) x[!is.na(x)]),
+        col = "lightblue1",
+        side = "left",
+        plotCentre = "line",
+        ylim = ylim_save,
+        main = "Distributions of observed and imputed data",
+        sub = NULL,
+        las = 2,
+        xlab = NULL,
+        ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+        )
+      title(
+        sub = "Light blue = observed data; Pink = imputed data",
+        cex.sub = 1.0,
+        line = g_ppep_distrib_ctl$xlab_line + 1
       )
-    red_violins <- lapply(red_dots, function(x) x[!is.na(x)])
-    cols_to_delete <- c()
-    for (ix in seq_len(length(red_violins))) {
-      if (length(red_violins[[ix]]) < 1) {
-        cols_to_delete <- c(cols_to_delete, ix)
+      title(
+        xlab = "Sample",
+        line = g_ppep_distrib_ctl$xlab_line
+      )
+      red_violins <- lapply(red_dots, function(x) x[!is.na(x)])
+      cols_to_delete <- c()
+      for (ix in seq_len(length(red_violins))) {
+        if (length(red_violins[[ix]]) < 1) {
+          cols_to_delete <- c(cols_to_delete, ix)
+        }
       }
-    }
-    # destroy any unimputable columns
-    if (!is.null(cols_to_delete)) {
-      red_violins <- red_violins[-cols_to_delete]
-    }
-    # plot imputed values in red on right half-violin plot
-    vioplot::vioplot(
-      x = red_violins,
-      col = "lightpink1",
-      side = "right",
-      plotCentre = "line",
-      add = TRUE
-      )
-  }
-
-  par(old_par)
+      # destroy any unimputable columns
+      if (!is.null(cols_to_delete)) {
+        red_violins <- red_violins[-cols_to_delete]
+      }
+      # plot imputed values in red on right half-violin plot
+      vioplot::vioplot(
+        x = red_violins,
+        col = "lightpink1",
+        side = "right",
+        plotCentre = "line",
+        add = TRUE
+        )
+
+    },
+    finally = par(old_par)
+  )
 
   # density plot
   cat("\\leavevmode\n\n\n\n\n\n\n")
@@ -1893,88 +4061,88 @@
 }
 ```
 
-# Perform Quantile Normalization
+# Quantile Normalization
 
 The excellent `normalize.quantiles` function from
 *[the `preprocessCore` Bioconductor package](http://bioconductor.org/packages/release/bioc/html/preprocessCore.html)*
 performs "quantile normalization" as described Bolstad *et al.* (2003),
 DOI *[10.1093/bioinformatics/19.2.185](https://doi.org/10.1093%2Fbioinformatics%2F19.2.185)*
-and *its supplementary material [http://bmbolstad.com/misc/normalize/normalize.html](http://bmbolstad.com/misc/normalize/normalize.html)*,
+and its supplementary material [http://bmbolstad.com/misc/normalize/normalize.html](http://bmbolstad.com/misc/normalize/normalize.html),
 i.e., it assumes that the goal is to detect
 subtle differences among grossly similar samples (having similar distributions)
-by equailzing intra-quantile quantitations.
-Unfortunately, one software library upon which it depends
+by equalizing intra-quantile quantitations^[Unfortunately,
+one software library upon which `preprocessCore` depends
 *[suffers from a concurrency defect](https://support.bioconductor.org/p/122925/#9135989)*
-that requires that a specific, non-concurrent version of the library be
+that requires that a specific, non-concurrent version of the library (`openblas` version $0.3.3$) be
 installed.  The installation command equivalent to what was used to install the library to produce the results presented here is:
-```
-    conda install bioconductor-preprocesscore openblas=0.3.3
-```
+\linebreak
+`    conda install bioconductor-preprocesscore openblas=0.3.3`].
 
 
 <!--
-# Apply quantile normalization using preprocessCore::normalize.quantiles
-# ---
-# tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html
-#   except this: https://support.bioconductor.org/p/122925/#9135989
-#   says to install it like this:
-#     ```
-#     BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE, lib=.libPaths()[1])
-#     ```
-# conda installation (necessary because of a bug in recent openblas):
-#   conda install bioconductor-preprocesscore openblas=0.3.3
-# ...
-# ---
-# normalize.quantiles {preprocessCore} --  Quantile Normalization
-#
-# Description:
-#   Using a normalization based upon quantiles, this function normalizes a
-#     matrix of probe level intensities.
-#
-#   THIS FUNCTIONS WILL HANDLE MISSING DATA (ie NA values), based on the
-#     assumption that the data is missing at random.
-#
-# Usage:
-#   normalize.quantiles(x, copy = TRUE, keep.names = FALSE)
-#
-# Arguments:
-#
-#   - x: A matrix of intensities where each column corresponds to a chip and each row is a probe.
-#
-#   - copy: Make a copy of matrix before normalizing. Usually safer to work with a copy,
-#       but in certain situations not making a copy of the matrix, but instead normalizing
-#       it in place will be more memory friendly.
-#
-#   - keep.names: Boolean option to preserve matrix row and column names in output.
-#
-# Details:
-#   This method is based upon the concept of a quantile-quantile plot extended to n dimensions.
-#     No special allowances are made for outliers. If you make use of quantile normalization
-#     please cite Bolstad et al, Bioinformatics (2003).
-#
-#   This functions will handle missing data (ie NA values), based on
-#     the assumption that the data is missing at random.
-#
-#   Note that the current implementation optimizes for better memory usage
-#     at the cost of some additional run-time.
-#
-# Value: A normalized matrix.
-#
-# Author: Ben Bolstad, bmbolstad.com
-#
-# References
-#
-#   - Bolstad, B (2001) Probe Level Quantile Normalization of High Density Oligonucleotide
-#       Array Data. Unpublished manuscript http://bmbolstad.com/stuff/qnorm.pdf
-#
-#   - Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) A Comparison of
-#       Normalization Methods for High Density Oligonucleotide Array Data Based on Bias
-#       and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185
-#       http://bmbolstad.com/misc/normalize/normalize.html
-# ...
+  # Apply quantile normalization using preprocessCore::normalize.quantiles
+  # ---
+  # tool repository: http://bioconductor.org/packages/release/bioc/html/preprocessCore.html
+  #   except this: https://support.bioconductor.org/p/122925/#9135989
+  #   says to install it like this:
+  #     ```
+  #     BiocManager::install("preprocessCore", configure.args="--disable-threading", force = TRUE, lib=.libPaths()[1])
+  #     ```
+  # conda installation (necessary because of a bug in recent openblas):
+  #   conda install bioconductor-preprocesscore openblas=0.3.3
+  # ...
+  # ---
+  # normalize.quantiles {preprocessCore} --  Quantile Normalization
+  #
+  # Description:
+  #   Using a normalization based upon quantiles, this function normalizes a
+  #     matrix of probe level intensities.
+  #
+  #   THIS FUNCTIONS WILL HANDLE MISSING DATA (ie NA values), based on the
+  #     assumption that the data is missing at random.
+  #
+  # Usage:
+  #   normalize.quantiles(x, copy = TRUE, keep.names = FALSE)
+  #
+  # Arguments:
+  #
+  #   - x: A matrix of intensities where each column corresponds to a chip and each row is a probe.
+  #
+  #   - copy: Make a copy of matrix before normalizing. Usually safer to work with a copy,
+  #       but in certain situations not making a copy of the matrix, but instead normalizing
+  #       it in place will be more memory friendly.
+  #
+  #   - keep.names: Boolean option to preserve matrix row and column names in output.
+  #
+  # Details:
+  #   This method is based upon the concept of a quantile-quantile plot extended to n dimensions.
+  #     No special allowances are made for outliers. If you make use of quantile normalization
+  #     please cite Bolstad et al, Bioinformatics (2003).
+  #
+  #   This functions will handle missing data (ie NA values), based on
+  #     the assumption that the data is missing at random.
+  #
+  #   Note that the current implementation optimizes for better memory usage
+  #     at the cost of some additional run-time.
+  #
+  # Value: A normalized matrix.
+  #
+  # Author: Ben Bolstad, bmbolstad.com
+  #
+  # References
+  #
+  #   - Bolstad, B (2001) Probe Level Quantile Normalization of High Density Oligonucleotide
+  #       Array Data. Unpublished manuscript http://bmbolstad.com/stuff/qnorm.pdf
+  #
+  #   - Bolstad, B. M., Irizarry R. A., Astrand, M, and Speed, T. P. (2003) A Comparison of
+  #       Normalization Methods for High Density Oligonucleotide Array Data Based on Bias
+  #       and Variance. Bioinformatics 19(2), pp 185-193. DOI 10.1093/bioinformatics/19.2.185
+  #       http://bmbolstad.com/misc/normalize/normalize.html
+  # ...
 -->
 ```{r echo = FALSE, results = 'asis'}
 
+if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp)), "\n")
 if (nrow(quant_data_imp) > 0) {
   quant_data_imp_qn <- preprocessCore::normalize.quantiles(
      as.matrix(quant_data_imp), keep.names = TRUE
@@ -1983,17 +4151,20 @@
   quant_data_imp_qn <- as.matrix(quant_data_imp)
 }
 
+if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn)), "\n")
+
 quant_data_imp_qn <- as.data.frame(quant_data_imp_qn)
-
 write_debug_file(quant_data_imp_qn)
 
 quant_data_imp_qn_log <- log10(quant_data_imp_qn)
-
 write_debug_file(quant_data_imp_qn_log)
 
+if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n")
+if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n")
+
 quant_data_imp_qn_ls <- t(scale(t(log10(quant_data_imp_qn))))
 
-sel <- apply(quant_data_imp_qn_ls, 1, any_nan)
+sel <- row_apply(quant_data_imp_qn_ls, any_nan)
 quant_data_imp_qn_ls2 <- quant_data_imp_qn_ls
 
 quant_data_imp_qn_ls2 <- quant_data_imp_qn_ls2[which(sel), ]
@@ -2008,10 +4179,9 @@
 data_table_imp_qn_lt <- cbind(full_data[1:9], quant_data_imp_qn_log)
 ```
 
-<!-- ACE insertion begin -->
 ## Are normalized, imputed, log-transformed sample distributions similar?
 
-```{r echo = FALSE, fig.dim = c(9, 5.5), results = 'asis'}
+```{r echo = FALSE, fig.dim = c(9, 6.5), results = 'asis'}
 
 # Save unimputed quant_data_log for plotting below
 unimputed_quant_data_log <- quant_data_log
@@ -2034,6 +4204,22 @@
 
 
   # data visualization
+  if (TRUE) {
+
+  my_ppep_distrib_bxp(
+      x = quant_data_log
+    , sample_name_grow = sample_name_grow
+    , main = "Peptide intensities for each sample"
+    , varwidth = boxplot_varwidth
+    , sub = NULL
+    , xlab = "Sample"
+    , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+    , col = const_boxplot_fill
+    , notch = boxplot_notch
+    )
+
+  } else {
+
   old_par <- par(
     mai = par("mai") + c(0.5, 0, 0, 0)
   , oma = par("oma") + c(0.5, 0, 0, 0)
@@ -2043,12 +4229,16 @@
   colnames(quant_data_log) <- sample_name_matches
   boxplot(
     quant_data_log
-  , las = 1
+  , las = 2
+  , cex.axis = 0.9 * sample_name_grow
   , col = const_boxplot_fill
   , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
   , xlab = "Sample"
+  , notch = boxplot_notch
+  , varwidth = boxplot_varwidth
   )
   par(old_par)
+  }
 } else {
   cat("There are no peptides to plot\n")
 }
@@ -2074,7 +4264,28 @@
 
 # ANOVA Analysis
 
-```{r, echo = FALSE}
+## Assignment of $p$-value and quality score
+
+For each phosphopeptide, ANOVA analysis was performed to compute a $p$-value representing the evidence against the "null hypothesis" ($H_0$) that the intensity does not vary significantly among sample groups.
+
+However, because as more and more phosphopeptides are tested, there is increasd probability that, by random chance, a given peptide will have a $p$-value that appears to indicate significance.  For this reason, the $p$-values were adjusted by applying the False Discovery Rate (FDR) correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https:/doi.org/10.1111/j.2517-6161.1995.tb02031.x).
+
+Furthermore, to give more weight to phosphopeptides having fewer missing values, an (arbitrarily defined) quality score was assigned to each, defined as:
+
+$$
+\textit{quality}_j
+  = \frac{1 + o_{j}}{v_{j}(1 + o_{j}) + 0.005}
+$$
+
+where:
+
+- $o_j$ is the minimum number of non-missing observations per sample group for substrate $j$ for all sample groups, and
+- $v_j$ is the FDR-adjusted ANOVA $p$-value for substrate $j$.
+
+
+```{r, echo = FALSE, results = 'asis'}
+cat("\\newpage\n")
+
 # Make new data frame containing only Phosphopeptides
 #   to connect preANOVA to ANOVA (connect_df)
 connect_df <- data.frame(
@@ -2084,16 +4295,10 @@
 colnames(connect_df) <- c("Phosphopeptide", "Intensity")
 ```
 
-```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
-count_of_treatment_levels <- length(levels(sample_treatment_levels))
+```{r anova, echo = FALSE, fig.dim = c(10, 12), results = 'asis'}
+g_can_run_ksea <- FALSE
+old_oma <- par("oma")
 if (count_of_treatment_levels < 2) {
-  nuke_control_sequences <-
-    function(s) {
-      s <- gsub("[\\]", "xyzzy_plugh", s)
-      s <- gsub("[$]", "\\\\$", s)
-      s <- gsub("xyzzy_plugh", "$\\\\backslash$", s)
-      return(s)
-    }
   cat(
     "ERROR!!!! Cannot perform ANOVA analysis",
     "(see next page)\\newpage\n"
@@ -2109,7 +4314,7 @@
     paste(names(quant_data_imp_qn_log), collapse = "\n\n\n"),
     "\n\\end{quote}\n\n")
 
-  regex_sample_names <- nuke_control_sequences(regex_sample_names)
+  regex_sample_names <- latex_printable_control_seqs(regex_sample_names)
 
   cat("\\leavevmode\n\n\n")
   cat("Parsing rule for SampleNames is",
@@ -2125,7 +4330,7 @@
     paste(sample_name_matches, collapse = "\n\n\n"),
     "\n\\end{quote}\n\n")
 
-  regex_sample_grouping <- nuke_control_sequences(regex_sample_grouping)
+  regex_sample_grouping <- latex_printable_control_seqs(regex_sample_grouping)
 
   cat("\\leavevmode\n\n\n")
   cat("Parsing rule for SampleGrouping is",
@@ -2144,31 +4349,90 @@
 
 } else {
 
-  p_value_data_anova_ps <-
-    apply(
-      quant_data_imp_qn_log,
-      1,
-      anova_func,
-      grouping_factor = sample_treatment_levels,
-      one_way_f = one_way_all_categories
-      )
+  if (print_nb_messages) nbe("computing p_value_data_anova_ps\n")
+  if (print_nb_messages) nbe(see_variable(nrow(quant_data_imp_qn_log)), "\n")
+  if (print_nb_messages) nbe(see_variable(ncol(quant_data_imp_qn_log)), "\n")
+  if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log[, ".NE.7C"]), "\n")
+  if (print_nb_messages) nbe(see_variable(quant_data_imp_qn_log), "\n")
+  if (print_nb_messages) nbe(see_variable(anova_func), "\n")
+  if (print_nb_messages) nbe(see_variable(smpl_trt), "\n")
+  if (print_nb_messages) nbe(see_variable(one_way_all_categories), "\n")
+  tryCatch(
+    {
+      p_value_data_anova_ps <-
+        row_apply(
+          quant_data_imp_qn_log,
+          anova_func,
+          grouping_factor = smpl_trt,
+          one_way_f = one_way_all_categories
+          )
+    },
+    error = function(e) {
+      mesg <- paste("Could not compute ANOVA because", e$message)
+      cat("\n\n", mesg, "\n\n")
+      param_df_noexit(e)
+      sink(stderr())
+      cat("\n\n", mesg, "\n\n")
+      values <- paste(param_df$parameter, "=", param_df$value, collapse = "\n")
+      cat(values)
+      sink()
+      knitr::knit_exit()
+      exit(code = 1)
+    }
+  )
+  if (print_nb_messages) nbe(see_variable(p_value_data_anova_ps), "\n")
 
   p_value_data_anova_ps_fdr <-
     p.adjust(p_value_data_anova_ps, method = "fdr")
+  my_ppep_v <- full_data[, 1]
+  p_value_data <- list(
+    phosphopeptide = my_ppep_v,
+    raw_anova_p = p_value_data_anova_ps,
+    fdr_adjusted_anova_p = p_value_data_anova_ps_fdr,
+    missing_values = rowSums(is.na(quant_data)),
+    min_group_obs_count = min_group_obs_count
+  )
   p_value_data <- data.frame(
-    phosphopeptide = full_data[, 1]
-    ,
-    raw_anova_p = p_value_data_anova_ps
-    ,
-    fdr_adjusted_anova_p = p_value_data_anova_ps_fdr
+    phosphopeptide = my_ppep_v,
+    raw_anova_p = p_value_data_anova_ps,
+    fdr_adjusted_anova_p = p_value_data_anova_ps_fdr,
+    missing_values = rowSums(is.na(quant_data)),
+    min_group_obs_count = min_group_obs_count
   )
+  p_value_data$quality <- 1.0 / with(
+      p_value_data,
+      fdr_adjusted_anova_p + 0.005 / (1 + min_group_obs_count)
+    )
+
+  p_value_data$ranking <-
+    with(
+      p_value_data,
+      switch(
+        g_intensity_hm_criteria,
+        "quality"  = order(-quality),
+        "na_count" = order(missing_values, fdr_adjusted_anova_p),
+        # the default is "p_value"
+        order(fdr_adjusted_anova_p)
+      )
+    )
+  p_value_data <- p_value_data[p_value_data$ranking, , drop = FALSE]
+
+  write.table(
+    p_value_data,
+    file = "p_value_data.txt",
+    sep = "\t",
+    col.names = TRUE,
+    row.names = FALSE,
+    quote = FALSE
+    )
+
 
   # output ANOVA file to constructed filename,
   #   e.g.    "Outputfile_pST_ANOVA_STEP5.txt"
-  #   becomes "Outpufile_pST_ANOVA_STEP5_FDR0.05.txt"
+  #   becomes "Outputfile_pST_ANOVA_STEP5_FDR0.05.txt"
 
   # Re-output datasets to include p-values
-  metadata_plus_p <- cbind(full_data[1:9], p_value_data[, 2:3])
+  metadata_plus_p <- cbind(full_data[1:9], p_value_data[, 2:ncol(p_value_data)])
   write.table(
     cbind(metadata_plus_p, quant_data_imp),
     file = imputed_data_filename,
@@ -2188,61 +4452,97 @@
     )
 
 
-  p_value_data <-
-    p_value_data[order(p_value_data$fdr_adjusted_anova_p), ]
-
   first_page_suppress <- 1
   number_of_peptides_found <- 0
   cutoff <- val_fdr[1]
   for (cutoff in val_fdr) {
-    if (number_of_peptides_found > 49) {
+    #loop through FDR cutoffs
+    if (number_of_peptides_found > g_intensity_hm_rows - 1) {
       cat("\\leavevmode\n\n\n")
       break
       }
 
-    #loop through FDR cutoffs
+    bool_1 <- (p_value_data$fdr_adjusted_anova_p < cutoff)
+    bool_2 <- (p_value_data$min_group_obs_count >= g_intensity_min_per_class)
+    g_can_run_ksea <- g_can_run_ksea || (sum(bool_2) > 0)
+    bool_4 <- (p_value_data$quality >= params$minQuality)
+    bool_3 <- as.logical(
+        as.integer(bool_1) *
+        as.integer(bool_2) *
+        as.integer(bool_4)
+      )
+    if (print_trace_messages) {
+      if (length(bool_1) > 30) {
+        cat_variable(bool_1, force_str = TRUE)
+        cat_variable(bool_2, force_str = TRUE)
+        cat_variable(bool_3, force_str = TRUE)
+      } else {
+        cat_variable(bool_1, suffix = "\n\n")
+        cat_variable(bool_2, suffix = "\n\n")
+        cat_variable(bool_3, suffix = "\n\n")
+      }
+      cat_variable(length(bool_3), digits = 0, suffix = "; ")
+      cat_variable(sum(bool_3), digits = 0, suffix = "\n\n")
+    }
 
     filtered_p <-
-      p_value_data[
-        which(p_value_data$fdr_adjusted_anova_p < cutoff),
-        , drop = FALSE
-        ]
+      p_value_data[bool_3, , drop = FALSE]
+    filtered_p <-
+      filtered_p[!is.na(filtered_p$phosphopeptide), , drop = FALSE]
+
+    if (print_trace_messages)
+      cat_variable(filtered_p, force_str = TRUE)
+
+    have_remaining_peptides <- sum(bool_3, na.rm = TRUE) > 0
+    filter_result_string <-
+      sprintf(
+        "%s, %s of %0.0f peptides remained having both %s and %s.\n\n",
+        "After filtering for ANOVA results",
+        if (have_remaining_peptides)
+          as.character(sum(bool_3, na.rm = TRUE))
+        else
+          "none",
+        length(bool_3),
+        sprintf("adjusted p-value < %s", as.character(signif(cutoff, 2))),
+        sprintf(
+          "more than %0.0f observations in some groups",
+          max(0, g_intensity_min_per_class - 1)
+        )
+      )
+
     filtered_data_filtered <-
       quant_data_imp_qn_log[
         rownames(filtered_p),
         , drop = FALSE
         ]
+    # order by p-value
     filtered_data_filtered <-
       filtered_data_filtered[
         order(filtered_p$fdr_adjusted_anova_p),
         , drop = FALSE
         ]
 
-    # <!-- ACE insertion start -->
-
-    if (nrow(filtered_p) && nrow(filtered_data_filtered) > 0) {
+    if (have_remaining_peptides && nrow(filtered_p) > 0 && nrow(filtered_data_filtered) > 0) {
       if (first_page_suppress == 1) {
         first_page_suppress <- 0
         } else {
           cat("\\newpage\n")
         }
-      if (nrow(filtered_data_filtered) > 1) {
-        subsection_header(sprintf(
-          "Intensity distributions for %d phosphopeptides whose adjusted p-value < %0.2f\n",
-          nrow(filtered_data_filtered),
-          cutoff
-        ))
-      } else {
-        subsection_header(sprintf(
-          "Intensity distribution for one phosphopeptide (%s) whose adjusted p-value < %0.2f\n",
-          rownames(filtered_data_filtered)[1],
-          cutoff
-        ))
-      }
-      cat("\n\n\n")
-      cat("\n\n\n")
-
-      old_oma <- par("oma")
+      latex_samepage({
+        cat(filter_result_string)
+        if (nrow(filtered_data_filtered) > 1) {
+          cat(subsection_header(sprintf(
+            "Intensity distributions for %d phosphopeptides\n",
+            nrow(filtered_data_filtered)
+          )))
+        } else {
+          cat(subsection_header(sprintf(
+            "Intensity distribution for one phosphopeptide (%s)\n",
+            rownames(filtered_data_filtered)[1]
+          )))
+        }
+      }) # end latex_samepage
+
       old_par <- par(
         mai = (par("mai") + c(0.7, 0, 0, 0)) * c(1, 1, 0.3, 1),
         oma = old_oma * c(1, 1, 0.3, 1),
@@ -2250,18 +4550,24 @@
         cex.axis = 0.7,
         fin = c(9, 7.25)
         )
-      # ref: https://r-charts.com/distribution/add-points-boxplot/
       # Vertical plot
       colnames(filtered_data_filtered) <- sample_name_matches
       tryCatch(
         boxplot(
           filtered_data_filtered,
           main = "Imputed, normalized intensities", # no line plot
-          las = 1,
+          las = 2,
+          cex.axis = 0.9 * sample_name_grow,
           col = const_boxplot_fill,
-          ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
+          ylab = latex2exp::TeX("$log_{10}$(peptide intensity)"),
+          notch = FALSE,
+          varwidth = boxplot_varwidth
         ),
-        error = function(e) print(e)
+        error = function(e) {
+          print(e)
+          cat_margins()
+        }
+
       )
       par(old_par)
     } else {
@@ -2272,7 +4578,7 @@
       ))
     }
 
-    if (nrow(filtered_data_filtered) > 0) {
+    if (have_remaining_peptides && nrow(filtered_data_filtered) > 0) {
       # Add Phosphopeptide column to anova_filtered table
       # The assumption here is that the first intensity is unique;
       #   this is a hokey assumption but almost definitely will
@@ -2305,37 +4611,26 @@
 
       # Produce heatmap to visualize significance and the effect of imputation
 
-      anova_filtered_merge_format <- sapply(
-        X = filtered_p$fdr_adjusted_anova_p
-        ,
-        FUN = function(x) {
-          if (x > 0.0001)
-            paste0("(%0.", 1 + ceiling(-log10(x)), "f) %s")
-          else
-            paste0("(%0.4e) %s")
-        }
-      )
-
       cat_hm_heading <- function(m, cutoff) {
-        cat("\\newpage\n")
-        if (nrow(m) > intensity_hm_rows) {
-          subsection_header(
+        if (nrow(m) > g_intensity_hm_rows) {
+          cat("\\clearpage\n")
+          cat(subsection_header(
             paste(
               sprintf("Heatmap for the %d most-significant peptides",
-                intensity_hm_rows),
+                g_intensity_hm_rows),
               sprintf("whose adjusted p-value < %0.2f\n", cutoff)
             )
-          )
+          ))
         } else {
-          if (nrow(m) == 1) {
+          if (nrow(m) == 0) {
             return(FALSE)
           } else {
-            subsection_header(
+            cat(subsection_header(
               paste(
-                sprintf("Heatmap for %d usable peptides whose", nrow(m)),
+                sprintf("Heatmap for %d usable peptide genes whose", nrow(m)),
                 sprintf("adjusted p-value < %0.2f\n", cutoff)
               )
-            )
+            ))
           }
         }
         cat("\n\n\n")
@@ -2346,40 +4641,125 @@
       # construct matrix with appropriate rownames
       m <-
         as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ])
-      if (nrow(m) > 0) {
+      nrow_m <- nrow(m)
+      ncol_m <- ncol(m)
+      if (nrow_m > 0) {
         rownames_m <- rownames(m)
-        rownames(m) <- sapply(
-          X = seq_len(nrow(m))
-          ,
+        q <- data.frame(pepname = rownames_m)
+        g <- sqldf("
+          SELECT q.pepname, substr(met.Gene_Name, 1, 30) AS gene_name
+            FROM q, metadata_plus_p AS met
+            WHERE q.pepname = met.Phosphopeptide
+            ORDER BY q.rowid
+        ")
+        tmp <- sapply(
+          X = seq_len(nrow(g)),
+          FUN = function(i) {
+            pre <- strsplit(g$gene_name[i], "; ")[[1]]
+            rslt <- paste(unique(pre), sep = "; ")
+            return(rslt)
+          }
+        )
+        tmp <- unlist(tmp)
+        tmp <-
+          make.names(tmp, unique = TRUE)
+        tmp <- sub(
+            "No_Gene_Name",
+            "not_found",
+            tmp,
+            fixed = TRUE
+          )
+        ten_trunc_names <- trunc_ppep(rownames_m)
+        tmp <- sapply(
+          X = seq_len(nrow_m),
           FUN = function(i) {
             sprintf(
-              anova_filtered_merge_format[i],
-              filtered_p$fdr_adjusted_anova_p[i],
-              rownames_m[i]
+              "(%s) %s",
+              tmp[i],
+              ten_trunc_names[i]
             )
           }
         )
+        rownames(m) <- tmp
       }
       # draw the heading and heatmap
-      if (nrow(m) > 0) {
+      if (nrow_m > 0) {
           number_of_peptides_found <-
-            draw_intensity_heatmap(
+            ppep_heatmap(
               m                       = m,
               cutoff                  = cutoff,
               hm_heading_function     = cat_hm_heading,
-              hm_main_title           = "Unimputed, unnormalized log(intensities)",
-              suppress_row_dendrogram = FALSE
+              hm_main_title           =
+                "log(intensities), row-scaled, unimputed, unnormalized",
+              suppress_row_dendrogram = FALSE,
+              master_cex              = 0.35,
+              sepcolor                = "black",
+              colsep                  = sample_colsep
             )
+          if (number_of_peptides_found > 1) {
+            cat("\\leavevmode\n")
+          }
       }
     }
   }
 }
-cat("\\leavevmode\n\n\n")
+cat(filter_result_string)
+cat("\\leavevmode\n")
+
+if (!g_can_run_ksea) {
+  errmsg <- paste("Cannot proceed with KSEA analysis",
+    "because too many values are missing.")
+  if (FALSE) cat0(
+        errmsg,
+        "\\stepcounter{offset}\n",
+        "\\stepcounter{offset}\n",
+        "\\stepcounter{offset}\n",
+        " in ",
+        table_href(),
+        ".\n\n"
+      )
+  if (FALSE) {
+    if (print_nb_messages) nbe(see_variable(p_value_data))
+  } else {
+    if (print_nb_messages) nbe(see_variable(p_value_data))
+
+    display_p_value_data <- p_value_data
+    display_p_value_data$raw_anova_p <-
+      sprintf("%0.3g", display_p_value_data$raw_anova_p)
+    display_p_value_data$fdr_adjusted_anova_p <-
+      sprintf("%0.3g", display_p_value_data$fdr_adjusted_anova_p)
+    display_p_value_data$quality <-
+      sprintf("%0.3g", display_p_value_data$quality)
+
+    headers_1st_line <-
+      c("",               "Raw ANOVA", "FDR-adj.", "Missing", "Min. #", "", "")
+    headers_2nd_line <-
+      c("Phosphopeptide", "p-value",   "p-value",  "values",  "group-obs", "Quality", "Ranking")
+    data_frame_tabbing_latex(
+      x = display_p_value_data,
+      tabstops = c(2.75, 0.80, 0.80, 0.5, 0.6, 0.60),
+      use_subsubsection_header = FALSE,
+      headings = c(headers_1st_line, headers_2nd_line),
+      caption = "ANOVA results"
+
+      )
+  }
+  data_frame_tabbing_latex(
+    x = save_sample_treatment_df,
+    tabstops = c(1.25, 1.25),
+    caption = "Sample classes",
+    use_subsubsection_header = FALSE
+    )
+  param_df_exit()
+  knitr::knit_exit()
+  return(invisible(-1))
+}
+
 ```
 
 ```{r sqlite, echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
 
-if (count_of_treatment_levels > 1) {
+if (g_can_run_ksea && count_of_treatment_levels > 1) {
   # Prepare two-way contrasts with adjusted p-values
   # Strategy:
   # - use imputed, log-transformed data:
@@ -2395,7 +4775,7 @@
 
   # Each contrast is between a combination of trt levels
   m2 <- combn(
-    x = seq_len(length(levels(sample_treatment_levels))),
+    x = seq_len(length(levels(smpl_trt))),
     m = 2,
     simplify = TRUE
   )
@@ -2409,13 +4789,13 @@
       return(
         data.frame(
           contrast = cntrst,
-          level = sample_treatment_levels[
-              sample_treatment_levels %in%
-                levels(sample_treatment_levels)[c(lvl1, lvl2)]
+          level = smpl_trt[
+              smpl_trt %in%
+                levels(smpl_trt)[c(lvl1, lvl2)]
             ],
           label = sample_name_matches[
-              sample_treatment_levels %in%
-                levels(sample_treatment_levels)[c(lvl1, lvl2)]
+              smpl_trt %in%
+                levels(smpl_trt)[c(lvl1, lvl2)]
             ]
         )
       )
@@ -2595,6 +4975,7 @@
   )
 
   # - create contrast-metadata table
+  if (print_nb_messages) nbe("CREATE TABLE contrast_lvl_lvl_metadata")
   dml_no_rows_exec(db, "
       CREATE TABLE contrast_lvl_lvl_metadata
         AS
@@ -2705,12 +5086,11 @@
 
     # - run the two-level (one-way) test
     p_value_data_contrast_ps <-
-      apply(
-        X = contrast_cast_data,
-        MARGIN = 1, # apply to rows
-        FUN = anova_func,
+      row_apply(
+        x = contrast_cast_data,
+        fun = anova_func,
         grouping_factor =
-          as.factor(as.numeric(grouping_factor$level)), # anova_func arg2
+          as.factor(grouping_factor$level), # anova_func arg2
         one_way_f = one_way_two_categories, # anova_func arg3
         simplify = TRUE # TRUE is the default for simplify
         )
@@ -2922,6 +5302,8 @@
       ;
       "
   )
+  # We are done with DDL and insertion
+  RSQLite::dbDisconnect(db)
 }
 ```
 
@@ -2929,7 +5311,7 @@
 cat("\\newpage\n")
 ```
 
-# KSEA Analysis
+# KSEA Analysis Summaries
 
 Results of Kinase-Substrate Enrichment Analysis are presented here, if the substrates for any kinases are relatively enriched.   Enrichments are found by the CRAN `KSEAapp` package:
 
@@ -2940,24 +5322,24 @@
 For each kinase, $i$, and each two-way contrast of treatments, $j$, an enrichment $z$-score is computed as:
 
 $$
-\text{kinase enrichment score}_{j,i} = \frac{(\overline{s}_{j,i} - \overline{p}_j)\sqrt{m_{j,i}}}{\delta_j}
+\text{kinase enrichment }z\text{-score}_{j,i} = \frac{(\overline{`r sfc`}_{j,i} - \overline{`r pfc`}_j)\sqrt{m_{j,i}}}{\delta_j}
 $$
 
 and fold-enrichment is computed as:
 
 $$
-\text{Enrichment}_{j,i} = \frac{\overline{s}_{j,i}}{\overline{p}_j}
+\text{Enrichment}_{j,i} = \frac{\overline{`r sfc`}_{j,i}}{\overline{`r pfc`}_j}
 $$
 
 where:
 
-- $\overline{s}_{j,i}$ is the mean $\log_2 (|\text{fold-change|})$ in intensities (for contrast $j$) of known substrates of the kinase $i$,
-- $\overline{p}_j$ is the mean $\log_2 (|\text{fold-change}|)$ of all phosphosites identified in contrast $j$, and
+- $\overline{`r sfc`}_{j,i}$ is the mean `r pfc_txt` in intensities of known substrates of the kinase $i$ in contrast $j$,
+- $\overline{`r pfc`}_j$ is the mean `r pfc_txt` of all phosphosites identified in contrast $j$, and
 - $m_{j,i}$ is the total number of phosphosite substrates of kinase $i$ identified in contrast $j$,
-- $\delta_j$ is the standard deviation of the $\log_2 (|\text{fold-change}|)$ for contrast $j$ across all phosphosites in the dataset.
+- $\delta_j$ is the standard deviation of the $\log_2 (\text{fold-change})$ for contrast $j$ across all phosphosites in the dataset.
 - Note that the absolute value of fold-change is used so that both increased and decreased substrates of a kinase will contribute to its enrichment score.
 
-$\text{FDR}_{j,i}$ is computed from the $p$-value for the z-score using the R `stats::p.adjust` function, applying the False Discovery Rate correction from Benjamini and Hochberg (1995) [doi:10.1111/j.2517-6161.1995.tb02031.x](https:/doi.org/10.1111/j.2517-6161.1995.tb02031.x)
+$\text{FDR}_{j,i}$ is the False Discovery Rate corrected kinase enrichment score.
 
 Color intensity in heatmaps reflects magnitude of $z$-score for enrichment of respective kinase in respective contrast; hue reflects the sign of the $z$-score (blue, negative; red, positive).
 
@@ -2965,9 +5347,14 @@
 
 - Kinase names are generally as presented at Phospho.ELM [http://phospho.elm.eu.org/kinases.html](http://phospho.elm.eu.org/kinases.html) (when available), although Phospho.ELM data are not yet incorporated into this analysis.
 - Kinase names having the suffix '(HPRD)' are as presented at [http://hprd.org/serine_motifs](http://hprd.org/serine_motifs) and [http://hprd.org/tyrosine_motifs](http://hprd.org/tyrosine_motifs) and are as originally reported in the Amanchy et al., 2007 (doi: [10.1038/nbt0307-285](https://doi.org/10.1038/nbt0307-285)).
-- Kinase-strate deata were also taken from [http://networkin.science/download.shtml](http://networkin.science/download.shtml) and from PhosphoSitePlus [https://www.phosphosite.org/staticDownloads](https://www.phosphosite.org/staticDownloads).
-
-```{r ksea, echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
+- Kinase-substrate data were also taken from [http://networkin.science/download.shtml](http://networkin.science/download.shtml) and from PhosphoSitePlus [https://www.phosphosite.org/staticDownloads](https://www.phosphosite.org/staticDownloads).
+
+For each enriched kinase, a heatmap showing the intensities is presented for up to `r g_intensity_hm_rows` substrates, i.e., those substrates having the highest"quality".
+
+Where possible, a heatmap of the correlations among these the selected substrates is also presented; if correlations cannot be computed (because of too many missing values), then the covariances are heatmapped for substrates having a variance greater than 1.
+
+```{r ksea, echo = FALSE, fig.dim = c(12, 14.5), results = 'asis'}
+cat("\\clearpage\n")
 
 db <- RSQLite::dbConnect(RSQLite::SQLite(), ksea_app_prep_db)
 
@@ -3046,21 +5433,9 @@
   contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level)
   contrast_longlabel <- (
     sprintf(
-      "Trt %s {%s} -> Trt %s {%s}",
+      "Class %s -> Class %s",
       contrast_metadata_df[i_cntrst, "b_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "b_samples"],
-        fixed = TRUE
-      ),
-      contrast_metadata_df[i_cntrst, "a_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "a_samples"],
-        fixed = TRUE
-      )
+      contrast_metadata_df[i_cntrst, "a_level"]
     )
   )
   kseaapp_input <-
@@ -3091,18 +5466,44 @@
     expr = {
         ksea_scores_rslt <-
           ksea_scores(
-            ksdata           = pseudo_ksdata, # KSEAapp::KSData,
-            px               = kseaapp_input,
-            networkin        = TRUE,
-            networkin_cutoff = 2
+            ksdata                  = pseudo_ksdata,
+            px                      = kseaapp_input,
+            networkin               = TRUE,
+            networkin_cutoff        = 2,
+            minimum_substrate_count = ksea_min_substrate_count
             )
 
+        if (FALSE) {
+          ksea_scores_rslt <-
+            ksea_scores_rslt[
+              ksea_scores_rslt$m >= ksea_min_substrate_count,
+              ,
+              drop = FALSE
+            ]
+        }
+
+        if (FALSE) {
+          data_frame_tabbing_latex(
+            x = ksea_scores_rslt,
+            tabstops = c(0.8, 0.8, 0.8, 0.8, 0.8, 0.8),
+            caption = paste("KSEA scores for contrast ",
+                        cntrst_b_level, "to",  cntrst_a_level),
+            use_subsubsection_header = FALSE
+          )
+        }
+
+        if (FALSE) {
+          if (print_nb_messages) nbe("Output contents of `ksea_scores_rslt` table\n")
+          cat_variable(ksea_scores_rslt)
+          cat("\n\\clearpage\n")
+        }
+
         if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) {
           next_index <- 1 + next_index
           rslt$score_list[[next_index]] <- ksea_scores_rslt
           rslt$name_list[[next_index]] <- contrast_label
           rslt$longname_list[[next_index]] <- contrast_longlabel
-          low_fdr_print(
+          ksea_low_fdr_print(
             rslt = rslt,
             i_cntrst = i_cntrst,
             i = next_index,
@@ -3113,21 +5514,24 @@
             )
         }
       },
-    error = function(e) str(e)
+    error = function(e) {
+        str(e)
+        cat_margins()
+      }
   )
 }
 
 plotted_kinases <- NULL
-if (length(rslt$score_list) > 1) {
+if (g_can_run_ksea && length(rslt$score_list) > 1) {
   for (i in seq_len(length(ksea_heatmap_titles))) {
     hdr <- ksea_heatmap_titles[[i]]
     which_kinases <- i
 
     cat("\\clearpage\n\\begin{center}\n")
     if (i == const_ksea_astrsk_kinases) {
-      subsection_header(hdr)
+      cat(subsection_header(hdr))
     } else {
-      subsection_header(hdr)
+      cat(subsection_header(hdr))
     }
     cat("\\end{center}\n")
 
@@ -3146,7 +5550,7 @@
       m_cutoff = 1,
       # a numeric value between 0 and 1 indicating the p-value/FDR cutoff
       #   for indicating significant kinases in the heatmap
-      p_cutoff = 0.05,
+      p_cutoff = params$kseaCutoffThreshold,
       # a binary input of TRUE or FALSE, indicating whether or not to perform
       #   hierarchical clustering of the sample columns
       sample_cluster = TRUE,
@@ -3163,131 +5567,446 @@
       # - 1 : all kinases
       # - 2 : significant kinases
       # - 3 : non-significant kinases
-      which_kinases = which_kinases
+      which_kinases = which_kinases,
+      margins = c(7, 15)
     )
-    cat("\\begin{center}\n")
-    cat("Color intensities reflects $z$-score magnitudes; hue reflects $z$-score sign.  Asterisks reflect significance.\n")
-    cat("\\end{center}\n")
+    if (!is.null(plotted_kinases)) {
+      cat("\\begin{center}\n")
+      if (which_kinases != const_ksea_nonastrsk_kinases)
+        cat("Asterisks reflect significance.\n")
+      cat("\\end{center}\n")
+    }
   } # end for (i in ...
 } # end if (length ...
-
-for (i_cntrst in seq_len(length(rslt$score_list))) {
-  next_index <- i_cntrst
-  cntrst_a_level <- contrast_metadata_df[i_cntrst, "a_level"]
-  cntrst_b_level <- contrast_metadata_df[i_cntrst, "b_level"]
-  cntrst_fold_change <- contrast_metadata_df[i_cntrst, 6]
-  contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level)
-  contrast_longlabel <- (
-    sprintf(
-      "Trt %s {%s} -> Trt %s {%s}",
-      contrast_metadata_df[i_cntrst, "b_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "b_samples"],
-        fixed = TRUE
-      ),
-      contrast_metadata_df[i_cntrst, "a_level"],
-      gsub(
-        pattern = ";",
-        replacement = ", ",
-        x = contrast_metadata_df[i_cntrst, "a_samples"],
-        fixed = TRUE
+```
+
+```{r kseabar_calc, echo = FALSE, fig.dim = c(9.5, 6), results = 'asis'}
+ksea_prints <- list()
+ksea_barplots <- list()
+    for (i_cntrst in seq_len(length(rslt$score_list))) {
+      next_index <- i_cntrst
+      cntrst_a_level <- contrast_metadata_df[i_cntrst, "a_level"]
+      cntrst_b_level <- contrast_metadata_df[i_cntrst, "b_level"]
+      cntrst_fold_change <- contrast_metadata_df[i_cntrst, 6]
+      contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level)
+      contrast_longlabel <- (
+        sprintf(
+          "Class %s -> Class %s",
+          contrast_metadata_df[i_cntrst, "b_level"],
+          contrast_metadata_df[i_cntrst, "a_level"]
+        )
+      )
+      main_title <- (
+          sprintf(
+          "Change from treatment %s to treatment %s",
+          contrast_metadata_df[i_cntrst, "b_level"],
+          contrast_metadata_df[i_cntrst, "a_level"]
+        )
       )
+      sub_title <- contrast_longlabel
+      tryCatch(
+        expr = {
+            ksea_scores_rslt <- rslt$score_list[[next_index]]
+            if (print_nb_messages) nbe(see_variable(ksea_scores_rslt)) #ACE
+
+            if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) {
+              sink(deferred <- file())
+              ksea_low_fdr_print(
+                rslt = rslt,
+                i_cntrst = i_cntrst,
+                i = next_index,
+                a_level = cntrst_a_level,
+                b_level = cntrst_b_level,
+                fold_change = cntrst_fold_change,
+                caption = contrast_longlabel,
+                write_db = FALSE,
+                anchor = const_table_anchor_t
+                )
+              cat("\n")
+              sink()
+              lines <-
+                paste(
+                 readLines(deferred, warn = FALSE),
+                 collapse = "\n"
+                 )
+              close(deferred)
+              sq_put(ksea_prints, lines)
+              sink(stderr())
+              cat("\n---\n")
+              cat_variable(ksea_prints)
+              barplot_closure <-
+                ksea_low_fdr_barplot_factory(
+                  rslt = rslt,
+                  i_cntrst = i_cntrst,
+                  i = next_index,
+                  a_level = cntrst_a_level,
+                  b_level = cntrst_b_level,
+                  fold_change = cntrst_fold_change,
+                  caption = contrast_longlabel
+                )
+              if (rlang::is_closure(barplot_closure))
+                sq_put(ksea_barplots, barplot_closure)
+              else
+                sq_put(ksea_barplots, no_op)
+              str(ksea_barplots)
+              cat("\n...\n")
+              sink()
+            }
+          },
+        error = function(e) {
+            str(e)
+            cat_margins()
+          }
+      )
+    }
+```
+
+```{r phosphoelm_kinase_upid_desc, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'}
+
+have_kinase_descriptions <-
+  if (!is.null(bzip2df(kinase_uprt_desc_lut, kinase_uprt_desc_lut_bz2)) &&
+      !is.null(bzip2df(kinase_name_uprt_lut, kinase_name_uprt_lut_bz2))
+  ) {
+    rownames(kinase_uprt_desc_lut) <- kinase_uprt_desc_lut$UniProtID
+    kinase_name_to_desc_uprt <- function(s) {
+      rslt <- NULL
+      tryCatch(
+        {
+          which_rows <- eval(s == kinase_name_uprt_lut$kinase)
+          kinase_uprtid <-
+            kinase_name_uprt_lut[which_rows, 2]
+          # filter for first _HUMAN match if any
+          grepl_human <- grepl("_HUMAN$", kinase_uprtid)
+          if (0 < sum(grepl_human))
+            kinase_uprtid <- kinase_uprtid[grepl_human]
+          # filter for first match if any
+          if (0 < length(kinase_uprtid)) {
+            kinase_uprtid <- kinase_uprtid[1]
+            kinase_desc <- kinase_uprt_desc_lut[kinase_uprtid, 2]
+            if (!is.na(kinase_desc))
+              rslt <- c(kinase_desc, kinase_uprtid)
+            else
+              rslt <- c(kinase_desc, "")
+          }
+        },
+        warning = str
+      )
+      rslt
+    }
+    TRUE
+  } else {
+    kinase_name_to_desc_uprt <- function(s) NULL
+    FALSE
+  }
+```
+
+```{r write_params, echo = FALSE, results = 'asis'}
+# perhaps this should be moved into the functions section, eventually ...
+write_params <- function(db) {
+  # write parameters to report
+
+  # write parameters to SQLite output
+
+  mqppep_anova_script_param_df <- data.frame(
+    script    = "mqppep_anova_script.Rmd",
+    parameter = names(param_unlist),
+    value     = param_unlist
     )
+  ddl_exec(db, "
+    DROP TABLE IF EXISTS script_parameter;
+    "
   )
-  main_title <- (
-      sprintf(
-      "Change from treatment %s to treatment %s",
-      contrast_metadata_df[i_cntrst, "b_level"],
-      contrast_metadata_df[i_cntrst, "a_level"]
-    )
+  ddl_exec(db, "
+    CREATE TABLE IF NOT EXISTS script_parameter(
+      script    TEXT,
+      parameter TEXT,
+      value     ANY,
+      UNIQUE (script, parameter) ON CONFLICT REPLACE
+      )
+      ;
+    "
+  )
+  RSQLite::dbWriteTable(
+    conn = db,
+    name = "script_parameter",
+    value = mqppep_anova_script_param_df,
+    append = TRUE
   )
-  sub_title <- contrast_longlabel
-  tryCatch(
-    expr = {
-        ksea_scores_rslt <- rslt$score_list[[next_index]]
-
-        if (0 < sum(!is.nan(ksea_scores_rslt$FDR))) {
-          low_fdr_barplot(
-            rslt = rslt,
-            i_cntrst = i_cntrst,
-            i = next_index,
-            a_level = cntrst_a_level,
-            b_level = cntrst_b_level,
-            fold_change = cntrst_fold_change,
-            caption = contrast_longlabel
-            )
-        }
-      },
-    error = function(e) str(e)
-  )
+
+  loaded_packages_df <-  sessioninfo::package_info("loaded")
+  loaded_packages_df[, "library"] <- as.character(loaded_packages_df$library)
+  loaded_packages_df <- data.frame(
+    package = loaded_packages_df$package,
+    version = loaded_packages_df$loadedversion,
+    date    = loaded_packages_df$date
+    )
+  #ACE cat("\\clearpage\n\\section{R package versions}\n")
+  #ACE data_frame_tabbing_latex(
+  #ACE   x = loaded_packages_df,
+  #ACE   tabstops = c(2.5, 1.25),
+  #ACE   caption = "R package versions"
+  #ACE   )
+  cat("\\clearpage\n\\section{Input parameter settings}\n")
+  data_frame_tabbing_latex(
+    x = param_df,
+    tabstops = c(1.75),
+    underscore_whack = TRUE,
+    caption = "Input parameters",
+    verbatim = FALSE
+    )
+}
+
+if (!have_kinase_descriptions) {
+  write_params(db)
+  # We are done with output
+  RSQLite::dbDisconnect(db)
+  param_df_exit()
+  knitr::knit_exit()
 }
 ```
 
-```{r enriched, echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
-
-# Use enriched kinases to find enriched kinase-substrate pairs
-enriched_kinases <- data.frame(kinase = ls(ksea_asterisk_hash))
-all_enriched_substrates <- sqldf("
-  SELECT
-    gene AS kinase,
-    ppep,
-    '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label
-  FROM (
-    SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
-      FROM pseudo_ksdata
-      WHERE GENE IN (SELECT kinase FROM enriched_kinases)
+```{r kseabar, echo = FALSE, fig.dim = c(9.5, 12.3), results = 'asis'}
+if (have_kinase_descriptions) {
+  my_section_header <-
+    sprintf(
+      "inases whose KSEA %s < %s\n",
+      ksea_cutoff_statistic,
+      signif(ksea_cutoff_threshold, 2)
+    )
+
+  # Use enriched kinases to find enriched kinase-substrate pairs
+  enriched_kinases <- data.frame(kinase = ls(ksea_asterisk_hash))
+
+  enriched_kinase_descs <-
+    Reduce(
+      f = function(l, r) {
+        lkup <- kinase_name_to_desc_uprt(r)
+        if (is.null(lkup)) l
+          else r2 <- rbind(
+            l,
+            data.frame(
+              kinase = r,
+              uniprot_id = lkup[2],
+              description = lkup[1]
+            )
+          )
+        },
+      x = enriched_kinases$kinase,
+      init = NULL
+    )
+
+  if (length(enriched_kinase_descs) > 0 && nrow(enriched_kinase_descs) > 0) {
+    cat("\n\\clearpage\n")
+    data_frame_tabbing_latex(
+      x = enriched_kinase_descs,
+      tabstops = c(0.9, 1.3),
+      headings = c("Kinase", "UniProt ID", "Description"),
+      caption = paste0("Descriptions of k", my_section_header)
+    )
+  }
+
+  if (FALSE) {
+    cat_variable(sqldf("SELECT kinase FROM enriched_kinases"))
+    cat_variable(sqldf("
+       SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
+         FROM pseudo_ksdata
+         WHERE gene IN (SELECT kinase FROM enriched_kinases)
+    "))
+    data_frame_table_latex(
+      x = sqldf("
+       SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
+         FROM pseudo_ksdata
+         WHERE gene IN (SELECT kinase FROM enriched_kinases)
+      "),
+      justification = "l l l",
+      centered = TRUE,
+      caption = "substrates of enriched kinases",
+      anchor = c(const_table_anchor_p, const_table_anchor_t),
+      underscore_whack = TRUE
     )
-  GROUP BY ppep
-  ")
-
-# helper used to label per-kinase substrate enrichment figure
-cat_enriched_heading <- function(m, cut_args) {
-  cutoff <- cut_args$cutoff
-  kinase <- cut_args$kinase
-  statistic <- cut_args$statistic
-  threshold <- cut_args$threshold
-  cat("\\newpage\n")
-  if (nrow(m) > intensity_hm_rows) {
-    subsection_header(
-      paste(
+    data_frame_table_latex(
+      x = sqldf("
+         SELECT
+           gene AS kinase,
+           ppep,
+           sub_gene,
+           '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label,
+           fdr_adjusted_anova_p,
+           quality,
+           min_group_obs_count
+         FROM (
+           SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
+             FROM pseudo_ksdata
+             WHERE gene IN (SELECT kinase FROM enriched_kinases)
+           ),
+           p_value_data
+         WHERE ppep = phosphopeptide
+         GROUP BY kinase, ppep
+         ORDER BY kinase, ppep, p_value_data.quality DESC
+      "),
+      justification = "l l l l l l l",
+      centered = TRUE,
+      caption = "labeled substrates of enriched kinases",
+      anchor = c(const_table_anchor_p, const_table_anchor_t),
+      underscore_whack = TRUE
+    )
+  }
+  all_enriched_substrates <- sqldf("
+   SELECT
+     gene AS kinase,
+     ppep,
+     sub_gene,
+     '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label,
+     fdr_adjusted_anova_p,
+     quality,
+     min_group_obs_count
+   FROM (
+     SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep
+       FROM pseudo_ksdata
+       WHERE gene IN (SELECT kinase FROM enriched_kinases)
+     ),
+     p_value_data
+   WHERE ppep = phosphopeptide
+   GROUP BY kinase, ppep
+   ORDER BY kinase, ppep, p_value_data.quality DESC
+   ")
+
+  all_enriched_substrates <-
+    all_enriched_substrates[
+      all_enriched_substrates$quality >= params$minQuality,
+      ,
+      drop = FALSE
+    ]
+
+  all_enriched_substrates$sub_gene <-
+    sub(
+      " ///.*",
+      " ...",
+      all_enriched_substrates$sub_gene
+    )
+
+  all_enriched_substrates$label <-
+    with(
+      all_enriched_substrates,
+      sprintf(
+        "(%s-%s) %s",
+        kinase,
+        trunc_subgene(sub_gene),
+        ppep
+      )
+    )
+
+  # this global is set to TRUE by cat_enriched_heading immediately below
+  g_neednewpage <- FALSE
+
+  # helper used to label per-kinase substrate enrichment figure
+  cat_enriched_heading <- function(m, cut_args) {
+    cutoff <- cut_args$cutoff
+    kinase <- cut_args$kinase
+    if (g_neednewpage) cat("\\newpage\n")
+    g_neednewpage <- TRUE
+    if (nrow(m) > g_intensity_hm_rows) {
+      cat(subsection_header(
         sprintf(
-          "Lowest p-valued %d (of %d) enriched %s-substrates,",
-          intensity_hm_rows,
+          "Highest-quality %d (of %d) enriched %s-substrates",
+          g_intensity_hm_rows,
           nrow(m),
           kinase
-        ),
-        sprintf(" KSEA %s < %0.2f\n", statistic, threshold)
-      )
-    )
-  } else {
-    if (nrow(m) == 1) {
-      return(FALSE)
+        )
+      ))
     } else {
-      subsection_header(
-        paste(
+      if (nrow(m) == 0) {
+        return(FALSE)
+      } else {
+        nrow_m <- nrow(m)
+        cat(subsection_header(
           sprintf(
-            "%d enriched %s-substrates,",
-            nrow(m),
-            kinase
-            ),
-          sprintf(
-            " KSEA %s < %0.2f\n",
-            statistic,
-            threshold
+            "%d enriched %s-substrate%s",
+            nrow_m,
+            kinase,
+            if (nrow_m > 1) "s" else ""
             )
+        ))
+      }
+    }
+    cat("\n\n\n")
+    cat("\n\n\n")
+    return(TRUE)
+  }
+
+  # --------------------------------
+  # hack begin - show all substrates
+  enriched_substrates <- all_enriched_substrates
+  # add "FALSE &&" to prevent listing of substrates
+  if (show_enriched_substrates && nrow(enriched_substrates) > 0) {
+    short_row_names <- sub(
+        "$FAILED_MATCH_GENE_NAME",
+        "not_found",
+        enriched_substrates$sub_gene,
+        fixed = TRUE
+      )
+
+    if (print_nb_messages) nbe(see_variable(enriched_substrates))
+    substrates_df <- with(
+        enriched_substrates,
+        data.frame(
+          kinase = kinase,
+          substrate = sub(" ///*", "...", short_row_names),
+          anova_p_value = signif(fdr_adjusted_anova_p, 2),
+          min_group_obs_count = signif(min_group_obs_count, 0),
+          quality = signif(quality, 3),
+          sequence = trunc_n(30)(ppep)
         )
       )
-    }
+
+    substrates_df <- substrates_df[
+      with(substrates_df, order(kinase, -quality)),
+      ,
+      drop = FALSE
+      ]
+
+    if (print_nb_messages) nbe(see_variable(substrates_df))
+    if (nrow(substrates_df) < 1)
+      substrates_df$sequence <- c()
+    if (print_nb_messages) nbe(see_variable(substrates_df))
+    names(substrates_df) <- headers_2nd_line <-
+      c("Kinase", "Substrate", "p-value", "per group)", "quality", "Sequence")
+    headers_1st_line <- c("", "", "ANOVA", "min(values", "", "")
+    data_frame_tabbing_latex(
+      x = substrates_df,
+      tabstops = c(1.2, 0.8, 0.5, 0.65, 0.5),
+      headings = c(headers_1st_line, headers_2nd_line),
+      caption = "Details for all enriched substrates of enriched kinases"
+    )
+    rm(
+      enriched_substrates,
+      substrates_df,
+      short_row_names,
+      headers_1st_line,
+      headers_2nd_line
+      )
   }
-  cat("\n\n\n")
-  cat("\n\n\n")
-  return(TRUE)
+  cat("\\clearpage\n")
+  # hack end - show all substrates
+  # --------------------------------
+
+  # print deferred tables and graphs for kinases from contrasts
+  for (i_cntrst in seq_len(length(ksea_prints))) {
+    #latex_samepage({
+      cat(ksea_prints[[i_cntrst]])
+      cat("\n")
+      ksea_barplots[[i_cntrst]]()
+      cat("\n")
+      cat("\\clearpage\n")
+    #})
+  }
+
 }
-
-# Disabling heatmaps for substrates pending decision whether to eliminate them altogether
-if (FALSE)
+```
+
+```{r enriched, echo = FALSE, fig.dim = c(12, 13.7), results = 'asis'}
+if (g_can_run_ksea) {
+  g_did_enriched_header <- FALSE
   for (kinase_name in sort(enriched_kinases$kinase)) {
     enriched_substrates <-
       all_enriched_substrates[
@@ -3295,135 +6014,256 @@
         ,
         drop = FALSE
         ]
+    ten_trunc_ppep <- trunc_enriched_substrate(enriched_substrates$ppep)
+    enriched_substrates$label <- with(
+      enriched_substrates,
+      sprintf(
+        "(%s) %s",
+        make.names(
+          sub("$FAILED_MATCH_GENE_NAME", "not_found", sub_gene, fixed = TRUE),
+          unique = TRUE
+        ),
+        ten_trunc_ppep
+        )
+      )
     # Get the intensity values for the heatmap
     enriched_intensities <-
       as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE])
+
     # Remove rows having too many NA values to be relevant
-    na_counter <- is.na(enriched_intensities)
-    na_counts <- apply(na_counter, 1, sum)
-    enriched_intensities <-
-      enriched_intensities[na_counts < ncol(enriched_intensities) / 2, , drop = FALSE]
+    good_rows <- (rowSums(enriched_intensities, na.rm = TRUE) != 0)
+    #ACE nbe(see_variable(good_rows), "\n")
+    enriched_substrates <- enriched_substrates[good_rows, , drop = FALSE]
+    enriched_intensities <- enriched_intensities[good_rows, , drop = FALSE]
+
     # Rename the rows with the display-name for the heatmap
-    rownames(enriched_intensities) <-
+    short_row_names <- sub(
+        "$FAILED_MATCH_GENE_NAME",
+        "not_found",
+        enriched_substrates$sub_gene,
+        fixed = TRUE
+      )
+    short_row_names <-
+      make.names(short_row_names, unique = TRUE)
+    long_row_names <-
       sapply(
         X = rownames(enriched_intensities),
         FUN = function(rn) {
           enriched_substrates[enriched_substrates$ppep == rn, "label"]
         }
       )
+    rownames(enriched_intensities) <- long_row_names
     # Format as matrix for heatmap
     m <- as.matrix(enriched_intensities)
+    rownames(m) <- trunc_enriched_substrate(rownames(m))
+
+    #ACE nb("m with bad rows: ", see_variable(m), "\n")
+    #ACE good_rows <- (rowSums(m, na.rm = TRUE) != 0)
+    #ACE nb(see_variable(good_rows), "\n")
+    #ACE m <- m[good_rows, , drop = FALSE]
+    #ACE nb("m without(?) bad rows: ", see_variable(m), "\n")
+    #ACE nb(see_variable(short_row_names), "\n")
+    #ACE local_short_row_names <- short_row_names[good_rows]
+    #ACE local_long_row_names  <- long_row_names[good_rows]
+    #ACE local_enriched_intensities <- enriched_intensities[local_long_row_names, ]
+
     # Draw the heading and heatmap
-    if (nrow(m) > 0) {
+    nrow_m <- nrow(m)
+    if (nrow_m > 0) {
+      if (!g_did_enriched_header) {
+        cat("\n\\clearpage\n")
+        cat(section_header(paste0("K", my_section_header)))
+        g_did_enriched_header <- TRUE
+      }
+      is_na_m <- is.na(m)
+      cellnote_m <- is_na_m
+      cellnote_m[!is_na_m] <- ""
+      cellnote_m[is_na_m] <- "NA"
       cut_args <- new_env()
       cut_args$cutoff <- cutoff
       cut_args$kinase <- kinase_name
       cut_args$statistic <- ksea_cutoff_statistic
       cut_args$threshold <- ksea_cutoff_threshold
       number_of_peptides_found <-
-        draw_intensity_heatmap(
+        ppep_heatmap(
           m                       = m,
+          cellnote                = cellnote_m,
           cutoff                  = cut_args,
           hm_heading_function     = cat_enriched_heading,
           hm_main_title
             = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates",
-          suppress_row_dendrogram = FALSE
+          suppress_row_dendrogram = FALSE,
+          master_cex              = 0.35,
+          sepcolor                = "black",
+          colsep                  = sample_colsep
         )
+        if (number_of_peptides_found > 1) {
+
+          tryCatch(
+            {
+              rownames(m) <- short_row_names
+              cov_heatmap(m, nrow_m > g_intensity_hm_rows)
+            },
+            error = function(e) {
+                cat(
+                  sprintf(
+                    "ERROR: %s\n\\newline\n",
+                    mget("e")
+                  )
+                )
+                cat(
+                  sprintf(
+                    "message: %s\n\\newline\n",
+                    e$message
+                  )
+                )
+                cat_margins()
+              }
+          )
+        }
+      substrates_df <- with(
+          enriched_substrates,
+          data.frame(
+            substrate = sub(" ///*", "...", short_row_names),
+            sequence = trunc_long_ppep(ppep),
+            anova_p_value = signif(fdr_adjusted_anova_p, 2),
+            min_group_obs_count = signif(min_group_obs_count, 0),
+            quality = signif(quality, 3)
+          )
+        )
+      excess_substrates <- nrow(substrates_df) > g_intensity_hm_rows
+      if (excess_substrates)
+        substrates_df <- substrates_df[1:g_intensity_hm_rows, ]
+      names(substrates_df) <- headers_2nd_line <-
+        c("Substrate", "Sequence", "p-value", "per group)", "quality")
+      headers_1st_line <- c("", "", "ANOVA", "min(values", "")
+      if (1 < nrow(enriched_substrates))
+        cat("\n\\newpage\n")
+      cat(subsubsection_header(
+        sprintf(
+          "Details for %s%s-substrates",
+          if (excess_substrates)
+            sprintf(
+              "%s \"highest quality\" ",
+              g_intensity_hm_rows
+            )
+          else "",
+          kinase_name
+        )
+      ))
+      substrates_df <- substrates_df[order(-substrates_df$quality), ]
+      data_frame_tabbing_latex(
+        x = substrates_df,
+        tabstops = c(0.8, 3.8, 0.6, 0.8),
+        headings = c(headers_1st_line, headers_2nd_line)
+      )
+    } else {
+      if (print_nb_messages) nbe(see_variable(nrow_m > 0), "\n")
     }
+    if (print_nb_messages) nb("end kinase ", kinase_name, "\n")
   }
 
-# Write output tabular files
-
-# get kinase, ppep, concat(kinase) tuples for enriched kinases
-
-kinase_ppep_label <- sqldf("
-  WITH
-    t(ppep, label) AS
-      (
-        SELECT DISTINCT
-            SUB_MOD_RSD AS ppep,
-            group_concat(gene, '; ') AS label
+  # Write output tabular files
+
+  # get kinase, ppep, concat(kinase) tuples for enriched kinases
+
+  if (print_nb_messages) nb("kinase_ppep_label <- ...\n")
+  if (print_nb_messages) nbe("kinase_ppep_label <- ...\n")
+  kinase_ppep_label <- sqldf("
+    WITH
+      t(ppep, label) AS
+        (
+          SELECT DISTINCT
+              SUB_MOD_RSD AS ppep,
+              group_concat(gene, '; ') AS label
+            FROM pseudo_ksdata
+            WHERE GENE IN (SELECT kinase FROM enriched_kinases)
+            GROUP BY ppep
+        ),
+      k(kinase, ppep_join) AS
+        (
+        SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep_join
           FROM pseudo_ksdata
           WHERE GENE IN (SELECT kinase FROM enriched_kinases)
-          GROUP BY ppep
-      ),
-    k(kinase, ppep_join) AS
-      (
-      SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep_join
-        FROM pseudo_ksdata
-        WHERE GENE IN (SELECT kinase FROM enriched_kinases)
-      )
-  SELECT k.kinase, t.ppep, t.label
-    FROM  t,  k
-    WHERE t.ppep = k.ppep_join
-    ORDER BY k.kinase, t.ppep
-  ")
-
-# extract what we need from full_data
-impish <- cbind(rownames(quant_data_imp), quant_data_imp)
-colnames(impish)[1] <- "Phosphopeptide"
-data_table_imputed_sql <- "
-  SELECT
-    f.*,
-    k.label AS KSEA_enrichments,
-    q.*
-  FROM
-    metadata_plus_p f
-      LEFT JOIN kinase_ppep_label k
-        ON f.Phosphopeptide = k.ppep,
-    impish q
-  WHERE
-    f.Phosphopeptide = q.Phosphopeptide
-  "
-data_table_imputed <- sqldf(data_table_imputed_sql)
-# Zap the duplicated 'Phosphopeptide' column named 'ppep'
-data_table_imputed <-
-    data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
-
-# Output with imputed, un-normalized data
-
-write.table(
-    data_table_imputed
-  , file = imputed_data_filename
-  , sep = "\t"
-  , col.names = TRUE
-  , row.names = FALSE
-  , quote = FALSE
+        )
+    SELECT k.kinase, t.ppep, t.label
+      FROM  t,  k
+      WHERE t.ppep = k.ppep_join
+      ORDER BY k.kinase, t.ppep
+    ")
+
+
+  # extract what we need from full_data
+  impish <- cbind(rownames(quant_data_imp), quant_data_imp)
+  colnames(impish)[1] <- "Phosphopeptide"
+  data_table_imputed_sql <- "
+    SELECT
+      f.*,
+      k.label AS KSEA_enrichments,
+      q.*
+    FROM
+      metadata_plus_p f
+        LEFT JOIN kinase_ppep_label k
+          ON f.Phosphopeptide = k.ppep,
+      impish q
+    WHERE
+      f.Phosphopeptide = q.Phosphopeptide
+    "
+  data_table_imputed <- sqldf(data_table_imputed_sql)
+  # Zap the duplicated 'Phosphopeptide' column named 'ppep'
+  data_table_imputed <-
+      data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
+
+  # Output imputed, un-normalized data
+  if (print_nb_messages) nb("Output imputed, un-normalized data tabular file\n")
+  if (print_nb_messages) nbe("Output imputed, un-normalized data tabular file\n")
+  write.table(
+      data_table_imputed
+    , file = imputed_data_filename
+    , sep = "\t"
+    , col.names = TRUE
+    , row.names = FALSE
+    , quote = FALSE
+    )
+
+
+  #output quantile normalized data
+  impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log)
+  colnames(impish)[1] <- "Phosphopeptide"
+  data_table_imputed <- sqldf(data_table_imputed_sql)
+  # Zap the duplicated 'Phosphopeptide' column named 'ppep'
+  data_table_imputed <-
+      data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
+  if (print_nb_messages) nb("Output quantile normalized data tabular file\n")
+  if (print_nb_messages) nbe("Output quantile normalized data tabular file\n")
+  write.table(
+    data_table_imputed,
+    file = imp_qn_lt_data_filenm,
+    sep = "\t",
+    col.names = TRUE,
+    row.names = FALSE,
+    quote = FALSE
   )
 
-
-#output quantile normalized data
-impish <- cbind(rownames(quant_data_imp_qn_log), quant_data_imp_qn_log)
-colnames(impish)[1] <- "Phosphopeptide"
-data_table_imputed <- sqldf(data_table_imputed_sql)
-# Zap the duplicated 'Phosphopeptide' column named 'ppep'
-data_table_imputed <-
-    data_table_imputed[, c(1:12, 14:ncol(data_table_imputed))]
-write.table(
-  data_table_imputed,
-  file = imp_qn_lt_data_filenm,
-  sep = "\t",
-  col.names = TRUE,
-  row.names = FALSE,
-  quote = FALSE
-)
-
-ppep_kinase <- sqldf("
-  SELECT DISTINCT k.ppep, k.kinase
-    FROM (
-      SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep
-        FROM pseudo_ksdata
-        WHERE GENE IN (SELECT kinase FROM enriched_kinases)
-      ) k
-    ORDER BY k.ppep, k.kinase
-  ")
-
-RSQLite::dbWriteTable(
-  conn = db,
-  name = "ksea_enriched_ks",
-  value = ppep_kinase,
-  append = FALSE
-  )
+  ppep_kinase <- sqldf("
+    SELECT DISTINCT k.ppep, k.kinase
+      FROM (
+        SELECT DISTINCT gene AS kinase, SUB_MOD_RSD AS ppep
+          FROM pseudo_ksdata
+          WHERE GENE IN (SELECT kinase FROM enriched_kinases)
+        ) k
+      ORDER BY k.ppep, k.kinase
+    ")
+
+  RSQLite::dbWriteTable(
+    conn = db,
+    name = "ksea_enriched_ks",
+    value = ppep_kinase,
+    append = FALSE
+    )
+}
+
+if (print_nb_messages) nb("RSQLite::dbWriteTable anova_signif\n")
 
 RSQLite::dbWriteTable(
   conn = db,
@@ -3453,6 +6293,8 @@
     "
   )
 
+if (print_nb_messages) nb("Output contents of `stats_metadata_v` table to tabular file\n")
+if (print_nb_messages) nbe("Output contents of `stats_metadata_v` table to tabular file\n")
 write.table(
   dbReadTable(db, "stats_metadata_v"),
   file = anova_ksea_mtdt_file,
@@ -3462,75 +6304,21 @@
   quote = FALSE
   )
 
+cat("\n\\clearpage\n")
 
 ```
 
+# Data-processing summary flowchart
+
+![Flowchart showing ANOVA and KSEA data-processing steps](KSEA_impl_flowchart.pdf)
+
 ```{r parmlist, echo = FALSE, fig.dim = c(9, 10), results = 'asis'}
 cat("\\leavevmode\n\n\n")
 
-# write parameters to report
-
-param_unlist <- unlist(as.list(params))
-param_df <- data.frame(
-  parameter = paste0("\\verb@", names(param_unlist), "@"),
-  value = paste0("\\verb@", gsub("$", "\\$", param_unlist, fixed = TRUE), "@")
-  )
-
-data_frame_latex(
-  x = param_df,
-  justification = "p{0.35\\linewidth} p{0.6\\linewidth}",
-  centered = TRUE,
-  caption = "Input parameters",
-  anchor = const_table_anchor_bp,
-  underscore_whack = FALSE
-  )
-
-# write parameters to SQLite output
-
-mqppep_anova_script_param_df <- data.frame(
-  script    = "mqppep_anova_script.Rmd",
-  parameter = names(param_unlist),
-  value     = param_unlist
-  )
-ddl_exec(db, "
-  DROP TABLE IF EXISTS script_parameter;
-  "
-)
-ddl_exec(db, "
-  CREATE TABLE IF NOT EXISTS script_parameter(
-    script    TEXT,
-    parameter TEXT,
-    value     ANY,
-    UNIQUE (script, parameter) ON CONFLICT REPLACE
-    )
-    ;
-  "
-)
-RSQLite::dbWriteTable(
-  conn = db,
-  name = "script_parameter",
-  value = mqppep_anova_script_param_df,
-  append = TRUE
-)
-
+write_params(db)
 # We are done with output
 RSQLite::dbDisconnect(db)
+
+cat("\\clearpage\n\\section{R package versions}\n")
+utils::toLatex(utils::sessionInfo())
 ```
-<!--
-There's gotta be a better way...
-
-loaded_packages_df <-  sessioninfo::package_info("loaded")
-loaded_packages_df[, "library"] <- as.character(loaded_packages_df$library)
-loaded_packages_df <- data.frame(
-  package = loaded_packages_df$package,
-  version = loaded_packages_df$loadedversion,
-  date    = loaded_packages_df$date
-  )
-data_frame_latex(
-  x = loaded_packages_df,
-  justification = "l | l l",
-  centered = FALSE,
-  caption = "Loaded R packages",
-  anchor = const_table_anchor_bp
-  )
--->
author	galaxyp
date	Fri, 28 Oct 2022 18:26:42 +0000
parents	8dfd5d2b5903
children	a5e7469dfdfa