view PMD_FDR_package_for_Galaxy.R @ 0:5cc0c32d05a2 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pmd_fdr commit 00f85eca73cd8afedfefbeec94a4462455ac1a9a"
author galaxyp
date Mon, 07 Oct 2019 11:59:37 -0400
parents
children 460edeedeb7d
line wrap: on
line source

###############################################################################
# PMD_FDR_package_for_Galaxy.R                                                #
#                                                                             #
# Project 021 - PMD-FDR for Galaxy-P                                          #
#                                                                             #
# Description: Computes iFDR and gFDR on PSMs as a script designed for Galaxy #
#              Note that plotting code has been left in that is not used      #
#              in this file; this is the code I used to create figures for    #
#              publication. I left it in for potential development of views.  #
#                                                                             #
#              This file was created by concatenating the following files:    #
#                                                                             #
#                   A - 005 - Parser - ArgParser.R                            #
#                   B - 019 - PMD-FDR - functions.R                           #
#                   C - 021 - PMD-FDR Wrapper - functions.R                   #
#                   D - 021 - PMD-FDR Main.R                                  #
#                                                                             #
# Required packages: argparser                                                #
#                    stringr                                                  #
#                    RUnit                                                    #
#                                                                             #
# Release date: 2019-10-05                                                    #
#      Version: 1.4                                                           #
#                                                                             #
###############################################################################
# Package currently supports the following parameters:
#
# --psm_report            full name and path to the PSM report
# --psm_report_1_percent  full name and path to the PSM report for 1% FDR
# --output_i_fdr          full name and path to the i-FDR output file 
# --output_g_fdr          full name and path to the g-FDR output file 
# --output_densities      full name and path to the densities output file 
#
###############################################################################
# A - 005 - Parser - ArgParser.R                                              #
#                                                                             #
# Description: Wrapper for argparser package, using RefClass                  #
#                                                                             #
###############################################################################

#install.packages("argparser")
library(argparser)

# Class definition

ArgParser <- setRefClass("ArgParser",
                         fields = c("parser"))
ArgParser$methods(
  initialize = function(...){
    parser <<- arg_parser(...)  
  },
  local_add_argument = function(...){
    parser <<- add_argument(parser, ...)
  },
  parse_arguments = function(...){
    result = parse_args(parser, ...)
    return(result) 
  }
)

###############################################################################
# B - 019 - PMD-FDR - functions.R                                             #
#                                                                             #
# Primary work-horse for PMD-FDR                                              #
#                                                                             #
###############################################################################
###############################################################################
####### Load libraries etc.
###############################################################################
library(stringr)
library(RUnit)

#############################################################
####### Global values (should be parameters to module but aren't yet)
#############################################################

MIN_GOOD_PEPTIDE_LENGTH          <- 11
MIN_ACCEPTABLE_POINTS_IN_DENSITY <- 10

#############################################################
####### General purpose functions
#############################################################
# Creates a more useful error report when file is not reasonable
safe_file_exists <- function(file_path){ # Still not particularly useful in cases where it is a valid directory
  tryCatch(
    return(file_test(op = "-f", x=file_path)),
    error=function(e) {simpleError(sprintf("file path is not valid: '%s'", file_path))}
  )
}
# My standard way of loading data into data.frames
load_standard_df <- function(file_path=NULL){
  clean_field_names = function(field_names){
    result <- field_names
    idx_blank <- which(result == "")
    result[idx_blank] <- sprintf("<Field %d>", idx_blank)
    return(result)
  }
  if (safe_file_exists(file_path)){
    field_names <- read_field_names(file_path, sep = "\t")
    field_names <- clean_field_names(field_names)
    
    if (length(field_names) == 0){
      return(data.frame())
    }
    data <- read.table(file = file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE, blank.lines.skip = TRUE)#, check.names = FALSE)
    colnames(data) = field_names
  } else {
    stop(sprintf("File path does not exist: '%s'", file_path))
  }
  return(data)
}
save_standard_df <- function(x=NULL, file_path=NULL){
  if (file_path != ""){
    write.table(x = x, file = file_path, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
  }
}
rename_column <- function(df=NULL, name_before=NULL, name_after=NULL, suppressWarnings=FALSE){
  if (is.null(df)){
    stop("Dataframe (df) does not exist - unable to rename column")
  }
  if (name_before %in% colnames(df)){
    df[,name_after]  <- df[,name_before]
    df[,name_before] <- NULL
  } else if (!suppressWarnings){
    warning(sprintf("'%s' is not a field in the data frame and so has not been renamed", name_before))
  }
  return(df)
}
rename_columns <- function(df=NULL, names_before=NULL, names_after=NULL){
  for (i in safe_iterator(length(names_before))){
    df <- rename_column(df, names_before[i], names_after[i])
  }
  return(df)
}
round_to_tolerance    <- function(x=NULL, tolerance=NULL, ...){ 
  return(function_to_tolerance(x=x, tolerance=tolerance, FUN=round, ...)) 
}
function_to_tolerance <- function(x=NULL, tolerance=NULL, FUN=NULL, ...){
  return(FUN(x/tolerance, ...) * tolerance) 
}
safe_median <- function(x) median(x, na.rm=TRUE)
normalize_density <- function(d){
  # Normalizes y-values in density function
  # so that the integral under the curve is 1
  # (uses rectangles to approximate area)
  delta_x               <- diff(range(d$x)) / length(d$x)
  unnormalized_integral <- delta_x * sum(d$y)
  new_d   <- d
  new_d$y <- with(new_d, y )
  
  return(new_d)
}
if_null <- function(cond=NULL, null_result=NULL, not_null_result=NULL){
  return(switch(1+is.null(cond), 
                not_null_result, 
                null_result))
}
rainbow_with_fixed_intensity <- function(n=NULL, goal_intensity_0_1=NULL, alpha=NULL){
  goal_intensity <- 255*goal_intensity_0_1
  hex_colors <- rainbow(n)
  rgb_colors <- col2rgb(hex_colors)
  df_colors <- data.frame(t(rgb_colors))
  df_colors$intensity <- with(df_colors, 0.2989*red + 0.5870*green + 0.1140*blue)
  
  df_colors$white_black <- with(df_colors, ifelse(intensity < goal_intensity, 255, 0))
  df_colors$mix_level   <- with(df_colors, (white_black - goal_intensity) / (white_black - intensity  ) )
  df_colors$new_red     <- with(df_colors, mix_level*red   + (1-mix_level)*white_black)
  df_colors$new_green   <- with(df_colors, mix_level*green + (1-mix_level)*white_black)
  df_colors$new_blue    <- with(df_colors, mix_level*blue  + (1-mix_level)*white_black)
  names_pref_new <- c("new_red", "new_green", "new_blue")
  names_no_pref  <- c("red", "green", "blue")
  df_colors <- df_colors[,names_pref_new]
  df_colors <- rename_columns(df_colors, names_before = names_pref_new, names_after = names_no_pref)
  rgb_colors <-as.matrix(df_colors/255 )
  
  return(rgb(rgb_colors, alpha=alpha))
}
safe_iterator <- function(n_steps = NULL){
  if (n_steps < 1){
    result = numeric(0)
  } else {
    result = 1:n_steps
  }
  return(result)
}
col2hex <- function(cols=NULL, col_alpha=255){
  if (all(col_alpha<=1)){
    col_alpha <- round(col_alpha*255)
  }
  col_matrix <- t(col2rgb(cols))
  results <- rgb(col_matrix, alpha=col_alpha, maxColorValue = 255)
  return(results)
}
credible_interval <- function(x=NULL, N=NULL, precision=0.001, alpha=0.05){
  # Approximates "highest posterior density interval"
  # Uses exact binomial but with a finite list of potential values (1/precision)
  
  p <- seq(from=0, to=1, by=precision)
  d <- dbinom(x = x, size = N, prob = p)
  d <- d / sum(d)
  df <- data.frame(p=p, d=d)
  df <- df[order(-df$d),]
  df$cumsum <- cumsum(df$d)
  max_idx <- sum(df$cumsum < (1-alpha)) + 1
  max_idx <- min(max_idx, nrow(df))
  
  lower <- min(df$p[1:max_idx])
  upper <- max(df$p[1:max_idx])
  
  return(c(lower,upper))
}
verified_element_of_list <- function(parent_list=NULL, element_name=NULL, object_name=NULL){
  if (is.null(parent_list[[element_name]])){
    if (is.null(object_name)){
      object_name = "the list"
    }
    stop(sprintf("Element '%s' does not yet exist in %s", element_name, object_name))
  }
  return(parent_list[[element_name]])
}
read_field_names = function(file_path=NULL, sep = "\t"){
  con = file(file_path,"r")
  fields = readLines(con, n=1)
  close(con)
  
  if (length(fields) == 0){
    return(c())
  }
  fields = strsplit(x = fields, split = sep)[[1]]
  return(fields)
}
check_field_name = function(input_df = NULL, name_of_input_df=NULL, field_name=NULL){
  test_succeeded <- field_name %in% colnames(input_df)
  current_columns <- paste0(colnames(input_df), collapse=", ")
  checkTrue(test_succeeded,
            msg = sprintf("Expected fieldname '%s' in %s (but did not find it among %s)", 
                          field_name, name_of_input_df, current_columns))
}

#############################################################
####### Classes for Data
#############################################################

###############################################################################
#            Class: Data_Object
###############################################################################
Data_Object <- setRefClass("Data_Object", 
                           fields =list(m_is_dirty = "logical",
                                        parents    = "list",
                                        children   = "list", 
                                        class_name = "character"))
Data_Object$methods(
  initialize = function(){
    m_is_dirty <<- TRUE
    class_name <<- "Data_Object <abstract class - class_name needs to be set in subclass>"
  },
  load_data = function(){
    #print(sprintf("Calling %s$load_data()", class_name)) # Useful for debugging
    ensure_parents()
    verify()
    m_load_data()
    set_dirty(new_value = FALSE)
  },
  ensure = function(){
    if (m_is_dirty){
      load_data()
    }
  },
  set_dirty = function(new_value){
    if (new_value != m_is_dirty){
      m_is_dirty <<- new_value
      set_children_dirty()
    }
  },
  verify = function(){
    stop(sprintf("verify() is an abstract method - define it in %s before calling load_data()", class_name))
  },
  m_load_data = function(){
    stop(sprintf("m_load_data() is an abstract method - define it in %s before calling load_data()", class_name))
  },
  append_parent = function(parent=NULL){
    parents <<- append(parents, parent)
  },
  append_child = function(child=NULL){
    children <<- append(children, child)
  },
  ensure_parents = function(){
    for (parent in parents){
      # print(sprintf("Calling %s$ensure()", parent$class_name)) # Useful for debugging
      parent$ensure()
    }
  },
  set_children_dirty = function(){
    for (child in children){
      child$set_dirty(TRUE)
    }
  }
)
###############################################################################
#            Class: Data_Object_Info
###############################################################################
Data_Object_Info <- setRefClass("Data_Object_Info", 
                                contains = "Data_Object",
                                fields =list(
                                  data_file_name_1_percent_FDR = "character",
                                  data_file_name  = "character",
                                  data_path_name  = "character",
                                  experiment_name = "character",
                                  designation     = "character",
                                  
                                  input_file_type = "character"
                                  
                                  #score_field_name = "character"
                                  #collection_name="character",
                                  #dir_results="character",
                                  #dir_dataset="character",
                                  #dataset_designation="character",
                                  #file_name_dataset="character",
                                  #file_name_dataset_1_percent="character",
                                  #experiment_name="character"
                                ) )
Data_Object_Info$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Info - <Abstract class - class_name needs to be set in subclass>"
  },    
  verify = function(){
    checkFieldExists = function(field_name=NULL){
      field_value <- .self[[field_name]]
      checkTrue(length(field_value) > 0,
                sprintf("Field %s$%s has not been set (and should have been)", class_name, field_name))
      checkTrue(length(field_value) == 1,
                sprintf("Field %s$%s has been set to multiple values (and should be a single value)", class_name, field_name))
      checkTrue(field_value != "",
                sprintf("Field %s$%s has been set to an empty string (and should not have been)", class_name, field_name))
    }
    checkFieldExists("data_file_name")
    checkFieldExists("data_path_name")
    checkFieldExists("experiment_name")
    checkFieldExists("designation")
    checkFieldExists("input_file_type")
    #checkFieldExists("score_field_name")
  },
  m_load_data = function(){
    # Nothing to do - this is really a data class
  },
  file_path = function(){
    result <- file.path(data_path_name, data_file_name)
    if (length(result) == 0){
      stop("Unable to validate file path - one or both of path name and file name are missing")
    }
    return(result)
  },
  file_path_1_percent_FDR = function(){
    local_file_name <- get_data_file_name_1_percent_FDR()
    if (length(local_file_name) == 0){
      result <- ""
    } else {
      result <- file.path(data_path_name, local_file_name)
    }
    
    # Continue even if file name is missing - not all analyses have a 1 percent FDR file; this is managed downstream
    
    # if (length(result) == 0){
    #   stop("Unable to validate file path - one or both of path name and file name (of 1 percent FDR file) are missing")
    # }
    return(result)
  },
  get_data_file_name_1_percent_FDR = function(){
    return(data_file_name_1_percent_FDR)
  },
  collection_name = function(){
    result <- sprintf("%s_%s", experiment_name, designation)
    return(result)
  }
)
###############################################################################
#            Class: Data_Object_Info_737_two_step
###############################################################################
Data_Object_Info_737_two_step <- setRefClass("Data_Object_Info_737_two_step", 
                                             contains = "Data_Object_Info",
                                             fields =list())
Data_Object_Info_737_two_step$methods(
  initialize = function(){
    callSuper()
    class_name                   <<- "Data_Object_Info_737_two_step"
    #score_field_name             <<- "Confidence [%]"
    data_file_name_1_percent_FDR <<- "737_NS_Peptide_Shaker_PSM_Report_Multi_Stage_Two_Step.tabular"
    data_file_name               <<- "737_NS_Peptide_Shaker_Extended_PSM_Report_Multi_Stage_Two_Step.tabular.tabular"
    data_path_name               <<- file.path(".", "Data")
    experiment_name              <<- "Oral_737_NS"
    designation                  <<- "two_step"
    
    input_file_type              <<- "PSM_Report"
    
    #data_collection_oral_737_NS_combined$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_CombinedDB.tabular"
    #data_collection_oral_737_NS_two_step$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_Multi_Stage_Two_Step.tabular"
    
  }
)

###############################################################################
#            Class: Data_Object_Info_737_combined
###############################################################################
Data_Object_Info_737_combined <- setRefClass("Data_Object_Info_737_combined", 
                                             contains = "Data_Object_Info",
                                             fields =list())
Data_Object_Info_737_combined$methods(
  initialize = function(){
    callSuper()
    class_name                   <<- "Data_Object_Info_737_combined"
    #score_field_name             <<- "Confidence [%]"
    data_file_name_1_percent_FDR <<- "737_NS_Peptide_Shaker_PSM_Report_CombinedDB.tabular"
    data_file_name               <<- "737_NS_Peptide_Shaker_Extended_PSM_Report_CombinedDB.tabular"
    data_path_name               <<- file.path(".", "Data")
    experiment_name              <<- "Oral_737_NS"
    designation                  <<- "two_step"
    
    input_file_type              <<- "PSM_Report"
    
    #data_collection_oral_737_NS_combined$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_CombinedDB.tabular"
    #data_collection_oral_737_NS_two_step$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_Multi_Stage_Two_Step.tabular"
    
  }
)

###############################################################################
#            Class: Data_Object_Pyrococcus_tr
###############################################################################
Data_Object_Pyrococcus_tr <- setRefClass("Data_Object_Pyrococcus_tr", 
                                         contains = "Data_Object_Info",
                                         fields =list())
Data_Object_Pyrococcus_tr$methods(
  initialize = function(){
    callSuper()
    class_name                   <<- "Data_Object_Pyrococcus_tr"
    #score_field_name             <<- "Confidence [%]"
    data_file_name_1_percent_FDR <<- ""
    data_file_name               <<- "Pfu_traditional_Extended_PSM_Report.tabular"
    data_path_name               <<- file.path(".", "Data")
    experiment_name              <<- "Pyrococcus"
    designation                  <<- "tr"
    
    input_file_type              <<- "PSM_Report"
    
  }
)
###############################################################################
#            Class: Data_Object_Mouse_Mutations
###############################################################################
Data_Object_Mouse_Mutations <- setRefClass("Data_Object_Mouse_Mutations", 
                                           contains = "Data_Object_Info",
                                           fields =list())
Data_Object_Mouse_Mutations$methods(
  initialize = function(){
    callSuper()
    class_name                   <<- "Data_Object_Mouse_Mutations"
    #score_field_name             <<- "Confidence [%]"
    data_file_name_1_percent_FDR <<- ""
    data_file_name               <<- "Combined_DB_Mouse_5PTM.tabular"
    data_path_name               <<- file.path(".", "Data")
    experiment_name              <<- "Mouse Mutations"
    designation                  <<- "combined_05"
    
    input_file_type              <<- "PSM_Report"
    
  }
)
###############################################################################
#            Class: Data_Object_Raw_Data
###############################################################################
Data_Object_Raw_Data <- setRefClass("Data_Object_Raw_Data", 
                                    contains = "Data_Object",
                                    fields =list(df = "data.frame"))
Data_Object_Raw_Data$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Raw_Data"
  },
  verify = function(){
    # Check that file exists before using it
    file_path <- get_info()$file_path()
    if (! safe_file_exists(file_path)){
      stop(sprintf("Raw data file does not exist (%s)", file_path))
    }
    # BUGBUG: Needs to also check the following:
    #         - file is tab-delimited
    #         - first row is a list of column names
  },
  set_info = function(info){
    parents[["info"]] <<- info
  },
  get_info = function(){
    return(verified_element_of_list(parents, "info", "Data_Object_Raw_Data$parents"))
  },
  m_load_data = function(){
    info <- get_info()
    df <<- load_standard_df(info$file_path())
  }
)
###############################################################################
#            Class: Data_Object_Raw_1_Percent
###############################################################################
Data_Object_Raw_1_Percent <- setRefClass("Data_Object_Raw_1_Percent", 
                                         contains = "Data_Object",
                                         fields =list(df = "data.frame"))
Data_Object_Raw_1_Percent$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Raw_1_Percent"
  },
  set_info = function(info){
    parents[["info"]] <<- info
  },
  verify = function(){
    # Do nothing - a missing file name is acceptable for this module and is dealt with in load()
  },
  get_info = function(){
    return(verified_element_of_list(parents, "info", "Data_Object_Raw_1_Percent$parents"))
  },
  m_load_data = function(){
    
    info <- get_info()
    file_path <- info$file_path_1_percent_FDR()
    if (exists()){
      df <<- load_standard_df(info$file_path_1_percent_FDR())
    } # Note that failing to load is a valid state for this file, leading to not is_dirty. BUGBUG: this could lead to problems if a good file appears later
  },
  exists = function(){
    
    info <- get_info()
    local_file_name <- info$get_data_file_name_1_percent_FDR() # Check file name not file path
    
    if (length(local_file_name) == 0 ){ # variable not set
      result = FALSE
    } else if (local_file_name == ""){  # variable set to empty string
      result = FALSE
    } else {
      result = safe_file_exists(info$file_path_1_percent_FDR())
    }
    
    return(result)
  }
)
###############################################################################
#            Class: Data_Converter
###############################################################################
Data_Converter <- setRefClass("Data_Converter", 
                              fields =list(class_name = "character",
                                           file_type  = "character"
                              ) )
Data_Converter$methods(
  initialize = function(){
    class_name <<- "Data_Converter <abstract class - class_name needs to be set in subclass>"
    file_type  <<- "file_type has not been set before being used <needs to be set in initialize() of subclass>"
  },
  check_raw_fields = function(info=NULL, raw_data=NULL){
    stop(sprintf("check_raw_fields() is an abstract method - define it in %s before calling Data_Object_Data_Converter$load_data()", class_name))
  },
  convert_data = function(){
    stop(sprintf("convert_data() is an abstract method - define it in %s before calling Data_Object_Data_Converter$load_data()", class_name))
  }
)
###############################################################################
#            Class: Data_Converter_PMD_FDR_input_file
###############################################################################
Data_Converter_PMD_FDR_input_file <- setRefClass("Data_Converter_PMD_FDR_input_file", 
                                                 contains = "Data_Converter",
                                                 fields =list(
                                                   
                                                 ) )
Data_Converter_PMD_FDR_input_file$methods(
  initialize = function(){
    callSuper()
    
    class_name <<- "Data_Converter_PMD_FDR_input_file"
    file_type  <<- "PMD_FDR_file_type"
  },
  check_raw_fields = function(info=NULL, raw_data=NULL){
    data_original <- raw_data$df
    check_field_name(data_original, "raw_data", "PMD_FDR_input_score")
    check_field_name(data_original, "raw_data", "PMD_FDR_pmd")
    check_field_name(data_original, "raw_data", "PMD_FDR_spectrum_file")
    check_field_name(data_original, "raw_data", "PMD_FDR_proteins")
    check_field_name(data_original, "raw_data", "PMD_FDR_spectrum_title")
    check_field_name(data_original, "raw_data", "PMD_FDR_sequence")
    check_field_name(data_original, "raw_data", "PMD_FDR_decoy")
  },
  convert_data = function(info=NULL, raw_data=NULL){
    data_new <- raw_data$df
    
    return(data_new) # Pass through - everything should be in order
  }
)
###############################################################################
#            Class: Data_Converter_PSM_Report
###############################################################################
Data_Converter_PSM_Report <- setRefClass("Data_Converter_PSM_Report", 
                                         contains = "Data_Converter",
                                         fields =list(
                                           
                                         ) )
Data_Converter_PSM_Report$methods(
  initialize = function(){
    callSuper()
    
    class_name <<- "Data_Converter_PSM_Report"
    file_type  <<- "PSM_Report"
  },
  check_raw_fields = function(info=NULL, raw_data=NULL){
    data_original <- raw_data$df
    check_field_name(data_original, "raw_data", "Confidence [%]") 
    check_field_name(data_original, "raw_data", "Precursor m/z Error [ppm]")
    check_field_name(data_original, "raw_data", "Spectrum File")
    check_field_name(data_original, "raw_data", "Protein(s)")
    check_field_name(data_original, "raw_data", "Spectrum Title")
    check_field_name(data_original, "raw_data", "Decoy")
    check_field_name(data_original, "raw_data", "Sequence")
    
  },
  convert_data = function(info=NULL, raw_data=NULL){
    data_new <- raw_data$df
    
    data_new$PMD_FDR_input_score    <- data_new[, "Confidence [%]"           ]
    data_new$PMD_FDR_pmd            <- data_new[, "Precursor m/z Error [ppm]"]
    data_new$PMD_FDR_spectrum_file  <- data_new[, "Spectrum File"            ]
    data_new$PMD_FDR_proteins       <- data_new[, "Protein(s)"               ]
    data_new$PMD_FDR_spectrum_title <- data_new[, "Spectrum Title"           ]
    data_new$PMD_FDR_sequence       <- data_new[, "Sequence"                 ]
    data_new$PMD_FDR_decoy          <- data_new[, "Decoy"                    ]
    
    return(data_new)
  }
)
###############################################################################
#            Class: Data_Converter_MaxQuant_Evidence
###############################################################################
Data_Converter_MaxQuant_Evidence <- setRefClass("Data_Converter_MaxQuant_Evidence", 
                                                contains = "Data_Converter",
                                                fields =list(
                                                  
                                                ) )
Data_Converter_MaxQuant_Evidence$methods(
  initialize = function(){
    callSuper()
    
    class_name <<- "Data_Converter_MaxQuant_Evidence"
    file_type  <<- "MaxQuant_Evidence"
  },
  check_raw_fields = function(info=NULL, raw_data=NULL){
    data_original <- raw_data$df
    
    check_field_name(data_original, "raw_data", "PEP")
    check_field_name(data_original, "raw_data", "Mass error [ppm]")
    check_field_name(data_original, "raw_data", "Proteins")
    check_field_name(data_original, "raw_data", "Retention time")
    check_field_name(data_original, "raw_data", "Sequence")
    check_field_name(data_original, "raw_data", "Reverse")
  },
  convert_data = function(info=NULL, raw_data=NULL){
    data_new <- raw_data$df
    
    data_new$PMD_FDR_input_score    <- 100 * (1 - data_new[, "PEP"             ])
    data_new$PMD_FDR_pmd            <-            data_new[, "Mass error [ppm]"]
    data_new$PMD_FDR_spectrum_file  <-            "<place_holder - assumes a single spectra file>"
    data_new$PMD_FDR_proteins       <-            data_new[, "Proteins"        ]
    data_new$PMD_FDR_spectrum_title <-            data_new[, "Retention time"  ] # Used for ordering peptides - not important in MaxQuant since PMD has already been normalized effectively
    data_new$PMD_FDR_sequence       <-            data_new[, "Sequence"        ]
    data_new$PMD_FDR_decoy          <- ifelse(    data_new[, "Reverse"         ] == "+", 1, 0)
    
    return(data_new)
  }
)

###############################################################################
#            Class: Data_Object_Data_Converter
###############################################################################
Data_Object_Data_Converter <- setRefClass("Data_Object_Data_Converter", 
                                          contains = "Data_Object",
                                          fields =list(df             = "data.frame",
                                                       data_converter = "Data_Converter"))
Data_Object_Data_Converter$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Data_Converter"
  },
  currently_supported_file_types = function(){
    return(c("PSM_Report", "PMD_FDR_input_file"))
  },
  verify = function(){
    info     <- get_info()
    raw_data <- get_raw_data()
    file_type <- get_info()$input_file_type
    
    set_file_type(file_type)
    data_converter$check_raw_fields(info=info, raw_data=raw_data)
    
  },
  m_load_data = function(){
    
    info      <- get_info()
    raw_data  <- get_raw_data()
    file_type <- get_info()$input_file_type
    
    df <<- data_converter$convert_data(info=info, raw_data=raw_data)
    
  },
  set_file_type = function(file_type = NULL){
    if        (file_type == "PSM_Report"        ){
      data_converter <<- Data_Converter_PSM_Report        $new()
    } else if (file_type == "PMD_FDR_input_file"){
      data_converter <<- Data_Converter_PMD_FDR_input_file$new()
    } else if (file_type == "MaxQuant_Evidence"){
      data_converter <<- Data_Converter_MaxQuant_Evidence $new()
    } else {
      stop(sprintf("File type '%s' is not currently supported by PMD-FDR module", file_type))
    }
  },
  set_info = function(info){
    parents[["info"]] <<- info
  },
  get_info = function(){
    return(verified_element_of_list(parents, "info", "Data_Object_Data_Converter$parents"))
  },
  set_raw_data = function(raw_data){
    parents[["raw_data"]] <<- raw_data
  },
  get_raw_data = function(){
    return(verified_element_of_list(parents, "raw_data", "Data_Object_Data_Converter$parents"))
  }
)
###############################################################################
#            Class: Data_Object_Groupings
###############################################################################
Data_Object_Groupings <- setRefClass("Data_Object_Groupings", 
                                     contains = "Data_Object",
                                     fields =list(df = "data.frame"))
Data_Object_Groupings$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Groupings"
  },
  simplify_field_name = function(x=NULL){
    result <- gsub(pattern = "PMD_FDR_", replacement = "", x = x)
    return(result)
  },
  verify = function(){
    data_original <- get_data_converter()$df
    
    check_field_name(data_original, "data_converter", "PMD_FDR_input_score")
    check_field_name(data_original, "data_converter", "PMD_FDR_pmd")
    check_field_name(data_original, "data_converter", "PMD_FDR_spectrum_file")
    check_field_name(data_original, "data_converter", "PMD_FDR_proteins")
    check_field_name(data_original, "data_converter", "PMD_FDR_spectrum_title")
    check_field_name(data_original, "data_converter", "PMD_FDR_sequence")
    check_field_name(data_original, "data_converter", "PMD_FDR_decoy")
    
  },
  m_load_data = function(){
    make_data_groups <- function(data_original=NULL){
      
      # Functions supporting make_data_groups()
      
      standardize_fields <- function(data=NULL){
        data_new <- data
        
        info <- get_info()
        info$ensure()
        #field_name_of_score <- info$get_field_name_of_score()
        
        # #data_new <- rename_column(data_new, "Variable Modifications"   , "ptm_list")
        # data_new <- rename_column(data_new, field_name_of_score        , "PMD_FDR_input_score")
        # data_new <- rename_column(data_new, "Precursor m/z Error [ppm]", "PMD_FDR_pmd")
        # #data_new <- rename_column(data_new, "Isotope Number"           , "isotope_number")
        # #data_new <- rename_column(data_new, "m/z"                      , "m_z")
        # #data_new <- rename_column(data_new, "Measured Charge"          , "charge")
        # data_new <- rename_column(data_new, "Spectrum File"            , "PMD_FDR_spectrum_file")
        # data_new <- rename_column(data_new, "Protein(s)"               , "PMD_FDR_proteins")
        # data_new <- rename_column(data_new, "Spectrum Title"           , "PMD_FDR_spectrum_title")
        # data_new <- manage_decoy_column(data_new)
        
        # Now managed in Data_Converter
        # data_new$PMD_FDR_input_score    <- data_new[,  field_name_of_score       ]
        # data_new$PMD_FDR_pmd            <- data_new[, "Precursor m/z Error [ppm]"]
        # data_new$PMD_FDR_spectrum_file  <- data_new[, "Spectrum File"            ]
        # data_new$PMD_FDR_proteins       <- data_new[, "Protein(s)"               ]
        # data_new$PMD_FDR_spectrum_title <- data_new[, "Spectrum Title"           ]
        
        data_new$value          <- data_new$PMD_FDR_pmd
        data_new$PMD_FDR_peptide_length <- str_length(data_new$PMD_FDR_sequence)
        #data_new$charge_value   <- with(data_new, as.numeric(substr(charge, start=1, stop=str_length(charge)-1)))
        #data_new$measured_mass  <- with(data_new, m_z*charge_value)
        data_new$PMD_FDR_spectrum_index <- NA
        data_new$PMD_FDR_spectrum_index[order(data_new$PMD_FDR_spectrum_title, na.last = TRUE)] <- 1:nrow(data_new)
        
        return(data_new)
      }
      add_grouped_variable <- function(data_groups = data_groups, field_name_to_group = NULL, vec.length.out = NULL, vec.tolerance = NULL, value_format = NULL){
        
        # Support functions for add_grouped_variable()
        find_interval_vec <- function(x=NULL, length.out = NULL, tolerance = NULL){
          q <- quantile(x = x, probs = seq(from=0, to=1, length.out = length.out), na.rm=TRUE)
          q <- round_to_tolerance(q, tolerance = tolerance)
          return(q)
        }
        get_group_data_frame <- function(vec=NULL, value_format = NULL){
          n <- length(vec)
          a <- vec[-n]
          b <- vec[-1]
          
          lower      <- ifelse(a == b           , "eq", NA)
          lower      <- ifelse(is.na(lower     ), "ge", lower)
          upper      <- ifelse(a == b           , "eq", NA)
          upper[n-1] <- ifelse(is.na(upper[n-1]), "le", "eq")
          upper      <- ifelse(is.na(upper     ), "lt", upper)
          group <- data.frame(list(idx=1:(n-1), a=a, b=b, lower=lower, upper=upper))
          
          name_format <- sprintf("%%%s_%%%s_%%s_%%s", value_format, value_format)
          group$new_var <- with(group, sprintf(name_format, a, b, lower, upper))
          
          return(group)
        }
        merge_group_with_data <- function(data_groups = NULL, group = NULL, vec = NULL, field_name_to_group = NULL){
          field_name_new <- sprintf("group_%s", simplify_field_name(field_name_to_group))
          group_idx      <- findInterval(x = data_groups[,field_name_to_group], 
                                         vec = vec, 
                                         all.inside=TRUE)
          data_groups$new_var <- group$new_var[group_idx]
          data_groups         <- rename_column(data_groups, "new_var", field_name_new)
        }
        # Body of add_grouped_variable()
        
        vec    <- find_interval_vec(x          = data_groups[[field_name_to_group]], 
                                    length.out = vec.length.out, 
                                    tolerance  = vec.tolerance )
        group  <- get_group_data_frame(vec          = vec, 
                                       value_format = value_format)
        df_new <- merge_group_with_data(data_groups         = data_groups, 
                                        group               = group, 
                                        vec                 = vec,
                                        field_name_to_group = field_name_to_group)
        df_new <- add_group_decoy(df_new, field_name_to_group)
        
        return(df_new)
      }
      add_already_grouped_variable <- function(field_name_to_group = NULL, data_groups = NULL ){
        old_name <- field_name_to_group
        new_name <- sprintf("group_%s", simplify_field_name(old_name))
        df_new <- data_groups
        df_new[[new_name]] <- data_groups[[old_name]]
        
        df_new <- add_group_decoy(data_groups = df_new, field_name_to_group = field_name_to_group)
        
        return(df_new)
      }
      add_value_norm <- function(data_groups = NULL){
        
        df_new            <- data_groups
        df_new$value_norm <- with(df_new, value - median_of_group_index)
        
        return(df_new)
      }
      add_protein_group <-function(data_groups = NULL){
        data_new <- data_groups
        df_group_def <- data.frame(stringsAsFactors = FALSE,
                                   list(pattern    = c(""     , "pfu_"      , "cRAP"),
                                        group_name = c("human", "pyrococcus", "contaminant")))
        for (i in 1:nrow(df_group_def)){
          idx <- grepl(pattern = df_group_def$pattern[i],
                       x       = data_new$PMD_FDR_proteins)
          data_new$group_proteins[idx] <- df_group_def$group_name[i]
        }
        
        data_new <- add_group_decoy(data_groups = data_new, field_name_to_group = "PMD_FDR_proteins")
        return(data_new)
      }
      add_group_decoy <- function(data_groups=NULL, field_name_to_group=NULL){
        simple_field_name <- simplify_field_name(field_name_to_group)
        field_name_decoy <- sprintf("group_decoy_%s", simple_field_name)
        field_name_group <- sprintf("group_%s",       simple_field_name)
        
        data_groups[[field_name_decoy]] <- with(data_groups, ifelse(PMD_FDR_decoy, "decoy", data_groups[[field_name_group]]))
        
        return(data_groups)
      }
      add_group_training_class <- function(data_groups = NULL){
        df_new <- data_groups
        
        lowest_confidence_group <- min(data_groups$group_input_score)
        
        is_long_enough   <- with(df_new, (PMD_FDR_peptide_length >= MIN_GOOD_PEPTIDE_LENGTH)    )
        is_good          <- with(df_new, (PMD_FDR_decoy == 0) & (PMD_FDR_input_score == 100) )
        is_bad           <- with(df_new, (PMD_FDR_decoy == 1) )
        #is_used_to_train <- with(df_new, used_to_find_middle) # BUGBUG: circular definition
        
        idx_good         <- which(is_good         ) # & is_long_enough)
        n_good           <- length(idx_good)
        idx_testing      <- idx_good[c(TRUE,FALSE)] # Selects every other item
        idx_training     <- setdiff(idx_good, idx_testing)
        
        #is_good_short    <- with(df_new,  is_good      & !is_long_enough                )
        #is_good_long     <- with(df_new,  is_good      &  is_long_enough                )
        is_bad_short     <- with(df_new,  is_bad       & !is_long_enough                )
        is_bad_long      <- with(df_new,  is_bad       &  is_long_enough                )
        #is_good_training <- with(df_new,  is_good_long & (used_to_find_middle == TRUE ) )
        #is_good_testing  <- with(df_new,  is_good_long & (used_to_find_middle == FALSE) )
        
        df_new$group_training_class                   <- "other_short"   # Default
        df_new$group_training_class[is_long_enough  ] <- "other_long"    # Default (if long enough)
        df_new$group_training_class[idx_training    ] <- "good_training" # Length does not matter (anymore)
        df_new$group_training_class[idx_testing     ] <- "good_testing"  # Ditto
        #df_new$group_training_class[is_good_short   ] <- "good_short"
        df_new$group_training_class[is_bad_long     ] <- "bad_long"      # ...except for "bad"
        df_new$group_training_class[is_bad_short    ] <- "bad_short"
        
        df_new <- add_used_to_find_middle( data_groups = df_new ) # Guarantees consistency between duplicated definitions
        
        return(df_new)
      }
      add_used_to_find_middle <- function(data_groups = NULL){
        df_new    <- data_groups
        idx_used  <- which(data_groups$group_training_class == "good_training")
        
        df_new$used_to_find_middle           <- FALSE
        df_new$used_to_find_middle[idx_used] <- TRUE
        
        return(df_new)
      }
      add_group_spectrum_index <- function(data_groups = NULL){
        
        # Supporting functions for add_group_spectrum_index()
        
        get_breaks_all <- function(df_new){
          # Supporting function(s) for get_breaks_all()
          
          get_cut_points <- function(data_subset){
            
            # Supporting function(s) for get_cut_points()
            
            cut_values <- function(data=NULL, minimum_segment_length=NULL){
              # using cpt.mean -- Appears to have a memory leak
              #results_cpt <- cpt.mean(data=data, method="PELT", minimum_segment_length=minimum_segment_length)
              #results <- results_cpt@cpts
              
              # Just look at the end
              #results <- c(length(data))
              
              # regularly spaced, slightly larger than minimum_segment_length
              n_points <- length(data)
              n_regions <- floor(n_points / minimum_segment_length)
              n_regions <- ifelse(n_regions == 0, 1, n_regions)
              results <- round(seq(1, n_points, length.out = n_regions + 1))
              results <- results[-1]
              return(results)
            }
            remove_last <- function(x){
              return(x[-length(x)] )
            }
            
            # Main code of for get_cut_points()
            max_idx = max(data_subset$PMD_FDR_spectrum_index)
            data_sub_sub <- subset(data_subset, group_training_class == "good_training") #(PMD_FDR_input_score==100) & (PMD_FDR_decoy==0))
            minimum_segment_length = 50
            
            values <- data_sub_sub$value
            n_values <- length(values)
            local_to_global_idx <- data_sub_sub$PMD_FDR_spectrum_index
            if (n_values <= minimum_segment_length){
              result <- c()
            } else {
              local_idx <- cut_values(data=values, minimum_segment_length=minimum_segment_length)
              result <- local_to_global_idx[local_idx]
              result <- remove_last(result)
            }
            result <- c(result, max_idx)
            return(result)
          }
          remove_last <- function(vec) {
            return(vec[-length(vec)])
          }
          
          # Main code of get_breaks_all()
          
          breaks <- 1
          
          files <- unique(df_new$PMD_FDR_spectrum_file)
          
          for (local_file in files){
            data_subset <- subset(df_new, (PMD_FDR_spectrum_file==local_file))
            if (nrow(data_subset) > 0){
              breaks <- c(breaks, get_cut_points(data_subset))
            }
          }
          breaks <- sort(unique(breaks))
          breaks <- remove_last(breaks)
          breaks <- c(breaks, max(df_new$PMD_FDR_spectrum_index + 1))
          
          return(breaks)
        }
        
        # Main code of add_group_spectrum_index()
        
        field_name_to_group <- "PMD_FDR_spectrum_index"
        
        df_new <- data_groups[order(data_groups[[field_name_to_group]]),]
        breaks <- get_breaks_all(df_new)
        
        df_new$group_spectrum_index <- cut(x = df_new[[field_name_to_group]], breaks = breaks, right = FALSE, dig.lab = 6)
        df_new <- add_group_decoy(data_groups = df_new, field_name_to_group = field_name_to_group)
        
        return(df_new)
      }
      add_median_of_group_index <-function(data_groups = NULL){
        field_median <- "median_of_group_index"
        data_good <- subset(data_groups, used_to_find_middle )
        med <- aggregate(value~group_spectrum_index, data=data_good, FUN=safe_median)
        med <- rename_column(med, "value", field_median)
        
        data_groups[[field_median]] <- NULL
        df_new <- merge(data_groups, med)
        
        return(df_new)
      }
      add_1_percent_to_data_groups <- function(data_groups=NULL){
        
        data_new <- data_groups
        
        if (get_raw_1_percent()$exists()){
          # Load 1 percent file
          df_1_percent <- get_raw_1_percent()$df
          
          # Get relevant fields
          df_1_percent$is_in_1percent <- TRUE
          df_1_percent                <- rename_column(df_1_percent, "Spectrum Title", "PMD_FDR_spectrum_title")
          df_1_percent                <- df_1_percent[,c("PMD_FDR_spectrum_title", "is_in_1percent")]
          
          # Merge with data_groups
          data_new <- merge(data_new, df_1_percent, all.x=TRUE)
          data_new$is_in_1percent[is.na(data_new$is_in_1percent)] <- FALSE
        }
        
        # Save results
        return(data_new)
        
      }
      
      
      # Main code of make_data_groups()
      data_groups <- standardize_fields(data_original)
      
      data_groups <- add_grouped_variable(field_name_to_group = "PMD_FDR_input_score", 
                                          data_groups         = data_groups, 
                                          vec.length.out      = 14, 
                                          vec.tolerance       = 1, 
                                          value_format        = "03d")
      
      data_groups <- add_grouped_variable(field_name_to_group = "PMD_FDR_pmd", 
                                          data_groups         = data_groups, 
                                          vec.length.out      = 21, 
                                          vec.tolerance       = 0.1, 
                                          value_format        = "+05.1f")
      
      data_groups <- add_grouped_variable(field_name_to_group = "PMD_FDR_peptide_length", 
                                          data_groups         = data_groups, 
                                          vec.length.out      = 11, 
                                          vec.tolerance       = 1, 
                                          value_format        = "02d")
      
      # data_groups <- add_grouped_variable(field_name_to_group = "m_z", 
      #                                     data_groups         = data_groups, 
      #                                     vec.length.out      = 11, 
      #                                     vec.tolerance       = 10, 
      #                                     value_format        = "04.0f")
      # 
      # data_groups <- add_grouped_variable(field_name_to_group = "measured_mass", 
      #                                     data_groups         = data_groups, 
      #                                     vec.length.out      = 11, 
      #                                     vec.tolerance       = 1, 
      #                                     value_format        = "04.0f")
      # 
      # data_groups <- add_already_grouped_variable(field_name_to_group = "isotope_number",
      #                                             data_groups         = data_groups )
      # 
      # data_groups <- add_already_grouped_variable(field_name_to_group = "charge",
      #                                             data_groups         = data_groups )
      # 
      data_groups <- add_already_grouped_variable(field_name_to_group = "PMD_FDR_spectrum_file",
                                                  data_groups         = data_groups )
      data_groups <- add_protein_group(data_groups = data_groups)
      data_groups <- add_group_training_class(  data_groups = data_groups)
      data_groups <- add_group_spectrum_index(  data_groups = data_groups)
      data_groups <- add_median_of_group_index( data_groups = data_groups)
      data_groups <- add_value_norm(            data_groups = data_groups)
      
      # fields_of_interest <- c("PMD_FDR_input_score", "PMD_FDR_pmd", "m_z", "PMD_FDR_peptide_length", "isotope_number", "charge", "PMD_FDR_spectrum_file", "measured_mass", "PMD_FDR_spectrum_index", "PMD_FDR_proteins")
      # fields_of_interest <- c("value", 
      #                         "PMD_FDR_decoy",
      #                         "PMD_FDR_spectrum_title",
      #                         "median_of_group_index",
      #                         "value_norm",
      #                         "used_to_find_middle",
      #                         "group_training_class",
      #                         fields_of_interest, 
      #                         sprintf("group_%s"      , fields_of_interest),
      #                         sprintf("group_decoy_%s", fields_of_interest))
      
      fields_of_interest <- c("PMD_FDR_input_score", "PMD_FDR_pmd", "PMD_FDR_peptide_length", "PMD_FDR_spectrum_file", "PMD_FDR_spectrum_index", "PMD_FDR_proteins")
      fields_of_interest <- c("value",
                              "PMD_FDR_decoy",
                              "PMD_FDR_spectrum_title",
                              "median_of_group_index",
                              "value_norm",
                              "used_to_find_middle",
                              "group_training_class",
                              fields_of_interest,
                              sprintf("group_%s"      , simplify_field_name(fields_of_interest)),
                              sprintf("group_decoy_%s", simplify_field_name(fields_of_interest)))
      
      data_groups <- data_groups[,fields_of_interest]
      data_groups <- add_1_percent_to_data_groups(data_groups)
      
      return(data_groups)
    }
    
    data_original <- get_data_converter()$df #parents[[INDEX_OF_ORIGINAL_DATA]]$df
    df <<- make_data_groups(data_original)
  },
  set_info = function(info){
    parents[["info"]] <<- info
  },
  get_info = function(){
    return(verified_element_of_list(parents, "info", "Data_Object_Groupings$parents"))
  },
  set_data_converter = function(data_converter){
    parents[["data_converter"]] <<- data_converter
  },
  get_data_converter = function(){
    return(verified_element_of_list(parents, "data_converter", "Data_Object_Groupings$parents"))
  },
  set_raw_1_percent = function(raw_1_percent){ ############## BUGBUG: the 1% file should be using the same file type format as the standard data (but isn't)
    parents[["raw_1_percent"]] <<- raw_1_percent
  },
  get_raw_1_percent = function(){
    return(verified_element_of_list(parents, "raw_1_percent", "Data_Object_Groupings$parents"))
  }
)
###############################################################################
#            Class: Data_Object_Individual_FDR
###############################################################################
Data_Object_Individual_FDR <- setRefClass("Data_Object_Individual_FDR", 
                                          contains = "Data_Object",
                                          fields =list(df = "data.frame"))
Data_Object_Individual_FDR$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Individual_FDR"
  },
  verify = function(){
    data_groups = get_data_groups()$df
    densities   = get_densities()$df
    alpha       = get_alpha()$df
    
    check_field_name(data_groups, "data_groups", "value_norm")
    check_field_name(data_groups, "data_groups", "group_decoy_input_score")
    check_field_name(data_groups, "data_groups", "PMD_FDR_peptide_length")
    check_field_name(data_groups, "data_groups", "PMD_FDR_input_score")
    check_field_name(alpha, "alpha", "alpha") # BUGBUG: I'm missing a field here...
    check_field_name(densities, "densities", "x")
    check_field_name(densities, "densities", "t")
    check_field_name(densities, "densities", "f")
    
  },
  set_data_groups = function(parent){
    parents[["data_groups"]] <<- parent
  },
  get_data_groups = function(){
    return(verified_element_of_list(parents, "data_groups", "Data_Object_Individual_FDR$parents"))
  },
  set_densities = function(parent){
    parents[["densities"]] <<- parent
  },
  get_densities = function(){
    return(verified_element_of_list(parents, "densities", "Data_Object_Individual_FDR$parents"))
  },
  set_alpha = function(parent){
    parents[["alpha"]] <<- parent
  },
  get_alpha = function(){
    return(verified_element_of_list(parents, "alpha", "Data_Object_Individual_FDR$parents"))
  },
  m_load_data = function(){
    add_FDR_to_data_groups <- function(data_groups=NULL, densities=NULL, alpha=NULL, field_value=NULL, field_decoy_group=NULL, set_decoy_to_1=FALSE){
      # Support functions for add_FDR_to_data_groups()
      get_group_fdr <- function(group_stats = NULL, data_groups = NULL, densities=NULL){
        group_fdr <- apply(X = densities, MARGIN = 2, FUN = max)
        df_group_fdr <- data.frame(group_fdr)
        df_group_fdr <- rename_column(df_group_fdr, "group_fdr", "v")
        df_group_fdr$group_of_interest <- names(group_fdr)
        t <- df_group_fdr[df_group_fdr$group_of_interest == "t", "v"]
        f <- df_group_fdr[df_group_fdr$group_of_interest == "f", "v"]
        df_group_fdr <- subset(df_group_fdr, !(group_of_interest %in% c("x", "t", "f")))
        df_group_fdr$group_fdr <-(df_group_fdr$v - t) / (f - t)
        
        return(df_group_fdr)
      }
      
      get_mode <- function(x){
        d <- density(x)
        return(d$x[which.max(d$y)])
      }
      
      # Main code for add_FDR_to_data_groups()
      
      # Set up analysis
      data_new <- data_groups
      data_new$value_of_interest <- data_new[,field_value]
      data_new$group_of_interest <- data_new[,field_decoy_group]
      
      data_subset <- subset(data_new, PMD_FDR_peptide_length >= 11)
      
      # Identify mean PMD_FDR_input_score per group
      
      group_input_score <- aggregate(PMD_FDR_input_score~group_of_interest, data=data_subset, FUN=mean)
      group_input_score <- rename_column(group_input_score, "PMD_FDR_input_score", "group_input_score")
      
      #group_fdr   <- get_group_fdr(data_groups = data_subset, densities=densities)
      group_stats <- merge(alpha, group_input_score)
      group_stats <- subset(group_stats, group_of_interest != "PMD_FDR_decoy")
      
      x=c(0,group_stats$group_input_score)
      y=c(1,group_stats$alpha)
      FUN_interp <- approxfun(x=x,y=y)
      
      data_new$interpolated_groupwise_FDR <- FUN_interp(data_new$PMD_FDR_input_score)
      if (set_decoy_to_1){
        data_new$interpolated_groupwise_FDR[data_new$PMD_FDR_decoy == 1] <- 1
      }
      
      return(data_new)
    }
    
    data_groups = get_data_groups()$df
    densities   = get_densities()$df
    alpha       = get_alpha()$df
    
    d_true  <- densities[,c("x", "t")]
    d_false <- densities[,c("x", "f")]
    
    i_fdr <- add_FDR_to_data_groups(data_groups       = data_groups, 
                                    densities         = densities,
                                    alpha             = alpha,
                                    field_value       ="value_norm", 
                                    field_decoy_group = "group_decoy_input_score")
    # Derive local t
    interp_t <- splinefun(x=d_true$x,  y=d_true$t) #approxfun(x=d_true$x, y=d_true$y)
    
    # Derive local f
    interp_f <- splinefun(x=d_false$x, y=d_false$f) #approxfun(x=d_true$x, y=d_true$y)
    
    # Derive local FDR
    i_fdr$t     <- interp_t(i_fdr$value_of_interest)
    i_fdr$f     <- interp_f(i_fdr$value_of_interest)
    i_fdr$alpha <- i_fdr$interpolated_groupwise_FDR
    i_fdr$i_fdr <- with(i_fdr, (alpha*f) / (alpha*f + (1-alpha)*t)) 
    
    df <<- i_fdr
    
  }
)
###############################################################################
#            Class: Data_Object_Densities
###############################################################################
Data_Object_Densities <- setRefClass("Data_Object_Densities", 
                                     contains = "Data_Object",
                                     fields =list(df = "data.frame"))
Data_Object_Densities$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Densities"
  },
  verify = function(){
    df_data_groups <- get_data_groups()$df
    
    checkTrue(nrow(df_data_groups) > 0,
              msg = "data_groups data frame was empty (and should not have been)")
    
    check_field_name(df_data_groups, "data_groups", "value_norm")
    check_field_name(df_data_groups, "data_groups", "group_decoy_input_score")
    check_field_name(df_data_groups, "data_groups", "group_training_class")
  },
  set_data_groups = function(parent=NULL){
    parents[["data_groups"]] <<- parent
  },
  get_data_groups = function(){
    return(verified_element_of_list(parent_list = parents, element_name = "data_groups", object_name = "Data_Object_Densities$parents"))
  },
  m_load_data = function(){
    
    # Support functions for make_densities()
    set_values_of_interest <- function(df_data_groups=NULL, field_group = NULL){
      field_value       = "value_norm"
      
      new_data_groups <- get_data_groups()$df
      new_data_groups$value_of_interest <- new_data_groups[,field_value]
      new_data_groups$group_of_interest <- new_data_groups[,field_group]
      #groups <- sort(unique(new_data_groups$group_of_interest))
      
      return(new_data_groups)
    }
    get_ylim <- function(data_groups=NULL){
      ylim <- range(data_groups$value_of_interest, na.rm = TRUE)
      return(ylim)
    }
    make_hit_density <- function(data_subset=NULL, descr_of_df=NULL, ylim=NULL){
      #stop("Data_Object_Densities$make_hit_density() is untested beyond here")
      verify_density = function(data_subset=NULL, value_field=NULL, descr_of_df=NULL, ylim=NULL){
        values <- data_subset[value_field]
        values <- values[! is.na(values)]
        if (length(values) < MIN_ACCEPTABLE_POINTS_IN_DENSITY){
          stop (sprintf("There are too few valid %s (%d < %d) in %s to be used for calculating a density function",
                        value_field, 
                        length(values),
                        MIN_ACCEPTABLE_POINTS_IN_DENSITY,
                        descr_of_df))
        }
        d <- density(values, from = ylim[1], to = ylim[2])
        
        return(d)
      }
      uniformalize_density <- function(d){
        # Reorganizes y-values of density function so that 
        # function is monotone increasing to mode
        # and monotone decreasing afterwards
        idx_mode   <- which.max(d$y)
        idx_lower <- 1:(idx_mode-1)
        idx_upper <- idx_mode:length(d$y)
        
        values_lower <- d$y[idx_lower]
        values_upper <- d$y[idx_upper]
        
        new_d   <- d
        new_d$y <- c(sort(values_lower, decreasing = FALSE), 
                     sort(values_upper, decreasing = TRUE))
        
        return(new_d)
      }
      
      local_df <- subset(data_subset,
                         (PMD_FDR_peptide_length >= MIN_GOOD_PEPTIDE_LENGTH) &
                           (used_to_find_middle == FALSE))
      d <- verify_density      (data_subset=local_df, value_field = "value_of_interest", descr_of_df = descr_of_df, ylim=ylim)
      d <- normalize_density   (d)
      d <- uniformalize_density(d)
      
      return(d)
    }
    make_true_hit_density  <- function(data_groups=NULL){
      d_true  <- make_hit_density(data_subset = subset(data_groups, (group_training_class == "good_testing") ),
                                  descr_of_df = "Good-testing dataset",
                                  ylim        = get_ylim(data_groups))
      return(d_true)
    }
    make_false_hit_density <- function(data_groups=NULL){
      d_false <- make_hit_density(data_subset = subset(data_groups, (group_training_class == "bad_long") ),
                                  descr_of_df = "Bad-long dataset",
                                  ylim        = get_ylim(data_groups))
      
      return(d_false)
    }
    add_v_densities <- function(data_groups=NULL, densities=NULL, field_group = NULL){
      groups <- sort(unique(data_groups$group_of_interest))
      
      new_densities <- densities
      
      for (local_group in groups){
        d_v <- make_hit_density(data_subset = subset(data_groups, (group_of_interest == local_group)),
                                descr_of_df = sprintf("subset of data (where %s is '%s')", 
                                                      field_group,
                                                      local_group),
                                ylim        = get_ylim(data_groups))
        new_densities[local_group] <- d_v$y
      }
      
      return(new_densities)
    }
    
    # Main section of make_densities()
    df_data_groups <- get_data_groups()$df
    new_data_groups <- set_values_of_interest(df_data_groups,  field_group = "group_decoy_input_score")
    d_true  <- make_true_hit_density( new_data_groups)
    d_false <- make_false_hit_density(new_data_groups)
    
    densities <- data.frame(x=d_true$x, 
                            t=d_true$y, 
                            f=d_false$y)
    densities <- add_v_densities(data_groups=new_data_groups, densities=densities,  field_group = "group_decoy_input_score")
    df <<- densities
  }
)
###############################################################################
#            Class: Data_Object_Alpha
###############################################################################
Data_Object_Alpha <- setRefClass("Data_Object_Alpha", 
                                 contains = "Data_Object",
                                 fields =list(df = "data.frame"))
Data_Object_Alpha$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Alpha"
  },
  verify = function(){
    densities <- get_densities()$df
    
    checkTrue(nrow(densities) > 0,
              msg = "Densities data.frame was empty (and should not have been)")
  },
  set_densities = function(parent=NULL){
    parents[["densities"]] <<- parent
  },
  get_densities = function(){
    return(verified_element_of_list(parent_list = parents, element_name = "densities", object_name = "Data_Object_Alpha"))
  },
  m_load_data = function(){
    
    densities <- get_densities()$df
    
    max_of_density = apply(X = densities, MARGIN = 2, FUN = max)
    df_alpha <- data.frame(stringsAsFactors = FALSE,
                           list(v = max_of_density,
                                group_of_interest = names(max_of_density)))
    df_alpha <- subset(df_alpha, group_of_interest != "x")
    t <- with(subset(df_alpha, group_of_interest=="t"), v)
    f <- with(subset(df_alpha, group_of_interest=="f"), v)
    df_alpha$alpha <- with(df_alpha, (t-v)/(t-f))
    
    alpha <- df_alpha[,c("group_of_interest", "alpha")]
    alpha <- subset(alpha, (group_of_interest != "t") & (group_of_interest != "f"))
    
    df <<- alpha
  }
)
###############################################################################
#            Class: Data_Processor
###############################################################################
Data_Processor <- setRefClass("Data_Processor", 
                              fields =list(info           = "Data_Object_Info",
                                           raw_data       = "Data_Object_Raw_Data",
                                           raw_1_percent  = "Data_Object_Raw_1_Percent",
                                           data_converter = "Data_Object_Data_Converter",
                                           data_groups    = "Data_Object_Groupings",
                                           densities      = "Data_Object_Densities",
                                           alpha          = "Data_Object_Alpha",
                                           i_fdr          = "Data_Object_Individual_FDR"))
Data_Processor$methods(
  initialize = function(p_info=NULL){
    if (! is.null(p_info)){
      set_info(p_info)
    }
  },
  set_info = function(p_info=NULL){
    # This initialization defines all of the dependencies between the various components
    
    info <<- p_info
    
    # raw_data
    raw_data$set_info(info)
    info$append_child(raw_data)
    
    # raw_1_percent
    raw_1_percent$set_info(info)
    info$append_child(raw_1_percent)
    
    # data_converter
    data_converter$set_info    (info)
    data_converter$set_raw_data(raw_data)
    info         $append_child (data_converter)
    raw_data     $append_child (data_converter)
    
    # data_groups
    data_groups$set_info          (info)
    data_groups$set_data_converter(data_converter)
    data_groups$set_raw_1_percent (raw_1_percent)
    info          $append_child   (data_groups)
    data_converter$append_child   (data_groups)
    raw_1_percent $append_child   (data_groups)
    
    # densities
    densities  $set_data_groups(data_groups)
    data_groups$append_child   (densities)
    
    # alpha
    alpha    $set_densities(densities)
    densities$append_child (alpha)
    
    # i_fdr
    i_fdr$set_data_groups(data_groups)
    i_fdr$set_densities  (densities)
    i_fdr$set_alpha      (alpha)
    data_groups  $append_child(i_fdr)
    densities    $append_child(i_fdr)
    alpha        $append_child(i_fdr)
  }
)


#############################################################
####### Classes for Plotting
#############################################################

###############################################################################
#            Class: Plot_Image
###############################################################################
Plot_Image = setRefClass("Plot_Image",
                         fields = list(data_processors    = "list",
                                       plot_title         = "character",
                                       include_text       = "logical",
                                       include_main       = "logical", 
                                       x.intersp          = "numeric",
                                       y.intersp          = "numeric",
                                       scale              = "numeric",
                                       main               = "character",
                                       is_image_container = "logical"))
Plot_Image$methods(
  initialize = function(p_data_processors = list(), 
                        p_include_main = TRUE, 
                        p_include_text = TRUE,
                        p_is_image_container = FALSE){
    include_main    <<- p_include_main
    include_text    <<- p_include_text
    data_processors <<- p_data_processors
    is_image_container <<- p_is_image_container
  },
  plot_image = function(){
    plot(main="Define plot_image() for subclass") # Abstract function
  },
  get_n = function(){
    stop("Need to define function get_n() for subclass") #Abstract function
  },
  create_standard_main = function(){
    needs_main <- function(){
      return(include_text & include_main & !is_image_container)
    }
    if (needs_main()){
      collection_name <- data_processors[[1]]$info$collection_name()
      main <<- sprintf("%s\n(Dataset: %s; n=%s)", plot_title, collection_name,  format(get_n(), big.mark = ","))
    }
  },
  plot_image_in_window = function(p_scale=NULL, window_height=NULL, window_width=NULL){
    scale <<- p_scale
    SIZE_AXIS      <- 2.5 * scale # in the units used by mar
    SIZE_MAIN      <- 2.5 * scale
    SIZE_NO_MARGIN <- 0.1 * scale
    FONT_SIZE      <- 8   * scale
    WINDOW_WIDTH   <- window_width  * scale
    WINDOW_HEIGHT  <- window_height * scale
    X_INTERSP      <- 0.5 * scale + 0.4 # manages legend text spacing
    Y_INTERSP      <- 0.5 * scale + 0.4 # manages
    
    if (include_main){
      mar = c(SIZE_AXIS, SIZE_AXIS, SIZE_MAIN     , SIZE_NO_MARGIN)
    } else {
      mar = c(SIZE_AXIS, SIZE_AXIS, SIZE_NO_MARGIN, SIZE_NO_MARGIN)
    }
    mgp = c(SIZE_AXIS/2, SIZE_AXIS/4, 0) # Margin line (mex units) for axis title, axis labels, axis lines
    ps  = FONT_SIZE
    x.intersp <<- X_INTERSP
    y.intersp <<- Y_INTERSP
    
    windows(width = WINDOW_WIDTH, height=WINDOW_HEIGHT)
    
    old_par  <- par(mar=mar, ps=ps, mgp=mgp)
    create_standard_main()
    
    plot_image()
    if (!is_image_container){
      axis(side=1, labels=include_text, tcl=-0.5, lwd=scale)
      axis(side=2, labels=include_text, tcl=-0.5, lwd=scale)
      box(lwd=scale)
    }
    par(old_par)
  },
  plot_image_in_small_window = function(p_scale=1){
    plot_image_in_window(p_scale=p_scale, window_height=2, window_width=3.25)
  },
  plot_image_in_large_window = function(p_scale=1, window_height=NULL){
    plot_image_in_window(p_scale=p_scale, window_height=window_height, window_width=7)
  }
)
###############################################################################
#            Class: Legend_Object
###############################################################################
Legend_Object = setRefClass("Legend_Object",
                            contains = "Plot_Image",
                            fields = list(user_params = "list",
                                          scale       = "numeric"))
Legend_Object$methods(
  initialize = function(p_user_params = NULL, p_scale = NULL){
    if (is.null(p_user_params)){
      user_params <<- list()
    } else {
      user_params <<- p_user_params
    }
    if (is.null(p_scale)){
      stop("Legend_Object must have a valid scale")
    } else {
      scale <<- p_scale
    }
    user_params$x         <<- if_null(user_params$x        , "topleft", user_params$x)
    user_params$y         <<- if_null(user_params$y        ,      NULL, user_params$y)
    user_params$bty       <<- if_null(user_params$bty      ,       "o", user_params$bty)
    user_params$lwd       <<- if_null(user_params$lwd      ,      NULL, user_params$lwd        * scale) # Because we allow NULL, scale must be inside parens
    user_params$seg.len   <<- if_null(user_params$seg.len  ,         3, user_params$seg.len  ) * scale
    user_params$box.lwd   <<- if_null(user_params$box.lwd  ,         1, user_params$box.lwd  ) * scale
    user_params$x.intersp <<- if_null(user_params$x.intersp,       0.6, user_params$x.intersp) * scale
    user_params$y.intersp <<- if_null(user_params$y.intersp,       0.4, user_params$y.intersp) * scale + 0.2
  },
  show = function(){
    first_legend = legend(x         = user_params$x,
                          y         = user_params$y,
                          title     = "", 
                          legend    = user_params$leg, 
                          col       = user_params$col, 
                          bty       = user_params$bty,
                          lty       = user_params$lty, 
                          lwd       = user_params$lwd, 
                          seg.len   = user_params$seg.len, 
                          box.lwd   = user_params$box.lwd, 
                          x.intersp = user_params$x.intersp, 
                          y.intersp = user_params$y.intersp)
    new_x = first_legend$rect$left 
    new_y = first_legend$rect$top + first_legend$rect$h * ifelse(scale==1, 0.07, 0.03 - (scale * 0.02)) #switch(scale, 0.01, -0.01, -0.03, -0.05)# (0.07 - 0.09 * ((scale-1)^2))#(0.15 - 0.08*scale)#.07 * (2 - scale)
    legend(x=new_x, y=new_y, title = user_params$title, legend = "", cex=1.15, bty="n")
    
  }
)
###############################################################################
#            Class: Plot_Multiple_Images
###############################################################################
Plot_Multiple_Images = setRefClass("Plot_Multiple_Images",
                                   contains = "Plot_Image",
                                   fields = list(n_images_wide = "numeric",
                                                 n_images_tall = "numeric",
                                                 image_list    = "list"))
Plot_Multiple_Images$methods(
  initialize = function(p_n_images_wide=1, p_n_images_tall=2, p_image_list=NULL, ...){
    n_images_wide  <<- p_n_images_wide
    n_images_tall  <<- p_n_images_tall
    image_list     <<- p_image_list
    #plot_title      <<- "True Hit and False Hit Distributions"
    
    callSuper(p_is_image_container=TRUE, ...)
  },
  plot_image = function(){
    # Support functions
    apply_mtext <- function(letter=NULL){
      line=1.3*scale
      mtext(letter, side=1, line=line, adj=0)
    }
    # main code
    old_par <- par(mfrow=c(n_images_tall, n_images_wide))
    i=0
    n_images <- length(image_list)
    
    for (i in 1:n_images){
      image <- image_list[[i]]
      image$create_standard_main()
      image$scale <- scale
      image$plot_image()
      axis(side=1, labels=include_text, tcl=-0.5, lwd=scale)
      axis(side=2, labels=include_text, tcl=-0.5, lwd=scale)
      box(lwd=scale)
      apply_mtext(letter=sprintf("(%s)", letters[i]))
      
    }
    par(old_par)
    
  }
)
###############################################################################
#            Class: Plot_Compare_PMD_and_Norm_Density
###############################################################################
Plot_Compare_PMD_and_Norm_Density = setRefClass("Plot_Compare_PMD_and_Norm_Density",
                                                contains = "Plot_Image",
                                                fields = list(show_norm      = "logical",
                                                              display_n_psms = "logical"))
Plot_Compare_PMD_and_Norm_Density$methods(
  initialize = function(p_show_norm=TRUE, p_display_n_psms=TRUE, ...){
    show_norm       <<- p_show_norm
    display_n_psms  <<- p_display_n_psms
    plot_title      <<- "True Hit and False Hit Distributions"
    
    callSuper(...)
  },
  plot_image = function(){
    # Support functions for plot_compare_PMD_and_norm_density()
    
    get_densities <- function(data_subset = NULL, var_value = NULL){
      data_subset$value_of_interest <- data_subset[,var_value]
      from <- min(data_subset$value_of_interest, na.rm = TRUE)
      to   <- max(data_subset$value_of_interest, na.rm = TRUE)
      xlim = range(data_subset$value_of_interest)
      data_true  <- subset(data_subset, (PMD_FDR_decoy==0) & (PMD_FDR_input_score==100))
      data_false <- subset(data_subset, (PMD_FDR_decoy==1))    
      
      d_true  <- with(data_true , density(value_of_interest, from = from, to = to, na.rm = TRUE))
      d_false <- with(data_false, density(value_of_interest, from = from, to = to, na.rm = TRUE))
      d_true  <- normalize_density(d_true)
      d_false <- normalize_density(d_false)
      
      densities <- list(d_true=d_true, d_false=d_false, var_value = var_value, n_true = nrow(data_true), n_false = nrow(data_false))
      
      return(densities)
    }
    get_xlim <- function(densities_a = NULL, densities_b = NULL, show_norm=NULL){
      xlim   <- range(c(      densities_a$d_true$x, densities_a$d_false$y))
      if (show_norm){
        xlim <- range(c(xlim, densities_b$d_true$x, densities_b$d_false$y))
      }
      return(xlim)
    }
    get_ylim <- function(densities_a = NULL, densities_b = NULL, show_norm=NULL){
      ylim   <- range(c(      densities_a$d_true$y, densities_a$d_false$y))
      if (show_norm){
        ylim <- range(c(ylim, densities_b$d_true$y, densities_b$d_false$y))
      }
      return(ylim)
    }
    plot_distributions <- function(densities = NULL, var_value= NULL, dataset_name = NULL, ...){
      leg = list()
      leg$leg = c("Good", "Bad")
      if (display_n_psms){
        leg$leg = sprintf("%s (%d PSMs)", 
                          leg$leg,
                          c(densities$n_true, densities$n_false))
        
      }
      leg$col = c("black", "red")
      leg$lwd = c(3      ,     3)
      leg$lty = c(1      ,     2)
      leg$title = "Hit Category"
      xlab = ifelse(var_value == "value",
                    "PMD (ppm)",
                    "PMD - normalized (ppm)")
      ylab = "Density"
      if (!include_text){
        xlab = ""
        ylab = ""
      }
      plot( densities$d_true , col=leg$col[1], lwd=leg$lwd[1] * scale, lty=leg$lty[1], xaxt = "n", yaxt = "n", main=main, xlab = xlab, ylab=ylab, ...)
      lines(densities$d_false, col=leg$col[2], lwd=leg$lwd[2] * scale, lty=leg$lty[2])
      abline(v=0, h=0, col="gray", lwd=1*scale)
      if (include_text){
        legend_object <- Legend_Object$new(leg, scale)
        legend_object$show()
        #legend("topleft", legend=leg.leg, col=leg.col, lwd=leg.lwd, lty=leg.lty, x.intersp = x.intersp, y.intersp = y.intersp)
      }
    }
    
    # Main code block for plot_compare_PMD_and_norm_density
    data_processor <- data_processors[[1]]
    data_processor$data_groups$ensure()
    data_groups <- data_processor$data_groups$df
    
    data_subset_a <- subset(data_groups  , used_to_find_middle == FALSE)
    data_subset_b <- subset(data_subset_a, PMD_FDR_peptide_length > 11)
    
    densities_a <- get_densities(data_subset = data_subset_a, var_value = "value")
    densities_b <- get_densities(data_subset = data_subset_b, var_value = "value_norm")
    
    xlim=get_xlim(densities_a, densities_b, show_norm = show_norm)
    ylim=get_ylim(densities_a, densities_b, show_norm = show_norm)
    
    dataset_name <- data_processor$info$collection_name
    plot_distributions(  densities=densities_a, var_value = "value"     , dataset_name = dataset_name, xlim=xlim, ylim=ylim)
    if (show_norm){
      plot_distributions(densities=densities_b, var_value = "value_norm", dataset_name = dataset_name, xlim=xlim, ylim=ylim)
    }
  },
  get_n = function(){
    data_processor <- data_processors[[1]]
    data_processor$data_groups$ensure()
    data_subset_a <- subset(data_processor$data_groups$df  , used_to_find_middle == FALSE)
    data_subset_b <- subset(data_subset_a, PMD_FDR_peptide_length > 11)
    
    if (show_norm){
      data_subset <- data_subset_a
    } else {
      data_subset <- data_subset_b
    }
    
    data_true  <- subset(data_subset, (PMD_FDR_decoy==0) & (PMD_FDR_input_score==100))
    data_false <- subset(data_subset, (PMD_FDR_decoy==1))       
    
    return(nrow(data_true) + nrow(data_false))
  }
)

###############################################################################
#            Class: Plot_Time_Invariance_Alt
###############################################################################
Plot_Time_Invariance_Alt = setRefClass("Plot_Time_Invariance_Alt",
                                       contains = "Plot_Image",
                                       fields = list(show_norm      = "logical",
                                                     display_n_psms = "logical",
                                                     training_class = "character",
                                                     ylim           = "numeric",
                                                     field_of_interest = "character"))
Plot_Time_Invariance_Alt$methods(
  initialize = function(p_ylim=NULL, p_training_class=NULL, p_field_of_interest="value_norm", ...){
    get_subset_title <- function(training_class=NULL){
      if        (training_class == "bad_long"){
        subset_title="bad only"
      } else if (training_class == "good_testing"){
        subset_title="good-testing only"
      } else if (training_class == "good_training"){
        subset_title="good-training only"
      } else if (training_class == "other"){
        subset_title="other only"
      } else {
        stop("Unexpected training_class in plot_time_invariance")
      }
      return(subset_title)
    }
    
    ylim <<- p_ylim
    training_class <<- p_training_class
    field_of_interest <<- p_field_of_interest
    subset_title <- get_subset_title(training_class=training_class)
    backup_title <- sprintf("Middle 25%% PMD for spectra sorted by index%s", 
                            ifelse(is.null(subset_title),
                                   "",
                                   sprintf(" - %s", subset_title)))
    #plot_title <<- get_main(main_title=main, backup_title=backup_title, data_collection = data_collection)
    plot_title <<- backup_title
    
    callSuper(...)
  },
  plot_image = function(){
    # Support functions for plot_time_invariance()
    
    # Main code of plot_time_invariance()
    data_subset = get_data_subset()
    plot_group_spectrum_index_from_subset_boxes(data_subset = data_subset)
    abline(h=0, col="blue", lwd=scale)
  },
  get_data_subset = function(){
    data_processor <- data_processors[[1]]
    data_processor$data_groups$ensure()
    return(subset(data_processor$data_groups$df, (group_training_class==training_class)))
  },
  get_n = function(){
    return(nrow(get_data_subset()))
  },
  plot_group_spectrum_index_from_subset_boxes = function(data_subset = NULL){
    n_plot_groups <- 100
    
    field_name_text <- ifelse(field_of_interest=="value", "PMD", "Translated PMD")
    new_subset                   <- data_subset
    new_subset$value_of_interest <- new_subset[,field_of_interest]
    new_subset                   <- new_subset[order(new_subset$PMD_FDR_spectrum_index),]
    
    idxs <- round_to_tolerance(seq(from=1, to=nrow(new_subset), length.out = n_plot_groups+1), 1)
    idxs_left  <- idxs[-(n_plot_groups+1)]
    idxs_right <- idxs[-1] - 1
    idxs_right[n_plot_groups] <- idxs_right[n_plot_groups] + 1
    
    new_subset$plot_group <- NA
    for (i in 1:n_plot_groups){
      new_subset$plot_group[idxs_left[i]:idxs_right[i]] <- i 
    }
    xleft   <- aggregate(PMD_FDR_spectrum_index   ~plot_group, data=new_subset, FUN=min)
    xright  <- aggregate(PMD_FDR_spectrum_index   ~plot_group, data=new_subset, FUN=max)
    ybottom <- aggregate(value_of_interest~plot_group, data=new_subset, FUN=function(x){quantile(x, probs = 0.5 - (0.25/2))})
    ytop    <- aggregate(value_of_interest~plot_group, data=new_subset, FUN=function(x){quantile(x, probs = 0.5 + (0.25/2))})
    boxes <- merge(            rename_column(xleft  , "PMD_FDR_spectrum_index"   , "xleft"),
                               merge(      rename_column(xright , "PMD_FDR_spectrum_index"   , "xright"),
                                           merge(rename_column(ybottom, "value_of_interest", "ybottom"),
                                                 rename_column(ytop   , "value_of_interest", "ytop"))))
    
    xlab <- "Spectrum Index"
    ylab <- sprintf("%s (ppm)", field_name_text )
    if (is.null(ylim)){
      ylim <<- range(new_subset$value_of_interest)
    }
    if (!include_text){
      xlab=""
      ylab=""
    }
    plot(value_of_interest~PMD_FDR_spectrum_index, data=new_subset, type="n", ylim=ylim, xlab = xlab, ylab=ylab, main=main, xaxt="n", yaxt="n")
    with(boxes, rect(xleft = xleft, ybottom = ybottom, xright = xright, ytop = ytop, lwd=scale))
    #points(median_of_group_index~PMD_FDR_spectrum_index, data=data_subset, cex=.5, pch=15)
    axis(1, labels=include_text, lwd=scale)
    axis(2, labels=include_text, lwd=scale)
    box(lwd=scale) #box around plot area
  }
  
)
###############################################################################
#            Class: Plot_Time_Invariance_Alt_Before_and_After
###############################################################################
Plot_Time_Invariance_Alt_Before_and_After = setRefClass("Plot_Time_Invariance_Alt_Before_and_After",
                                                        contains = "Plot_Multiple_Images",
                                                        fields = list())
Plot_Time_Invariance_Alt_Before_and_After$methods(
  initialize = function(p_data_processors = NULL, 
                        p_include_text=TRUE, 
                        p_include_main=FALSE,
                        p_ylim = c(-4,4), ...){
    plot_object1 <- Plot_Time_Invariance_Alt$new(p_data_processors = p_data_processors, 
                                                 p_include_text=p_include_text, 
                                                 p_include_main=p_include_main,
                                                 p_training_class = "good_testing",
                                                 p_field_of_interest = "value",
                                                 p_ylim = p_ylim)
    
    plot_object2 <- Plot_Time_Invariance_Alt$new(p_data_processors = p_data_processors, 
                                                 p_include_text=p_include_text, 
                                                 p_include_main=p_include_main,
                                                 p_training_class = "good_testing",
                                                 p_field_of_interest = "value_norm",
                                                 p_ylim = p_ylim)
    
    callSuper(p_n_images_wide=1, 
              p_n_images_tall=2, 
              p_include_text=p_include_text,
              p_include_main=p_include_main,
              p_image_list = list(plot_object1, plot_object2), ...)
  }
)

###############################################################################
#            Class: Plot_Density_PMD_and_Norm_Decoy_by_AA_Length
###############################################################################
Plot_Density_PMD_and_Norm_Decoy_by_AA_Length = setRefClass("Plot_Density_PMD_and_Norm_Decoy_by_AA_Length",
                                                           contains = "Plot_Image",
                                                           fields = list(show_norm = "logical"))
Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$methods(
  initialize = function(p_show_norm=FALSE, ...){
    plot_title <<- "The Decoy Bump: PMD Distribution of Decoy matches by peptide length"
    show_norm  <<- p_show_norm
    callSuper(...)
  },
  get_n = function(){
    data_processor <- data_processors[[1]]
    data_processor$data_groups$ensure()
    data_subset <- subset(data_processor$data_groups$df, (PMD_FDR_decoy == 1))
    return(nrow(data_subset))
  },
  plot_image = function(){
    
    # Support functions for plot_density_PMD_and_norm_decoy_by_aa_length()
    
    add_group_peptide_length_special <- function(){
      data_processor <- data_processors[[1]]
      data_processor$data_groups$ensure()
      data_groups <- data_processor$data_groups$df # the name data_groups is a data.frame instead of a Data_Object
      data_groups <- subset(data_groups, used_to_find_middle == FALSE)
      
      df_group_definition <- data.frame(stringsAsFactors = FALSE,
                                        list(group_peptide_length_special = c("06-08", "09-10", "11-12", "13-15", "16-20", "21-50"),
                                             min                          = c(  6    ,   9    ,  11    ,  13    ,  16    ,  21    ),
                                             max                          = c(     8 ,     10 ,     12 ,     15 ,     20 ,     50 ) ))
      group_peptide_length_special     <- data.frame(list(PMD_FDR_peptide_length = 6:50))
      group_peptide_length_special$min <- with(group_peptide_length_special, sapply(PMD_FDR_peptide_length, FUN = function(i) max(df_group_definition$min[df_group_definition$min <= i])))
      group_peptide_length_special     <- merge(group_peptide_length_special, df_group_definition)
      
      data_groups$group_peptide_length_special <- NULL
      new_data_groups <- (merge(data_groups, 
                                group_peptide_length_special[,c("PMD_FDR_peptide_length", 
                                                                "group_peptide_length_special")]))
      return(new_data_groups)
    }
    get_densities <- function(data_subset = NULL, field_value = NULL, field_group=NULL){
      get_density_from_subset <- function(data_subset=NULL, xlim=NULL){
        
        d_group            <- with(data_subset , density(value_of_interest, from = xlim[1], to = xlim[2], na.rm=TRUE))
        d_group            <- normalize_density(d_group)
        
        return(d_group)
      }
      
      data_temp                   <- data_subset
      data_temp$value_of_interest <- data_temp[[field_value]]
      data_temp$group_of_interest <- data_temp[[field_group]]
      
      xlim = range(data_temp$value_of_interest, na.rm=TRUE)
      
      groups      <- sort(unique(data_temp$group_of_interest))
      n_groups    <- length(groups)
      
      d_group <- get_density_from_subset( data_subset=data_temp, xlim = xlim )
      densities <- list("All decoys" = d_group)
      for (i in 1:n_groups){
        group <- groups[i]
        
        d_group <- get_density_from_subset( data_subset=subset(data_temp, (group_of_interest == group)), 
                                            xlim = xlim )
        densities[[group]] <- d_group
      }
      
      return(densities)
    }
    get_limits <- function(densities_a = NULL, densities_b = NULL){
      xlim = c()
      ylim = c(0)
      for (single_density in densities_a){
        xlim=range(c(xlim, single_density$x))
        ylim=range(c(ylim, single_density$y))
      }
      for (single_density in densities_b){
        xlim=range(c(xlim, single_density$x))
        ylim=range(c(ylim, single_density$y))
      }
      
      return(list(xlim=xlim, ylim=ylim))
    }
    plot_distributions <- function(data_groups = NULL, xlim=NULL, ylim=NULL, densities = NULL, field_group= NULL, field_value = "value", xlab_modifier = "", var_value= NULL, include_peak_dots=TRUE, dataset_name = NULL, ...){
      data_groups$group_of_interest <- data_groups[[field_group]]
      data_groups$value_of_interest <- data_groups[[field_value]]
      
      # Main body of plot_decoy_distribution_by_field_of_interest()
      FIXED_LWD=3
      
      groups <- sort(unique(data_groups$group_of_interest))
      n      <- length(groups)
      
      df_leg <- data.frame(stringsAsFactors = FALSE,
                           list(leg = groups,
                                col = rainbow_with_fixed_intensity(n = n, goal_intensity_0_1 = 0.4),
                                lty = rep(1:6, length.out=n),
                                lwd = rep(FIXED_LWD , n)) )
      
      d <- densities[["All decoys"]]
      
      xlab = sprintf("Precursor Mass Discrepancy%s (ppm)", xlab_modifier)
      ylab = "Density"
      
      if (!include_text){
        xlab=""
        ylab=""
      }
      plot(d, lwd=FIXED_LWD * scale, main=main, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, xaxt="n", yaxt="n")
      
      ave_peak <- max(d$y)
      max_peak <- 0
      
      for (local_group in groups){
        data_subset <- subset(data_groups, group_of_interest == local_group)
        data_info   <- subset(df_leg     , leg               == local_group)
        col <- data_info$col[1]
        lty <- data_info$lty[1]
        lwd <- data_info$lwd[1]
        if (nrow(data_subset) > 100){
          d <- densities[[local_group]]  #density(data_subset[[field_value]])
          lines(d, col=col, lty=lty, lwd=lwd * scale)
          peak <- max(d$y)
          max_peak <- max(max_peak, peak)
        }
      }
      abline(v=0, h=0, lwd=scale)
      leg <- list(title = "Peptide length (aa)", 
                  leg = c("All decoys"     , df_leg$leg),
                  col = c(col2hex("black") , df_leg$col),
                  lty = c(1                , df_leg$lty),
                  lwd = c(FIXED_LWD        , df_leg$lwd)
      )
      if (include_text){
        legend_object = Legend_Object$new(leg, scale)
        legend_object$show()
        #first_legend = legend(x="topleft", title = "", legend = leg$leg, col = leg$col, lty = leg$lty, lwd = leg$lwd, seg.len=leg$seg.len, box.lwd=leg$box.lwd, x.intersp = leg$x.intersp, y.intersp = leg$y.intersp)
        #new_x = first_legend$rect$left 
        #new_y = first_legend$rect$top + first_legend$rect$h * .07 * (2 - scale)
        #legend(x=new_x, y=new_y, title = leg$title, legend = "", cex=1.15, bty="n")
      }
      
      box(lwd=scale) #box around plot area
      
    }
    
    # Main body for plot_density_PMD_and_norm_decoy_by_aa_length()
    
    data_mod <- add_group_peptide_length_special()
    data_mod <- subset(data_mod, PMD_FDR_decoy==1)
    
    densities_a <- get_densities(data_subset = data_mod, field_value = "value"     , field_group = "group_peptide_length_special")
    densities_b <- get_densities(data_subset = data_mod, field_value = "value_norm", field_group = "group_peptide_length_special")
    
    data_processor <- data_processors[[1]]
    dataset_name <- data_processor$info$collection_name()
    
    limits <- get_limits(densities_a, densities_b)
    xlim   <- limits$xlim
    ylim   <- limits$ylim
    
    if (show_norm){
      plot_distributions(data_groups = data_mod, densities=densities_b, field_value = "value_norm", xlab_modifier = " - normalized", field_group = "group_peptide_length_special", dataset_name=dataset_name, xlim=xlim, ylim=ylim)
    } else {
      plot_distributions(data_groups = data_mod, densities=densities_a, field_value = "value"     , xlab_modifier = ""             , field_group = "group_peptide_length_special", dataset_name=dataset_name, xlim=xlim, ylim=ylim)
    }
  }
  
)

###############################################################################
#            Class: Plot_Bad_CI
###############################################################################
Plot_Bad_CI = setRefClass("Plot_Bad_CI",
                          contains = "Plot_Image",
                          fields = list(breaks = "numeric",
                                        ylim   = "numeric"))
Plot_Bad_CI$methods(
  initialize = function(p_breaks=20, p_ylim=NULL, ...){
    if (is.null(p_ylim)){
      ylim <<- numeric(0)
    } else {
      ylim <<- p_ylim
    }
    breaks <<- p_breaks
    plot_title <<- "Credible Intervals for proportion within range - bad"
    callSuper(...)
  },
  data_processor = function(){
    return(data_processors[[1]])
  },
  get_n = function(){
    data_processor()$data_groups$ensure()
    return(nrow(subset(data_processor()$data_groups$df, (PMD_FDR_decoy == 1))))
  },
  plot_image = function(){
    data_processor()$data_groups$ensure()
    data_groups <- data_processor()$data_groups$df
    data_decoy <- subset(data_groups, data_groups$group_training_class == "bad_long")
    data_decoy$region <- cut(x = data_decoy$value, breaks = breaks)
    table(data_decoy$region)
    regions <- unique(data_decoy$region)
    
    N = nrow(data_decoy)
    find_lower_ci_bound <- function(x){
      ci <- credible_interval(length(x), N, precision = 0.001, alpha=0.05)
      return(ci[1])
    }
    find_upper_ci_bound <- function(x){
      ci <- credible_interval(length(x), N, precision = 0.001, alpha=0.05)
      return(ci[2])
    }
    xleft   <- aggregate(value~region, data=data_decoy, FUN=min)
    xright  <- aggregate(value~region, data=data_decoy, FUN=max)
    ytop    <- aggregate(value~region, data=data_decoy, FUN=find_upper_ci_bound)
    ybottom <- aggregate(value~region, data=data_decoy, FUN=find_lower_ci_bound)
    
    xleft   <- rename_column(xleft  , "value", "xleft"  )
    xright  <- rename_column(xright , "value", "xright" )
    ytop    <- rename_column(ytop   , "value", "ytop"   )
    ybottom <- rename_column(ybottom, "value", "ybottom")
    
    boxes <- merge(merge(xleft, xright), merge(ytop, ybottom))
    
    
    xlab <- "Precursor Mass Discrepancy (ppm)"
    ylab <- "Proportion of PSMs\nin subgroup"
    xlim=range(data_decoy$value, na.rm = TRUE)
    get_ylim(boxes=boxes)
    if (!include_text){
      xlab=""
      ylab=""
    }
    
    plot(x=c(-10,10), y=c(0,1), type="n", ylim=ylim, xlim=xlim, xlab=xlab, ylab=ylab, main=main, xaxt="n", yaxt="n")
    
    with(boxes, rect(xleft=xleft, xright=xright, ytop=ytop, ybottom=ybottom, lwd=scale))
    
    abline(h=1/breaks, col="blue", lwd=scale)
  },
  get_ylim = function(boxes=NULL){
    is_valid_range <- function(r=NULL){
      return(length(r) == 2)
    }
    if (! is_valid_range(ylim)){
      ylim <<- range(c(0,boxes$ytop, boxes$ybottom))
    }
  }
  
)
###############################################################################
#            Class: Plot_Selective_Loss
###############################################################################
Plot_Selective_Loss = setRefClass("Plot_Selective_Loss",
                                  contains = "Plot_Image",
                                  fields = list())
Plot_Selective_Loss$methods(
  initialize = function( ...){
    plot_title <<- "PMD-FDR Selectively removes Bad Hits"
    callSuper(...)
  },
  data_processor = function(){
    return(data_processors[[1]])
  },
  get_n = function(){
    data_processor()$i_fdr$ensure()
    data_subset <- data_processor()$i_fdr$df
    return(nrow(data_subset))
  },
  plot_image = function(){
    # Support functions for plot_selective_loss()
    
    samples_lost_by_threshold <- function(updated_i_fdr=NULL, score_threshold=NULL){
      data_subset <- subset(updated_i_fdr, PMD_FDR_input_score >= score_threshold)
      tbl <- with(updated_i_fdr, 
                  table(PMD_FDR_input_score >= score_threshold, 
                        new_confidence < score_threshold, 
                        group_decoy_proteins))
      df <- data.frame(tbl)
      df_n <- aggregate(Freq~group_decoy_proteins+Var1, data=df, FUN=sum)
      df_n <- rename_column(df_n, name_before = "Freq", "n")
      df <- merge(df, df_n)
      df$rate_of_loss <- with(df, Freq/n)
      df <- subset(df, (Var1==TRUE) & (Var2==TRUE))
      df <- df[,c("group_decoy_proteins", "rate_of_loss", "n", "Freq")]
      if (nrow(df) > 0){
        df$score_threshold <- score_threshold
      }
      return(df)
    }
    get_loss_record <- function(updated_i_fdr=NULL, score_thresholds=NULL){
      df=data.frame()
      for (score_threshold in score_thresholds){
        df_new_loss <- samples_lost_by_threshold(updated_i_fdr, score_threshold)
        df <- rbind(df, df_new_loss)
      }
      return(df)
    }
    
    # Main code for plot_selective_loss()
    
    updated_i_fdr                <- data_processor()$i_fdr$df
    updated_i_fdr$new_confidence <- with(updated_i_fdr, 100 * (1-i_fdr)) #ifelse((1-i_fdr) < (PMD_FDR_input_score / 100), (1-i_fdr), (PMD_FDR_input_score/100)))
    loss_record <- get_loss_record(updated_i_fdr=updated_i_fdr, score_thresholds = 1:100)
    xlim <- with(loss_record, range(score_threshold))
    ylim <- c(0,1)
    xlab <- "Fixed Confidence threshold (PeptideShaker score)"
    ylab <- "Rate of PSM disqualification from PMD-FDR"
    lwd  <- 4
    plot(x=xlim, y=ylim, type="n", main=main, xlab=xlab, ylab=ylab)
    
    groups <- sort(unique(loss_record$group_decoy_proteins))
    n_g    <- length(groups)
    
    cols <- rainbow_with_fixed_intensity(n=n_g, goal_intensity_0_1 = 0.5, alpha = 1)
    ltys <- rep(1:6, length.out=n_g)
    
    leg     <- list(leg=groups, col=cols, lty=ltys, lwd=lwd, title="Species/Category")
    
    for (i in 1:n_g){
      lines(rate_of_loss~score_threshold, data=subset(loss_record, group_decoy_proteins==leg$leg[i]), col=leg$col[i], lwd=leg$lwd * scale, lty=leg$lty[i])
    }
    abline(h=0, v=100, lwd=scale)
    abline(h=c(0.1, 0.8), col="gray", lwd=scale)
    
    #leg = list(leg=group, col=col, lty=lty, lwd=lwd)
    #with(leg, legend(x = "topleft", legend = group, col = col, lty = lty, lwd = lwd, seg.len = seg.len))
    legend_object <- Legend_Object$new(leg, scale)
    legend_object$show()
  }
  
)
###############################################################################
#            Class: Plot_Selective_Loss_for_TOC
###############################################################################
Plot_Selective_Loss_for_TOC = setRefClass("Plot_Selective_Loss_for_TOC",
                                          contains = "Plot_Image",
                                          fields = list(xlab="character",
                                                        ylab="character",
                                                        title_x="numeric",
                                                        title_y="numeric",
                                                        legend_border="logical",
                                                        legend_x = "numeric",
                                                        legend_y = "numeric",
                                                        legend_title="character",
                                                        legend_location = "character",
                                                        name_contaminant = "character",
                                                        name_decoy = "character",
                                                        name_human = "character",
                                                        name_pyro = "character"))
Plot_Selective_Loss_for_TOC$methods(
  initialize = function( ...){
    plot_title <<- "PMD-FDR selectively removes bad hits"
    callSuper(...)
    xlab <<- "Confidence threshold (PeptideShaker)"
    ylab <<- "PMD Disqualifiction Rate"
    legend_border    <<- FALSE
    #legend_title     <<-  "Species/Category"
    title_x          <<- 50
    title_y          <<- 0.9
    legend_x         <<- 10         
    legend_y         <<- 0.75
    name_contaminant <<- "signal - contaminant"
    name_decoy       <<- "decoy - reversed"
    name_human       <<- "decoy - human"
    name_pyro        <<- "signal - pyrococcus"
  },
  data_processor = function(){
    return(data_processors[[1]])
  },
  get_n = function(){
    data_processor()$i_fdr$ensure()
    data_subset <- data_processor()$i_fdr$df
    return(nrow(data_subset))
  },
  plot_image = function(){
    # Support functions for plot_selective_loss()
    
    samples_lost_by_threshold <- function(updated_i_fdr=NULL, score_threshold=NULL){
      data_subset <- subset(updated_i_fdr, PMD_FDR_input_score >= score_threshold)
      tbl <- with(updated_i_fdr, 
                  table(PMD_FDR_input_score >= score_threshold, 
                        new_confidence < score_threshold, 
                        group_decoy_proteins))
      df <- data.frame(tbl)
      df_n <- aggregate(Freq~group_decoy_proteins+Var1, data=df, FUN=sum)
      df_n <- rename_column(df_n, name_before = "Freq", "n")
      df <- merge(df, df_n)
      df$rate_of_loss <- with(df, Freq/n)
      df <- subset(df, (Var1==TRUE) & (Var2==TRUE))
      df <- df[,c("group_decoy_proteins", "rate_of_loss", "n", "Freq")]
      if (nrow(df) > 0){
        df$score_threshold <- score_threshold
      }
      return(df)
    }
    get_loss_record <- function(updated_i_fdr=NULL, score_thresholds=NULL){
      df=data.frame()
      for (score_threshold in score_thresholds){
        df_new_loss <- samples_lost_by_threshold(updated_i_fdr, score_threshold)
        df <- rbind(df, df_new_loss)
      }
      return(df)
    }
    convert_groups <- function(groups=NULL){
      new_groups <- groups
      new_groups <- gsub(pattern = "contaminant", replacement = name_contaminant, x = new_groups)
      new_groups <- gsub(pattern = "decoy"      , replacement = name_decoy      , x = new_groups)
      new_groups <- gsub(pattern = "human"      , replacement = name_human      , x = new_groups)
      new_groups <- gsub(pattern = "pyrococcus" , replacement = name_pyro       , x = new_groups)
      
      return(new_groups)
    }
    
    # Main code for plot_selective_loss()
    
    updated_i_fdr                <- data_processor()$i_fdr$df
    updated_i_fdr$new_confidence <- with(updated_i_fdr, 100 * (1-i_fdr)) #ifelse((1-i_fdr) < (PMD_FDR_input_score / 100), (1-i_fdr), (PMD_FDR_input_score/100)))
    loss_record <- get_loss_record(updated_i_fdr=updated_i_fdr, score_thresholds = 1:100)
    xlim <- with(loss_record, range(score_threshold))
    ylim <- c(0,1)
    #xlab <- "Fixed Confidence threshold (PeptideShaker score)"
    #ylab <- "Rate of PSM disqualification from PMD-FDR"
    lwd  <- 4
    plot(x=xlim, y=ylim, type="n", main=main, xlab=xlab, ylab=ylab)
    
    groups <- sort(unique(loss_record$group_decoy_proteins))
    n_g    <- length(groups)
    
    cols <- rainbow_with_fixed_intensity(n=n_g, goal_intensity_0_1 = 0.5, alpha = 1)
    ltys <- rep(1:6, length.out=n_g)
    bty  <- ifelse(legend_border, "o", "n")
    
    leg     <- list(leg=convert_groups(groups), var_name=groups, col=cols, lty=ltys, lwd=lwd, bty=bty, x=legend_x, y=legend_y)
    #leg     <- list(leg=groups, col=cols, lty=ltys, lwd=lwd, bty=bty, x=legend_x, y=legend_y)
    
    for (i in 1:n_g){
      lines(rate_of_loss~score_threshold, data=subset(loss_record, group_decoy_proteins==leg$var_name[i]), col=leg$col[i], lwd=leg$lwd * scale, lty=leg$lty[i])
    }
    abline(h=0, v=100, lwd=scale)
    abline(h=c(0.1, 0.8), col="gray", lwd=scale)
    
    #leg = list(leg=group, col=col, lty=lty, lwd=lwd)
    #with(leg, legend(x = "topleft", legend = group, col = col, lty = lty, lwd = lwd, seg.len = seg.len))
    legend_object <- Legend_Object$new(leg, scale)
    legend_object$show()
    text(x=title_x, y=title_y, labels = plot_title)
  }
  
)
###############################################################################
#            Class: Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR
###############################################################################
Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR = setRefClass("Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR",
                                                            contains = "Plot_Image",
                                                            fields = list())
Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR$methods(
  initialize = function( ...){
    plot_title <<- "Precursor Mass Discrepance i-FDR for 1% Target-Decoy FDR PSMs"
    callSuper(...)
  },
  data_processor = function(){
    return(data_processors[[1]])
  },
  get_n = function(){
    data_processor()$i_fdr$ensure()
    if (one_percent_calculation_exists()){
      i_fdr <- data_processor()$i_fdr$df
      data_subset <- subset(i_fdr, is_in_1percent==TRUE)
      n <- nrow(data_subset)
    } else {
      n <- 0
    }
    
    return (n)
  },
  plot_image = function(){
    if (one_percent_calculation_exists()){
      i_fdr        <- get_modified_fdr()
      report_good_discrepancies(i_fdr)
      data_TD_good <- get_data_TD_good(i_fdr)
      mean_results <- get_mean_results(data_TD_good)
      boxes        <- mean_results
      boxes        <- rename_columns(df = boxes, 
                                     names_before = c("min_conf", "max_conf", "lower"  , "upper"),
                                     names_after  = c("xleft"   , "xright"  , "ybottom", "ytop" ))
      xlim <- range(boxes[,c("xleft", "xright")])
      ylim <- range(boxes[,c("ybottom", "ytop")])
      
      #head(mean_results)
      
      xlab = "Confidence Score (Peptide Shaker)"
      ylab = "Mean PMD i-FDR"
      
      if (!include_text){
        xlab=""
        ylab=""
      }
      
      plot(mean_i_fdr~mean_conf, data=mean_results, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, main=main, xaxt="n", yaxt="n", cex=scale, lwd=scale)
      with(boxes, rect(xleft = xleft, ybottom = ybottom, xright = xright, ytop = ytop, lwd=scale))
      abline(b=-1, a=100, lwd=4*scale, col="dark gray")
      abline(h=0, v=100, lwd=1*scale)
      
    } else {
      stop(sprintf("Dataset '%s' does not include 1%% FDR data", data_processor()$info$collection_name()))
    }
  },
  get_mean_results = function(data_TD_good = NULL){
    mean_i_fdr <- aggregate(i_fdr~conf_group, data=data_TD_good, FUN=mean)
    mean_i_fdr <- rename_column(mean_i_fdr, "i_fdr", "mean_i_fdr")
    sd_i_fdr <- aggregate(i_fdr~conf_group, data=data_TD_good, FUN=sd)
    sd_i_fdr <- rename_column(sd_i_fdr, "i_fdr", "sd_i_fdr")
    n_i_fdr <- aggregate(i_fdr~conf_group, data=data_TD_good, FUN=length)
    n_i_fdr <- rename_column(n_i_fdr, "i_fdr", "n")
    mean_conf <- aggregate(PMD_FDR_input_score~conf_group, data=data_TD_good, FUN=mean)
    mean_conf <- rename_column(mean_conf, "PMD_FDR_input_score", "mean_conf")
    min_conf <- aggregate(PMD_FDR_input_score~conf_group, data=data_TD_good, FUN=min)
    min_conf <- rename_column(min_conf, "PMD_FDR_input_score", "min_conf")
    max_conf <- aggregate(PMD_FDR_input_score~conf_group, data=data_TD_good, FUN=max)
    max_conf <- rename_column(max_conf, "PMD_FDR_input_score", "max_conf")
    
    mean_results <-                     mean_i_fdr
    mean_results <- merge(mean_results, sd_i_fdr)
    mean_results <- merge(mean_results, n_i_fdr)
    mean_results <- merge(mean_results, mean_conf)
    mean_results <- merge(mean_results, min_conf)
    mean_results <- merge(mean_results, max_conf)
    
    mean_results$se    <- with(mean_results, sd_i_fdr / sqrt(n - 1))
    mean_results$lower <- with(mean_results, mean_i_fdr - 2*se)
    mean_results$upper <- with(mean_results, mean_i_fdr + 2*se)
    return(mean_results)
  },
  get_data_TD_good = function(i_fdr=NULL){
    data_TD_good <- subset(i_fdr, TD_good==TRUE)
    data_TD_good <- data_TD_good[order(data_TD_good$PMD_FDR_input_score),]
    n <- nrow(data_TD_good)
    data_TD_good$conf_group <- cut(1:n, breaks=floor(n/100))
    data_TD_good$i_fdr <- 100 * data_TD_good$i_fdr
    return(data_TD_good)
  },
  get_modified_fdr = function(){
    i_fdr <- data_processor()$i_fdr$df
    i_fdr$PMD_good  <- i_fdr$i_fdr < 0.01
    i_fdr$TD_good   <- i_fdr$is_in_1percent == TRUE
    i_fdr$conf_good <- i_fdr$PMD_FDR_input_score == 100
    return(i_fdr)
  },
  one_percent_calculation_exists = function(){
    data_processor()$raw_1_percent$ensure()
    return(data_processor()$raw_1_percent$exists())# "is_in_1percent" %in% colnames(data_processor()$i_fdr))
  },
  report_good_discrepancies = function(i_fdr=NULL){
    with(subset(i_fdr,                                        (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good)))
    with(subset(i_fdr, (PMD_FDR_input_score==100)                    & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good)))
    with(subset(i_fdr, (PMD_FDR_input_score>= 99) & (PMD_FDR_input_score<100) & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good)))
    with(subset(i_fdr, (PMD_FDR_input_score>= 99) & (PMD_FDR_input_score<100) & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good)))
    with(subset(i_fdr, (PMD_FDR_input_score>= 90) & (PMD_FDR_input_score< 99) & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good)))
  }
  
)

###############################################################################
#            Class: Plot_Density_PMD_by_Score
###############################################################################
Plot_Density_PMD_by_Score = setRefClass("Plot_Density_PMD_by_Score",
                                        contains = "Plot_Image",
                                        fields = list(show_norm = "logical"))
Plot_Density_PMD_by_Score$methods(
  initialize = function(p_show_norm=FALSE, ...){
    show_norm <<- p_show_norm
    plot_title <<- "PMD distribution, by Confidence ranges"
    callSuper(...)
    
  },
  data_processor = function(){
    return(data_processors[[1]])
  },
  get_n = function(){
    return(nrow(data_processor()$data_groups$df))
    #data_subset <- data_collection$i_fdr
    #return(nrow(data_subset))
  },
  get_modified_data_groups = function(var_value = NULL){
    # Note: Filters out used_to_find_middle
    # Note: Creates "value_of_interest" field
    # Note: Remakes "group_decoy_input_score" field
    data_new                   <- data_processor()$data_groups$df
    data_new                   <- subset(data_new, !used_to_find_middle )
    data_new$value_of_interest <- data_new[, var_value]
    
    cutoff_points <- c(100, 100, 95, 80, 50, 0, 0)
    n <- length(cutoff_points)
    uppers <- cutoff_points[-n]
    lowers <- cutoff_points[-1]
    
    for (i in 1:(n-1)){
      upper <- uppers[i]
      lower <- lowers[i]
      
      
      if (lower==upper){
        idx <- with(data_new, which(                        (PMD_FDR_input_score == upper) & (PMD_FDR_decoy == 0)))
        cat_name <- sprintf("%d", upper)
      } else {
        idx <- with(data_new, which((PMD_FDR_input_score >= lower) & (PMD_FDR_input_score <  upper) & (PMD_FDR_decoy == 0)))
        cat_name <- sprintf("%02d - %2d", lower, upper)
      }
      data_new$group_decoy_input_score[idx] <- cat_name
    }
    
    return(data_new)
  },
  plot_image = function(){
    
    # Support functions for plot_density_PMD_by_score()
    
    get_densities <- function(data_subset = NULL, var_value = NULL){
      
      # Support functions for get_densities()
      
      # New version
      
      # Main body of get_densities()
      
      data_subset <- get_modified_data_groups(var_value=var_value)
      #data_subset$value_of_interest <- data_subset[,var_value]
      from <- min(data_subset$value_of_interest, na.rm=TRUE)
      to   <- max(data_subset$value_of_interest, na.rm=TRUE)
      xlim = range(data_subset$value_of_interest, na.rm=TRUE)     
      
      groups   <- sort(unique(data_subset$group_decoy_input_score), decreasing = TRUE)
      n_groups <- length(groups)
      
      densities <- list(var_value = var_value, groups=groups)
      
      for (i in 1:n_groups){
        group <- groups[i]
        
        data_group_single  <- subset(data_subset, (group_decoy_input_score == group))
        d_group            <- with(data_group_single , density(value_of_interest, from = from, to = to, na.rm = TRUE))
        d_group            <- normalize_density(d_group)
        
        densities[[group]] <- d_group
      }
      
      return(densities)
      
    }
    get_xlim <- function(densities_a = NULL, densities_b = NULL){
      groups <- densities_a$groups
      
      xlim <- 0
      for (group in groups){
        xlim <- range(xlim, densities_a[[group]]$x, densities_b[[group]]$x)
      }
      
      return(xlim)
      
    }
    get_ylim <- function(densities_a = NULL, densities_b = NULL){
      groups <- densities_a$groups
      
      ylim <- 0
      for (group in groups){
        ylim <- range(ylim, densities_a[[group]]$y, densities_b[[group]]$y)
      }
      
      return(ylim)
      
    }
    plot_distributions <- function(densities = NULL, var_value= NULL,include_peak_dots=TRUE, xlab_modifier="", xlim=NULL, ylim=NULL, ...){
      data_groups <- get_modified_data_groups(var_value=var_value)
      groups      <- sort(unique(data_groups$group_decoy_input_score))
      n_groups    <- length(groups)
      
      groups_std   <- setdiff(groups, c("100", "decoy", "0") )
      groups_std   <- sort(groups_std, decreasing = TRUE)
      groups_std   <- c(groups_std, "0")
      n_std        <- length(groups_std)
      cols <- rainbow_with_fixed_intensity(n = n_std, goal_intensity_0_1 = 0.5, alpha=0.5)
      
      leg <- list(group = c("100"             , groups_std   , "decoy"                           ),
                  leg   = c("100"             , groups_std   , "All Decoys"                      ),
                  col   = c(col2hex("black")  , cols         , col2hex("purple", col_alpha = 0.5)), 
                  lwd   = c(4                 , rep(2, n_std), 4                                 ), 
                  title = "Confidence Score")
      
      xlab = sprintf("Precursor Mass Discrepancy%s (ppm)",
                     xlab_modifier)
      ylab = "Density"
      if (!include_text){
        xlab=""
        ylab=""
      }
      
      
      plot( x=xlim, y=ylim, col=leg$col[1], lwd=leg$lwd[1] * scale, main=main, xlab=xlab, ylab=ylab, xaxt="n", yaxt="n", cex=scale, type="n")#, lty=leg.lty[1], ...)
      
      include_peak_dots = FALSE # BUGBUG: Disabling this for now.  Need to move this to class parameter
      
      for (i in 1:length(leg$group)){
        group <- leg$group[i]
        d     <- densities[[group]]
        lines(d, col=leg$col[i], lwd=leg$lwd[i] * scale)
        if (include_peak_dots){
          x=d$x[which.max(d$y)]
          y=max(d$y)
          points(x=c(x,x), y=c(0,y), pch=19, col=leg$col[i], cex=scale)
        }
      }
      
      abline(v=0, lwd=scale)
      
      if (include_text){
        legend_object = Legend_Object$new(leg, scale)
        legend_object$show()
      }
      
    }
    
    # Main body for plot_density_PMD_by_score()
    
    data_groups <- data_processor()$data_groups$df
    
    data_subset_a <- subset(data_groups  , used_to_find_middle == FALSE)
    data_subset_b <- subset(data_subset_a, PMD_FDR_peptide_length > 11)
    
    densities_a <- get_densities(data_subset = data_subset_a, var_value = "value")        
    densities_b <- get_densities(data_subset = data_subset_b, var_value = "value_norm")
    
    xlim=get_xlim(densities_a, densities_b)
    ylim=get_ylim(densities_a, densities_b)
    
    dataset_name <- data_processor()$info$collection_name()
    if (show_norm){
      plot_distributions(densities=densities_b, var_value = "value_norm", xlab_modifier = " - normalized", xlim=xlim, ylim=ylim)
    } else {
      plot_distributions(densities=densities_a, var_value = "value"     , xlab_modifier = ""             , xlim=xlim, ylim=ylim)
    }
  }
)
###############################################################################
#            Class: Plot_Dataset_Description
###############################################################################
Plot_Dataset_Description = setRefClass("Plot_Dataset_Description",
                                       contains = "Plot_Multiple_Images",
                                       fields = list(ylim_time_invariance = "numeric"))
Plot_Dataset_Description$methods(
  initialize = function(p_data_processors = NULL, 
                        p_include_text=TRUE, 
                        p_include_main=FALSE,
                        p_ylim_time_invariance = c(-4,4), ...){
    plot_object_r1_c1 <- Plot_Time_Invariance_Alt$new(p_data_processors=p_data_processors, 
                                                      p_include_text=p_include_text, 
                                                      p_include_main=p_include_main,
                                                      p_training_class = "good_testing",
                                                      p_field_of_interest = "value",
                                                      p_ylim = p_ylim_time_invariance)
    
    plot_object_r1_c2 <- Plot_Time_Invariance_Alt$new(p_data_processors=p_data_processors, 
                                                      p_include_text=p_include_text, 
                                                      p_include_main=p_include_main,
                                                      p_training_class = "good_testing",
                                                      p_field_of_interest = "value_norm",
                                                      p_ylim = p_ylim_time_invariance)
    plot_object_r2_c1 <- Plot_Density_PMD_by_Score$new(p_data_processors=p_data_processors, 
                                                       p_show_norm=FALSE, 
                                                       p_include_text=p_include_text, 
                                                       p_include_main=p_include_main)
    
    plot_object_r2_c2 <- Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$new(p_data_processors=p_data_processors, 
                                                                          p_show_norm=FALSE,
                                                                          p_include_text=p_include_text, 
                                                                          p_include_main=p_include_main)
    
    plot_object_r3_c1 <- Plot_Density_PMD_by_Score$new(p_data_processors=p_data_processors, 
                                                       p_show_norm=TRUE, 
                                                       p_include_text=p_include_text, 
                                                       p_include_main=p_include_main)
    plot_object_r3_c2 <- Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$new(p_data_processors=p_data_processors, 
                                                                          p_show_norm=TRUE,
                                                                          p_include_text=p_include_text, 
                                                                          p_include_main=p_include_main)
    callSuper(p_n_images_wide=2, 
              p_n_images_tall=3, 
              p_include_text=p_include_text,
              p_include_main=p_include_main,
              p_image_list = list(plot_object_r1_c1, plot_object_r1_c2,
                                  plot_object_r2_c1, plot_object_r2_c2,
                                  plot_object_r3_c1, plot_object_r3_c2), ...)
    
  }
)
###############################################################################
#            Class: Plots_for_Paper
###############################################################################
Plots_for_Paper <- setRefClass("Plots_for_Paper", fields =list(data_processor_a = "Data_Processor",
                                                               data_processor_b = "Data_Processor",
                                                               data_processor_c = "Data_Processor",
                                                               data_processor_d = "Data_Processor",
                                                               include_text      = "logical",
                                                               include_main      = "logical", 
                                                               mai               = "numeric"))
Plots_for_Paper$methods(
  initialize = function(){
    data_processor_a <<- Data_Processor$new(p_info = Data_Object_Info_737_two_step$new())
    data_processor_b <<- Data_Processor$new(p_info = Data_Object_Info_737_combined$new())
    data_processor_c <<- Data_Processor$new(p_info = Data_Object_Pyrococcus_tr    $new())
    data_processor_d <<- Data_Processor$new(p_info = Data_Object_Mouse_Mutations  $new())
  },
  create_plots_for_paper = function(include_main=TRUE, finalize=TRUE){
    print_table_4_data()
    print_figure_2_data()
    plot_figure_D(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_C(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_B(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_A(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_8(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_7(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_6(p_scale=ifelse(finalize, 4, 1), p_include_main = include_main)
    plot_figure_5(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_4(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main)
    plot_figure_3(p_scale=ifelse(finalize, 4, 1), p_include_main = include_main)
  },
  print_figure_2_data = function(){
    print(create_stats_for_grouping_figure(list(data_processor_a)))
  },
  print_table_4_data = function(){
    report_ranges_of_comparisons(processors = list(data_processor_a))
    report_ranges_of_comparisons(processors = list(data_processor_c))
  },
  plot_figure_3 = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Compare_PMD_and_Norm_Density$new(p_data_processor  = list(data_processor_a),
                                                         p_show_norm       = FALSE,
                                                         p_include_text    = TRUE,
                                                         p_include_main    = p_include_main,
                                                         p_display_n_psms  = FALSE)
    plot_object$plot_image_in_small_window(p_scale=p_scale)
  },
  plot_figure_4 = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Time_Invariance_Alt_Before_and_After$new(p_data_processors = list(data_processor_a), 
                                                                 p_include_text=TRUE, 
                                                                 p_include_main=p_include_main,
                                                                 p_ylim = c(-4,4))
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
    
  },
  plot_figure_5 = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$new(p_data_processors = list(data_processor_a), 
                                                                    p_include_text=TRUE, 
                                                                    p_include_main=p_include_main)
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
  },
  plot_figure_6 = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Bad_CI$new(p_data_processors = list(data_processor_a), 
                                   p_include_text=TRUE, 
                                   p_include_main=p_include_main)
    plot_object$plot_image_in_small_window(p_scale=p_scale)
  },
  plot_figure_7 = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR$new(p_data_processors = list(data_processor_a), 
                                                                     p_include_text=TRUE, 
                                                                     p_include_main=p_include_main)
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
  },
  plot_figure_8 = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Selective_Loss$new(p_data_processors = list(data_processor_c), 
                                           p_include_text=TRUE, 
                                           p_include_main=p_include_main)
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
  },
  plot_figure_A = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_a), 
                                                p_include_text=TRUE,
                                                p_include_main=p_include_main,
                                                p_ylim_time_invariance=c(-4,4) )
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
  },
  plot_figure_B = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_b), 
                                                p_include_text=TRUE,
                                                p_include_main=p_include_main,
                                                p_ylim_time_invariance=c(-4,4) )
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
  },
  plot_figure_C = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_c), 
                                                p_include_text=TRUE,
                                                p_include_main=p_include_main,
                                                p_ylim_time_invariance=c(-4,4) )
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
  },
  plot_figure_D = function(p_scale=NULL, p_include_main=NULL){
    plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_d), 
                                                p_include_text=TRUE,
                                                p_include_main=p_include_main,
                                                p_ylim_time_invariance=c(-4,4) )
    plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale)
  },
  create_stats_for_grouping_figure = function(processors=NULL){
    processor <- processors[[1]]
    processor$i_fdr$ensure()
    aug_i_fdr                      <- processor$i_fdr$df
    aug_i_fdr$group_good_bad_other <- gsub("_.*", "", aug_i_fdr$group_training_class) 
    aug_i_fdr$group_null           <- "all"
    table(aug_i_fdr$group_training_class)
    table(aug_i_fdr$group_good_bad_other)
    table(aug_i_fdr$group_null)
    
    create_agg_fdr_stats <- function(i_fdr=NULL, grouping_var_name = NULL){
      formula_fdr <- as.formula(sprintf("%s~%s", "i_fdr", grouping_var_name))
      formula_len <- as.formula(sprintf("%s~%s", "PMD_FDR_peptide_length", grouping_var_name))
      agg_fdr <- aggregate(formula=formula_fdr, data=i_fdr, FUN=mean)
      agg_n   <- aggregate(formula=formula_fdr, data=i_fdr, FUN=length)
      agg_len <- aggregate(formula=formula_len, data=i_fdr, FUN=mean)
      agg_fdr <- rename_columns(df = agg_fdr, 
                                names_before = c(grouping_var_name, "i_fdr"), 
                                names_after  = c("group"          , "fdr"))
      agg_n   <- rename_columns(df = agg_n, 
                                names_before = c(grouping_var_name, "i_fdr"), 
                                names_after  = c("group"          , "n"))
      agg_len <- rename_columns(df = agg_len, 
                                names_before = c(grouping_var_name), 
                                names_after  = c("group"          ))
      agg <- merge(agg_fdr, agg_n)
      agg <- merge(agg    , agg_len)
      
      return(agg)
    }
    
    agg_detail  <- create_agg_fdr_stats(i_fdr = aug_i_fdr, grouping_var_name = "group_training_class")
    agg_grouped <- create_agg_fdr_stats(i_fdr = aug_i_fdr, grouping_var_name = "group_good_bad_other")
    agg_all     <- create_agg_fdr_stats(i_fdr = aug_i_fdr, grouping_var_name = "group_null")
    
    agg <- rbind(agg_detail, agg_grouped)
    agg <- rbind(agg, agg_all)
    
    agg$fdr <- ifelse(agg$fdr < 1, agg$fdr, 1)
    
    linear_combo <- function(x=NULL, a0=NULL, a1=NULL){
      result <- (a0 * (1-x) + a1 * x)
      return(result)
    }
    
    agg$r <- linear_combo(agg$fdr, a0=197, a1= 47)
    agg$g <- linear_combo(agg$fdr, a0= 90, a1= 85)
    agg$b <- linear_combo(agg$fdr, a0= 17, a1=151)
    
    return(agg)
  },
  report_ranges_of_comparisons = function(processors=NULL){
    report_comparison_of_Confidence_and_PMD = function (i_fdr = NULL, min_conf=NULL, max_conf=NULL, include_max=FALSE){
      report_PMD_confidence_comparison_from_subset = function(data_subset=NULL, group_name=NULL){
        print(group_name)
        print(sprintf("    Number of PSMs: %d", nrow(data_subset)))
        mean_confidence <- mean(data_subset$PMD_FDR_input_score)
        print(sprintf("    Mean Confidence Score: %3.1f", mean_confidence))
        print(sprintf("    PeptideShaker g-FDR: %3.1f", 100-mean_confidence))
        mean_PMD_FDR = mean(data_subset$i_fdr)
        print(sprintf("    PMD g-FDR: %3.1f", 100*mean_PMD_FDR))
        #col <- col2hex("black", 0.2)
        #plot(data_subset$i_fdr, pch=".", cex=2, col=col)
        #abline(h=0)
      }
      
      if (is.null(max_conf)) {
        data_subset <- subset(i_fdr, PMD_FDR_input_score == min_conf)
        group_name <- sprintf("Group %d", min_conf)
      } else if (include_max){
        data_subset <- subset(i_fdr, (PMD_FDR_input_score >= min_conf) & (PMD_FDR_input_score <= max_conf))
        group_name <- sprintf("Group %d through %d", min_conf, max_conf)
      } else {
        data_subset <- subset(i_fdr, (PMD_FDR_input_score >= min_conf) & (PMD_FDR_input_score < max_conf))
        group_name <- sprintf("Group %d to %d", min_conf, max_conf)
      }
      
      report_PMD_confidence_comparison_from_subset(data_subset=data_subset, group_name=group_name)
    }
    
    processor <- processors[[1]]
    processor$i_fdr$ensure()
    i_fdr <- processor$i_fdr$df
    info  <- processor$info
    print(sprintf("PMD and Confidence comparison for -- %s",  info$collection_name()))
    report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf=100, max_conf=NULL, include_max=TRUE)
    report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf= 99, max_conf=100 , include_max=FALSE)
    report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf= 90, max_conf= 99 , include_max=FALSE)
    report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf=  0, max_conf=100 , include_max=TRUE)
  }
)
###############################################################################
# C - 021 - PMD-FDR Wrapper - functions.R                                     #
#                                                                             #
# Creates the necessary structure to convert the PMD-FDR code into one that   #
# can run as a batch file                                                     #
#                                                                             #
###############################################################################
###############################################################################
#            Class: ModuleArgParser_PMD_FDR
###############################################################################
ModuleArgParser_PMD_FDR <- setRefClass("ModuleArgParser_PMD_FDR", 
                                       contains = c("ArgParser"),
                                       fields =list(args = "character") )
ModuleArgParser_PMD_FDR$methods(
  initialize = function(description = "Computes individual and global FDR using Precursor Mass Discrepancy (PMD-FDR)", ...){
    callSuper(description=description, ...)
    local_add_argument("--psm_report"          ,                                 help="full name and path to the PSM report")
    local_add_argument("--psm_report_1_percent", default = ""                  , help="full name and path to the PSM report for 1% FDR")
    local_add_argument("--output_i_fdr"        , default = ""                  , help="full name and path to the i-FDR output file ")
    local_add_argument("--output_g_fdr"        , default = ""                  , help="full name and path to the g-FDR output file ")
    local_add_argument("--output_densities"    , default = ""                  , help="full name and path to the densities output file ")
    #local_add_argument("--score_field_name"    , default = ""                  , help="name of score field (in R format)")
    local_add_argument("--input_file_type"     , default = "PMD_FDR_input_file", help="type of input file (currently supports: PSM_Report)")
  }
)
###############################################################################
#            Class: Data_Object_Parser
###############################################################################
Data_Object_Parser <- setRefClass("Data_Object_Parser", 
                                  contains = c("Data_Object"),
                                  fields =list(parser = "ModuleArgParser_PMD_FDR",
                                               args = "character",
                                               parsing_results = "list") )
Data_Object_Parser$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Parser"
  },
  verify = function(){
    # Nothing to do here - parser handles verification during load
  },
  m_load_data = function(){
    if (length(args) == 0){
      parsing_results <<- parser$parse_arguments(NULL)
    } else {
      parsing_results <<- parser$parse_arguments(args)
    }
    
  },
  set_args = function(p_args=NULL){ 
    # This is primarily used for testing.  In operation arguments will be passed automatically (through use of commandArgs)
    args <<- p_args
    set_dirty(TRUE)
  }
)
###############################################################################
#            Class: Data_Object_Info_Parser
###############################################################################
Data_Object_Info_Parser <- setRefClass("Data_Object_Info_Parser", 
                                       contains = c("Data_Object_Info"),
                                       fields =list(
                                         output_i_fdr = "character",
                                         output_g_fdr = "character",
                                         output_densities = "character"
                                       ) )
Data_Object_Info_Parser$methods(
  initialize = function(){
    callSuper()
    class_name <<- "Data_Object_Info_Parser"
  },
  verify = function(){
    check_field_exists = function(field_name=NULL, check_empty = TRUE){
      field_value <- get_parser()$parsing_results[field_name]
      checkTrue(! is.null(field_value),
                msg = sprintf("Parameter %s was not passed to PMD_FDR", field_value))
      if (check_empty){
        checkTrue(! is.null(field_value),
                  msg = sprintf("Parameter %s was not passed to PMD_FDR", field_value))
      }
    }
    # Check parameters passed in
    check_field_exists("junk")
    check_field_exists("psm_report")
    check_field_exists("psm_report_1_percent", check_empty = FALSE)
    check_field_exists("output_i_fdr"        , check_empty = FALSE)
    check_field_exists("output_g_fdr"        , check_empty = FALSE)
    check_field_exists("output_densities"    , check_empty = FALSE)
    #check_field_exists("score_field_name")
    check_field_exists("input_file_type")
  },
  m_load_data = function(){
    parsing_results <- get_parser()$parsing_results
    
    data_file_name               <<- as.character(parsing_results["psm_report"])
    data_file_name_1_percent_FDR <<- as.character(parsing_results["psm_report_1_percent"])
    data_path_name               <<- as.character(parsing_results[""])
    #experiment_name              <<- data_file_name
    #designation                  <<- ""
    output_i_fdr                 <<- as.character(parsing_results["output_i_fdr"])
    output_g_fdr                 <<- as.character(parsing_results["output_g_fdr"])
    output_densities             <<- as.character(parsing_results["output_densities"])
    
    input_file_type              <<- as.character(parsing_results["input_file_type"])
    #score_field_name             <<- as.character(parsing_results["score_field_name"])
  },
  set_parser = function(parser){
    parents[["parser"]] <<- parser
  },
  get_parser = function(){
    return(verified_element_of_list(parents, "parser", "Data_Object_Info_Parser$parents"))
  },
  file_path = function(){
    result <- data_file_name # Now assumes that full path is provided
    if (length(result) == 0){
      stop("Unable to validate file path - file name is missing")
    }
    return(result)
  },
  file_path_1_percent_FDR = function(){
    local_file_name <- get_data_file_name_1_percent_FDR()
    if (length(local_file_name) == 0){
      result <- ""
    } else {
      result <- local_file_name # path name is no longer relevant
    }
    
    # Continue even if file name is missing - not all analyses have a 1 percent FDR file; this is managed downstream
    
    # if (length(result) == 0){
    #   stop("Unable to validate file path - one or both of path name and file name (of 1 percent FDR file) are missing")
    # }
    return(result)
  },
  get_data_file_name_1_percent_FDR = function(){
    return(data_file_name_1_percent_FDR)
  },
  collection_name = function(){
    result <- ""
    return(result)
  }
  
)
###############################################################################
#            Class: Processor_PMD_FDR_for_Galaxy
# Purpose: Wrapper on tools from Project 019 to enable a Galaxy-based interface
###############################################################################
Processor_PMD_FDR_for_Galaxy <- setRefClass("Processor_PMD_FDR_for_Galaxy", 
                                            fields = list(
                                              parser         = "Data_Object_Parser",
                                              info           = "Data_Object_Info_Parser",
                                              raw_data       = "Data_Object_Raw_Data",
                                              raw_1_percent  = "Data_Object_Raw_1_Percent",
                                              data_converter = "Data_Object_Data_Converter",
                                              data_groups    = "Data_Object_Groupings",
                                              densities      = "Data_Object_Densities",
                                              alpha          = "Data_Object_Alpha",
                                              i_fdr          = "Data_Object_Individual_FDR"
                                            ))
Processor_PMD_FDR_for_Galaxy$methods(
  initialize = function(){
    # This initialization defines all of the dependencies between the various components
    # (Unfortunately, inheriting from Data_Processor leads to issues - I had to reimplement it here with a change to "info")
    
    # info
    info$set_parser(parser)
    parser$append_child(info)
    
    # raw_data
    raw_data$set_info(info)
    info$append_child(raw_data)
    
    # raw_1_percent
    raw_1_percent$set_info(info)
    info$append_child(raw_1_percent)
    
    # data_converter
    data_converter$set_info    (info)
    data_converter$set_raw_data(raw_data)
    info         $append_child (data_converter)
    raw_data     $append_child (data_converter)
    
    # data_groups
    data_groups$set_info          (info)
    data_groups$set_data_converter(data_converter)
    data_groups$set_raw_1_percent (raw_1_percent)
    info          $append_child   (data_groups)
    data_converter$append_child   (data_groups)
    raw_1_percent $append_child   (data_groups)
    
    # densities
    densities  $set_data_groups(data_groups)
    data_groups$append_child   (densities)
    
    # alpha
    alpha    $set_densities(densities)
    densities$append_child (alpha)
    
    # i_fdr
    i_fdr$set_data_groups(data_groups)
    i_fdr$set_densities  (densities)
    i_fdr$set_alpha      (alpha)
    data_groups  $append_child(i_fdr)
    densities    $append_child(i_fdr)
    alpha        $append_child(i_fdr)
    
  },
  compute = function(){
    #i_fdr is currently the lowest level object - it ultimately depends on everything else.
    i_fdr$ensure() # All pieces on which i_fdr depends are automatically verified and computed (through their verify() and ensure())
    
    save_standard_df(x = densities$df, file_path = info$output_densities)
    save_standard_df(x =     alpha$df, file_path = info$output_g_fdr)
    save_standard_df(x =     i_fdr$df, file_path = info$output_i_fdr)
  }
)
###############################################################################
# D - 021 - PMD-FDR Main.R                                                    #
#                                                                             #
# File Description: Contains the base code that interprets the parameters     #
#                   and computes i-FDR and g-FDR for a mass spec project      #
#                                                                             #
###############################################################################
argv <- commandArgs(TRUE) # Saves the parameters (command code)

processor <- Processor_PMD_FDR_for_Galaxy$new()
processor$parser$set_args(argv)
processor$compute()