Mercurial > repos > galaxyp > pmd_fdr
diff PMD_FDR_package_for_Galaxy.R @ 0:5cc0c32d05a2 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pmd_fdr commit 00f85eca73cd8afedfefbeec94a4462455ac1a9a"
author | galaxyp |
---|---|
date | Mon, 07 Oct 2019 11:59:37 -0400 |
parents | |
children | 460edeedeb7d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PMD_FDR_package_for_Galaxy.R Mon Oct 07 11:59:37 2019 -0400 @@ -0,0 +1,3153 @@ +############################################################################### +# PMD_FDR_package_for_Galaxy.R # +# # +# Project 021 - PMD-FDR for Galaxy-P # +# # +# Description: Computes iFDR and gFDR on PSMs as a script designed for Galaxy # +# Note that plotting code has been left in that is not used # +# in this file; this is the code I used to create figures for # +# publication. I left it in for potential development of views. # +# # +# This file was created by concatenating the following files: # +# # +# A - 005 - Parser - ArgParser.R # +# B - 019 - PMD-FDR - functions.R # +# C - 021 - PMD-FDR Wrapper - functions.R # +# D - 021 - PMD-FDR Main.R # +# # +# Required packages: argparser # +# stringr # +# RUnit # +# # +# Release date: 2019-10-05 # +# Version: 1.4 # +# # +############################################################################### +# Package currently supports the following parameters: +# +# --psm_report full name and path to the PSM report +# --psm_report_1_percent full name and path to the PSM report for 1% FDR +# --output_i_fdr full name and path to the i-FDR output file +# --output_g_fdr full name and path to the g-FDR output file +# --output_densities full name and path to the densities output file +# +############################################################################### +# A - 005 - Parser - ArgParser.R # +# # +# Description: Wrapper for argparser package, using RefClass # +# # +############################################################################### + +#install.packages("argparser") +library(argparser) + +# Class definition + +ArgParser <- setRefClass("ArgParser", + fields = c("parser")) +ArgParser$methods( + initialize = function(...){ + parser <<- arg_parser(...) + }, + local_add_argument = function(...){ + parser <<- add_argument(parser, ...) + }, + parse_arguments = function(...){ + result = parse_args(parser, ...) + return(result) + } +) + +############################################################################### +# B - 019 - PMD-FDR - functions.R # +# # +# Primary work-horse for PMD-FDR # +# # +############################################################################### +############################################################################### +####### Load libraries etc. +############################################################################### +library(stringr) +library(RUnit) + +############################################################# +####### Global values (should be parameters to module but aren't yet) +############################################################# + +MIN_GOOD_PEPTIDE_LENGTH <- 11 +MIN_ACCEPTABLE_POINTS_IN_DENSITY <- 10 + +############################################################# +####### General purpose functions +############################################################# +# Creates a more useful error report when file is not reasonable +safe_file_exists <- function(file_path){ # Still not particularly useful in cases where it is a valid directory + tryCatch( + return(file_test(op = "-f", x=file_path)), + error=function(e) {simpleError(sprintf("file path is not valid: '%s'", file_path))} + ) +} +# My standard way of loading data into data.frames +load_standard_df <- function(file_path=NULL){ + clean_field_names = function(field_names){ + result <- field_names + idx_blank <- which(result == "") + result[idx_blank] <- sprintf("<Field %d>", idx_blank) + return(result) + } + if (safe_file_exists(file_path)){ + field_names <- read_field_names(file_path, sep = "\t") + field_names <- clean_field_names(field_names) + + if (length(field_names) == 0){ + return(data.frame()) + } + data <- read.table(file = file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE, blank.lines.skip = TRUE)#, check.names = FALSE) + colnames(data) = field_names + } else { + stop(sprintf("File path does not exist: '%s'", file_path)) + } + return(data) +} +save_standard_df <- function(x=NULL, file_path=NULL){ + if (file_path != ""){ + write.table(x = x, file = file_path, quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) + } +} +rename_column <- function(df=NULL, name_before=NULL, name_after=NULL, suppressWarnings=FALSE){ + if (is.null(df)){ + stop("Dataframe (df) does not exist - unable to rename column") + } + if (name_before %in% colnames(df)){ + df[,name_after] <- df[,name_before] + df[,name_before] <- NULL + } else if (!suppressWarnings){ + warning(sprintf("'%s' is not a field in the data frame and so has not been renamed", name_before)) + } + return(df) +} +rename_columns <- function(df=NULL, names_before=NULL, names_after=NULL){ + for (i in safe_iterator(length(names_before))){ + df <- rename_column(df, names_before[i], names_after[i]) + } + return(df) +} +round_to_tolerance <- function(x=NULL, tolerance=NULL, ...){ + return(function_to_tolerance(x=x, tolerance=tolerance, FUN=round, ...)) +} +function_to_tolerance <- function(x=NULL, tolerance=NULL, FUN=NULL, ...){ + return(FUN(x/tolerance, ...) * tolerance) +} +safe_median <- function(x) median(x, na.rm=TRUE) +normalize_density <- function(d){ + # Normalizes y-values in density function + # so that the integral under the curve is 1 + # (uses rectangles to approximate area) + delta_x <- diff(range(d$x)) / length(d$x) + unnormalized_integral <- delta_x * sum(d$y) + new_d <- d + new_d$y <- with(new_d, y ) + + return(new_d) +} +if_null <- function(cond=NULL, null_result=NULL, not_null_result=NULL){ + return(switch(1+is.null(cond), + not_null_result, + null_result)) +} +rainbow_with_fixed_intensity <- function(n=NULL, goal_intensity_0_1=NULL, alpha=NULL){ + goal_intensity <- 255*goal_intensity_0_1 + hex_colors <- rainbow(n) + rgb_colors <- col2rgb(hex_colors) + df_colors <- data.frame(t(rgb_colors)) + df_colors$intensity <- with(df_colors, 0.2989*red + 0.5870*green + 0.1140*blue) + + df_colors$white_black <- with(df_colors, ifelse(intensity < goal_intensity, 255, 0)) + df_colors$mix_level <- with(df_colors, (white_black - goal_intensity) / (white_black - intensity ) ) + df_colors$new_red <- with(df_colors, mix_level*red + (1-mix_level)*white_black) + df_colors$new_green <- with(df_colors, mix_level*green + (1-mix_level)*white_black) + df_colors$new_blue <- with(df_colors, mix_level*blue + (1-mix_level)*white_black) + names_pref_new <- c("new_red", "new_green", "new_blue") + names_no_pref <- c("red", "green", "blue") + df_colors <- df_colors[,names_pref_new] + df_colors <- rename_columns(df_colors, names_before = names_pref_new, names_after = names_no_pref) + rgb_colors <-as.matrix(df_colors/255 ) + + return(rgb(rgb_colors, alpha=alpha)) +} +safe_iterator <- function(n_steps = NULL){ + if (n_steps < 1){ + result = numeric(0) + } else { + result = 1:n_steps + } + return(result) +} +col2hex <- function(cols=NULL, col_alpha=255){ + if (all(col_alpha<=1)){ + col_alpha <- round(col_alpha*255) + } + col_matrix <- t(col2rgb(cols)) + results <- rgb(col_matrix, alpha=col_alpha, maxColorValue = 255) + return(results) +} +credible_interval <- function(x=NULL, N=NULL, precision=0.001, alpha=0.05){ + # Approximates "highest posterior density interval" + # Uses exact binomial but with a finite list of potential values (1/precision) + + p <- seq(from=0, to=1, by=precision) + d <- dbinom(x = x, size = N, prob = p) + d <- d / sum(d) + df <- data.frame(p=p, d=d) + df <- df[order(-df$d),] + df$cumsum <- cumsum(df$d) + max_idx <- sum(df$cumsum < (1-alpha)) + 1 + max_idx <- min(max_idx, nrow(df)) + + lower <- min(df$p[1:max_idx]) + upper <- max(df$p[1:max_idx]) + + return(c(lower,upper)) +} +verified_element_of_list <- function(parent_list=NULL, element_name=NULL, object_name=NULL){ + if (is.null(parent_list[[element_name]])){ + if (is.null(object_name)){ + object_name = "the list" + } + stop(sprintf("Element '%s' does not yet exist in %s", element_name, object_name)) + } + return(parent_list[[element_name]]) +} +read_field_names = function(file_path=NULL, sep = "\t"){ + con = file(file_path,"r") + fields = readLines(con, n=1) + close(con) + + if (length(fields) == 0){ + return(c()) + } + fields = strsplit(x = fields, split = sep)[[1]] + return(fields) +} +check_field_name = function(input_df = NULL, name_of_input_df=NULL, field_name=NULL){ + test_succeeded <- field_name %in% colnames(input_df) + current_columns <- paste0(colnames(input_df), collapse=", ") + checkTrue(test_succeeded, + msg = sprintf("Expected fieldname '%s' in %s (but did not find it among %s)", + field_name, name_of_input_df, current_columns)) +} + +############################################################# +####### Classes for Data +############################################################# + +############################################################################### +# Class: Data_Object +############################################################################### +Data_Object <- setRefClass("Data_Object", + fields =list(m_is_dirty = "logical", + parents = "list", + children = "list", + class_name = "character")) +Data_Object$methods( + initialize = function(){ + m_is_dirty <<- TRUE + class_name <<- "Data_Object <abstract class - class_name needs to be set in subclass>" + }, + load_data = function(){ + #print(sprintf("Calling %s$load_data()", class_name)) # Useful for debugging + ensure_parents() + verify() + m_load_data() + set_dirty(new_value = FALSE) + }, + ensure = function(){ + if (m_is_dirty){ + load_data() + } + }, + set_dirty = function(new_value){ + if (new_value != m_is_dirty){ + m_is_dirty <<- new_value + set_children_dirty() + } + }, + verify = function(){ + stop(sprintf("verify() is an abstract method - define it in %s before calling load_data()", class_name)) + }, + m_load_data = function(){ + stop(sprintf("m_load_data() is an abstract method - define it in %s before calling load_data()", class_name)) + }, + append_parent = function(parent=NULL){ + parents <<- append(parents, parent) + }, + append_child = function(child=NULL){ + children <<- append(children, child) + }, + ensure_parents = function(){ + for (parent in parents){ + # print(sprintf("Calling %s$ensure()", parent$class_name)) # Useful for debugging + parent$ensure() + } + }, + set_children_dirty = function(){ + for (child in children){ + child$set_dirty(TRUE) + } + } +) +############################################################################### +# Class: Data_Object_Info +############################################################################### +Data_Object_Info <- setRefClass("Data_Object_Info", + contains = "Data_Object", + fields =list( + data_file_name_1_percent_FDR = "character", + data_file_name = "character", + data_path_name = "character", + experiment_name = "character", + designation = "character", + + input_file_type = "character" + + #score_field_name = "character" + #collection_name="character", + #dir_results="character", + #dir_dataset="character", + #dataset_designation="character", + #file_name_dataset="character", + #file_name_dataset_1_percent="character", + #experiment_name="character" + ) ) +Data_Object_Info$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Info - <Abstract class - class_name needs to be set in subclass>" + }, + verify = function(){ + checkFieldExists = function(field_name=NULL){ + field_value <- .self[[field_name]] + checkTrue(length(field_value) > 0, + sprintf("Field %s$%s has not been set (and should have been)", class_name, field_name)) + checkTrue(length(field_value) == 1, + sprintf("Field %s$%s has been set to multiple values (and should be a single value)", class_name, field_name)) + checkTrue(field_value != "", + sprintf("Field %s$%s has been set to an empty string (and should not have been)", class_name, field_name)) + } + checkFieldExists("data_file_name") + checkFieldExists("data_path_name") + checkFieldExists("experiment_name") + checkFieldExists("designation") + checkFieldExists("input_file_type") + #checkFieldExists("score_field_name") + }, + m_load_data = function(){ + # Nothing to do - this is really a data class + }, + file_path = function(){ + result <- file.path(data_path_name, data_file_name) + if (length(result) == 0){ + stop("Unable to validate file path - one or both of path name and file name are missing") + } + return(result) + }, + file_path_1_percent_FDR = function(){ + local_file_name <- get_data_file_name_1_percent_FDR() + if (length(local_file_name) == 0){ + result <- "" + } else { + result <- file.path(data_path_name, local_file_name) + } + + # Continue even if file name is missing - not all analyses have a 1 percent FDR file; this is managed downstream + + # if (length(result) == 0){ + # stop("Unable to validate file path - one or both of path name and file name (of 1 percent FDR file) are missing") + # } + return(result) + }, + get_data_file_name_1_percent_FDR = function(){ + return(data_file_name_1_percent_FDR) + }, + collection_name = function(){ + result <- sprintf("%s_%s", experiment_name, designation) + return(result) + } +) +############################################################################### +# Class: Data_Object_Info_737_two_step +############################################################################### +Data_Object_Info_737_two_step <- setRefClass("Data_Object_Info_737_two_step", + contains = "Data_Object_Info", + fields =list()) +Data_Object_Info_737_two_step$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Info_737_two_step" + #score_field_name <<- "Confidence [%]" + data_file_name_1_percent_FDR <<- "737_NS_Peptide_Shaker_PSM_Report_Multi_Stage_Two_Step.tabular" + data_file_name <<- "737_NS_Peptide_Shaker_Extended_PSM_Report_Multi_Stage_Two_Step.tabular.tabular" + data_path_name <<- file.path(".", "Data") + experiment_name <<- "Oral_737_NS" + designation <<- "two_step" + + input_file_type <<- "PSM_Report" + + #data_collection_oral_737_NS_combined$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_CombinedDB.tabular" + #data_collection_oral_737_NS_two_step$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_Multi_Stage_Two_Step.tabular" + + } +) + +############################################################################### +# Class: Data_Object_Info_737_combined +############################################################################### +Data_Object_Info_737_combined <- setRefClass("Data_Object_Info_737_combined", + contains = "Data_Object_Info", + fields =list()) +Data_Object_Info_737_combined$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Info_737_combined" + #score_field_name <<- "Confidence [%]" + data_file_name_1_percent_FDR <<- "737_NS_Peptide_Shaker_PSM_Report_CombinedDB.tabular" + data_file_name <<- "737_NS_Peptide_Shaker_Extended_PSM_Report_CombinedDB.tabular" + data_path_name <<- file.path(".", "Data") + experiment_name <<- "Oral_737_NS" + designation <<- "two_step" + + input_file_type <<- "PSM_Report" + + #data_collection_oral_737_NS_combined$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_CombinedDB.tabular" + #data_collection_oral_737_NS_two_step$file_name_dataset_1_percent = "737_NS_Peptide_Shaker_PSM_Report_Multi_Stage_Two_Step.tabular" + + } +) + +############################################################################### +# Class: Data_Object_Pyrococcus_tr +############################################################################### +Data_Object_Pyrococcus_tr <- setRefClass("Data_Object_Pyrococcus_tr", + contains = "Data_Object_Info", + fields =list()) +Data_Object_Pyrococcus_tr$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Pyrococcus_tr" + #score_field_name <<- "Confidence [%]" + data_file_name_1_percent_FDR <<- "" + data_file_name <<- "Pfu_traditional_Extended_PSM_Report.tabular" + data_path_name <<- file.path(".", "Data") + experiment_name <<- "Pyrococcus" + designation <<- "tr" + + input_file_type <<- "PSM_Report" + + } +) +############################################################################### +# Class: Data_Object_Mouse_Mutations +############################################################################### +Data_Object_Mouse_Mutations <- setRefClass("Data_Object_Mouse_Mutations", + contains = "Data_Object_Info", + fields =list()) +Data_Object_Mouse_Mutations$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Mouse_Mutations" + #score_field_name <<- "Confidence [%]" + data_file_name_1_percent_FDR <<- "" + data_file_name <<- "Combined_DB_Mouse_5PTM.tabular" + data_path_name <<- file.path(".", "Data") + experiment_name <<- "Mouse Mutations" + designation <<- "combined_05" + + input_file_type <<- "PSM_Report" + + } +) +############################################################################### +# Class: Data_Object_Raw_Data +############################################################################### +Data_Object_Raw_Data <- setRefClass("Data_Object_Raw_Data", + contains = "Data_Object", + fields =list(df = "data.frame")) +Data_Object_Raw_Data$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Raw_Data" + }, + verify = function(){ + # Check that file exists before using it + file_path <- get_info()$file_path() + if (! safe_file_exists(file_path)){ + stop(sprintf("Raw data file does not exist (%s)", file_path)) + } + # BUGBUG: Needs to also check the following: + # - file is tab-delimited + # - first row is a list of column names + }, + set_info = function(info){ + parents[["info"]] <<- info + }, + get_info = function(){ + return(verified_element_of_list(parents, "info", "Data_Object_Raw_Data$parents")) + }, + m_load_data = function(){ + info <- get_info() + df <<- load_standard_df(info$file_path()) + } +) +############################################################################### +# Class: Data_Object_Raw_1_Percent +############################################################################### +Data_Object_Raw_1_Percent <- setRefClass("Data_Object_Raw_1_Percent", + contains = "Data_Object", + fields =list(df = "data.frame")) +Data_Object_Raw_1_Percent$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Raw_1_Percent" + }, + set_info = function(info){ + parents[["info"]] <<- info + }, + verify = function(){ + # Do nothing - a missing file name is acceptable for this module and is dealt with in load() + }, + get_info = function(){ + return(verified_element_of_list(parents, "info", "Data_Object_Raw_1_Percent$parents")) + }, + m_load_data = function(){ + + info <- get_info() + file_path <- info$file_path_1_percent_FDR() + if (exists()){ + df <<- load_standard_df(info$file_path_1_percent_FDR()) + } # Note that failing to load is a valid state for this file, leading to not is_dirty. BUGBUG: this could lead to problems if a good file appears later + }, + exists = function(){ + + info <- get_info() + local_file_name <- info$get_data_file_name_1_percent_FDR() # Check file name not file path + + if (length(local_file_name) == 0 ){ # variable not set + result = FALSE + } else if (local_file_name == ""){ # variable set to empty string + result = FALSE + } else { + result = safe_file_exists(info$file_path_1_percent_FDR()) + } + + return(result) + } +) +############################################################################### +# Class: Data_Converter +############################################################################### +Data_Converter <- setRefClass("Data_Converter", + fields =list(class_name = "character", + file_type = "character" + ) ) +Data_Converter$methods( + initialize = function(){ + class_name <<- "Data_Converter <abstract class - class_name needs to be set in subclass>" + file_type <<- "file_type has not been set before being used <needs to be set in initialize() of subclass>" + }, + check_raw_fields = function(info=NULL, raw_data=NULL){ + stop(sprintf("check_raw_fields() is an abstract method - define it in %s before calling Data_Object_Data_Converter$load_data()", class_name)) + }, + convert_data = function(){ + stop(sprintf("convert_data() is an abstract method - define it in %s before calling Data_Object_Data_Converter$load_data()", class_name)) + } +) +############################################################################### +# Class: Data_Converter_PMD_FDR_input_file +############################################################################### +Data_Converter_PMD_FDR_input_file <- setRefClass("Data_Converter_PMD_FDR_input_file", + contains = "Data_Converter", + fields =list( + + ) ) +Data_Converter_PMD_FDR_input_file$methods( + initialize = function(){ + callSuper() + + class_name <<- "Data_Converter_PMD_FDR_input_file" + file_type <<- "PMD_FDR_file_type" + }, + check_raw_fields = function(info=NULL, raw_data=NULL){ + data_original <- raw_data$df + check_field_name(data_original, "raw_data", "PMD_FDR_input_score") + check_field_name(data_original, "raw_data", "PMD_FDR_pmd") + check_field_name(data_original, "raw_data", "PMD_FDR_spectrum_file") + check_field_name(data_original, "raw_data", "PMD_FDR_proteins") + check_field_name(data_original, "raw_data", "PMD_FDR_spectrum_title") + check_field_name(data_original, "raw_data", "PMD_FDR_sequence") + check_field_name(data_original, "raw_data", "PMD_FDR_decoy") + }, + convert_data = function(info=NULL, raw_data=NULL){ + data_new <- raw_data$df + + return(data_new) # Pass through - everything should be in order + } +) +############################################################################### +# Class: Data_Converter_PSM_Report +############################################################################### +Data_Converter_PSM_Report <- setRefClass("Data_Converter_PSM_Report", + contains = "Data_Converter", + fields =list( + + ) ) +Data_Converter_PSM_Report$methods( + initialize = function(){ + callSuper() + + class_name <<- "Data_Converter_PSM_Report" + file_type <<- "PSM_Report" + }, + check_raw_fields = function(info=NULL, raw_data=NULL){ + data_original <- raw_data$df + check_field_name(data_original, "raw_data", "Confidence [%]") + check_field_name(data_original, "raw_data", "Precursor m/z Error [ppm]") + check_field_name(data_original, "raw_data", "Spectrum File") + check_field_name(data_original, "raw_data", "Protein(s)") + check_field_name(data_original, "raw_data", "Spectrum Title") + check_field_name(data_original, "raw_data", "Decoy") + check_field_name(data_original, "raw_data", "Sequence") + + }, + convert_data = function(info=NULL, raw_data=NULL){ + data_new <- raw_data$df + + data_new$PMD_FDR_input_score <- data_new[, "Confidence [%]" ] + data_new$PMD_FDR_pmd <- data_new[, "Precursor m/z Error [ppm]"] + data_new$PMD_FDR_spectrum_file <- data_new[, "Spectrum File" ] + data_new$PMD_FDR_proteins <- data_new[, "Protein(s)" ] + data_new$PMD_FDR_spectrum_title <- data_new[, "Spectrum Title" ] + data_new$PMD_FDR_sequence <- data_new[, "Sequence" ] + data_new$PMD_FDR_decoy <- data_new[, "Decoy" ] + + return(data_new) + } +) +############################################################################### +# Class: Data_Converter_MaxQuant_Evidence +############################################################################### +Data_Converter_MaxQuant_Evidence <- setRefClass("Data_Converter_MaxQuant_Evidence", + contains = "Data_Converter", + fields =list( + + ) ) +Data_Converter_MaxQuant_Evidence$methods( + initialize = function(){ + callSuper() + + class_name <<- "Data_Converter_MaxQuant_Evidence" + file_type <<- "MaxQuant_Evidence" + }, + check_raw_fields = function(info=NULL, raw_data=NULL){ + data_original <- raw_data$df + + check_field_name(data_original, "raw_data", "PEP") + check_field_name(data_original, "raw_data", "Mass error [ppm]") + check_field_name(data_original, "raw_data", "Proteins") + check_field_name(data_original, "raw_data", "Retention time") + check_field_name(data_original, "raw_data", "Sequence") + check_field_name(data_original, "raw_data", "Reverse") + }, + convert_data = function(info=NULL, raw_data=NULL){ + data_new <- raw_data$df + + data_new$PMD_FDR_input_score <- 100 * (1 - data_new[, "PEP" ]) + data_new$PMD_FDR_pmd <- data_new[, "Mass error [ppm]"] + data_new$PMD_FDR_spectrum_file <- "<place_holder - assumes a single spectra file>" + data_new$PMD_FDR_proteins <- data_new[, "Proteins" ] + data_new$PMD_FDR_spectrum_title <- data_new[, "Retention time" ] # Used for ordering peptides - not important in MaxQuant since PMD has already been normalized effectively + data_new$PMD_FDR_sequence <- data_new[, "Sequence" ] + data_new$PMD_FDR_decoy <- ifelse( data_new[, "Reverse" ] == "+", 1, 0) + + return(data_new) + } +) + +############################################################################### +# Class: Data_Object_Data_Converter +############################################################################### +Data_Object_Data_Converter <- setRefClass("Data_Object_Data_Converter", + contains = "Data_Object", + fields =list(df = "data.frame", + data_converter = "Data_Converter")) +Data_Object_Data_Converter$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Data_Converter" + }, + currently_supported_file_types = function(){ + return(c("PSM_Report", "PMD_FDR_input_file")) + }, + verify = function(){ + info <- get_info() + raw_data <- get_raw_data() + file_type <- get_info()$input_file_type + + set_file_type(file_type) + data_converter$check_raw_fields(info=info, raw_data=raw_data) + + }, + m_load_data = function(){ + + info <- get_info() + raw_data <- get_raw_data() + file_type <- get_info()$input_file_type + + df <<- data_converter$convert_data(info=info, raw_data=raw_data) + + }, + set_file_type = function(file_type = NULL){ + if (file_type == "PSM_Report" ){ + data_converter <<- Data_Converter_PSM_Report $new() + } else if (file_type == "PMD_FDR_input_file"){ + data_converter <<- Data_Converter_PMD_FDR_input_file$new() + } else if (file_type == "MaxQuant_Evidence"){ + data_converter <<- Data_Converter_MaxQuant_Evidence $new() + } else { + stop(sprintf("File type '%s' is not currently supported by PMD-FDR module", file_type)) + } + }, + set_info = function(info){ + parents[["info"]] <<- info + }, + get_info = function(){ + return(verified_element_of_list(parents, "info", "Data_Object_Data_Converter$parents")) + }, + set_raw_data = function(raw_data){ + parents[["raw_data"]] <<- raw_data + }, + get_raw_data = function(){ + return(verified_element_of_list(parents, "raw_data", "Data_Object_Data_Converter$parents")) + } +) +############################################################################### +# Class: Data_Object_Groupings +############################################################################### +Data_Object_Groupings <- setRefClass("Data_Object_Groupings", + contains = "Data_Object", + fields =list(df = "data.frame")) +Data_Object_Groupings$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Groupings" + }, + simplify_field_name = function(x=NULL){ + result <- gsub(pattern = "PMD_FDR_", replacement = "", x = x) + return(result) + }, + verify = function(){ + data_original <- get_data_converter()$df + + check_field_name(data_original, "data_converter", "PMD_FDR_input_score") + check_field_name(data_original, "data_converter", "PMD_FDR_pmd") + check_field_name(data_original, "data_converter", "PMD_FDR_spectrum_file") + check_field_name(data_original, "data_converter", "PMD_FDR_proteins") + check_field_name(data_original, "data_converter", "PMD_FDR_spectrum_title") + check_field_name(data_original, "data_converter", "PMD_FDR_sequence") + check_field_name(data_original, "data_converter", "PMD_FDR_decoy") + + }, + m_load_data = function(){ + make_data_groups <- function(data_original=NULL){ + + # Functions supporting make_data_groups() + + standardize_fields <- function(data=NULL){ + data_new <- data + + info <- get_info() + info$ensure() + #field_name_of_score <- info$get_field_name_of_score() + + # #data_new <- rename_column(data_new, "Variable Modifications" , "ptm_list") + # data_new <- rename_column(data_new, field_name_of_score , "PMD_FDR_input_score") + # data_new <- rename_column(data_new, "Precursor m/z Error [ppm]", "PMD_FDR_pmd") + # #data_new <- rename_column(data_new, "Isotope Number" , "isotope_number") + # #data_new <- rename_column(data_new, "m/z" , "m_z") + # #data_new <- rename_column(data_new, "Measured Charge" , "charge") + # data_new <- rename_column(data_new, "Spectrum File" , "PMD_FDR_spectrum_file") + # data_new <- rename_column(data_new, "Protein(s)" , "PMD_FDR_proteins") + # data_new <- rename_column(data_new, "Spectrum Title" , "PMD_FDR_spectrum_title") + # data_new <- manage_decoy_column(data_new) + + # Now managed in Data_Converter + # data_new$PMD_FDR_input_score <- data_new[, field_name_of_score ] + # data_new$PMD_FDR_pmd <- data_new[, "Precursor m/z Error [ppm]"] + # data_new$PMD_FDR_spectrum_file <- data_new[, "Spectrum File" ] + # data_new$PMD_FDR_proteins <- data_new[, "Protein(s)" ] + # data_new$PMD_FDR_spectrum_title <- data_new[, "Spectrum Title" ] + + data_new$value <- data_new$PMD_FDR_pmd + data_new$PMD_FDR_peptide_length <- str_length(data_new$PMD_FDR_sequence) + #data_new$charge_value <- with(data_new, as.numeric(substr(charge, start=1, stop=str_length(charge)-1))) + #data_new$measured_mass <- with(data_new, m_z*charge_value) + data_new$PMD_FDR_spectrum_index <- NA + data_new$PMD_FDR_spectrum_index[order(data_new$PMD_FDR_spectrum_title, na.last = TRUE)] <- 1:nrow(data_new) + + return(data_new) + } + add_grouped_variable <- function(data_groups = data_groups, field_name_to_group = NULL, vec.length.out = NULL, vec.tolerance = NULL, value_format = NULL){ + + # Support functions for add_grouped_variable() + find_interval_vec <- function(x=NULL, length.out = NULL, tolerance = NULL){ + q <- quantile(x = x, probs = seq(from=0, to=1, length.out = length.out), na.rm=TRUE) + q <- round_to_tolerance(q, tolerance = tolerance) + return(q) + } + get_group_data_frame <- function(vec=NULL, value_format = NULL){ + n <- length(vec) + a <- vec[-n] + b <- vec[-1] + + lower <- ifelse(a == b , "eq", NA) + lower <- ifelse(is.na(lower ), "ge", lower) + upper <- ifelse(a == b , "eq", NA) + upper[n-1] <- ifelse(is.na(upper[n-1]), "le", "eq") + upper <- ifelse(is.na(upper ), "lt", upper) + group <- data.frame(list(idx=1:(n-1), a=a, b=b, lower=lower, upper=upper)) + + name_format <- sprintf("%%%s_%%%s_%%s_%%s", value_format, value_format) + group$new_var <- with(group, sprintf(name_format, a, b, lower, upper)) + + return(group) + } + merge_group_with_data <- function(data_groups = NULL, group = NULL, vec = NULL, field_name_to_group = NULL){ + field_name_new <- sprintf("group_%s", simplify_field_name(field_name_to_group)) + group_idx <- findInterval(x = data_groups[,field_name_to_group], + vec = vec, + all.inside=TRUE) + data_groups$new_var <- group$new_var[group_idx] + data_groups <- rename_column(data_groups, "new_var", field_name_new) + } + # Body of add_grouped_variable() + + vec <- find_interval_vec(x = data_groups[[field_name_to_group]], + length.out = vec.length.out, + tolerance = vec.tolerance ) + group <- get_group_data_frame(vec = vec, + value_format = value_format) + df_new <- merge_group_with_data(data_groups = data_groups, + group = group, + vec = vec, + field_name_to_group = field_name_to_group) + df_new <- add_group_decoy(df_new, field_name_to_group) + + return(df_new) + } + add_already_grouped_variable <- function(field_name_to_group = NULL, data_groups = NULL ){ + old_name <- field_name_to_group + new_name <- sprintf("group_%s", simplify_field_name(old_name)) + df_new <- data_groups + df_new[[new_name]] <- data_groups[[old_name]] + + df_new <- add_group_decoy(data_groups = df_new, field_name_to_group = field_name_to_group) + + return(df_new) + } + add_value_norm <- function(data_groups = NULL){ + + df_new <- data_groups + df_new$value_norm <- with(df_new, value - median_of_group_index) + + return(df_new) + } + add_protein_group <-function(data_groups = NULL){ + data_new <- data_groups + df_group_def <- data.frame(stringsAsFactors = FALSE, + list(pattern = c("" , "pfu_" , "cRAP"), + group_name = c("human", "pyrococcus", "contaminant"))) + for (i in 1:nrow(df_group_def)){ + idx <- grepl(pattern = df_group_def$pattern[i], + x = data_new$PMD_FDR_proteins) + data_new$group_proteins[idx] <- df_group_def$group_name[i] + } + + data_new <- add_group_decoy(data_groups = data_new, field_name_to_group = "PMD_FDR_proteins") + return(data_new) + } + add_group_decoy <- function(data_groups=NULL, field_name_to_group=NULL){ + simple_field_name <- simplify_field_name(field_name_to_group) + field_name_decoy <- sprintf("group_decoy_%s", simple_field_name) + field_name_group <- sprintf("group_%s", simple_field_name) + + data_groups[[field_name_decoy]] <- with(data_groups, ifelse(PMD_FDR_decoy, "decoy", data_groups[[field_name_group]])) + + return(data_groups) + } + add_group_training_class <- function(data_groups = NULL){ + df_new <- data_groups + + lowest_confidence_group <- min(data_groups$group_input_score) + + is_long_enough <- with(df_new, (PMD_FDR_peptide_length >= MIN_GOOD_PEPTIDE_LENGTH) ) + is_good <- with(df_new, (PMD_FDR_decoy == 0) & (PMD_FDR_input_score == 100) ) + is_bad <- with(df_new, (PMD_FDR_decoy == 1) ) + #is_used_to_train <- with(df_new, used_to_find_middle) # BUGBUG: circular definition + + idx_good <- which(is_good ) # & is_long_enough) + n_good <- length(idx_good) + idx_testing <- idx_good[c(TRUE,FALSE)] # Selects every other item + idx_training <- setdiff(idx_good, idx_testing) + + #is_good_short <- with(df_new, is_good & !is_long_enough ) + #is_good_long <- with(df_new, is_good & is_long_enough ) + is_bad_short <- with(df_new, is_bad & !is_long_enough ) + is_bad_long <- with(df_new, is_bad & is_long_enough ) + #is_good_training <- with(df_new, is_good_long & (used_to_find_middle == TRUE ) ) + #is_good_testing <- with(df_new, is_good_long & (used_to_find_middle == FALSE) ) + + df_new$group_training_class <- "other_short" # Default + df_new$group_training_class[is_long_enough ] <- "other_long" # Default (if long enough) + df_new$group_training_class[idx_training ] <- "good_training" # Length does not matter (anymore) + df_new$group_training_class[idx_testing ] <- "good_testing" # Ditto + #df_new$group_training_class[is_good_short ] <- "good_short" + df_new$group_training_class[is_bad_long ] <- "bad_long" # ...except for "bad" + df_new$group_training_class[is_bad_short ] <- "bad_short" + + df_new <- add_used_to_find_middle( data_groups = df_new ) # Guarantees consistency between duplicated definitions + + return(df_new) + } + add_used_to_find_middle <- function(data_groups = NULL){ + df_new <- data_groups + idx_used <- which(data_groups$group_training_class == "good_training") + + df_new$used_to_find_middle <- FALSE + df_new$used_to_find_middle[idx_used] <- TRUE + + return(df_new) + } + add_group_spectrum_index <- function(data_groups = NULL){ + + # Supporting functions for add_group_spectrum_index() + + get_breaks_all <- function(df_new){ + # Supporting function(s) for get_breaks_all() + + get_cut_points <- function(data_subset){ + + # Supporting function(s) for get_cut_points() + + cut_values <- function(data=NULL, minimum_segment_length=NULL){ + # using cpt.mean -- Appears to have a memory leak + #results_cpt <- cpt.mean(data=data, method="PELT", minimum_segment_length=minimum_segment_length) + #results <- results_cpt@cpts + + # Just look at the end + #results <- c(length(data)) + + # regularly spaced, slightly larger than minimum_segment_length + n_points <- length(data) + n_regions <- floor(n_points / minimum_segment_length) + n_regions <- ifelse(n_regions == 0, 1, n_regions) + results <- round(seq(1, n_points, length.out = n_regions + 1)) + results <- results[-1] + return(results) + } + remove_last <- function(x){ + return(x[-length(x)] ) + } + + # Main code of for get_cut_points() + max_idx = max(data_subset$PMD_FDR_spectrum_index) + data_sub_sub <- subset(data_subset, group_training_class == "good_training") #(PMD_FDR_input_score==100) & (PMD_FDR_decoy==0)) + minimum_segment_length = 50 + + values <- data_sub_sub$value + n_values <- length(values) + local_to_global_idx <- data_sub_sub$PMD_FDR_spectrum_index + if (n_values <= minimum_segment_length){ + result <- c() + } else { + local_idx <- cut_values(data=values, minimum_segment_length=minimum_segment_length) + result <- local_to_global_idx[local_idx] + result <- remove_last(result) + } + result <- c(result, max_idx) + return(result) + } + remove_last <- function(vec) { + return(vec[-length(vec)]) + } + + # Main code of get_breaks_all() + + breaks <- 1 + + files <- unique(df_new$PMD_FDR_spectrum_file) + + for (local_file in files){ + data_subset <- subset(df_new, (PMD_FDR_spectrum_file==local_file)) + if (nrow(data_subset) > 0){ + breaks <- c(breaks, get_cut_points(data_subset)) + } + } + breaks <- sort(unique(breaks)) + breaks <- remove_last(breaks) + breaks <- c(breaks, max(df_new$PMD_FDR_spectrum_index + 1)) + + return(breaks) + } + + # Main code of add_group_spectrum_index() + + field_name_to_group <- "PMD_FDR_spectrum_index" + + df_new <- data_groups[order(data_groups[[field_name_to_group]]),] + breaks <- get_breaks_all(df_new) + + df_new$group_spectrum_index <- cut(x = df_new[[field_name_to_group]], breaks = breaks, right = FALSE, dig.lab = 6) + df_new <- add_group_decoy(data_groups = df_new, field_name_to_group = field_name_to_group) + + return(df_new) + } + add_median_of_group_index <-function(data_groups = NULL){ + field_median <- "median_of_group_index" + data_good <- subset(data_groups, used_to_find_middle ) + med <- aggregate(value~group_spectrum_index, data=data_good, FUN=safe_median) + med <- rename_column(med, "value", field_median) + + data_groups[[field_median]] <- NULL + df_new <- merge(data_groups, med) + + return(df_new) + } + add_1_percent_to_data_groups <- function(data_groups=NULL){ + + data_new <- data_groups + + if (get_raw_1_percent()$exists()){ + # Load 1 percent file + df_1_percent <- get_raw_1_percent()$df + + # Get relevant fields + df_1_percent$is_in_1percent <- TRUE + df_1_percent <- rename_column(df_1_percent, "Spectrum Title", "PMD_FDR_spectrum_title") + df_1_percent <- df_1_percent[,c("PMD_FDR_spectrum_title", "is_in_1percent")] + + # Merge with data_groups + data_new <- merge(data_new, df_1_percent, all.x=TRUE) + data_new$is_in_1percent[is.na(data_new$is_in_1percent)] <- FALSE + } + + # Save results + return(data_new) + + } + + + # Main code of make_data_groups() + data_groups <- standardize_fields(data_original) + + data_groups <- add_grouped_variable(field_name_to_group = "PMD_FDR_input_score", + data_groups = data_groups, + vec.length.out = 14, + vec.tolerance = 1, + value_format = "03d") + + data_groups <- add_grouped_variable(field_name_to_group = "PMD_FDR_pmd", + data_groups = data_groups, + vec.length.out = 21, + vec.tolerance = 0.1, + value_format = "+05.1f") + + data_groups <- add_grouped_variable(field_name_to_group = "PMD_FDR_peptide_length", + data_groups = data_groups, + vec.length.out = 11, + vec.tolerance = 1, + value_format = "02d") + + # data_groups <- add_grouped_variable(field_name_to_group = "m_z", + # data_groups = data_groups, + # vec.length.out = 11, + # vec.tolerance = 10, + # value_format = "04.0f") + # + # data_groups <- add_grouped_variable(field_name_to_group = "measured_mass", + # data_groups = data_groups, + # vec.length.out = 11, + # vec.tolerance = 1, + # value_format = "04.0f") + # + # data_groups <- add_already_grouped_variable(field_name_to_group = "isotope_number", + # data_groups = data_groups ) + # + # data_groups <- add_already_grouped_variable(field_name_to_group = "charge", + # data_groups = data_groups ) + # + data_groups <- add_already_grouped_variable(field_name_to_group = "PMD_FDR_spectrum_file", + data_groups = data_groups ) + data_groups <- add_protein_group(data_groups = data_groups) + data_groups <- add_group_training_class( data_groups = data_groups) + data_groups <- add_group_spectrum_index( data_groups = data_groups) + data_groups <- add_median_of_group_index( data_groups = data_groups) + data_groups <- add_value_norm( data_groups = data_groups) + + # fields_of_interest <- c("PMD_FDR_input_score", "PMD_FDR_pmd", "m_z", "PMD_FDR_peptide_length", "isotope_number", "charge", "PMD_FDR_spectrum_file", "measured_mass", "PMD_FDR_spectrum_index", "PMD_FDR_proteins") + # fields_of_interest <- c("value", + # "PMD_FDR_decoy", + # "PMD_FDR_spectrum_title", + # "median_of_group_index", + # "value_norm", + # "used_to_find_middle", + # "group_training_class", + # fields_of_interest, + # sprintf("group_%s" , fields_of_interest), + # sprintf("group_decoy_%s", fields_of_interest)) + + fields_of_interest <- c("PMD_FDR_input_score", "PMD_FDR_pmd", "PMD_FDR_peptide_length", "PMD_FDR_spectrum_file", "PMD_FDR_spectrum_index", "PMD_FDR_proteins") + fields_of_interest <- c("value", + "PMD_FDR_decoy", + "PMD_FDR_spectrum_title", + "median_of_group_index", + "value_norm", + "used_to_find_middle", + "group_training_class", + fields_of_interest, + sprintf("group_%s" , simplify_field_name(fields_of_interest)), + sprintf("group_decoy_%s", simplify_field_name(fields_of_interest))) + + data_groups <- data_groups[,fields_of_interest] + data_groups <- add_1_percent_to_data_groups(data_groups) + + return(data_groups) + } + + data_original <- get_data_converter()$df #parents[[INDEX_OF_ORIGINAL_DATA]]$df + df <<- make_data_groups(data_original) + }, + set_info = function(info){ + parents[["info"]] <<- info + }, + get_info = function(){ + return(verified_element_of_list(parents, "info", "Data_Object_Groupings$parents")) + }, + set_data_converter = function(data_converter){ + parents[["data_converter"]] <<- data_converter + }, + get_data_converter = function(){ + return(verified_element_of_list(parents, "data_converter", "Data_Object_Groupings$parents")) + }, + set_raw_1_percent = function(raw_1_percent){ ############## BUGBUG: the 1% file should be using the same file type format as the standard data (but isn't) + parents[["raw_1_percent"]] <<- raw_1_percent + }, + get_raw_1_percent = function(){ + return(verified_element_of_list(parents, "raw_1_percent", "Data_Object_Groupings$parents")) + } +) +############################################################################### +# Class: Data_Object_Individual_FDR +############################################################################### +Data_Object_Individual_FDR <- setRefClass("Data_Object_Individual_FDR", + contains = "Data_Object", + fields =list(df = "data.frame")) +Data_Object_Individual_FDR$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Individual_FDR" + }, + verify = function(){ + data_groups = get_data_groups()$df + densities = get_densities()$df + alpha = get_alpha()$df + + check_field_name(data_groups, "data_groups", "value_norm") + check_field_name(data_groups, "data_groups", "group_decoy_input_score") + check_field_name(data_groups, "data_groups", "PMD_FDR_peptide_length") + check_field_name(data_groups, "data_groups", "PMD_FDR_input_score") + check_field_name(alpha, "alpha", "alpha") # BUGBUG: I'm missing a field here... + check_field_name(densities, "densities", "x") + check_field_name(densities, "densities", "t") + check_field_name(densities, "densities", "f") + + }, + set_data_groups = function(parent){ + parents[["data_groups"]] <<- parent + }, + get_data_groups = function(){ + return(verified_element_of_list(parents, "data_groups", "Data_Object_Individual_FDR$parents")) + }, + set_densities = function(parent){ + parents[["densities"]] <<- parent + }, + get_densities = function(){ + return(verified_element_of_list(parents, "densities", "Data_Object_Individual_FDR$parents")) + }, + set_alpha = function(parent){ + parents[["alpha"]] <<- parent + }, + get_alpha = function(){ + return(verified_element_of_list(parents, "alpha", "Data_Object_Individual_FDR$parents")) + }, + m_load_data = function(){ + add_FDR_to_data_groups <- function(data_groups=NULL, densities=NULL, alpha=NULL, field_value=NULL, field_decoy_group=NULL, set_decoy_to_1=FALSE){ + # Support functions for add_FDR_to_data_groups() + get_group_fdr <- function(group_stats = NULL, data_groups = NULL, densities=NULL){ + group_fdr <- apply(X = densities, MARGIN = 2, FUN = max) + df_group_fdr <- data.frame(group_fdr) + df_group_fdr <- rename_column(df_group_fdr, "group_fdr", "v") + df_group_fdr$group_of_interest <- names(group_fdr) + t <- df_group_fdr[df_group_fdr$group_of_interest == "t", "v"] + f <- df_group_fdr[df_group_fdr$group_of_interest == "f", "v"] + df_group_fdr <- subset(df_group_fdr, !(group_of_interest %in% c("x", "t", "f"))) + df_group_fdr$group_fdr <-(df_group_fdr$v - t) / (f - t) + + return(df_group_fdr) + } + + get_mode <- function(x){ + d <- density(x) + return(d$x[which.max(d$y)]) + } + + # Main code for add_FDR_to_data_groups() + + # Set up analysis + data_new <- data_groups + data_new$value_of_interest <- data_new[,field_value] + data_new$group_of_interest <- data_new[,field_decoy_group] + + data_subset <- subset(data_new, PMD_FDR_peptide_length >= 11) + + # Identify mean PMD_FDR_input_score per group + + group_input_score <- aggregate(PMD_FDR_input_score~group_of_interest, data=data_subset, FUN=mean) + group_input_score <- rename_column(group_input_score, "PMD_FDR_input_score", "group_input_score") + + #group_fdr <- get_group_fdr(data_groups = data_subset, densities=densities) + group_stats <- merge(alpha, group_input_score) + group_stats <- subset(group_stats, group_of_interest != "PMD_FDR_decoy") + + x=c(0,group_stats$group_input_score) + y=c(1,group_stats$alpha) + FUN_interp <- approxfun(x=x,y=y) + + data_new$interpolated_groupwise_FDR <- FUN_interp(data_new$PMD_FDR_input_score) + if (set_decoy_to_1){ + data_new$interpolated_groupwise_FDR[data_new$PMD_FDR_decoy == 1] <- 1 + } + + return(data_new) + } + + data_groups = get_data_groups()$df + densities = get_densities()$df + alpha = get_alpha()$df + + d_true <- densities[,c("x", "t")] + d_false <- densities[,c("x", "f")] + + i_fdr <- add_FDR_to_data_groups(data_groups = data_groups, + densities = densities, + alpha = alpha, + field_value ="value_norm", + field_decoy_group = "group_decoy_input_score") + # Derive local t + interp_t <- splinefun(x=d_true$x, y=d_true$t) #approxfun(x=d_true$x, y=d_true$y) + + # Derive local f + interp_f <- splinefun(x=d_false$x, y=d_false$f) #approxfun(x=d_true$x, y=d_true$y) + + # Derive local FDR + i_fdr$t <- interp_t(i_fdr$value_of_interest) + i_fdr$f <- interp_f(i_fdr$value_of_interest) + i_fdr$alpha <- i_fdr$interpolated_groupwise_FDR + i_fdr$i_fdr <- with(i_fdr, (alpha*f) / (alpha*f + (1-alpha)*t)) + + df <<- i_fdr + + } +) +############################################################################### +# Class: Data_Object_Densities +############################################################################### +Data_Object_Densities <- setRefClass("Data_Object_Densities", + contains = "Data_Object", + fields =list(df = "data.frame")) +Data_Object_Densities$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Densities" + }, + verify = function(){ + df_data_groups <- get_data_groups()$df + + checkTrue(nrow(df_data_groups) > 0, + msg = "data_groups data frame was empty (and should not have been)") + + check_field_name(df_data_groups, "data_groups", "value_norm") + check_field_name(df_data_groups, "data_groups", "group_decoy_input_score") + check_field_name(df_data_groups, "data_groups", "group_training_class") + }, + set_data_groups = function(parent=NULL){ + parents[["data_groups"]] <<- parent + }, + get_data_groups = function(){ + return(verified_element_of_list(parent_list = parents, element_name = "data_groups", object_name = "Data_Object_Densities$parents")) + }, + m_load_data = function(){ + + # Support functions for make_densities() + set_values_of_interest <- function(df_data_groups=NULL, field_group = NULL){ + field_value = "value_norm" + + new_data_groups <- get_data_groups()$df + new_data_groups$value_of_interest <- new_data_groups[,field_value] + new_data_groups$group_of_interest <- new_data_groups[,field_group] + #groups <- sort(unique(new_data_groups$group_of_interest)) + + return(new_data_groups) + } + get_ylim <- function(data_groups=NULL){ + ylim <- range(data_groups$value_of_interest, na.rm = TRUE) + return(ylim) + } + make_hit_density <- function(data_subset=NULL, descr_of_df=NULL, ylim=NULL){ + #stop("Data_Object_Densities$make_hit_density() is untested beyond here") + verify_density = function(data_subset=NULL, value_field=NULL, descr_of_df=NULL, ylim=NULL){ + values <- data_subset[value_field] + values <- values[! is.na(values)] + if (length(values) < MIN_ACCEPTABLE_POINTS_IN_DENSITY){ + stop (sprintf("There are too few valid %s (%d < %d) in %s to be used for calculating a density function", + value_field, + length(values), + MIN_ACCEPTABLE_POINTS_IN_DENSITY, + descr_of_df)) + } + d <- density(values, from = ylim[1], to = ylim[2]) + + return(d) + } + uniformalize_density <- function(d){ + # Reorganizes y-values of density function so that + # function is monotone increasing to mode + # and monotone decreasing afterwards + idx_mode <- which.max(d$y) + idx_lower <- 1:(idx_mode-1) + idx_upper <- idx_mode:length(d$y) + + values_lower <- d$y[idx_lower] + values_upper <- d$y[idx_upper] + + new_d <- d + new_d$y <- c(sort(values_lower, decreasing = FALSE), + sort(values_upper, decreasing = TRUE)) + + return(new_d) + } + + local_df <- subset(data_subset, + (PMD_FDR_peptide_length >= MIN_GOOD_PEPTIDE_LENGTH) & + (used_to_find_middle == FALSE)) + d <- verify_density (data_subset=local_df, value_field = "value_of_interest", descr_of_df = descr_of_df, ylim=ylim) + d <- normalize_density (d) + d <- uniformalize_density(d) + + return(d) + } + make_true_hit_density <- function(data_groups=NULL){ + d_true <- make_hit_density(data_subset = subset(data_groups, (group_training_class == "good_testing") ), + descr_of_df = "Good-testing dataset", + ylim = get_ylim(data_groups)) + return(d_true) + } + make_false_hit_density <- function(data_groups=NULL){ + d_false <- make_hit_density(data_subset = subset(data_groups, (group_training_class == "bad_long") ), + descr_of_df = "Bad-long dataset", + ylim = get_ylim(data_groups)) + + return(d_false) + } + add_v_densities <- function(data_groups=NULL, densities=NULL, field_group = NULL){ + groups <- sort(unique(data_groups$group_of_interest)) + + new_densities <- densities + + for (local_group in groups){ + d_v <- make_hit_density(data_subset = subset(data_groups, (group_of_interest == local_group)), + descr_of_df = sprintf("subset of data (where %s is '%s')", + field_group, + local_group), + ylim = get_ylim(data_groups)) + new_densities[local_group] <- d_v$y + } + + return(new_densities) + } + + # Main section of make_densities() + df_data_groups <- get_data_groups()$df + new_data_groups <- set_values_of_interest(df_data_groups, field_group = "group_decoy_input_score") + d_true <- make_true_hit_density( new_data_groups) + d_false <- make_false_hit_density(new_data_groups) + + densities <- data.frame(x=d_true$x, + t=d_true$y, + f=d_false$y) + densities <- add_v_densities(data_groups=new_data_groups, densities=densities, field_group = "group_decoy_input_score") + df <<- densities + } +) +############################################################################### +# Class: Data_Object_Alpha +############################################################################### +Data_Object_Alpha <- setRefClass("Data_Object_Alpha", + contains = "Data_Object", + fields =list(df = "data.frame")) +Data_Object_Alpha$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Alpha" + }, + verify = function(){ + densities <- get_densities()$df + + checkTrue(nrow(densities) > 0, + msg = "Densities data.frame was empty (and should not have been)") + }, + set_densities = function(parent=NULL){ + parents[["densities"]] <<- parent + }, + get_densities = function(){ + return(verified_element_of_list(parent_list = parents, element_name = "densities", object_name = "Data_Object_Alpha")) + }, + m_load_data = function(){ + + densities <- get_densities()$df + + max_of_density = apply(X = densities, MARGIN = 2, FUN = max) + df_alpha <- data.frame(stringsAsFactors = FALSE, + list(v = max_of_density, + group_of_interest = names(max_of_density))) + df_alpha <- subset(df_alpha, group_of_interest != "x") + t <- with(subset(df_alpha, group_of_interest=="t"), v) + f <- with(subset(df_alpha, group_of_interest=="f"), v) + df_alpha$alpha <- with(df_alpha, (t-v)/(t-f)) + + alpha <- df_alpha[,c("group_of_interest", "alpha")] + alpha <- subset(alpha, (group_of_interest != "t") & (group_of_interest != "f")) + + df <<- alpha + } +) +############################################################################### +# Class: Data_Processor +############################################################################### +Data_Processor <- setRefClass("Data_Processor", + fields =list(info = "Data_Object_Info", + raw_data = "Data_Object_Raw_Data", + raw_1_percent = "Data_Object_Raw_1_Percent", + data_converter = "Data_Object_Data_Converter", + data_groups = "Data_Object_Groupings", + densities = "Data_Object_Densities", + alpha = "Data_Object_Alpha", + i_fdr = "Data_Object_Individual_FDR")) +Data_Processor$methods( + initialize = function(p_info=NULL){ + if (! is.null(p_info)){ + set_info(p_info) + } + }, + set_info = function(p_info=NULL){ + # This initialization defines all of the dependencies between the various components + + info <<- p_info + + # raw_data + raw_data$set_info(info) + info$append_child(raw_data) + + # raw_1_percent + raw_1_percent$set_info(info) + info$append_child(raw_1_percent) + + # data_converter + data_converter$set_info (info) + data_converter$set_raw_data(raw_data) + info $append_child (data_converter) + raw_data $append_child (data_converter) + + # data_groups + data_groups$set_info (info) + data_groups$set_data_converter(data_converter) + data_groups$set_raw_1_percent (raw_1_percent) + info $append_child (data_groups) + data_converter$append_child (data_groups) + raw_1_percent $append_child (data_groups) + + # densities + densities $set_data_groups(data_groups) + data_groups$append_child (densities) + + # alpha + alpha $set_densities(densities) + densities$append_child (alpha) + + # i_fdr + i_fdr$set_data_groups(data_groups) + i_fdr$set_densities (densities) + i_fdr$set_alpha (alpha) + data_groups $append_child(i_fdr) + densities $append_child(i_fdr) + alpha $append_child(i_fdr) + } +) + + +############################################################# +####### Classes for Plotting +############################################################# + +############################################################################### +# Class: Plot_Image +############################################################################### +Plot_Image = setRefClass("Plot_Image", + fields = list(data_processors = "list", + plot_title = "character", + include_text = "logical", + include_main = "logical", + x.intersp = "numeric", + y.intersp = "numeric", + scale = "numeric", + main = "character", + is_image_container = "logical")) +Plot_Image$methods( + initialize = function(p_data_processors = list(), + p_include_main = TRUE, + p_include_text = TRUE, + p_is_image_container = FALSE){ + include_main <<- p_include_main + include_text <<- p_include_text + data_processors <<- p_data_processors + is_image_container <<- p_is_image_container + }, + plot_image = function(){ + plot(main="Define plot_image() for subclass") # Abstract function + }, + get_n = function(){ + stop("Need to define function get_n() for subclass") #Abstract function + }, + create_standard_main = function(){ + needs_main <- function(){ + return(include_text & include_main & !is_image_container) + } + if (needs_main()){ + collection_name <- data_processors[[1]]$info$collection_name() + main <<- sprintf("%s\n(Dataset: %s; n=%s)", plot_title, collection_name, format(get_n(), big.mark = ",")) + } + }, + plot_image_in_window = function(p_scale=NULL, window_height=NULL, window_width=NULL){ + scale <<- p_scale + SIZE_AXIS <- 2.5 * scale # in the units used by mar + SIZE_MAIN <- 2.5 * scale + SIZE_NO_MARGIN <- 0.1 * scale + FONT_SIZE <- 8 * scale + WINDOW_WIDTH <- window_width * scale + WINDOW_HEIGHT <- window_height * scale + X_INTERSP <- 0.5 * scale + 0.4 # manages legend text spacing + Y_INTERSP <- 0.5 * scale + 0.4 # manages + + if (include_main){ + mar = c(SIZE_AXIS, SIZE_AXIS, SIZE_MAIN , SIZE_NO_MARGIN) + } else { + mar = c(SIZE_AXIS, SIZE_AXIS, SIZE_NO_MARGIN, SIZE_NO_MARGIN) + } + mgp = c(SIZE_AXIS/2, SIZE_AXIS/4, 0) # Margin line (mex units) for axis title, axis labels, axis lines + ps = FONT_SIZE + x.intersp <<- X_INTERSP + y.intersp <<- Y_INTERSP + + windows(width = WINDOW_WIDTH, height=WINDOW_HEIGHT) + + old_par <- par(mar=mar, ps=ps, mgp=mgp) + create_standard_main() + + plot_image() + if (!is_image_container){ + axis(side=1, labels=include_text, tcl=-0.5, lwd=scale) + axis(side=2, labels=include_text, tcl=-0.5, lwd=scale) + box(lwd=scale) + } + par(old_par) + }, + plot_image_in_small_window = function(p_scale=1){ + plot_image_in_window(p_scale=p_scale, window_height=2, window_width=3.25) + }, + plot_image_in_large_window = function(p_scale=1, window_height=NULL){ + plot_image_in_window(p_scale=p_scale, window_height=window_height, window_width=7) + } +) +############################################################################### +# Class: Legend_Object +############################################################################### +Legend_Object = setRefClass("Legend_Object", + contains = "Plot_Image", + fields = list(user_params = "list", + scale = "numeric")) +Legend_Object$methods( + initialize = function(p_user_params = NULL, p_scale = NULL){ + if (is.null(p_user_params)){ + user_params <<- list() + } else { + user_params <<- p_user_params + } + if (is.null(p_scale)){ + stop("Legend_Object must have a valid scale") + } else { + scale <<- p_scale + } + user_params$x <<- if_null(user_params$x , "topleft", user_params$x) + user_params$y <<- if_null(user_params$y , NULL, user_params$y) + user_params$bty <<- if_null(user_params$bty , "o", user_params$bty) + user_params$lwd <<- if_null(user_params$lwd , NULL, user_params$lwd * scale) # Because we allow NULL, scale must be inside parens + user_params$seg.len <<- if_null(user_params$seg.len , 3, user_params$seg.len ) * scale + user_params$box.lwd <<- if_null(user_params$box.lwd , 1, user_params$box.lwd ) * scale + user_params$x.intersp <<- if_null(user_params$x.intersp, 0.6, user_params$x.intersp) * scale + user_params$y.intersp <<- if_null(user_params$y.intersp, 0.4, user_params$y.intersp) * scale + 0.2 + }, + show = function(){ + first_legend = legend(x = user_params$x, + y = user_params$y, + title = "", + legend = user_params$leg, + col = user_params$col, + bty = user_params$bty, + lty = user_params$lty, + lwd = user_params$lwd, + seg.len = user_params$seg.len, + box.lwd = user_params$box.lwd, + x.intersp = user_params$x.intersp, + y.intersp = user_params$y.intersp) + new_x = first_legend$rect$left + new_y = first_legend$rect$top + first_legend$rect$h * ifelse(scale==1, 0.07, 0.03 - (scale * 0.02)) #switch(scale, 0.01, -0.01, -0.03, -0.05)# (0.07 - 0.09 * ((scale-1)^2))#(0.15 - 0.08*scale)#.07 * (2 - scale) + legend(x=new_x, y=new_y, title = user_params$title, legend = "", cex=1.15, bty="n") + + } +) +############################################################################### +# Class: Plot_Multiple_Images +############################################################################### +Plot_Multiple_Images = setRefClass("Plot_Multiple_Images", + contains = "Plot_Image", + fields = list(n_images_wide = "numeric", + n_images_tall = "numeric", + image_list = "list")) +Plot_Multiple_Images$methods( + initialize = function(p_n_images_wide=1, p_n_images_tall=2, p_image_list=NULL, ...){ + n_images_wide <<- p_n_images_wide + n_images_tall <<- p_n_images_tall + image_list <<- p_image_list + #plot_title <<- "True Hit and False Hit Distributions" + + callSuper(p_is_image_container=TRUE, ...) + }, + plot_image = function(){ + # Support functions + apply_mtext <- function(letter=NULL){ + line=1.3*scale + mtext(letter, side=1, line=line, adj=0) + } + # main code + old_par <- par(mfrow=c(n_images_tall, n_images_wide)) + i=0 + n_images <- length(image_list) + + for (i in 1:n_images){ + image <- image_list[[i]] + image$create_standard_main() + image$scale <- scale + image$plot_image() + axis(side=1, labels=include_text, tcl=-0.5, lwd=scale) + axis(side=2, labels=include_text, tcl=-0.5, lwd=scale) + box(lwd=scale) + apply_mtext(letter=sprintf("(%s)", letters[i])) + + } + par(old_par) + + } +) +############################################################################### +# Class: Plot_Compare_PMD_and_Norm_Density +############################################################################### +Plot_Compare_PMD_and_Norm_Density = setRefClass("Plot_Compare_PMD_and_Norm_Density", + contains = "Plot_Image", + fields = list(show_norm = "logical", + display_n_psms = "logical")) +Plot_Compare_PMD_and_Norm_Density$methods( + initialize = function(p_show_norm=TRUE, p_display_n_psms=TRUE, ...){ + show_norm <<- p_show_norm + display_n_psms <<- p_display_n_psms + plot_title <<- "True Hit and False Hit Distributions" + + callSuper(...) + }, + plot_image = function(){ + # Support functions for plot_compare_PMD_and_norm_density() + + get_densities <- function(data_subset = NULL, var_value = NULL){ + data_subset$value_of_interest <- data_subset[,var_value] + from <- min(data_subset$value_of_interest, na.rm = TRUE) + to <- max(data_subset$value_of_interest, na.rm = TRUE) + xlim = range(data_subset$value_of_interest) + data_true <- subset(data_subset, (PMD_FDR_decoy==0) & (PMD_FDR_input_score==100)) + data_false <- subset(data_subset, (PMD_FDR_decoy==1)) + + d_true <- with(data_true , density(value_of_interest, from = from, to = to, na.rm = TRUE)) + d_false <- with(data_false, density(value_of_interest, from = from, to = to, na.rm = TRUE)) + d_true <- normalize_density(d_true) + d_false <- normalize_density(d_false) + + densities <- list(d_true=d_true, d_false=d_false, var_value = var_value, n_true = nrow(data_true), n_false = nrow(data_false)) + + return(densities) + } + get_xlim <- function(densities_a = NULL, densities_b = NULL, show_norm=NULL){ + xlim <- range(c( densities_a$d_true$x, densities_a$d_false$y)) + if (show_norm){ + xlim <- range(c(xlim, densities_b$d_true$x, densities_b$d_false$y)) + } + return(xlim) + } + get_ylim <- function(densities_a = NULL, densities_b = NULL, show_norm=NULL){ + ylim <- range(c( densities_a$d_true$y, densities_a$d_false$y)) + if (show_norm){ + ylim <- range(c(ylim, densities_b$d_true$y, densities_b$d_false$y)) + } + return(ylim) + } + plot_distributions <- function(densities = NULL, var_value= NULL, dataset_name = NULL, ...){ + leg = list() + leg$leg = c("Good", "Bad") + if (display_n_psms){ + leg$leg = sprintf("%s (%d PSMs)", + leg$leg, + c(densities$n_true, densities$n_false)) + + } + leg$col = c("black", "red") + leg$lwd = c(3 , 3) + leg$lty = c(1 , 2) + leg$title = "Hit Category" + xlab = ifelse(var_value == "value", + "PMD (ppm)", + "PMD - normalized (ppm)") + ylab = "Density" + if (!include_text){ + xlab = "" + ylab = "" + } + plot( densities$d_true , col=leg$col[1], lwd=leg$lwd[1] * scale, lty=leg$lty[1], xaxt = "n", yaxt = "n", main=main, xlab = xlab, ylab=ylab, ...) + lines(densities$d_false, col=leg$col[2], lwd=leg$lwd[2] * scale, lty=leg$lty[2]) + abline(v=0, h=0, col="gray", lwd=1*scale) + if (include_text){ + legend_object <- Legend_Object$new(leg, scale) + legend_object$show() + #legend("topleft", legend=leg.leg, col=leg.col, lwd=leg.lwd, lty=leg.lty, x.intersp = x.intersp, y.intersp = y.intersp) + } + } + + # Main code block for plot_compare_PMD_and_norm_density + data_processor <- data_processors[[1]] + data_processor$data_groups$ensure() + data_groups <- data_processor$data_groups$df + + data_subset_a <- subset(data_groups , used_to_find_middle == FALSE) + data_subset_b <- subset(data_subset_a, PMD_FDR_peptide_length > 11) + + densities_a <- get_densities(data_subset = data_subset_a, var_value = "value") + densities_b <- get_densities(data_subset = data_subset_b, var_value = "value_norm") + + xlim=get_xlim(densities_a, densities_b, show_norm = show_norm) + ylim=get_ylim(densities_a, densities_b, show_norm = show_norm) + + dataset_name <- data_processor$info$collection_name + plot_distributions( densities=densities_a, var_value = "value" , dataset_name = dataset_name, xlim=xlim, ylim=ylim) + if (show_norm){ + plot_distributions(densities=densities_b, var_value = "value_norm", dataset_name = dataset_name, xlim=xlim, ylim=ylim) + } + }, + get_n = function(){ + data_processor <- data_processors[[1]] + data_processor$data_groups$ensure() + data_subset_a <- subset(data_processor$data_groups$df , used_to_find_middle == FALSE) + data_subset_b <- subset(data_subset_a, PMD_FDR_peptide_length > 11) + + if (show_norm){ + data_subset <- data_subset_a + } else { + data_subset <- data_subset_b + } + + data_true <- subset(data_subset, (PMD_FDR_decoy==0) & (PMD_FDR_input_score==100)) + data_false <- subset(data_subset, (PMD_FDR_decoy==1)) + + return(nrow(data_true) + nrow(data_false)) + } +) + +############################################################################### +# Class: Plot_Time_Invariance_Alt +############################################################################### +Plot_Time_Invariance_Alt = setRefClass("Plot_Time_Invariance_Alt", + contains = "Plot_Image", + fields = list(show_norm = "logical", + display_n_psms = "logical", + training_class = "character", + ylim = "numeric", + field_of_interest = "character")) +Plot_Time_Invariance_Alt$methods( + initialize = function(p_ylim=NULL, p_training_class=NULL, p_field_of_interest="value_norm", ...){ + get_subset_title <- function(training_class=NULL){ + if (training_class == "bad_long"){ + subset_title="bad only" + } else if (training_class == "good_testing"){ + subset_title="good-testing only" + } else if (training_class == "good_training"){ + subset_title="good-training only" + } else if (training_class == "other"){ + subset_title="other only" + } else { + stop("Unexpected training_class in plot_time_invariance") + } + return(subset_title) + } + + ylim <<- p_ylim + training_class <<- p_training_class + field_of_interest <<- p_field_of_interest + subset_title <- get_subset_title(training_class=training_class) + backup_title <- sprintf("Middle 25%% PMD for spectra sorted by index%s", + ifelse(is.null(subset_title), + "", + sprintf(" - %s", subset_title))) + #plot_title <<- get_main(main_title=main, backup_title=backup_title, data_collection = data_collection) + plot_title <<- backup_title + + callSuper(...) + }, + plot_image = function(){ + # Support functions for plot_time_invariance() + + # Main code of plot_time_invariance() + data_subset = get_data_subset() + plot_group_spectrum_index_from_subset_boxes(data_subset = data_subset) + abline(h=0, col="blue", lwd=scale) + }, + get_data_subset = function(){ + data_processor <- data_processors[[1]] + data_processor$data_groups$ensure() + return(subset(data_processor$data_groups$df, (group_training_class==training_class))) + }, + get_n = function(){ + return(nrow(get_data_subset())) + }, + plot_group_spectrum_index_from_subset_boxes = function(data_subset = NULL){ + n_plot_groups <- 100 + + field_name_text <- ifelse(field_of_interest=="value", "PMD", "Translated PMD") + new_subset <- data_subset + new_subset$value_of_interest <- new_subset[,field_of_interest] + new_subset <- new_subset[order(new_subset$PMD_FDR_spectrum_index),] + + idxs <- round_to_tolerance(seq(from=1, to=nrow(new_subset), length.out = n_plot_groups+1), 1) + idxs_left <- idxs[-(n_plot_groups+1)] + idxs_right <- idxs[-1] - 1 + idxs_right[n_plot_groups] <- idxs_right[n_plot_groups] + 1 + + new_subset$plot_group <- NA + for (i in 1:n_plot_groups){ + new_subset$plot_group[idxs_left[i]:idxs_right[i]] <- i + } + xleft <- aggregate(PMD_FDR_spectrum_index ~plot_group, data=new_subset, FUN=min) + xright <- aggregate(PMD_FDR_spectrum_index ~plot_group, data=new_subset, FUN=max) + ybottom <- aggregate(value_of_interest~plot_group, data=new_subset, FUN=function(x){quantile(x, probs = 0.5 - (0.25/2))}) + ytop <- aggregate(value_of_interest~plot_group, data=new_subset, FUN=function(x){quantile(x, probs = 0.5 + (0.25/2))}) + boxes <- merge( rename_column(xleft , "PMD_FDR_spectrum_index" , "xleft"), + merge( rename_column(xright , "PMD_FDR_spectrum_index" , "xright"), + merge(rename_column(ybottom, "value_of_interest", "ybottom"), + rename_column(ytop , "value_of_interest", "ytop")))) + + xlab <- "Spectrum Index" + ylab <- sprintf("%s (ppm)", field_name_text ) + if (is.null(ylim)){ + ylim <<- range(new_subset$value_of_interest) + } + if (!include_text){ + xlab="" + ylab="" + } + plot(value_of_interest~PMD_FDR_spectrum_index, data=new_subset, type="n", ylim=ylim, xlab = xlab, ylab=ylab, main=main, xaxt="n", yaxt="n") + with(boxes, rect(xleft = xleft, ybottom = ybottom, xright = xright, ytop = ytop, lwd=scale)) + #points(median_of_group_index~PMD_FDR_spectrum_index, data=data_subset, cex=.5, pch=15) + axis(1, labels=include_text, lwd=scale) + axis(2, labels=include_text, lwd=scale) + box(lwd=scale) #box around plot area + } + +) +############################################################################### +# Class: Plot_Time_Invariance_Alt_Before_and_After +############################################################################### +Plot_Time_Invariance_Alt_Before_and_After = setRefClass("Plot_Time_Invariance_Alt_Before_and_After", + contains = "Plot_Multiple_Images", + fields = list()) +Plot_Time_Invariance_Alt_Before_and_After$methods( + initialize = function(p_data_processors = NULL, + p_include_text=TRUE, + p_include_main=FALSE, + p_ylim = c(-4,4), ...){ + plot_object1 <- Plot_Time_Invariance_Alt$new(p_data_processors = p_data_processors, + p_include_text=p_include_text, + p_include_main=p_include_main, + p_training_class = "good_testing", + p_field_of_interest = "value", + p_ylim = p_ylim) + + plot_object2 <- Plot_Time_Invariance_Alt$new(p_data_processors = p_data_processors, + p_include_text=p_include_text, + p_include_main=p_include_main, + p_training_class = "good_testing", + p_field_of_interest = "value_norm", + p_ylim = p_ylim) + + callSuper(p_n_images_wide=1, + p_n_images_tall=2, + p_include_text=p_include_text, + p_include_main=p_include_main, + p_image_list = list(plot_object1, plot_object2), ...) + } +) + +############################################################################### +# Class: Plot_Density_PMD_and_Norm_Decoy_by_AA_Length +############################################################################### +Plot_Density_PMD_and_Norm_Decoy_by_AA_Length = setRefClass("Plot_Density_PMD_and_Norm_Decoy_by_AA_Length", + contains = "Plot_Image", + fields = list(show_norm = "logical")) +Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$methods( + initialize = function(p_show_norm=FALSE, ...){ + plot_title <<- "The Decoy Bump: PMD Distribution of Decoy matches by peptide length" + show_norm <<- p_show_norm + callSuper(...) + }, + get_n = function(){ + data_processor <- data_processors[[1]] + data_processor$data_groups$ensure() + data_subset <- subset(data_processor$data_groups$df, (PMD_FDR_decoy == 1)) + return(nrow(data_subset)) + }, + plot_image = function(){ + + # Support functions for plot_density_PMD_and_norm_decoy_by_aa_length() + + add_group_peptide_length_special <- function(){ + data_processor <- data_processors[[1]] + data_processor$data_groups$ensure() + data_groups <- data_processor$data_groups$df # the name data_groups is a data.frame instead of a Data_Object + data_groups <- subset(data_groups, used_to_find_middle == FALSE) + + df_group_definition <- data.frame(stringsAsFactors = FALSE, + list(group_peptide_length_special = c("06-08", "09-10", "11-12", "13-15", "16-20", "21-50"), + min = c( 6 , 9 , 11 , 13 , 16 , 21 ), + max = c( 8 , 10 , 12 , 15 , 20 , 50 ) )) + group_peptide_length_special <- data.frame(list(PMD_FDR_peptide_length = 6:50)) + group_peptide_length_special$min <- with(group_peptide_length_special, sapply(PMD_FDR_peptide_length, FUN = function(i) max(df_group_definition$min[df_group_definition$min <= i]))) + group_peptide_length_special <- merge(group_peptide_length_special, df_group_definition) + + data_groups$group_peptide_length_special <- NULL + new_data_groups <- (merge(data_groups, + group_peptide_length_special[,c("PMD_FDR_peptide_length", + "group_peptide_length_special")])) + return(new_data_groups) + } + get_densities <- function(data_subset = NULL, field_value = NULL, field_group=NULL){ + get_density_from_subset <- function(data_subset=NULL, xlim=NULL){ + + d_group <- with(data_subset , density(value_of_interest, from = xlim[1], to = xlim[2], na.rm=TRUE)) + d_group <- normalize_density(d_group) + + return(d_group) + } + + data_temp <- data_subset + data_temp$value_of_interest <- data_temp[[field_value]] + data_temp$group_of_interest <- data_temp[[field_group]] + + xlim = range(data_temp$value_of_interest, na.rm=TRUE) + + groups <- sort(unique(data_temp$group_of_interest)) + n_groups <- length(groups) + + d_group <- get_density_from_subset( data_subset=data_temp, xlim = xlim ) + densities <- list("All decoys" = d_group) + for (i in 1:n_groups){ + group <- groups[i] + + d_group <- get_density_from_subset( data_subset=subset(data_temp, (group_of_interest == group)), + xlim = xlim ) + densities[[group]] <- d_group + } + + return(densities) + } + get_limits <- function(densities_a = NULL, densities_b = NULL){ + xlim = c() + ylim = c(0) + for (single_density in densities_a){ + xlim=range(c(xlim, single_density$x)) + ylim=range(c(ylim, single_density$y)) + } + for (single_density in densities_b){ + xlim=range(c(xlim, single_density$x)) + ylim=range(c(ylim, single_density$y)) + } + + return(list(xlim=xlim, ylim=ylim)) + } + plot_distributions <- function(data_groups = NULL, xlim=NULL, ylim=NULL, densities = NULL, field_group= NULL, field_value = "value", xlab_modifier = "", var_value= NULL, include_peak_dots=TRUE, dataset_name = NULL, ...){ + data_groups$group_of_interest <- data_groups[[field_group]] + data_groups$value_of_interest <- data_groups[[field_value]] + + # Main body of plot_decoy_distribution_by_field_of_interest() + FIXED_LWD=3 + + groups <- sort(unique(data_groups$group_of_interest)) + n <- length(groups) + + df_leg <- data.frame(stringsAsFactors = FALSE, + list(leg = groups, + col = rainbow_with_fixed_intensity(n = n, goal_intensity_0_1 = 0.4), + lty = rep(1:6, length.out=n), + lwd = rep(FIXED_LWD , n)) ) + + d <- densities[["All decoys"]] + + xlab = sprintf("Precursor Mass Discrepancy%s (ppm)", xlab_modifier) + ylab = "Density" + + if (!include_text){ + xlab="" + ylab="" + } + plot(d, lwd=FIXED_LWD * scale, main=main, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, xaxt="n", yaxt="n") + + ave_peak <- max(d$y) + max_peak <- 0 + + for (local_group in groups){ + data_subset <- subset(data_groups, group_of_interest == local_group) + data_info <- subset(df_leg , leg == local_group) + col <- data_info$col[1] + lty <- data_info$lty[1] + lwd <- data_info$lwd[1] + if (nrow(data_subset) > 100){ + d <- densities[[local_group]] #density(data_subset[[field_value]]) + lines(d, col=col, lty=lty, lwd=lwd * scale) + peak <- max(d$y) + max_peak <- max(max_peak, peak) + } + } + abline(v=0, h=0, lwd=scale) + leg <- list(title = "Peptide length (aa)", + leg = c("All decoys" , df_leg$leg), + col = c(col2hex("black") , df_leg$col), + lty = c(1 , df_leg$lty), + lwd = c(FIXED_LWD , df_leg$lwd) + ) + if (include_text){ + legend_object = Legend_Object$new(leg, scale) + legend_object$show() + #first_legend = legend(x="topleft", title = "", legend = leg$leg, col = leg$col, lty = leg$lty, lwd = leg$lwd, seg.len=leg$seg.len, box.lwd=leg$box.lwd, x.intersp = leg$x.intersp, y.intersp = leg$y.intersp) + #new_x = first_legend$rect$left + #new_y = first_legend$rect$top + first_legend$rect$h * .07 * (2 - scale) + #legend(x=new_x, y=new_y, title = leg$title, legend = "", cex=1.15, bty="n") + } + + box(lwd=scale) #box around plot area + + } + + # Main body for plot_density_PMD_and_norm_decoy_by_aa_length() + + data_mod <- add_group_peptide_length_special() + data_mod <- subset(data_mod, PMD_FDR_decoy==1) + + densities_a <- get_densities(data_subset = data_mod, field_value = "value" , field_group = "group_peptide_length_special") + densities_b <- get_densities(data_subset = data_mod, field_value = "value_norm", field_group = "group_peptide_length_special") + + data_processor <- data_processors[[1]] + dataset_name <- data_processor$info$collection_name() + + limits <- get_limits(densities_a, densities_b) + xlim <- limits$xlim + ylim <- limits$ylim + + if (show_norm){ + plot_distributions(data_groups = data_mod, densities=densities_b, field_value = "value_norm", xlab_modifier = " - normalized", field_group = "group_peptide_length_special", dataset_name=dataset_name, xlim=xlim, ylim=ylim) + } else { + plot_distributions(data_groups = data_mod, densities=densities_a, field_value = "value" , xlab_modifier = "" , field_group = "group_peptide_length_special", dataset_name=dataset_name, xlim=xlim, ylim=ylim) + } + } + +) + +############################################################################### +# Class: Plot_Bad_CI +############################################################################### +Plot_Bad_CI = setRefClass("Plot_Bad_CI", + contains = "Plot_Image", + fields = list(breaks = "numeric", + ylim = "numeric")) +Plot_Bad_CI$methods( + initialize = function(p_breaks=20, p_ylim=NULL, ...){ + if (is.null(p_ylim)){ + ylim <<- numeric(0) + } else { + ylim <<- p_ylim + } + breaks <<- p_breaks + plot_title <<- "Credible Intervals for proportion within range - bad" + callSuper(...) + }, + data_processor = function(){ + return(data_processors[[1]]) + }, + get_n = function(){ + data_processor()$data_groups$ensure() + return(nrow(subset(data_processor()$data_groups$df, (PMD_FDR_decoy == 1)))) + }, + plot_image = function(){ + data_processor()$data_groups$ensure() + data_groups <- data_processor()$data_groups$df + data_decoy <- subset(data_groups, data_groups$group_training_class == "bad_long") + data_decoy$region <- cut(x = data_decoy$value, breaks = breaks) + table(data_decoy$region) + regions <- unique(data_decoy$region) + + N = nrow(data_decoy) + find_lower_ci_bound <- function(x){ + ci <- credible_interval(length(x), N, precision = 0.001, alpha=0.05) + return(ci[1]) + } + find_upper_ci_bound <- function(x){ + ci <- credible_interval(length(x), N, precision = 0.001, alpha=0.05) + return(ci[2]) + } + xleft <- aggregate(value~region, data=data_decoy, FUN=min) + xright <- aggregate(value~region, data=data_decoy, FUN=max) + ytop <- aggregate(value~region, data=data_decoy, FUN=find_upper_ci_bound) + ybottom <- aggregate(value~region, data=data_decoy, FUN=find_lower_ci_bound) + + xleft <- rename_column(xleft , "value", "xleft" ) + xright <- rename_column(xright , "value", "xright" ) + ytop <- rename_column(ytop , "value", "ytop" ) + ybottom <- rename_column(ybottom, "value", "ybottom") + + boxes <- merge(merge(xleft, xright), merge(ytop, ybottom)) + + + xlab <- "Precursor Mass Discrepancy (ppm)" + ylab <- "Proportion of PSMs\nin subgroup" + xlim=range(data_decoy$value, na.rm = TRUE) + get_ylim(boxes=boxes) + if (!include_text){ + xlab="" + ylab="" + } + + plot(x=c(-10,10), y=c(0,1), type="n", ylim=ylim, xlim=xlim, xlab=xlab, ylab=ylab, main=main, xaxt="n", yaxt="n") + + with(boxes, rect(xleft=xleft, xright=xright, ytop=ytop, ybottom=ybottom, lwd=scale)) + + abline(h=1/breaks, col="blue", lwd=scale) + }, + get_ylim = function(boxes=NULL){ + is_valid_range <- function(r=NULL){ + return(length(r) == 2) + } + if (! is_valid_range(ylim)){ + ylim <<- range(c(0,boxes$ytop, boxes$ybottom)) + } + } + +) +############################################################################### +# Class: Plot_Selective_Loss +############################################################################### +Plot_Selective_Loss = setRefClass("Plot_Selective_Loss", + contains = "Plot_Image", + fields = list()) +Plot_Selective_Loss$methods( + initialize = function( ...){ + plot_title <<- "PMD-FDR Selectively removes Bad Hits" + callSuper(...) + }, + data_processor = function(){ + return(data_processors[[1]]) + }, + get_n = function(){ + data_processor()$i_fdr$ensure() + data_subset <- data_processor()$i_fdr$df + return(nrow(data_subset)) + }, + plot_image = function(){ + # Support functions for plot_selective_loss() + + samples_lost_by_threshold <- function(updated_i_fdr=NULL, score_threshold=NULL){ + data_subset <- subset(updated_i_fdr, PMD_FDR_input_score >= score_threshold) + tbl <- with(updated_i_fdr, + table(PMD_FDR_input_score >= score_threshold, + new_confidence < score_threshold, + group_decoy_proteins)) + df <- data.frame(tbl) + df_n <- aggregate(Freq~group_decoy_proteins+Var1, data=df, FUN=sum) + df_n <- rename_column(df_n, name_before = "Freq", "n") + df <- merge(df, df_n) + df$rate_of_loss <- with(df, Freq/n) + df <- subset(df, (Var1==TRUE) & (Var2==TRUE)) + df <- df[,c("group_decoy_proteins", "rate_of_loss", "n", "Freq")] + if (nrow(df) > 0){ + df$score_threshold <- score_threshold + } + return(df) + } + get_loss_record <- function(updated_i_fdr=NULL, score_thresholds=NULL){ + df=data.frame() + for (score_threshold in score_thresholds){ + df_new_loss <- samples_lost_by_threshold(updated_i_fdr, score_threshold) + df <- rbind(df, df_new_loss) + } + return(df) + } + + # Main code for plot_selective_loss() + + updated_i_fdr <- data_processor()$i_fdr$df + updated_i_fdr$new_confidence <- with(updated_i_fdr, 100 * (1-i_fdr)) #ifelse((1-i_fdr) < (PMD_FDR_input_score / 100), (1-i_fdr), (PMD_FDR_input_score/100))) + loss_record <- get_loss_record(updated_i_fdr=updated_i_fdr, score_thresholds = 1:100) + xlim <- with(loss_record, range(score_threshold)) + ylim <- c(0,1) + xlab <- "Fixed Confidence threshold (PeptideShaker score)" + ylab <- "Rate of PSM disqualification from PMD-FDR" + lwd <- 4 + plot(x=xlim, y=ylim, type="n", main=main, xlab=xlab, ylab=ylab) + + groups <- sort(unique(loss_record$group_decoy_proteins)) + n_g <- length(groups) + + cols <- rainbow_with_fixed_intensity(n=n_g, goal_intensity_0_1 = 0.5, alpha = 1) + ltys <- rep(1:6, length.out=n_g) + + leg <- list(leg=groups, col=cols, lty=ltys, lwd=lwd, title="Species/Category") + + for (i in 1:n_g){ + lines(rate_of_loss~score_threshold, data=subset(loss_record, group_decoy_proteins==leg$leg[i]), col=leg$col[i], lwd=leg$lwd * scale, lty=leg$lty[i]) + } + abline(h=0, v=100, lwd=scale) + abline(h=c(0.1, 0.8), col="gray", lwd=scale) + + #leg = list(leg=group, col=col, lty=lty, lwd=lwd) + #with(leg, legend(x = "topleft", legend = group, col = col, lty = lty, lwd = lwd, seg.len = seg.len)) + legend_object <- Legend_Object$new(leg, scale) + legend_object$show() + } + +) +############################################################################### +# Class: Plot_Selective_Loss_for_TOC +############################################################################### +Plot_Selective_Loss_for_TOC = setRefClass("Plot_Selective_Loss_for_TOC", + contains = "Plot_Image", + fields = list(xlab="character", + ylab="character", + title_x="numeric", + title_y="numeric", + legend_border="logical", + legend_x = "numeric", + legend_y = "numeric", + legend_title="character", + legend_location = "character", + name_contaminant = "character", + name_decoy = "character", + name_human = "character", + name_pyro = "character")) +Plot_Selective_Loss_for_TOC$methods( + initialize = function( ...){ + plot_title <<- "PMD-FDR selectively removes bad hits" + callSuper(...) + xlab <<- "Confidence threshold (PeptideShaker)" + ylab <<- "PMD Disqualifiction Rate" + legend_border <<- FALSE + #legend_title <<- "Species/Category" + title_x <<- 50 + title_y <<- 0.9 + legend_x <<- 10 + legend_y <<- 0.75 + name_contaminant <<- "signal - contaminant" + name_decoy <<- "decoy - reversed" + name_human <<- "decoy - human" + name_pyro <<- "signal - pyrococcus" + }, + data_processor = function(){ + return(data_processors[[1]]) + }, + get_n = function(){ + data_processor()$i_fdr$ensure() + data_subset <- data_processor()$i_fdr$df + return(nrow(data_subset)) + }, + plot_image = function(){ + # Support functions for plot_selective_loss() + + samples_lost_by_threshold <- function(updated_i_fdr=NULL, score_threshold=NULL){ + data_subset <- subset(updated_i_fdr, PMD_FDR_input_score >= score_threshold) + tbl <- with(updated_i_fdr, + table(PMD_FDR_input_score >= score_threshold, + new_confidence < score_threshold, + group_decoy_proteins)) + df <- data.frame(tbl) + df_n <- aggregate(Freq~group_decoy_proteins+Var1, data=df, FUN=sum) + df_n <- rename_column(df_n, name_before = "Freq", "n") + df <- merge(df, df_n) + df$rate_of_loss <- with(df, Freq/n) + df <- subset(df, (Var1==TRUE) & (Var2==TRUE)) + df <- df[,c("group_decoy_proteins", "rate_of_loss", "n", "Freq")] + if (nrow(df) > 0){ + df$score_threshold <- score_threshold + } + return(df) + } + get_loss_record <- function(updated_i_fdr=NULL, score_thresholds=NULL){ + df=data.frame() + for (score_threshold in score_thresholds){ + df_new_loss <- samples_lost_by_threshold(updated_i_fdr, score_threshold) + df <- rbind(df, df_new_loss) + } + return(df) + } + convert_groups <- function(groups=NULL){ + new_groups <- groups + new_groups <- gsub(pattern = "contaminant", replacement = name_contaminant, x = new_groups) + new_groups <- gsub(pattern = "decoy" , replacement = name_decoy , x = new_groups) + new_groups <- gsub(pattern = "human" , replacement = name_human , x = new_groups) + new_groups <- gsub(pattern = "pyrococcus" , replacement = name_pyro , x = new_groups) + + return(new_groups) + } + + # Main code for plot_selective_loss() + + updated_i_fdr <- data_processor()$i_fdr$df + updated_i_fdr$new_confidence <- with(updated_i_fdr, 100 * (1-i_fdr)) #ifelse((1-i_fdr) < (PMD_FDR_input_score / 100), (1-i_fdr), (PMD_FDR_input_score/100))) + loss_record <- get_loss_record(updated_i_fdr=updated_i_fdr, score_thresholds = 1:100) + xlim <- with(loss_record, range(score_threshold)) + ylim <- c(0,1) + #xlab <- "Fixed Confidence threshold (PeptideShaker score)" + #ylab <- "Rate of PSM disqualification from PMD-FDR" + lwd <- 4 + plot(x=xlim, y=ylim, type="n", main=main, xlab=xlab, ylab=ylab) + + groups <- sort(unique(loss_record$group_decoy_proteins)) + n_g <- length(groups) + + cols <- rainbow_with_fixed_intensity(n=n_g, goal_intensity_0_1 = 0.5, alpha = 1) + ltys <- rep(1:6, length.out=n_g) + bty <- ifelse(legend_border, "o", "n") + + leg <- list(leg=convert_groups(groups), var_name=groups, col=cols, lty=ltys, lwd=lwd, bty=bty, x=legend_x, y=legend_y) + #leg <- list(leg=groups, col=cols, lty=ltys, lwd=lwd, bty=bty, x=legend_x, y=legend_y) + + for (i in 1:n_g){ + lines(rate_of_loss~score_threshold, data=subset(loss_record, group_decoy_proteins==leg$var_name[i]), col=leg$col[i], lwd=leg$lwd * scale, lty=leg$lty[i]) + } + abline(h=0, v=100, lwd=scale) + abline(h=c(0.1, 0.8), col="gray", lwd=scale) + + #leg = list(leg=group, col=col, lty=lty, lwd=lwd) + #with(leg, legend(x = "topleft", legend = group, col = col, lty = lty, lwd = lwd, seg.len = seg.len)) + legend_object <- Legend_Object$new(leg, scale) + legend_object$show() + text(x=title_x, y=title_y, labels = plot_title) + } + +) +############################################################################### +# Class: Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR +############################################################################### +Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR = setRefClass("Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR", + contains = "Plot_Image", + fields = list()) +Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR$methods( + initialize = function( ...){ + plot_title <<- "Precursor Mass Discrepance i-FDR for 1% Target-Decoy FDR PSMs" + callSuper(...) + }, + data_processor = function(){ + return(data_processors[[1]]) + }, + get_n = function(){ + data_processor()$i_fdr$ensure() + if (one_percent_calculation_exists()){ + i_fdr <- data_processor()$i_fdr$df + data_subset <- subset(i_fdr, is_in_1percent==TRUE) + n <- nrow(data_subset) + } else { + n <- 0 + } + + return (n) + }, + plot_image = function(){ + if (one_percent_calculation_exists()){ + i_fdr <- get_modified_fdr() + report_good_discrepancies(i_fdr) + data_TD_good <- get_data_TD_good(i_fdr) + mean_results <- get_mean_results(data_TD_good) + boxes <- mean_results + boxes <- rename_columns(df = boxes, + names_before = c("min_conf", "max_conf", "lower" , "upper"), + names_after = c("xleft" , "xright" , "ybottom", "ytop" )) + xlim <- range(boxes[,c("xleft", "xright")]) + ylim <- range(boxes[,c("ybottom", "ytop")]) + + #head(mean_results) + + xlab = "Confidence Score (Peptide Shaker)" + ylab = "Mean PMD i-FDR" + + if (!include_text){ + xlab="" + ylab="" + } + + plot(mean_i_fdr~mean_conf, data=mean_results, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, main=main, xaxt="n", yaxt="n", cex=scale, lwd=scale) + with(boxes, rect(xleft = xleft, ybottom = ybottom, xright = xright, ytop = ytop, lwd=scale)) + abline(b=-1, a=100, lwd=4*scale, col="dark gray") + abline(h=0, v=100, lwd=1*scale) + + } else { + stop(sprintf("Dataset '%s' does not include 1%% FDR data", data_processor()$info$collection_name())) + } + }, + get_mean_results = function(data_TD_good = NULL){ + mean_i_fdr <- aggregate(i_fdr~conf_group, data=data_TD_good, FUN=mean) + mean_i_fdr <- rename_column(mean_i_fdr, "i_fdr", "mean_i_fdr") + sd_i_fdr <- aggregate(i_fdr~conf_group, data=data_TD_good, FUN=sd) + sd_i_fdr <- rename_column(sd_i_fdr, "i_fdr", "sd_i_fdr") + n_i_fdr <- aggregate(i_fdr~conf_group, data=data_TD_good, FUN=length) + n_i_fdr <- rename_column(n_i_fdr, "i_fdr", "n") + mean_conf <- aggregate(PMD_FDR_input_score~conf_group, data=data_TD_good, FUN=mean) + mean_conf <- rename_column(mean_conf, "PMD_FDR_input_score", "mean_conf") + min_conf <- aggregate(PMD_FDR_input_score~conf_group, data=data_TD_good, FUN=min) + min_conf <- rename_column(min_conf, "PMD_FDR_input_score", "min_conf") + max_conf <- aggregate(PMD_FDR_input_score~conf_group, data=data_TD_good, FUN=max) + max_conf <- rename_column(max_conf, "PMD_FDR_input_score", "max_conf") + + mean_results <- mean_i_fdr + mean_results <- merge(mean_results, sd_i_fdr) + mean_results <- merge(mean_results, n_i_fdr) + mean_results <- merge(mean_results, mean_conf) + mean_results <- merge(mean_results, min_conf) + mean_results <- merge(mean_results, max_conf) + + mean_results$se <- with(mean_results, sd_i_fdr / sqrt(n - 1)) + mean_results$lower <- with(mean_results, mean_i_fdr - 2*se) + mean_results$upper <- with(mean_results, mean_i_fdr + 2*se) + return(mean_results) + }, + get_data_TD_good = function(i_fdr=NULL){ + data_TD_good <- subset(i_fdr, TD_good==TRUE) + data_TD_good <- data_TD_good[order(data_TD_good$PMD_FDR_input_score),] + n <- nrow(data_TD_good) + data_TD_good$conf_group <- cut(1:n, breaks=floor(n/100)) + data_TD_good$i_fdr <- 100 * data_TD_good$i_fdr + return(data_TD_good) + }, + get_modified_fdr = function(){ + i_fdr <- data_processor()$i_fdr$df + i_fdr$PMD_good <- i_fdr$i_fdr < 0.01 + i_fdr$TD_good <- i_fdr$is_in_1percent == TRUE + i_fdr$conf_good <- i_fdr$PMD_FDR_input_score == 100 + return(i_fdr) + }, + one_percent_calculation_exists = function(){ + data_processor()$raw_1_percent$ensure() + return(data_processor()$raw_1_percent$exists())# "is_in_1percent" %in% colnames(data_processor()$i_fdr)) + }, + report_good_discrepancies = function(i_fdr=NULL){ + with(subset(i_fdr, (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good))) + with(subset(i_fdr, (PMD_FDR_input_score==100) & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good))) + with(subset(i_fdr, (PMD_FDR_input_score>= 99) & (PMD_FDR_input_score<100) & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good))) + with(subset(i_fdr, (PMD_FDR_input_score>= 99) & (PMD_FDR_input_score<100) & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good))) + with(subset(i_fdr, (PMD_FDR_input_score>= 90) & (PMD_FDR_input_score< 99) & (PMD_FDR_decoy == 0)), print(table(TD_good, PMD_good))) + } + +) + +############################################################################### +# Class: Plot_Density_PMD_by_Score +############################################################################### +Plot_Density_PMD_by_Score = setRefClass("Plot_Density_PMD_by_Score", + contains = "Plot_Image", + fields = list(show_norm = "logical")) +Plot_Density_PMD_by_Score$methods( + initialize = function(p_show_norm=FALSE, ...){ + show_norm <<- p_show_norm + plot_title <<- "PMD distribution, by Confidence ranges" + callSuper(...) + + }, + data_processor = function(){ + return(data_processors[[1]]) + }, + get_n = function(){ + return(nrow(data_processor()$data_groups$df)) + #data_subset <- data_collection$i_fdr + #return(nrow(data_subset)) + }, + get_modified_data_groups = function(var_value = NULL){ + # Note: Filters out used_to_find_middle + # Note: Creates "value_of_interest" field + # Note: Remakes "group_decoy_input_score" field + data_new <- data_processor()$data_groups$df + data_new <- subset(data_new, !used_to_find_middle ) + data_new$value_of_interest <- data_new[, var_value] + + cutoff_points <- c(100, 100, 95, 80, 50, 0, 0) + n <- length(cutoff_points) + uppers <- cutoff_points[-n] + lowers <- cutoff_points[-1] + + for (i in 1:(n-1)){ + upper <- uppers[i] + lower <- lowers[i] + + + if (lower==upper){ + idx <- with(data_new, which( (PMD_FDR_input_score == upper) & (PMD_FDR_decoy == 0))) + cat_name <- sprintf("%d", upper) + } else { + idx <- with(data_new, which((PMD_FDR_input_score >= lower) & (PMD_FDR_input_score < upper) & (PMD_FDR_decoy == 0))) + cat_name <- sprintf("%02d - %2d", lower, upper) + } + data_new$group_decoy_input_score[idx] <- cat_name + } + + return(data_new) + }, + plot_image = function(){ + + # Support functions for plot_density_PMD_by_score() + + get_densities <- function(data_subset = NULL, var_value = NULL){ + + # Support functions for get_densities() + + # New version + + # Main body of get_densities() + + data_subset <- get_modified_data_groups(var_value=var_value) + #data_subset$value_of_interest <- data_subset[,var_value] + from <- min(data_subset$value_of_interest, na.rm=TRUE) + to <- max(data_subset$value_of_interest, na.rm=TRUE) + xlim = range(data_subset$value_of_interest, na.rm=TRUE) + + groups <- sort(unique(data_subset$group_decoy_input_score), decreasing = TRUE) + n_groups <- length(groups) + + densities <- list(var_value = var_value, groups=groups) + + for (i in 1:n_groups){ + group <- groups[i] + + data_group_single <- subset(data_subset, (group_decoy_input_score == group)) + d_group <- with(data_group_single , density(value_of_interest, from = from, to = to, na.rm = TRUE)) + d_group <- normalize_density(d_group) + + densities[[group]] <- d_group + } + + return(densities) + + } + get_xlim <- function(densities_a = NULL, densities_b = NULL){ + groups <- densities_a$groups + + xlim <- 0 + for (group in groups){ + xlim <- range(xlim, densities_a[[group]]$x, densities_b[[group]]$x) + } + + return(xlim) + + } + get_ylim <- function(densities_a = NULL, densities_b = NULL){ + groups <- densities_a$groups + + ylim <- 0 + for (group in groups){ + ylim <- range(ylim, densities_a[[group]]$y, densities_b[[group]]$y) + } + + return(ylim) + + } + plot_distributions <- function(densities = NULL, var_value= NULL,include_peak_dots=TRUE, xlab_modifier="", xlim=NULL, ylim=NULL, ...){ + data_groups <- get_modified_data_groups(var_value=var_value) + groups <- sort(unique(data_groups$group_decoy_input_score)) + n_groups <- length(groups) + + groups_std <- setdiff(groups, c("100", "decoy", "0") ) + groups_std <- sort(groups_std, decreasing = TRUE) + groups_std <- c(groups_std, "0") + n_std <- length(groups_std) + cols <- rainbow_with_fixed_intensity(n = n_std, goal_intensity_0_1 = 0.5, alpha=0.5) + + leg <- list(group = c("100" , groups_std , "decoy" ), + leg = c("100" , groups_std , "All Decoys" ), + col = c(col2hex("black") , cols , col2hex("purple", col_alpha = 0.5)), + lwd = c(4 , rep(2, n_std), 4 ), + title = "Confidence Score") + + xlab = sprintf("Precursor Mass Discrepancy%s (ppm)", + xlab_modifier) + ylab = "Density" + if (!include_text){ + xlab="" + ylab="" + } + + + plot( x=xlim, y=ylim, col=leg$col[1], lwd=leg$lwd[1] * scale, main=main, xlab=xlab, ylab=ylab, xaxt="n", yaxt="n", cex=scale, type="n")#, lty=leg.lty[1], ...) + + include_peak_dots = FALSE # BUGBUG: Disabling this for now. Need to move this to class parameter + + for (i in 1:length(leg$group)){ + group <- leg$group[i] + d <- densities[[group]] + lines(d, col=leg$col[i], lwd=leg$lwd[i] * scale) + if (include_peak_dots){ + x=d$x[which.max(d$y)] + y=max(d$y) + points(x=c(x,x), y=c(0,y), pch=19, col=leg$col[i], cex=scale) + } + } + + abline(v=0, lwd=scale) + + if (include_text){ + legend_object = Legend_Object$new(leg, scale) + legend_object$show() + } + + } + + # Main body for plot_density_PMD_by_score() + + data_groups <- data_processor()$data_groups$df + + data_subset_a <- subset(data_groups , used_to_find_middle == FALSE) + data_subset_b <- subset(data_subset_a, PMD_FDR_peptide_length > 11) + + densities_a <- get_densities(data_subset = data_subset_a, var_value = "value") + densities_b <- get_densities(data_subset = data_subset_b, var_value = "value_norm") + + xlim=get_xlim(densities_a, densities_b) + ylim=get_ylim(densities_a, densities_b) + + dataset_name <- data_processor()$info$collection_name() + if (show_norm){ + plot_distributions(densities=densities_b, var_value = "value_norm", xlab_modifier = " - normalized", xlim=xlim, ylim=ylim) + } else { + plot_distributions(densities=densities_a, var_value = "value" , xlab_modifier = "" , xlim=xlim, ylim=ylim) + } + } +) +############################################################################### +# Class: Plot_Dataset_Description +############################################################################### +Plot_Dataset_Description = setRefClass("Plot_Dataset_Description", + contains = "Plot_Multiple_Images", + fields = list(ylim_time_invariance = "numeric")) +Plot_Dataset_Description$methods( + initialize = function(p_data_processors = NULL, + p_include_text=TRUE, + p_include_main=FALSE, + p_ylim_time_invariance = c(-4,4), ...){ + plot_object_r1_c1 <- Plot_Time_Invariance_Alt$new(p_data_processors=p_data_processors, + p_include_text=p_include_text, + p_include_main=p_include_main, + p_training_class = "good_testing", + p_field_of_interest = "value", + p_ylim = p_ylim_time_invariance) + + plot_object_r1_c2 <- Plot_Time_Invariance_Alt$new(p_data_processors=p_data_processors, + p_include_text=p_include_text, + p_include_main=p_include_main, + p_training_class = "good_testing", + p_field_of_interest = "value_norm", + p_ylim = p_ylim_time_invariance) + plot_object_r2_c1 <- Plot_Density_PMD_by_Score$new(p_data_processors=p_data_processors, + p_show_norm=FALSE, + p_include_text=p_include_text, + p_include_main=p_include_main) + + plot_object_r2_c2 <- Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$new(p_data_processors=p_data_processors, + p_show_norm=FALSE, + p_include_text=p_include_text, + p_include_main=p_include_main) + + plot_object_r3_c1 <- Plot_Density_PMD_by_Score$new(p_data_processors=p_data_processors, + p_show_norm=TRUE, + p_include_text=p_include_text, + p_include_main=p_include_main) + plot_object_r3_c2 <- Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$new(p_data_processors=p_data_processors, + p_show_norm=TRUE, + p_include_text=p_include_text, + p_include_main=p_include_main) + callSuper(p_n_images_wide=2, + p_n_images_tall=3, + p_include_text=p_include_text, + p_include_main=p_include_main, + p_image_list = list(plot_object_r1_c1, plot_object_r1_c2, + plot_object_r2_c1, plot_object_r2_c2, + plot_object_r3_c1, plot_object_r3_c2), ...) + + } +) +############################################################################### +# Class: Plots_for_Paper +############################################################################### +Plots_for_Paper <- setRefClass("Plots_for_Paper", fields =list(data_processor_a = "Data_Processor", + data_processor_b = "Data_Processor", + data_processor_c = "Data_Processor", + data_processor_d = "Data_Processor", + include_text = "logical", + include_main = "logical", + mai = "numeric")) +Plots_for_Paper$methods( + initialize = function(){ + data_processor_a <<- Data_Processor$new(p_info = Data_Object_Info_737_two_step$new()) + data_processor_b <<- Data_Processor$new(p_info = Data_Object_Info_737_combined$new()) + data_processor_c <<- Data_Processor$new(p_info = Data_Object_Pyrococcus_tr $new()) + data_processor_d <<- Data_Processor$new(p_info = Data_Object_Mouse_Mutations $new()) + }, + create_plots_for_paper = function(include_main=TRUE, finalize=TRUE){ + print_table_4_data() + print_figure_2_data() + plot_figure_D(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_C(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_B(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_A(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_8(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_7(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_6(p_scale=ifelse(finalize, 4, 1), p_include_main = include_main) + plot_figure_5(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_4(p_scale=ifelse(finalize, 2, 1), p_include_main = include_main) + plot_figure_3(p_scale=ifelse(finalize, 4, 1), p_include_main = include_main) + }, + print_figure_2_data = function(){ + print(create_stats_for_grouping_figure(list(data_processor_a))) + }, + print_table_4_data = function(){ + report_ranges_of_comparisons(processors = list(data_processor_a)) + report_ranges_of_comparisons(processors = list(data_processor_c)) + }, + plot_figure_3 = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Compare_PMD_and_Norm_Density$new(p_data_processor = list(data_processor_a), + p_show_norm = FALSE, + p_include_text = TRUE, + p_include_main = p_include_main, + p_display_n_psms = FALSE) + plot_object$plot_image_in_small_window(p_scale=p_scale) + }, + plot_figure_4 = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Time_Invariance_Alt_Before_and_After$new(p_data_processors = list(data_processor_a), + p_include_text=TRUE, + p_include_main=p_include_main, + p_ylim = c(-4,4)) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + + }, + plot_figure_5 = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Density_PMD_and_Norm_Decoy_by_AA_Length$new(p_data_processors = list(data_processor_a), + p_include_text=TRUE, + p_include_main=p_include_main) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + }, + plot_figure_6 = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Bad_CI$new(p_data_processors = list(data_processor_a), + p_include_text=TRUE, + p_include_main=p_include_main) + plot_object$plot_image_in_small_window(p_scale=p_scale) + }, + plot_figure_7 = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Compare_iFDR_Confidence_1_Percent_TD_FDR$new(p_data_processors = list(data_processor_a), + p_include_text=TRUE, + p_include_main=p_include_main) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + }, + plot_figure_8 = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Selective_Loss$new(p_data_processors = list(data_processor_c), + p_include_text=TRUE, + p_include_main=p_include_main) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + }, + plot_figure_A = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_a), + p_include_text=TRUE, + p_include_main=p_include_main, + p_ylim_time_invariance=c(-4,4) ) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + }, + plot_figure_B = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_b), + p_include_text=TRUE, + p_include_main=p_include_main, + p_ylim_time_invariance=c(-4,4) ) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + }, + plot_figure_C = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_c), + p_include_text=TRUE, + p_include_main=p_include_main, + p_ylim_time_invariance=c(-4,4) ) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + }, + plot_figure_D = function(p_scale=NULL, p_include_main=NULL){ + plot_object <- Plot_Dataset_Description$new(p_data_processors=list(data_processor_d), + p_include_text=TRUE, + p_include_main=p_include_main, + p_ylim_time_invariance=c(-4,4) ) + plot_object$plot_image_in_large_window(window_height=4, p_scale=p_scale) + }, + create_stats_for_grouping_figure = function(processors=NULL){ + processor <- processors[[1]] + processor$i_fdr$ensure() + aug_i_fdr <- processor$i_fdr$df + aug_i_fdr$group_good_bad_other <- gsub("_.*", "", aug_i_fdr$group_training_class) + aug_i_fdr$group_null <- "all" + table(aug_i_fdr$group_training_class) + table(aug_i_fdr$group_good_bad_other) + table(aug_i_fdr$group_null) + + create_agg_fdr_stats <- function(i_fdr=NULL, grouping_var_name = NULL){ + formula_fdr <- as.formula(sprintf("%s~%s", "i_fdr", grouping_var_name)) + formula_len <- as.formula(sprintf("%s~%s", "PMD_FDR_peptide_length", grouping_var_name)) + agg_fdr <- aggregate(formula=formula_fdr, data=i_fdr, FUN=mean) + agg_n <- aggregate(formula=formula_fdr, data=i_fdr, FUN=length) + agg_len <- aggregate(formula=formula_len, data=i_fdr, FUN=mean) + agg_fdr <- rename_columns(df = agg_fdr, + names_before = c(grouping_var_name, "i_fdr"), + names_after = c("group" , "fdr")) + agg_n <- rename_columns(df = agg_n, + names_before = c(grouping_var_name, "i_fdr"), + names_after = c("group" , "n")) + agg_len <- rename_columns(df = agg_len, + names_before = c(grouping_var_name), + names_after = c("group" )) + agg <- merge(agg_fdr, agg_n) + agg <- merge(agg , agg_len) + + return(agg) + } + + agg_detail <- create_agg_fdr_stats(i_fdr = aug_i_fdr, grouping_var_name = "group_training_class") + agg_grouped <- create_agg_fdr_stats(i_fdr = aug_i_fdr, grouping_var_name = "group_good_bad_other") + agg_all <- create_agg_fdr_stats(i_fdr = aug_i_fdr, grouping_var_name = "group_null") + + agg <- rbind(agg_detail, agg_grouped) + agg <- rbind(agg, agg_all) + + agg$fdr <- ifelse(agg$fdr < 1, agg$fdr, 1) + + linear_combo <- function(x=NULL, a0=NULL, a1=NULL){ + result <- (a0 * (1-x) + a1 * x) + return(result) + } + + agg$r <- linear_combo(agg$fdr, a0=197, a1= 47) + agg$g <- linear_combo(agg$fdr, a0= 90, a1= 85) + agg$b <- linear_combo(agg$fdr, a0= 17, a1=151) + + return(agg) + }, + report_ranges_of_comparisons = function(processors=NULL){ + report_comparison_of_Confidence_and_PMD = function (i_fdr = NULL, min_conf=NULL, max_conf=NULL, include_max=FALSE){ + report_PMD_confidence_comparison_from_subset = function(data_subset=NULL, group_name=NULL){ + print(group_name) + print(sprintf(" Number of PSMs: %d", nrow(data_subset))) + mean_confidence <- mean(data_subset$PMD_FDR_input_score) + print(sprintf(" Mean Confidence Score: %3.1f", mean_confidence)) + print(sprintf(" PeptideShaker g-FDR: %3.1f", 100-mean_confidence)) + mean_PMD_FDR = mean(data_subset$i_fdr) + print(sprintf(" PMD g-FDR: %3.1f", 100*mean_PMD_FDR)) + #col <- col2hex("black", 0.2) + #plot(data_subset$i_fdr, pch=".", cex=2, col=col) + #abline(h=0) + } + + if (is.null(max_conf)) { + data_subset <- subset(i_fdr, PMD_FDR_input_score == min_conf) + group_name <- sprintf("Group %d", min_conf) + } else if (include_max){ + data_subset <- subset(i_fdr, (PMD_FDR_input_score >= min_conf) & (PMD_FDR_input_score <= max_conf)) + group_name <- sprintf("Group %d through %d", min_conf, max_conf) + } else { + data_subset <- subset(i_fdr, (PMD_FDR_input_score >= min_conf) & (PMD_FDR_input_score < max_conf)) + group_name <- sprintf("Group %d to %d", min_conf, max_conf) + } + + report_PMD_confidence_comparison_from_subset(data_subset=data_subset, group_name=group_name) + } + + processor <- processors[[1]] + processor$i_fdr$ensure() + i_fdr <- processor$i_fdr$df + info <- processor$info + print(sprintf("PMD and Confidence comparison for -- %s", info$collection_name())) + report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf=100, max_conf=NULL, include_max=TRUE) + report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf= 99, max_conf=100 , include_max=FALSE) + report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf= 90, max_conf= 99 , include_max=FALSE) + report_comparison_of_Confidence_and_PMD(i_fdr = i_fdr, min_conf= 0, max_conf=100 , include_max=TRUE) + } +) +############################################################################### +# C - 021 - PMD-FDR Wrapper - functions.R # +# # +# Creates the necessary structure to convert the PMD-FDR code into one that # +# can run as a batch file # +# # +############################################################################### +############################################################################### +# Class: ModuleArgParser_PMD_FDR +############################################################################### +ModuleArgParser_PMD_FDR <- setRefClass("ModuleArgParser_PMD_FDR", + contains = c("ArgParser"), + fields =list(args = "character") ) +ModuleArgParser_PMD_FDR$methods( + initialize = function(description = "Computes individual and global FDR using Precursor Mass Discrepancy (PMD-FDR)", ...){ + callSuper(description=description, ...) + local_add_argument("--psm_report" , help="full name and path to the PSM report") + local_add_argument("--psm_report_1_percent", default = "" , help="full name and path to the PSM report for 1% FDR") + local_add_argument("--output_i_fdr" , default = "" , help="full name and path to the i-FDR output file ") + local_add_argument("--output_g_fdr" , default = "" , help="full name and path to the g-FDR output file ") + local_add_argument("--output_densities" , default = "" , help="full name and path to the densities output file ") + #local_add_argument("--score_field_name" , default = "" , help="name of score field (in R format)") + local_add_argument("--input_file_type" , default = "PMD_FDR_input_file", help="type of input file (currently supports: PSM_Report)") + } +) +############################################################################### +# Class: Data_Object_Parser +############################################################################### +Data_Object_Parser <- setRefClass("Data_Object_Parser", + contains = c("Data_Object"), + fields =list(parser = "ModuleArgParser_PMD_FDR", + args = "character", + parsing_results = "list") ) +Data_Object_Parser$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Parser" + }, + verify = function(){ + # Nothing to do here - parser handles verification during load + }, + m_load_data = function(){ + if (length(args) == 0){ + parsing_results <<- parser$parse_arguments(NULL) + } else { + parsing_results <<- parser$parse_arguments(args) + } + + }, + set_args = function(p_args=NULL){ + # This is primarily used for testing. In operation arguments will be passed automatically (through use of commandArgs) + args <<- p_args + set_dirty(TRUE) + } +) +############################################################################### +# Class: Data_Object_Info_Parser +############################################################################### +Data_Object_Info_Parser <- setRefClass("Data_Object_Info_Parser", + contains = c("Data_Object_Info"), + fields =list( + output_i_fdr = "character", + output_g_fdr = "character", + output_densities = "character" + ) ) +Data_Object_Info_Parser$methods( + initialize = function(){ + callSuper() + class_name <<- "Data_Object_Info_Parser" + }, + verify = function(){ + check_field_exists = function(field_name=NULL, check_empty = TRUE){ + field_value <- get_parser()$parsing_results[field_name] + checkTrue(! is.null(field_value), + msg = sprintf("Parameter %s was not passed to PMD_FDR", field_value)) + if (check_empty){ + checkTrue(! is.null(field_value), + msg = sprintf("Parameter %s was not passed to PMD_FDR", field_value)) + } + } + # Check parameters passed in + check_field_exists("junk") + check_field_exists("psm_report") + check_field_exists("psm_report_1_percent", check_empty = FALSE) + check_field_exists("output_i_fdr" , check_empty = FALSE) + check_field_exists("output_g_fdr" , check_empty = FALSE) + check_field_exists("output_densities" , check_empty = FALSE) + #check_field_exists("score_field_name") + check_field_exists("input_file_type") + }, + m_load_data = function(){ + parsing_results <- get_parser()$parsing_results + + data_file_name <<- as.character(parsing_results["psm_report"]) + data_file_name_1_percent_FDR <<- as.character(parsing_results["psm_report_1_percent"]) + data_path_name <<- as.character(parsing_results[""]) + #experiment_name <<- data_file_name + #designation <<- "" + output_i_fdr <<- as.character(parsing_results["output_i_fdr"]) + output_g_fdr <<- as.character(parsing_results["output_g_fdr"]) + output_densities <<- as.character(parsing_results["output_densities"]) + + input_file_type <<- as.character(parsing_results["input_file_type"]) + #score_field_name <<- as.character(parsing_results["score_field_name"]) + }, + set_parser = function(parser){ + parents[["parser"]] <<- parser + }, + get_parser = function(){ + return(verified_element_of_list(parents, "parser", "Data_Object_Info_Parser$parents")) + }, + file_path = function(){ + result <- data_file_name # Now assumes that full path is provided + if (length(result) == 0){ + stop("Unable to validate file path - file name is missing") + } + return(result) + }, + file_path_1_percent_FDR = function(){ + local_file_name <- get_data_file_name_1_percent_FDR() + if (length(local_file_name) == 0){ + result <- "" + } else { + result <- local_file_name # path name is no longer relevant + } + + # Continue even if file name is missing - not all analyses have a 1 percent FDR file; this is managed downstream + + # if (length(result) == 0){ + # stop("Unable to validate file path - one or both of path name and file name (of 1 percent FDR file) are missing") + # } + return(result) + }, + get_data_file_name_1_percent_FDR = function(){ + return(data_file_name_1_percent_FDR) + }, + collection_name = function(){ + result <- "" + return(result) + } + +) +############################################################################### +# Class: Processor_PMD_FDR_for_Galaxy +# Purpose: Wrapper on tools from Project 019 to enable a Galaxy-based interface +############################################################################### +Processor_PMD_FDR_for_Galaxy <- setRefClass("Processor_PMD_FDR_for_Galaxy", + fields = list( + parser = "Data_Object_Parser", + info = "Data_Object_Info_Parser", + raw_data = "Data_Object_Raw_Data", + raw_1_percent = "Data_Object_Raw_1_Percent", + data_converter = "Data_Object_Data_Converter", + data_groups = "Data_Object_Groupings", + densities = "Data_Object_Densities", + alpha = "Data_Object_Alpha", + i_fdr = "Data_Object_Individual_FDR" + )) +Processor_PMD_FDR_for_Galaxy$methods( + initialize = function(){ + # This initialization defines all of the dependencies between the various components + # (Unfortunately, inheriting from Data_Processor leads to issues - I had to reimplement it here with a change to "info") + + # info + info$set_parser(parser) + parser$append_child(info) + + # raw_data + raw_data$set_info(info) + info$append_child(raw_data) + + # raw_1_percent + raw_1_percent$set_info(info) + info$append_child(raw_1_percent) + + # data_converter + data_converter$set_info (info) + data_converter$set_raw_data(raw_data) + info $append_child (data_converter) + raw_data $append_child (data_converter) + + # data_groups + data_groups$set_info (info) + data_groups$set_data_converter(data_converter) + data_groups$set_raw_1_percent (raw_1_percent) + info $append_child (data_groups) + data_converter$append_child (data_groups) + raw_1_percent $append_child (data_groups) + + # densities + densities $set_data_groups(data_groups) + data_groups$append_child (densities) + + # alpha + alpha $set_densities(densities) + densities$append_child (alpha) + + # i_fdr + i_fdr$set_data_groups(data_groups) + i_fdr$set_densities (densities) + i_fdr$set_alpha (alpha) + data_groups $append_child(i_fdr) + densities $append_child(i_fdr) + alpha $append_child(i_fdr) + + }, + compute = function(){ + #i_fdr is currently the lowest level object - it ultimately depends on everything else. + i_fdr$ensure() # All pieces on which i_fdr depends are automatically verified and computed (through their verify() and ensure()) + + save_standard_df(x = densities$df, file_path = info$output_densities) + save_standard_df(x = alpha$df, file_path = info$output_g_fdr) + save_standard_df(x = i_fdr$df, file_path = info$output_i_fdr) + } +) +############################################################################### +# D - 021 - PMD-FDR Main.R # +# # +# File Description: Contains the base code that interprets the parameters # +# and computes i-FDR and g-FDR for a mass spec project # +# # +############################################################################### +argv <- commandArgs(TRUE) # Saves the parameters (command code) + +processor <- Processor_PMD_FDR_for_Galaxy$new() +processor$parser$set_args(argv) +processor$compute() +