Mercurial > repos > eschen42 > w4mcorcov
diff w4mcorcov_calc.R @ 6:7bd523ca1f9a draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit cafda5095a79ce2376325b57337302f95137195d
author | eschen42 |
---|---|
date | Wed, 18 Jul 2018 12:35:55 -0400 |
parents | 50f60f94c034 |
children | 066b1f409e9f |
line wrap: on
line diff
--- a/w4mcorcov_calc.R Fri Mar 30 14:59:19 2018 -0400 +++ b/w4mcorcov_calc.R Wed Jul 18 12:35:55 2018 -0400 @@ -7,9 +7,23 @@ #### OPLS-DA algoC <- "nipals" -do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) { +do_detail_plot <- function( + x_dataMatrix +, x_predictor +, x_is_match +, x_algorithm +, x_prefix +, x_show_labels +, x_progress = print +, x_env +, x_crossval_i +) { off <- function(x) if (x_show_labels == "0") 0 else x - if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { + if ( x_is_match + && ncol(x_dataMatrix) > 0 + && length(unique(x_predictor))> 1 + && x_crossval_i < nrow(x_dataMatrix) + ) { my_oplsda <- opls( x = x_dataMatrix , y = x_predictor @@ -19,21 +33,47 @@ , printL = FALSE , plotL = FALSE , crossvalI = x_crossval_i + , scaleC = "none" # data have been pareto scaled outside this routine and must not be scaled again. This line fixes issue #2. ) my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] - do_s_plot <- function(parallel_x) { - my_cor_vs_cov <- cor_vs_cov( - matrix_x = x_dataMatrix - , ropls_x = my_oplsda - , parallel_x = parallel_x - ) + do_s_plot <- function( + x_env + , predictor_projection_x = TRUE + , cplot_x = FALSE + , cor_vs_cov_x = NULL + ) + { + #print(ls(x_env)) # "cplot_y" etc + #print(str(x_env$cplot_y)) # chr "covariance" + if (cplot_x) { + #print(x_env$cplot_y) # "covariance" + cplot_y_correlation <- (x_env$cplot_y == "correlation") + #print(cplot_y_correlation) # FALSE + } + if (is.null(cor_vs_cov_x)) { + my_cor_vs_cov <- cor_vs_cov( + matrix_x = x_dataMatrix + , ropls_x = my_oplsda + , predictor_projection_x = predictor_projection_x + ) + } else { + my_cor_vs_cov <- cor_vs_cov_x + } + # print("str(my_cor_vs_cov)") + # str(my_cor_vs_cov) + if (sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { + x_progress("No cor_vs_cov data produced") + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="too few covariance data") + return(my_cor_vs_cov) + } with( my_cor_vs_cov , { - min_x <- min(covariance) - max_x <- max(covariance) + min_x <- min(covariance, na.rm = TRUE) + max_x <- max(covariance, na.rm = TRUE) lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) covariance <- covariance / lim_x lim_x <- 1.2 @@ -42,37 +82,78 @@ # (Mehmood 2012 doi:10.1186/1748-7188-6-27) plus_cor <- correlation plus_cov <- covariance - cex <- 0.75 + cex <- 0.65 which_projection <- if (projection == 1) "t1" else "o1" which_loading <- if (projection == 1) "parallel" else "orthogonal" - if (projection == 1) { - my_xlab <- "relative covariance(feature,t1)" - my_x <- plus_cov - my_ylab <- "correlation(feature,t1) [~ parallel loading]" - my_y <- plus_cor - my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + if (projection == 1) { # predictor-projection + vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) + if (!cplot_x) { # S-plot predictor-projection + my_xlab <- "relative covariance(feature,t1)" + my_x <- plus_cov + my_ylab <- "correlation(feature,t1)" + my_y <- plus_cor + } else { # C-plot predictor-projection + my_xlab <- "variable importance in predictor-projection" + my_x <- vip4p + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,t1)" + my_y <- plus_cor + } else { + my_ylab <- "relative covariance(feature,t1)" + my_y <- plus_cov + } + } + if (cplot_x) { + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + my_xlim <- c( 0, lim_x + off(0.2) ) + } else { + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + } my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) my_load_distal <- loadp my_load_proximal <- loado - vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) red <- as.numeric(correlation > 0) * vipcp blue <- as.numeric(correlation < 0) * vipcp alpha <- 0.1 + 0.4 * vipcp - my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha) - main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) - } else { - my_xlab <- "relative covariance(feature,to1)" - my_x <- -plus_cov - my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) - my_ylab <- "correlation(feature,to1) [~ orthogonal loading]" - my_y <- plus_cor + red[is.na(red)] <- 0 + blue[is.na(blue)] <- 0 + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) + main_label <- sprintf("%s for level %s versus %s" + , x_prefix, fctr_lvl_1, fctr_lvl_2) + } else { # orthogonal projection + vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) + if (!cplot_x) { + my_xlab <- "relative covariance(feature,to1)" + my_x <- -plus_cov + } else { + my_xlab <- "variable importance in orthogonal projection" + my_x <- vip4o + } + if (!cplot_x) { # S-plot orthogonal projection + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + my_ylab <- "correlation(feature,to1)" + my_y <- plus_cor + } else { # C-plot orthogonal projection + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + my_xlim <- c( 0, lim_x + off(0.2) ) + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,to1)" + my_y <- plus_cor + } else { + my_ylab <- "relative covariance(feature,to1)" + my_y <- plus_cov + } + } my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) my_load_distal <- loado my_load_proximal <- loadp - vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) alpha <- 0.1 + 0.4 * vipco - my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha) - main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2) + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) + main_label <- sprintf( + "Features influencing orthogonal projection for %s versus %s" + , fctr_lvl_1, fctr_lvl_2) } main_cex <- min(1.0, 46.0/nchar(main_label)) my_feature_label_slant <- -30 # slant feature labels 30 degrees downward @@ -92,7 +173,7 @@ ) low_x <- -0.7 * lim_x high_x <- 0.7 * lim_x - if (projection == 1) { + if (projection == 1 && !cplot_x) { text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") } @@ -109,54 +190,105 @@ names(head(sort(my_load_distal),n = n_labels)) , names(tail(sort(my_load_distal),n = n_labels)) ) - labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) + labels <- unname( + sapply( + X = tsv1$featureID + , FUN = function(x) if( x %in% labels_to_show ) x else "" + ) + ) x_text_offset <- 0.024 - y_text_offset <- (if (projection == 1) 1 else -1) * -0.017 + y_text_off <- 0.017 + if (!cplot_x) { # S-plot + y_text_offset <- if (projection == 1) -y_text_off else y_text_off + } else { # C-plot + y_text_offset <- + sapply( + X = (my_y > 0) + , FUN = function(x) { if (x) y_text_off else -y_text_off } + ) + } label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { - print("str(x_arg)") - print(str(x_arg)) - print("str(y_arg)") - print(str(y_arg)) - print("str(labels_arg)") - print(str(labels_arg)) - text( - y = y_arg - , x = x_arg + x_text_offset - , cex = 0.4 - , labels = labels_arg - , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels - , srt = slant_arg - , adj = 0 # left-justified - ) + # print("str(x_arg)") + # print(str(x_arg)) + # print("str(y_arg)") + # print(str(y_arg)) + # print("str(labels_arg)") + # print(str(labels_arg)) + if (length(labels_arg) > 0) { + unique_slant <- unique(slant_arg) + if (length(unique_slant) == 1) { + text( + y = y_arg + , x = x_arg + x_text_offset + , cex = 0.4 + , labels = labels_arg + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant_arg + , adj = 0 # left-justified + ) + } else { + for (slant in unique_slant) { + text( + y = y_arg[slant_arg == slant] + , x = x_arg[slant_arg == slant] + x_text_offset + , cex = 0.4 + , labels = labels_arg[slant_arg == slant] + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant + , adj = 0 # left-justified + ) + } + } + } } - my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + if (!cplot_x) { + my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + } else { + my_slant <- sapply( + X = (my_y > 0) + , FUN = function(x) if (x) 2 else -2 + ) * my_feature_label_slant + } if (length(my_x) > 1) { - label_features( + label_features( x_arg = my_x [my_x > 0] , y_arg = my_y [my_x > 0] - y_text_offset , labels_arg = labels[my_x > 0] - , slant_arg = -my_slant + , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) ) - label_features( - x_arg = my_x [my_x < 0] - , y_arg = my_y [my_x < 0] + y_text_offset - , labels_arg = labels[my_x < 0] + if (!cplot_x) { + label_features( + x_arg = my_x [my_x < 0] + , y_arg = my_y [my_x < 0] + y_text_offset + , labels_arg = labels[my_x < 0] + , slant_arg = my_slant + ) + } + } else { + if (!cplot_x) { + my_slant <- (if (my_x > 1) -1 else 1) * my_slant + my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset + } else { + my_slant <- (if (my_y > 1) -1 else 1) * my_slant + my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset + } + label_features( + x_arg = my_x + , y_arg = my_y_arg + , labels_arg = labels , slant_arg = my_slant ) - } else { - label_features( - x_arg = my_x - , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset - , labels_arg = labels - , slant_arg = (if (my_x > 1) -1 else 1) * my_slant - ) } } } ) return (my_cor_vs_cov) } - my_cor_vs_cov <- do_s_plot( parallel_x = TRUE ) + my_cor_vs_cov <- do_s_plot( + x_env = x_env + , predictor_projection_x = TRUE + , cplot_x = FALSE + ) typeVc <- c("correlation", # 1 "outlier", # 2 "overview", # 3 @@ -175,6 +307,7 @@ } else { my_typevc <- c("(dummy)","overview","(dummy)") } + my_ortho_cor_vs_cov_exists <- FALSE for (my_type in my_typevc) { if (my_type %in% typeVc) { tryCatch({ @@ -187,17 +320,65 @@ , parLayL = TRUE , parEllipsesL = TRUE ) + if (my_type == "overview") { + sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) + title(sub = sub_label) + } } else { - do_s_plot( parallel_x = FALSE ) + my_ortho_cor_vs_cov <- do_s_plot( + x_env = x_env + , predictor_projection_x = FALSE + , cplot_x = FALSE + ) + my_ortho_cor_vs_cov_exists <- TRUE } }, error = function(e) { - x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) ) + x_progress( + sprintf( + "factor level %s or %s may have only one sample - %s" + , fctr_lvl_1 + , fctr_lvl_2 + , e$message + ) + ) }) } else { plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") text(x=1, y=1, labels="no orthogonal projection is possible") } } + cplot_p <- x_env$cplot_p + cplot_o <- x_env$cplot_o + if (cplot_p || cplot_o) { + if (cplot_p) { + do_s_plot( + x_env = x_env + , predictor_projection_x = TRUE + , cplot_x = TRUE + , cor_vs_cov_x = my_cor_vs_cov + ) + did_plots <- 1 + } else { + did_plots <- 0 + } + if (cplot_o) { + if (my_ortho_cor_vs_cov_exists) { + do_s_plot( + x_env = x_env + , predictor_projection_x = FALSE + , cplot_x = TRUE + , cor_vs_cov_x = my_ortho_cor_vs_cov + ) + } else { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="no orthogonal projection is possible") + } + did_plots <- 1 + did_plots + } + if (did_plots == 1) { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white") + } + } return (my_cor_vs_cov) } else { return (NULL) @@ -205,7 +386,14 @@ } # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 -corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { +corcov_calc <- function( + calc_env +, failure_action = stop +, progress_action = function(x) { } +, corcov_tsv_action = function(t) { } +, salience_tsv_action = function(t) { } +, extra_plots = c() +) { if ( ! is.environment(calc_env) ) { failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") return ( FALSE ) @@ -235,18 +423,20 @@ # arg/env checking if (!(facC %in% names(smpl_metadata))) { - failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) + failure_action( + sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" + , facC)) return ( FALSE ) } mz <- vrbl_metadata$mz names(mz) <- vrbl_metadata$variableMetadata mz_lookup <- function(feature) unname(mz[feature]) - + rt <- vrbl_metadata$rt names(rt) <- vrbl_metadata$variableMetadata rt_lookup <- function(feature) unname(rt[feature]) - + # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) salience_df <- calc_env$salience_df <- w4msalience( data_matrix = data_matrix @@ -322,7 +512,10 @@ # for each column name, extract the parts of the name matched by 'col_pattern', if any the_colnames <- colnames(vrbl_metadata) if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { - failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) + failure_action( + sprintf( + "bad parameter! variableMetadata must contain results of W4M Univariate test '%s'." + , tesC)) return ( FALSE ) } col_matches <- lapply( @@ -349,21 +542,36 @@ } } level_union <- unique(sort(level_union)) - overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) + overall_significant <- 1 == ( + if (intersample_sig_col %in% colnames(vrbl_metadata)) { + vrbl_metadata[,intersample_sig_col] + } else { + 1 + } + ) if ( length(level_union) > 2 ) { chosen_samples <- smpl_metadata_facC %in% level_union chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) col_selector <- vrbl_metadata_names[ overall_significant ] my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] plot_action <- function(fctr_lvl_1, fctr_lvl_2) { - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2)) + predictor <- sapply( + X = chosen_facC + , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 + ) my_cor_cov <- do_detail_plot( x_dataMatrix = my_matrix , x_predictor = predictor , x_is_match = is_match , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" + , x_prefix = if (pairSigFeatOnly) { + "Significantly contrasting features" + } else { + "Significant features" + } , x_show_labels = labelFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) @@ -375,7 +583,10 @@ my_tsv <- my_cor_cov$tsv1 my_tsv$mz <- mz_lookup(my_tsv$featureID) my_tsv$rt <- rt_lookup(my_tsv$featureID) - my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] + my_tsv["level1Level2Sig"] <- vrbl_metadata[ + match(my_tsv$featureID, vrbl_metadata_names) + , vrbl_metadata_col + ] tsv <<- my_tsv corcov_tsv_action(tsv) did_plot <<- TRUE @@ -404,20 +615,34 @@ fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2)) # choose only samples with one of the two factors for this column chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) predictor <- smpl_metadata_facC[chosen_samples] # extract only the significantly-varying features and the chosen samples - fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) - col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] + fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * + ( if (intersample_sig_col %in% colnames(vrbl_metadata)) { + vrbl_metadata[,intersample_sig_col] + } else { + 1 + } + ) + col_selector <- vrbl_metadata_names[ + if ( pairSigFeatOnly ) fully_significant else overall_significant + ] my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] my_cor_cov <- do_detail_plot( x_dataMatrix = my_matrix , x_predictor = predictor , x_is_match = is_match , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" + , x_prefix = if (pairSigFeatOnly) { + "Significantly contrasting features" + } else { + "Significant features" + } , x_show_labels = labelFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) @@ -429,7 +654,10 @@ tsv <- my_cor_cov$tsv1 tsv$mz <- mz_lookup(tsv$featureID) tsv$rt <- rt_lookup(tsv$featureID) - tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] + tsv["level1Level2Sig"] <- vrbl_metadata[ + match(tsv$featureID, vrbl_metadata_names) + , vrbl_metadata_col + ] corcov_tsv_action(tsv) did_plot <- TRUE } @@ -444,7 +672,7 @@ completed <- c() lapply( X = level_union - , FUN = function(x) { + , FUN = function(x) { fctr_lvl_1 <- x[1] fctr_lvl_2 <- { if ( fctr_lvl_1 %in% completed ) @@ -455,12 +683,20 @@ } chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) fctr_lvl_2 <- "other" - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2) + ) if (length(unique(chosen_samples)) < 1) { progress_action("NOTHING TO PLOT...") } else { chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) - predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) + predictor <- sapply( + X = chosen_facC + , FUN = function(fac) { + if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 + } + ) my_matrix <- scdm[ chosen_samples, , drop = FALSE ] # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) @@ -494,11 +730,13 @@ utils::combn( x = level_union , m = 2 - , FUN = function(x) { + , FUN = function(x) { fctr_lvl_1 <- x[1] fctr_lvl_2 <- x[2] chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { progress_action("NOTHING TO PLOT...") } else { @@ -536,7 +774,10 @@ } } if (!did_plot) { - failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) + failure_action( + sprintf( + "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" + , facC, originalLevCSV)) return ( FALSE ) } return ( TRUE ) @@ -547,31 +788,41 @@ # Wiklund_2008 doi:10.1021/ac0713510 # Galindo_Prieto_2014 doi:10.1002/cem.2627 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R -cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) { +cor_vs_cov <- function( + matrix_x, ropls_x +, predictor_projection_x = TRUE +) { x_class <- class(ropls_x) - if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) - stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) + if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) + stop( + paste( + "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " + , as.character(x_class) + ) + ) } result <- list() - result$projection <- projection <- if (parallel_x) 1 else 2 + result$projection <- projection <- if (predictor_projection_x) 1 else 2 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS if ( ropls_x@suppLs$algoC == "nipals") { # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) - if (parallel_x) + if (predictor_projection_x) score_matrix <- ropls_x@scoreMN else score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) score_matrix_magnitude <- mag(score_matrix) - result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) - result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) + result$covariance <- + score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) + result$correlation <- + score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) } else { # WARNING - untested code - I don't have test data to exercise this branch # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F - if (parallel_x) + if (predictor_projection_x) score_matrix <- ropls_x@scoreMN else score_matrix <- ropls_x@orthoScoreMN @@ -612,9 +863,20 @@ result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) superresult <- list() if (length(result$vip4o) == 0) result$vip4o <- NA - greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) - superresult$tsv1 <- data.frame( - featureID = names(ropls_x@vipVn) + greaterLevel <- sapply( + X = result$correlation + , FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 + ) + + # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 + featureID <- names(ropls_x@vipVn) + greaterLevel <- greaterLevel[featureID] + result$correlation <- result$correlation[featureID] + result$covariance <- result$covariance[featureID] + # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 + + tsv1 <- data.frame( + featureID = featureID , factorLevel1 = result$level1 , factorLevel2 = result$level2 , greaterLevel = greaterLevel @@ -627,7 +889,10 @@ , loado = result$loado , row.names = NULL ) - rownames(superresult$tsv1) <- superresult$tsv1$featureID + tsv1 <- tsv1[!is.na(tsv1$correlation),] + tsv1 <- tsv1[!is.na(tsv1$covariance),] + superresult$tsv1 <- tsv1 + rownames(superresult$tsv1) <- tsv1$featureID superresult$projection <- result$projection superresult$covariance <- result$covariance superresult$correlation <- result$correlation @@ -638,7 +903,7 @@ superresult$details <- result result$superresult <- superresult # Include thise in case future consumers of this routine want to use it in currently unanticipated ways - result$oplsda <- ropls_x + result$oplsda <- ropls_x result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways return (superresult) }