Mercurial > repos > eschen42 > w4mcorcov

diff w4mcorcov_calc.R @ 6:7bd523ca1f9a draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit cafda5095a79ce2376325b57337302f95137195d
author: eschen42
date: Wed, 18 Jul 2018 12:35:55 -0400
parents: 50f60f94c034
children: 066b1f409e9f
--- a/w4mcorcov_calc.R	Fri Mar 30 14:59:19 2018 -0400
+++ b/w4mcorcov_calc.R	Wed Jul 18 12:35:55 2018 -0400
@@ -7,9 +7,23 @@
 #### OPLS-DA
 algoC <- "nipals"
 
-do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) {
+do_detail_plot <- function(
+  x_dataMatrix
+, x_predictor
+, x_is_match
+, x_algorithm
+, x_prefix
+, x_show_labels
+, x_progress = print
+, x_env
+, x_crossval_i
+) {
   off <- function(x) if (x_show_labels == "0") 0 else x
-  if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) {
+  if ( x_is_match
+      && ncol(x_dataMatrix) > 0
+      && length(unique(x_predictor))> 1
+      && x_crossval_i < nrow(x_dataMatrix)
+  ) {
     my_oplsda <- opls(
         x      = x_dataMatrix
       , y      = x_predictor
@@ -19,21 +33,47 @@
       , printL = FALSE
       , plotL  = FALSE
       , crossvalI = x_crossval_i
+      , scaleC = "none" # data have been pareto scaled outside this routine and must not be scaled again. This line fixes issue #2.
       )
     my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
     fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
     fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
-    do_s_plot <- function(parallel_x) {
-      my_cor_vs_cov <- cor_vs_cov(
-          matrix_x   = x_dataMatrix
-        , ropls_x    = my_oplsda
-        , parallel_x = parallel_x
-        )
+    do_s_plot <- function(
+        x_env
+      , predictor_projection_x   = TRUE
+      , cplot_x      = FALSE
+      , cor_vs_cov_x = NULL
+      )
+    {
+      #print(ls(x_env))               # "cplot_y" etc
+      #print(str(x_env$cplot_y))      # chr "covariance"
+      if (cplot_x) {
+        #print(x_env$cplot_y)         # "covariance"
+        cplot_y_correlation <- (x_env$cplot_y == "correlation")
+        #print(cplot_y_correlation)   # FALSE
+      }
+      if (is.null(cor_vs_cov_x)) {
+        my_cor_vs_cov <- cor_vs_cov(
+            matrix_x   = x_dataMatrix
+          , ropls_x    = my_oplsda
+          , predictor_projection_x = predictor_projection_x
+          )
+      } else {
+        my_cor_vs_cov <- cor_vs_cov_x
+      }
+      # print("str(my_cor_vs_cov)")
+      # str(my_cor_vs_cov)
+      if (sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
+        x_progress("No cor_vs_cov data produced")
+        plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
+        text(x=1, y=1, labels="too few covariance data")
+        return(my_cor_vs_cov)
+      }
       with(
         my_cor_vs_cov
       , {
-          min_x <- min(covariance)
-          max_x <- max(covariance)
+          min_x <- min(covariance, na.rm = TRUE)
+          max_x <- max(covariance, na.rm = TRUE)
           lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
           covariance <- covariance / lim_x
           lim_x <- 1.2
@@ -42,37 +82,78 @@
           #   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
           plus_cor <- correlation
           plus_cov <- covariance
-          cex <- 0.75
+          cex <- 0.65
           which_projection <- if (projection == 1) "t1" else "o1"
           which_loading <- if (projection == 1) "parallel" else "orthogonal"
-          if (projection == 1) {
-            my_xlab <- "relative covariance(feature,t1)"
-            my_x <- plus_cov
-            my_ylab <- "correlation(feature,t1) [~ parallel loading]"
-            my_y <- plus_cor
-            my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+          if (projection == 1) { # predictor-projection
+            vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
+            if (!cplot_x) { # S-plot predictor-projection
+              my_xlab <- "relative covariance(feature,t1)"
+              my_x <- plus_cov
+              my_ylab <- "correlation(feature,t1)"
+              my_y <- plus_cor
+            } else { # C-plot predictor-projection
+              my_xlab <- "variable importance in predictor-projection"
+              my_x <- vip4p
+              if (cplot_y_correlation) {
+                my_ylab <- "correlation(feature,t1)"
+                my_y <- plus_cor
+              } else {
+                my_ylab <- "relative covariance(feature,t1)"
+                my_y <- plus_cov
+              }
+            }
+            if (cplot_x) {
+              lim_x <- max(my_x, na.rm = TRUE) * 1.1
+              my_xlim <- c( 0, lim_x + off(0.2) )
+            } else {
+              my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+            }
             my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
             my_load_distal <- loadp
             my_load_proximal <- loado
-            vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
             red  <- as.numeric(correlation > 0) * vipcp
             blue <- as.numeric(correlation < 0) * vipcp
             alpha <- 0.1 + 0.4 * vipcp
-            my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha)
-            main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
-          } else {
-            my_xlab <- "relative covariance(feature,to1)"
-            my_x <- -plus_cov
-            my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
-            my_ylab <- "correlation(feature,to1) [~ orthogonal loading]"
-            my_y <- plus_cor
+            red[is.na(red)] <- 0
+            blue[is.na(blue)] <- 0
+            alpha[is.na(alpha)] <- 0
+            my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
+            main_label <- sprintf("%s for level %s versus %s"
+                                 , x_prefix, fctr_lvl_1, fctr_lvl_2)
+          } else { # orthogonal projection
+            vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
+            if (!cplot_x) {
+              my_xlab <- "relative covariance(feature,to1)"
+              my_x <- -plus_cov
+            } else {
+              my_xlab <- "variable importance in orthogonal projection"
+              my_x <- vip4o
+            }
+            if (!cplot_x) { # S-plot orthogonal projection
+              my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+              my_ylab <- "correlation(feature,to1)"
+              my_y <- plus_cor
+            } else { # C-plot orthogonal projection
+              lim_x <- max(my_x, na.rm = TRUE) * 1.1
+              my_xlim <- c( 0, lim_x + off(0.2) )
+              if (cplot_y_correlation) {
+                my_ylab <- "correlation(feature,to1)"
+                my_y <- plus_cor
+              } else {
+                my_ylab <- "relative covariance(feature,to1)"
+                my_y <- plus_cov
+              }
+            }
             my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
             my_load_distal <- loado
             my_load_proximal <- loadp
-            vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
             alpha <- 0.1 + 0.4 * vipco
-            my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha)
-            main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2)
+            alpha[is.na(alpha)] <- 0
+            my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
+            main_label <- sprintf(
+              "Features influencing orthogonal projection for %s versus %s"
+            , fctr_lvl_1, fctr_lvl_2)
           }
           main_cex <- min(1.0, 46.0/nchar(main_label))
           my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
@@ -92,7 +173,7 @@
           )
           low_x <- -0.7 * lim_x
           high_x <- 0.7 * lim_x
-          if (projection == 1) {
+          if (projection == 1 && !cplot_x) {
             text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
             text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
           }
@@ -109,54 +190,105 @@
               names(head(sort(my_load_distal),n = n_labels))
             , names(tail(sort(my_load_distal),n = n_labels))
             )
-            labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
+            labels <- unname(
+              sapply(
+                X = tsv1$featureID
+              , FUN = function(x) if( x %in% labels_to_show ) x else ""
+              )
+            )
             x_text_offset <- 0.024
-            y_text_offset <- (if (projection == 1) 1 else -1) * -0.017
+            y_text_off <- 0.017
+            if (!cplot_x) { # S-plot
+              y_text_offset <- if (projection == 1) -y_text_off else y_text_off
+            } else { # C-plot
+              y_text_offset <-
+                sapply(
+                  X = (my_y > 0)
+                , FUN = function(x) { if (x) y_text_off else -y_text_off }
+                )
+            }
             label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
-              print("str(x_arg)")
-              print(str(x_arg))
-              print("str(y_arg)")
-              print(str(y_arg))
-              print("str(labels_arg)")
-              print(str(labels_arg))
-              text(
-                y = y_arg
-              , x = x_arg + x_text_offset
-              , cex = 0.4
-              , labels = labels_arg
-              , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
-              , srt = slant_arg
-              , adj = 0   # left-justified
-              )
+              # print("str(x_arg)")
+              # print(str(x_arg))
+              # print("str(y_arg)")
+              # print(str(y_arg))
+              # print("str(labels_arg)")
+              # print(str(labels_arg))
+              if (length(labels_arg) > 0) {
+                unique_slant <- unique(slant_arg)
+                if (length(unique_slant) == 1) {
+                  text(
+                    y = y_arg
+                  , x = x_arg + x_text_offset
+                  , cex = 0.4
+                  , labels = labels_arg
+                  , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+                  , srt = slant_arg
+                  , adj = 0   # left-justified
+                  )
+                } else {
+                  for (slant in unique_slant) {
+                    text(
+                      y = y_arg[slant_arg == slant]
+                    , x = x_arg[slant_arg == slant] + x_text_offset
+                    , cex = 0.4
+                    , labels = labels_arg[slant_arg == slant]
+                    , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+                    , srt = slant
+                    , adj = 0   # left-justified
+                    )
+                  }
+                }
+              }
             }
-            my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
+            if (!cplot_x) {
+              my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
+            } else {
+              my_slant <- sapply(
+                            X = (my_y > 0)
+                          , FUN = function(x) if (x) 2 else -2
+                          ) * my_feature_label_slant
+            }
             if (length(my_x) > 1) {
-              label_features( 
+              label_features(
                 x_arg      = my_x  [my_x > 0]
               , y_arg      = my_y  [my_x > 0] - y_text_offset
               , labels_arg = labels[my_x > 0]
-              , slant_arg = -my_slant
+              , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
               )
-              label_features( 
-                x_arg      = my_x  [my_x < 0]
-              , y_arg      = my_y  [my_x < 0] + y_text_offset
-              , labels_arg = labels[my_x < 0]
+              if (!cplot_x) {
+                label_features(
+                  x_arg      = my_x  [my_x < 0]
+                , y_arg      = my_y  [my_x < 0] + y_text_offset
+                , labels_arg = labels[my_x < 0]
+                , slant_arg = my_slant
+                )
+              }
+            } else {
+              if (!cplot_x) {
+                my_slant <- (if (my_x > 1) -1 else 1) * my_slant
+                my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
+              } else {
+                my_slant <- (if (my_y > 1) -1 else 1) * my_slant
+                my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
+              }
+              label_features(
+                x_arg = my_x
+              , y_arg = my_y_arg
+              , labels_arg = labels
               , slant_arg = my_slant
               )
-            } else {
-              label_features( 
-                x_arg = my_x
-              , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
-              , labels_arg = labels
-              , slant_arg = (if (my_x > 1) -1 else 1) * my_slant
-              )
             }
           }
         }
       )
       return (my_cor_vs_cov)
     }
-    my_cor_vs_cov <- do_s_plot( parallel_x = TRUE )
+    my_cor_vs_cov <- do_s_plot(
+      x_env = x_env
+    , predictor_projection_x = TRUE
+    , cplot_x = FALSE
+    )
     typeVc <- c("correlation",      # 1
                 "outlier",          # 2
                 "overview",         # 3
@@ -175,6 +307,7 @@
     } else {
       my_typevc <- c("(dummy)","overview","(dummy)")
     }
+    my_ortho_cor_vs_cov_exists <- FALSE
     for (my_type in my_typevc) {
       if (my_type %in% typeVc) {
         tryCatch({
@@ -187,17 +320,65 @@
              , parLayL      = TRUE
              , parEllipsesL = TRUE
              )
+             if (my_type == "overview") {
+               sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
+               title(sub = sub_label)
+             }
           } else {
-            do_s_plot( parallel_x = FALSE )
+            my_ortho_cor_vs_cov <- do_s_plot(
+              x_env = x_env
+            , predictor_projection_x = FALSE
+            , cplot_x = FALSE
+            )
+            my_ortho_cor_vs_cov_exists <- TRUE
           }
         }, error = function(e) {
-          x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) )
+          x_progress(
+            sprintf(
+              "factor level %s or %s may have only one sample - %s"
+            , fctr_lvl_1
+            , fctr_lvl_2
+            , e$message
+            )
+          )
         })
       } else {
         plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
         text(x=1, y=1, labels="no orthogonal projection is possible")
       }
     }
+    cplot_p <- x_env$cplot_p
+    cplot_o <- x_env$cplot_o
+    if (cplot_p || cplot_o) {
+      if (cplot_p) {
+        do_s_plot(
+          x_env = x_env
+        , predictor_projection_x = TRUE
+        , cplot_x = TRUE
+        , cor_vs_cov_x = my_cor_vs_cov
+        )
+        did_plots <- 1
+      } else {
+        did_plots <- 0
+      }
+      if (cplot_o) {
+        if (my_ortho_cor_vs_cov_exists) {
+          do_s_plot(
+            x_env = x_env
+          , predictor_projection_x = FALSE
+          , cplot_x = TRUE
+          , cor_vs_cov_x = my_ortho_cor_vs_cov
+          )
+        } else {
+          plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
+          text(x=1, y=1, labels="no orthogonal projection is possible")
+        }
+        did_plots <- 1 + did_plots
+      }
+      if (did_plots == 1) {
+        plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white")
+      }
+    }
     return (my_cor_vs_cov)
   } else {
     return (NULL)
@@ -205,7 +386,14 @@
 }
 
 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
-corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) {
+corcov_calc <- function(
+  calc_env
+, failure_action      = stop
+, progress_action     = function(x) { }
+, corcov_tsv_action   = function(t) { }
+, salience_tsv_action = function(t) { }
+, extra_plots         = c()
+) {
   if ( ! is.environment(calc_env) ) {
     failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
     return ( FALSE )
@@ -235,18 +423,20 @@
 
   # arg/env checking
   if (!(facC %in% names(smpl_metadata))) {
-    failure_action(sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata", facC))
+    failure_action(
+      sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata"
+             , facC))
     return ( FALSE )
   }
 
   mz             <- vrbl_metadata$mz
   names(mz)      <- vrbl_metadata$variableMetadata
   mz_lookup      <- function(feature) unname(mz[feature])
-  
+
   rt             <- vrbl_metadata$rt
   names(rt)      <- vrbl_metadata$variableMetadata
   rt_lookup      <- function(feature) unname(rt[feature])
-  
+
   # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
   salience_df <- calc_env$salience_df <- w4msalience(
     data_matrix    = data_matrix
@@ -322,7 +512,10 @@
     # for each column name, extract the parts of the name matched by 'col_pattern', if any
     the_colnames <- colnames(vrbl_metadata)
     if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
-      failure_action(sprintf("bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'.", tesC))
+      failure_action(
+        sprintf(
+          "bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'."
+        , tesC))
       return ( FALSE )
     }
     col_matches <- lapply(
@@ -349,21 +542,36 @@
       }
     }
     level_union <- unique(sort(level_union))
-    overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
+    overall_significant <- 1 == (
+      if (intersample_sig_col %in% colnames(vrbl_metadata)) {
+        vrbl_metadata[,intersample_sig_col]
+      } else {
+        1
+      }
+    )
     if ( length(level_union) > 2 ) {
       chosen_samples <- smpl_metadata_facC %in% level_union
       chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
       col_selector <- vrbl_metadata_names[ overall_significant ]
       my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
       plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
-        progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-        predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+        progress_action(
+          sprintf("calculating/plotting contrast of %s vs. %s"
+                 , fctr_lvl_1, fctr_lvl_2))
+        predictor <- sapply(
+          X = chosen_facC
+        , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
+        )
         my_cor_cov <- do_detail_plot(
           x_dataMatrix  = my_matrix
         , x_predictor   = predictor
         , x_is_match    = is_match
         , x_algorithm   = algoC
-        , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+        , x_prefix      = if (pairSigFeatOnly) {
+                            "Significantly contrasting features"
+                          } else {
+                            "Significant features"
+                          }
         , x_show_labels = labelFeatures
         , x_progress    = progress_action
         , x_crossval_i  = min(7, length(chosen_samples))
@@ -375,7 +583,10 @@
           my_tsv <- my_cor_cov$tsv1
           my_tsv$mz <- mz_lookup(my_tsv$featureID)
           my_tsv$rt <- rt_lookup(my_tsv$featureID)
-          my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 
+          my_tsv["level1Level2Sig"] <- vrbl_metadata[
+            match(my_tsv$featureID, vrbl_metadata_names)
+          , vrbl_metadata_col
+          ]
           tsv <<- my_tsv
           corcov_tsv_action(tsv)
           did_plot <<- TRUE
@@ -404,20 +615,34 @@
           fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
           # only process this column if both factors are members of lvlCSV
           is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
-          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+          progress_action(
+            sprintf("calculating/plotting contrast of %s vs. %s"
+                   , fctr_lvl_1, fctr_lvl_2))
           # choose only samples with one of the two factors for this column
           chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
           predictor <- smpl_metadata_facC[chosen_samples]
           # extract only the significantly-varying features and the chosen samples
-          fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
-          col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
+          fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] *
+            ( if (intersample_sig_col %in% colnames(vrbl_metadata)) {
+                vrbl_metadata[,intersample_sig_col]
+              } else {
+                1
+              }
+            )
+          col_selector <- vrbl_metadata_names[
+            if ( pairSigFeatOnly ) fully_significant else overall_significant
+          ]
           my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
           my_cor_cov <- do_detail_plot(
             x_dataMatrix  = my_matrix
           , x_predictor   = predictor
           , x_is_match    = is_match
           , x_algorithm   = algoC
-          , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+          , x_prefix      = if (pairSigFeatOnly) {
+                              "Significantly contrasting features"
+                            } else {
+                              "Significant features"
+                            }
           , x_show_labels = labelFeatures
           , x_progress    = progress_action
           , x_crossval_i  = min(7, length(chosen_samples))
@@ -429,7 +654,10 @@
             tsv <- my_cor_cov$tsv1
             tsv$mz <- mz_lookup(tsv$featureID)
             tsv$rt <- rt_lookup(tsv$featureID)
-            tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 
+            tsv["level1Level2Sig"] <- vrbl_metadata[
+              match(tsv$featureID, vrbl_metadata_names)
+            , vrbl_metadata_col
+            ]
             corcov_tsv_action(tsv)
             did_plot <- TRUE
           }
@@ -444,7 +672,7 @@
         completed <- c()
         lapply(
           X = level_union
-        , FUN = function(x) { 
+        , FUN = function(x) {
             fctr_lvl_1 <- x[1]
             fctr_lvl_2 <- {
               if ( fctr_lvl_1 %in% completed )
@@ -455,12 +683,20 @@
             }
             chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
             fctr_lvl_2 <- "other"
-            progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+            progress_action(
+              sprintf("calculating/plotting contrast of %s vs. %s"
+              , fctr_lvl_1, fctr_lvl_2)
+            )
             if (length(unique(chosen_samples)) < 1) {
               progress_action("NOTHING TO PLOT...")
             } else {
               chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
-              predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+              predictor <- sapply(
+                X = chosen_facC
+              , FUN = function(fac) {
+                  if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
+                }
+              )
               my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
               # only process this column if both factors are members of lvlCSV
               is_match <- isLevelSelected(fctr_lvl_1)
@@ -494,11 +730,13 @@
       utils::combn(
         x = level_union
       , m = 2
-      , FUN = function(x) { 
+      , FUN = function(x) {
           fctr_lvl_1 <- x[1]
           fctr_lvl_2 <- x[2]
           chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+          progress_action(
+            sprintf("calculating/plotting contrast of %s vs. %s"
+                   , fctr_lvl_1, fctr_lvl_2))
           if (length(unique(chosen_samples)) < 1) {
             progress_action("NOTHING TO PLOT...")
           } else {
@@ -536,7 +774,10 @@
     }
   }
   if (!did_plot) {
-    failure_action(sprintf("bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))
+    failure_action(
+      sprintf(
+        "bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'"
+      , facC, originalLevCSV))
     return ( FALSE )
   }
   return ( TRUE )
@@ -547,31 +788,41 @@
 #     Wiklund_2008 doi:10.1021/ac0713510
 #     Galindo_Prieto_2014 doi:10.1002/cem.2627
 #     https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
-cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) {
+cor_vs_cov <- function(
+  matrix_x, ropls_x
+, predictor_projection_x = TRUE
+) {
   x_class <- class(ropls_x)
-  if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) 
-    stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) )
+  if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
+    stop(
+      paste(
+        "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
+      , as.character(x_class)
+      )
+    )
   }
   result <- list()
-  result$projection <- projection <- if (parallel_x) 1 else 2
+  result$projection <- projection <- if (predictor_projection_x) 1 else 2
   # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
   if ( ropls_x@suppLs$algoC == "nipals") {
     # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
     mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
     mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
-    if (parallel_x)
+    if (predictor_projection_x)
        score_matrix <- ropls_x@scoreMN
     else
        score_matrix <- ropls_x@orthoScoreMN
     score_matrix_transposed <- t(score_matrix)
     score_matrix_magnitude <- mag(score_matrix)
-    result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
-    result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
+    result$covariance <-
+      score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
+    result$correlation <-
+      score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
   } else {
     # WARNING - untested code - I don't have test data to exercise this branch
     # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
     # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
-    if (parallel_x)
+    if (predictor_projection_x)
        score_matrix <- ropls_x@scoreMN
     else
        score_matrix <- ropls_x@orthoScoreMN
@@ -612,9 +863,20 @@
   result$level2    <- rep.int(x = fctr_lvl_2, times = feature_count)
   superresult <- list()
   if (length(result$vip4o) == 0) result$vip4o <- NA
-  greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 )
-  superresult$tsv1 <- data.frame(
-    featureID           = names(ropls_x@vipVn)
+  greaterLevel <- sapply(
+    X = result$correlation
+  , FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
+  )
+
+  # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
+  featureID          <- names(ropls_x@vipVn)
+  greaterLevel       <- greaterLevel[featureID]
+  result$correlation <- result$correlation[featureID]
+  result$covariance  <- result$covariance[featureID]
+  # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
+
+  tsv1 <- data.frame(
+    featureID           = featureID
   , factorLevel1        = result$level1
   , factorLevel2        = result$level2
   , greaterLevel        = greaterLevel
@@ -627,7 +889,10 @@
   , loado               = result$loado
   , row.names           = NULL
   )
-  rownames(superresult$tsv1) <- superresult$tsv1$featureID
+  tsv1 <- tsv1[!is.na(tsv1$correlation),]
+  tsv1 <- tsv1[!is.na(tsv1$covariance),]
+  superresult$tsv1 <- tsv1
+  rownames(superresult$tsv1) <- tsv1$featureID
   superresult$projection <- result$projection
   superresult$covariance <- result$covariance
   superresult$correlation <- result$correlation
@@ -638,7 +903,7 @@
   superresult$details <- result
   result$superresult <- superresult
   # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
-  result$oplsda    <- ropls_x          
+  result$oplsda    <- ropls_x
   result$predictor <- ropls_x@suppLs$y   # in case future consumers of this routine want to use it in currently unanticipated ways
   return (superresult)
 }
author	eschen42
date	Wed, 18 Jul 2018 12:35:55 -0400
parents	50f60f94c034
children	066b1f409e9f