Mercurial > repos > eschen42 > w4mcorcov

# center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/
center_colmeans <- function(x) {
  xcenter = colMeans(x)
  x - rep(xcenter, rep.int(nrow(x), ncol(x)))
}

#### OPLS-DA
algoC <- "nipals"

do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) {
  off <- function(x) if (x_show_labels == "0") 0 else x
  if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) {
    my_oplsda <- opls(
        x      = x_dataMatrix
      , y      = x_predictor
      , algoC  = x_algorithm
      , predI  = 1
      , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
      , printL = FALSE
      , plotL  = FALSE
      , crossvalI = x_crossval_i
      )
    my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
    fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
    fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
    do_s_plot <- function(parallel_x) {
      my_cor_vs_cov <- cor_vs_cov(
          matrix_x   = x_dataMatrix
        , ropls_x    = my_oplsda
        , parallel_x = parallel_x
        )
      with(
        my_cor_vs_cov
      , {
          min_x <- min(covariance)
          max_x <- max(covariance)
          lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
          covariance <- covariance / lim_x
          lim_x <- 1.2
          # "It is generally accepted that a variable should be selected if vj>1, [27–29],
          #   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
          #   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
          plus_cor <- correlation
          plus_cov <- covariance
          cex <- 0.75
          which_projection <- if (projection == 1) "t1" else "o1"
          which_loading <- if (projection == 1) "parallel" else "orthogonal"
          if (projection == 1) {
            my_xlab <- "relative covariance(feature,t1)"
            my_x <- plus_cov
            my_ylab <- "correlation(feature,t1) [~ parallel loading]"
            my_y <- plus_cor
            my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
            my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
            my_load_distal <- loadp
            my_load_proximal <- loado
            vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
            red  <- as.numeric(correlation > 0) * vipcp
            blue <- as.numeric(correlation < 0) * vipcp
            alpha <- 0.1 + 0.4 * vipcp
            my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha)
            main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
          } else {
            my_xlab <- "relative covariance(feature,to1)"
            my_x <- -plus_cov
            my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
            my_ylab <- "correlation(feature,to1) [~ orthogonal loading]"
            my_y <- plus_cor
            my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
            my_load_distal <- loado
            my_load_proximal <- loadp
            vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
            alpha <- 0.1 + 0.4 * vipco
            my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha)
            main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2)
          }
          main_cex <- min(1.0, 46.0/nchar(main_label))
          my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
          plot(
            y = my_y
          , x = my_x
          , type = "p"
          , xlim = my_xlim
          , ylim = my_ylim
          , xlab = my_xlab
          , ylab = my_ylab
          , main = main_label
          , cex.main = main_cex
          , cex = cex
          , pch = 16
          , col = my_col
          )
          low_x <- -0.7 * lim_x
          high_x <- 0.7 * lim_x
          if (projection == 1) {
            text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
            text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
          }
          if ( x_show_labels != "0" ) {
            names(my_load_distal) <- tsv1$featureID
            names(my_load_proximal) <- tsv1$featureID
            if ( x_show_labels == "ALL" ) {
              n_labels <- length(my_load_distal)
            } else {
              n_labels <- as.numeric(x_show_labels)
            }
            n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
            labels_to_show <- c(
              names(head(sort(my_load_distal),n = n_labels))
            , names(tail(sort(my_load_distal),n = n_labels))
            )
            labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
            x_text_offset <- 0.024
            y_text_offset <- (if (projection == 1) 1 else -1) * -0.017
            label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
              print("str(x_arg)")
              print(str(x_arg))
              print("str(y_arg)")
              print(str(y_arg))
              print("str(labels_arg)")
              print(str(labels_arg))
              text(
                y = y_arg
              , x = x_arg + x_text_offset
              , cex = 0.4
              , labels = labels_arg
              , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
              , srt = slant_arg
              , adj = 0   # left-justified
              )
            }
            my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
            if (length(my_x) > 1) {
              label_features(
                x_arg      = my_x  [my_x > 0]
              , y_arg      = my_y  [my_x > 0] - y_text_offset
              , labels_arg = labels[my_x > 0]
              , slant_arg = -my_slant
              )
              label_features(
                x_arg      = my_x  [my_x < 0]
              , y_arg      = my_y  [my_x < 0] + y_text_offset
              , labels_arg = labels[my_x < 0]
              , slant_arg = my_slant
              )
            } else {
              label_features(
                x_arg = my_x
              , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
              , labels_arg = labels
              , slant_arg = (if (my_x > 1) -1 else 1) * my_slant
              )
            }
          }
        }
      )
      return (my_cor_vs_cov)
    }
    my_cor_vs_cov <- do_s_plot( parallel_x = TRUE )
    typeVc <- c("correlation",      # 1
                "outlier",          # 2
                "overview",         # 3
                "permutation",      # 4
                "predict-train",    # 5
                "predict-test",     # 6
                "summary",          # 7 = c(2,3,4,9)
                "x-loading",        # 8
                "x-score",          # 9
                "x-variance",       # 10
                "xy-score",         # 11
                "xy-weight"         # 12
               )                    # [c(3,8,9)] # [c(4,3,8,9)]
    if ( length(my_oplsda@orthoVipVn) > 0 ) {
      my_typevc <- typeVc[c(9,3,8)]
    } else {
      my_typevc <- c("(dummy)","overview","(dummy)")
    }
    for (my_type in my_typevc) {
      if (my_type %in% typeVc) {
        tryCatch({
          if ( my_type != "x-loading" ) {
             plot(
               x            = my_oplsda
             , typeVc       = my_type
             , parCexN      = 0.4
             , parDevNewL   = FALSE
             , parLayL      = TRUE
             , parEllipsesL = TRUE
             )
          } else {
            do_s_plot( parallel_x = FALSE )
          }
        }, error = function(e) {
          x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) )
        })
      } else {
        plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
        text(x=1, y=1, labels="no orthogonal projection is possible")
      }
    }
    return (my_cor_vs_cov)
  } else {
    return (NULL)
  }
}

# S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) {
  if ( ! is.environment(calc_env) ) {
    failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
    return ( FALSE )
  }
  if ( ! is.function(corcov_tsv_action) ) {
    failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function")
    return ( FALSE )
  }
  if ( ! is.function(salience_tsv_action) ) {
    failure_action("salience_calc: fatal error - 'salience_tsv_action' is not a function")
    return ( FALSE )
  }

  # extract parameters from the environment
  vrbl_metadata <- calc_env$vrbl_metadata
  vrbl_metadata_names <- vrbl_metadata[,1]
  smpl_metadata <- calc_env$smpl_metadata
  data_matrix <- calc_env$data_matrix
  pairSigFeatOnly <- calc_env$pairSigFeatOnly
  facC <- calc_env$facC
  tesC <- calc_env$tesC
  # extract the levels from the environment
  originalLevCSV <- levCSV <- calc_env$levCSV
  # matchingC is one of { "none", "wildcard", "regex" }
  matchingC <- calc_env$matchingC
  labelFeatures <- calc_env$labelFeatures

  # arg/env checking
  if (!(facC %in% names(smpl_metadata))) {
    failure_action(sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata", facC))
    return ( FALSE )
  }

  mz             <- vrbl_metadata$mz
  names(mz)      <- vrbl_metadata$variableMetadata
  mz_lookup      <- function(feature) unname(mz[feature])

  rt             <- vrbl_metadata$rt
  names(rt)      <- vrbl_metadata$variableMetadata
  rt_lookup      <- function(feature) unname(rt[feature])

  # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
  salience_df <- calc_env$salience_df <- w4msalience(
    data_matrix    = data_matrix
  , sample_class   = smpl_metadata[,facC]
  , failure_action = failure_action
  )
  salience_tsv_action({
    my_df <- data.frame(
      featureID    = salience_df$feature
    , salientLevel = salience_df$max_level
    , salientRCV   = salience_df$salient_rcv
    , salience     = salience_df$salience
    , mz           = mz_lookup(salience_df$feature)
    , rt           = rt_lookup(salience_df$feature)
    )
    my_df[order(-my_df$salience),]
  })

  # transform wildcards to regexen
  if (matchingC == "wildcard") {
    # strsplit(x = "hello,wild,world", split = ",")
    levCSV <- gsub("[.]", "[.]", levCSV)
    levCSV <- strsplit(x = levCSV, split = ",")
    levCSV <- sapply(levCSV, utils::glob2rx, trim.tail = FALSE)
    levCSV <- paste(levCSV, collapse = ",")
  }
  # function to determine whether level is a member of levCSV
  isLevelSelected <- function(lvl) {
    matchFun <- if (matchingC != "none") grepl else `==`
    return(
      Reduce(
        f = "||"
      , x = sapply(
              X = strsplit(
                    x = levCSV
                  , split = ","
                  , fixed = TRUE
                  )[[1]]
            , FUN = matchFun
            , lvl
            )
      )
    )
  }

  # transpose matrix because ropls matrix is the transpose of XCMS matrix
  # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot
  # center
  cdm <- center_colmeans(t(data_matrix))
  # pareto-scale
  my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE))
  scdm <- sweep(cdm, 2, my_scale, "/")

  # pattern to match column names like k10_kruskal_k4.k3_sig
  col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC)
  # column name like k10_kruskal_sig
  intersample_sig_col  <- sprintf('%s_%s_sig', facC, tesC)
  # get the facC column from sampleMetadata, dropping to one dimension
  smpl_metadata_facC <- smpl_metadata[,facC]

  # allocate a slot in the environment for the contrast_list, each element of which will be a data.frame with this structure:
  #   - feature ID
  #   - value1
  #   - value2
  #   - Wiklund_2008 correlation
  #   - Wiklund_2008 covariance
  #   - Wiklund_2008 VIP
  calc_env$contrast_list <- list()


  did_plot <- FALSE
  if (tesC != "none") {
    # for each column name, extract the parts of the name matched by 'col_pattern', if any
    the_colnames <- colnames(vrbl_metadata)
    if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
      failure_action(sprintf("bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'.", tesC))
      return ( FALSE )
    }
    col_matches <- lapply(
      X = the_colnames,
      FUN = function(x) {
        regmatches( x, regexec(col_pattern, x) )[[1]]
      }
    )
    ## first contrast each selected level with all other levels combined into one "super-level" ##
    # process columns matching the pattern
    level_union <- c()
    for (i in 1:length(col_matches)) {
      col_match <- col_matches[[i]]
      if (length(col_match) > 0) {
        # it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig
        vrbl_metadata_col <- col_match[1]               # ^^^^^^^^^^^^^^^^^^^^^  # Column name
        fctr_lvl_1        <- col_match[2]               #             ^^         # Factor-level 1
        fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
        # only process this column if both factors are members of lvlCSV
        is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
        if (is_match) {
          level_union <- c(level_union, col_match[2], col_match[3])
        }
      }
    }
    level_union <- unique(sort(level_union))
    overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
    if ( length(level_union) > 2 ) {
      chosen_samples <- smpl_metadata_facC %in% level_union
      chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
      col_selector <- vrbl_metadata_names[ overall_significant ]
      my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
      plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
        progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
        predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
        my_cor_cov <- do_detail_plot(
          x_dataMatrix  = my_matrix
        , x_predictor   = predictor
        , x_is_match    = is_match
        , x_algorithm   = algoC
        , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
        , x_show_labels = labelFeatures
        , x_progress    = progress_action
        , x_crossval_i  = min(7, length(chosen_samples))
        , x_env         = calc_env
        )
        if ( is.null(my_cor_cov) ) {
          progress_action("NOTHING TO PLOT.")
        } else {
          my_tsv <- my_cor_cov$tsv1
          my_tsv$mz <- mz_lookup(my_tsv$featureID)
          my_tsv$rt <- rt_lookup(my_tsv$featureID)
          my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
          tsv <<- my_tsv
          corcov_tsv_action(tsv)
          did_plot <<- TRUE
        }
      }
      if ( length(level_union) != 2 ) {
        fctr_lvl_2 <- "other"
        for ( fctr_lvl_1 in level_union[1:length(level_union)] ) {
          plot_action(fctr_lvl_1, fctr_lvl_2)
        }
      } else {
        plot_action(fctr_lvl_1 = level_union[1], fctr_lvl_2 = level_union[2])
      }
    }

    if ( length(level_union) > 1 ) {
      ## next, contrast each selected level with each of the other levels individually ##
      # process columns matching the pattern
      for (i in 1:length(col_matches)) {
        # for each potential match of the pattern
        col_match <- col_matches[[i]]
        if (length(col_match) > 0) {
          # it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig
          vrbl_metadata_col <- col_match[1]               # ^^^^^^^^^^^^^^^^^^^^^  # Column name
          fctr_lvl_1        <- col_match[2]               #             ^^         # Factor-level 1
          fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
          # only process this column if both factors are members of lvlCSV
          is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
          # choose only samples with one of the two factors for this column
          chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
          predictor <- smpl_metadata_facC[chosen_samples]
          # extract only the significantly-varying features and the chosen samples
          fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
          col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
          my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
          my_cor_cov <- do_detail_plot(
            x_dataMatrix  = my_matrix
          , x_predictor   = predictor
          , x_is_match    = is_match
          , x_algorithm   = algoC
          , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
          , x_show_labels = labelFeatures
          , x_progress    = progress_action
          , x_crossval_i  = min(7, length(chosen_samples))
          , x_env         = calc_env
          )
          if ( is.null(my_cor_cov) ) {
            progress_action("NOTHING TO PLOT.")
          } else {
            tsv <- my_cor_cov$tsv1
            tsv$mz <- mz_lookup(tsv$featureID)
            tsv$rt <- rt_lookup(tsv$featureID)
            tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
            corcov_tsv_action(tsv)
            did_plot <- TRUE
          }
        }
      }
    }
  } else { # tesC == "none"
    level_union <- unique(sort(smpl_metadata_facC))
    if ( length(level_union) > 1 ) {
      if ( length(level_union) > 2 ) {
        ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
        completed <- c()
        lapply(
          X = level_union
        , FUN = function(x) {
            fctr_lvl_1 <- x[1]
            fctr_lvl_2 <- {
              if ( fctr_lvl_1 %in% completed )
                return("DUMMY")
              # strF(completed)
              completed <<- c(completed, fctr_lvl_1)
              setdiff(level_union, fctr_lvl_1)
            }
            chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
            fctr_lvl_2 <- "other"
            progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
            if (length(unique(chosen_samples)) < 1) {
              progress_action("NOTHING TO PLOT...")
            } else {
              chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
              predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
              my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
              # only process this column if both factors are members of lvlCSV
              is_match <- isLevelSelected(fctr_lvl_1)
              my_cor_cov <- do_detail_plot(
                x_dataMatrix  = my_matrix
              , x_predictor   = predictor
              , x_is_match    = is_match
              , x_algorithm   = algoC
              , x_prefix      = "Features"
              , x_show_labels = labelFeatures
              , x_progress    = progress_action
              , x_crossval_i  = min(7, length(chosen_samples))
              , x_env         = calc_env
              )
              if ( is.null(my_cor_cov) ) {
                progress_action("NOTHING TO PLOT")
              } else {
                tsv <- my_cor_cov$tsv1
                tsv$mz <- mz_lookup(tsv$featureID)
                tsv$rt <- rt_lookup(tsv$featureID)
                corcov_tsv_action(tsv)
                did_plot <<- TRUE
              }
            }
            "dummy" # need to return a value; otherwise combn fails with an error
          }
        )
      }
      ## pass 2 - contrast each selected level with each of the other levels individually ##
      completed <- c()
      utils::combn(
        x = level_union
      , m = 2
      , FUN = function(x) {
          fctr_lvl_1 <- x[1]
          fctr_lvl_2 <- x[2]
          chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
          if (length(unique(chosen_samples)) < 1) {
            progress_action("NOTHING TO PLOT...")
          } else {
            chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
            predictor <- chosen_facC
            my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
            # only process this column if both factors are members of lvlCSV
            is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
            my_cor_cov <- do_detail_plot(
              x_dataMatrix  = my_matrix
            , x_predictor   = predictor
            , x_is_match    = is_match
            , x_algorithm   = algoC
            , x_prefix      = "Features"
            , x_show_labels = labelFeatures
            , x_progress    = progress_action
            , x_crossval_i  = min(7, length(chosen_samples))
            , x_env         = calc_env
            )
            if ( is.null(my_cor_cov) ) {
              progress_action("NOTHING TO PLOT")
            } else {
              tsv <- my_cor_cov$tsv1
              tsv$mz <- mz_lookup(tsv$featureID)
              tsv$rt <- rt_lookup(tsv$featureID)
              corcov_tsv_action(tsv)
              did_plot <<- TRUE
            }
          }
          "dummy" # need to return a value; otherwise combn fails with an error
        }
      )
    } else {
      progress_action("NOTHING TO PLOT....")
    }
  }
  if (!did_plot) {
    failure_action(sprintf("bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))
    return ( FALSE )
  }
  return ( TRUE )
}

# Calculate data for correlation-versus-covariance plot
#   Adapted from:
#     Wiklund_2008 doi:10.1021/ac0713510
#     Galindo_Prieto_2014 doi:10.1002/cem.2627
#     https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) {
  x_class <- class(ropls_x)
  if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
    stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) )
  }
  result <- list()
  result$projection <- projection <- if (parallel_x) 1 else 2
  # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
  if ( ropls_x@suppLs$algoC == "nipals") {
    # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
    mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
    mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
    if (parallel_x)
       score_matrix <- ropls_x@scoreMN
    else
       score_matrix <- ropls_x@orthoScoreMN
    score_matrix_transposed <- t(score_matrix)
    score_matrix_magnitude <- mag(score_matrix)
    result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
    result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
  } else {
    # WARNING - untested code - I don't have test data to exercise this branch
    # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
    # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
    if (parallel_x)
       score_matrix <- ropls_x@scoreMN
    else
       score_matrix <- ropls_x@orthoScoreMN
    score_matrix_transposed <- t(score_matrix)
    cov_divisor <- nrow(matrix_x) - 1
    result$covariance <- sapply(
      X = 1:ncol(matrix_x)
    , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor
    )
    score_sd <- sapply(
      X = 1:ncol(score_matrix)
      , FUN = function(x) sd(score_matrix[,x])
    )
    # xSdVn - Numerical vector: variable standard deviations of the 'x' matrix
    xSdVn <- ropls_x@xSdVn
    result$correlation <- sapply(
      X = 1:ncol(matrix_x)
    , FUN = function(x) {
        ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) )
      }
    )
  }
  result$correlation <- result$correlation[ 1, , drop = TRUE ]
  result$covariance  <- result$covariance [ 1, , drop = TRUE ]

  # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
  #    Length = number of features; labels = feature identifiers.  (The same is true for $correlation and $covariance.)
  result$vip4p     <- as.numeric(ropls_x@vipVn)
  result$vip4o     <- as.numeric(ropls_x@orthoVipVn)
  result$loadp     <- as.numeric(ropls_x@loadingMN)
  result$loado     <- as.numeric(ropls_x@orthoLoadingMN)
  # get the level names
  level_names      <- sort(levels(as.factor(ropls_x@suppLs$y)))
  fctr_lvl_1       <- level_names[1]
  fctr_lvl_2       <- level_names[2]
  feature_count    <- length(ropls_x@vipVn)
  result$level1    <- rep.int(x = fctr_lvl_1, times = feature_count)
  result$level2    <- rep.int(x = fctr_lvl_2, times = feature_count)
  superresult <- list()
  if (length(result$vip4o) == 0) result$vip4o <- NA
  greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 )
  superresult$tsv1 <- data.frame(
    featureID           = names(ropls_x@vipVn)
  , factorLevel1        = result$level1
  , factorLevel2        = result$level2
  , greaterLevel        = greaterLevel
  , projection          = result$projection
  , correlation         = result$correlation
  , covariance          = result$covariance
  , vip4p               = result$vip4p
  , vip4o               = result$vip4o
  , loadp               = result$loadp
  , loado               = result$loado
  , row.names           = NULL
  )
  rownames(superresult$tsv1) <- superresult$tsv1$featureID
  superresult$projection <- result$projection
  superresult$covariance <- result$covariance
  superresult$correlation <- result$correlation
  superresult$vip4p <- result$vip4p
  superresult$vip4o <- result$vip4o
  superresult$loadp <- result$loadp
  superresult$loado <- result$loado
  superresult$details <- result
  result$superresult <- superresult
  # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
  result$oplsda    <- ropls_x
  result$predictor <- ropls_x@suppLs$y   # in case future consumers of this routine want to use it in currently unanticipated ways
  return (superresult)
}

# vim: sw=2 ts=2 et :
author	eschen42
date	Fri, 30 Mar 2018 14:59:19 -0400
parents	8bba31f628da
children	7bd523ca1f9a