Mercurial > repos > eschen42 > w4mcorcov
diff w4mcorcov_calc.R @ 5:50f60f94c034 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit aff1790e25523d038a1e9528de748191c096132f
author | eschen42 |
---|---|
date | Fri, 30 Mar 2018 14:59:19 -0400 |
parents | 8bba31f628da |
children | 7bd523ca1f9a |
line wrap: on
line diff
--- a/w4mcorcov_calc.R Sun Mar 04 14:51:42 2018 -0500 +++ b/w4mcorcov_calc.R Fri Mar 30 14:59:19 2018 -0400 @@ -7,7 +7,7 @@ #### OPLS-DA algoC <- "nipals" -do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { +do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) { off <- function(x) if (x_show_labels == "0") 0 else x if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { my_oplsda <- opls( @@ -23,83 +23,140 @@ my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] - my_cor_vs_cov <- cor_vs_cov( - matrix_x = x_dataMatrix - , ropls_x = my_oplsda - ) - with( - my_cor_vs_cov - , { - min_x <- min(covariance) - max_x <- max(covariance) - lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) - covariance <- covariance / lim_x - lim_x <- 1.2 - main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) - main_cex <- min(1.0, 46.0/nchar(main_label)) - # "It is generally accepted that a variable should be selected if vj>1, [27–29], - # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." - # (Mehmood 2012 doi:10.1186/1748-7188-6-27) - vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) - alpha <- 0.1 + 0.4 * vipco - red <- as.numeric(correlation > 0) * vipco - blue <- as.numeric(correlation < 0) * vipco - plus_cor <- correlation - plus_cov <- covariance - cex <- 0.75 - plot( - y = plus_cor - , x = plus_cov - , type="p" - , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) ) - , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) ) - , xlab = sprintf("relative covariance(feature,t1)") - , ylab = sprintf("correlation(feature,t1)") - , main = main_label - , cex.main = main_cex - , cex = cex - , pch = 16 - , col = rgb(blue = blue, red = red, green = 0, alpha = alpha) + do_s_plot <- function(parallel_x) { + my_cor_vs_cov <- cor_vs_cov( + matrix_x = x_dataMatrix + , ropls_x = my_oplsda + , parallel_x = parallel_x ) - low_x <- -0.7 * lim_x - high_x <- 0.7 * lim_x - text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") - text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") - if ( x_show_labels != "0" ) { - my_loadp <- loadp - my_loado <- loado - names(my_loadp) <- tsv1$featureID - names(my_loado) <- tsv1$featureID - if ( x_show_labels == "ALL" ) { - n_labels <- length(loadp) + with( + my_cor_vs_cov + , { + min_x <- min(covariance) + max_x <- max(covariance) + lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) + covariance <- covariance / lim_x + lim_x <- 1.2 + # "It is generally accepted that a variable should be selected if vj>1, [27–29], + # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." + # (Mehmood 2012 doi:10.1186/1748-7188-6-27) + plus_cor <- correlation + plus_cov <- covariance + cex <- 0.75 + which_projection <- if (projection == 1) "t1" else "o1" + which_loading <- if (projection == 1) "parallel" else "orthogonal" + if (projection == 1) { + my_xlab <- "relative covariance(feature,t1)" + my_x <- plus_cov + my_ylab <- "correlation(feature,t1) [~ parallel loading]" + my_y <- plus_cor + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loadp + my_load_proximal <- loado + vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) + red <- as.numeric(correlation > 0) * vipcp + blue <- as.numeric(correlation < 0) * vipcp + alpha <- 0.1 + 0.4 * vipcp + my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha) + main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) } else { - n_labels <- as.numeric(x_show_labels) + my_xlab <- "relative covariance(feature,to1)" + my_x <- -plus_cov + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + my_ylab <- "correlation(feature,to1) [~ orthogonal loading]" + my_y <- plus_cor + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loado + my_load_proximal <- loadp + vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) + alpha <- 0.1 + 0.4 * vipco + my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha) + main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2) } - n_labels <- min( n_labels, (1 + length(loadp)) / 2 ) - labels_to_show <- c( - names(head(sort(my_loadp),n = n_labels)) - , names(tail(sort(my_loadp),n = n_labels)) + main_cex <- min(1.0, 46.0/nchar(main_label)) + my_feature_label_slant <- -30 # slant feature labels 30 degrees downward + plot( + y = my_y + , x = my_x + , type = "p" + , xlim = my_xlim + , ylim = my_ylim + , xlab = my_xlab + , ylab = my_ylab + , main = main_label + , cex.main = main_cex + , cex = cex + , pch = 16 + , col = my_col ) - if ( x_show_loado_labels ) { - labels_to_show <- c( - labels_to_show - , names(head(sort(my_loado),n = n_labels)) - , names(tail(sort(my_loado),n = n_labels)) - ) + low_x <- -0.7 * lim_x + high_x <- 0.7 * lim_x + if (projection == 1) { + text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") + text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") } - labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) - text( - y = plus_cor - 0.013 - , x = plus_cov + 0.020 - , cex = 0.4 - , labels = labels - , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha) - , srt = -30 # slant 30 degrees downward - , adj = 0 # left-justified - ) + if ( x_show_labels != "0" ) { + names(my_load_distal) <- tsv1$featureID + names(my_load_proximal) <- tsv1$featureID + if ( x_show_labels == "ALL" ) { + n_labels <- length(my_load_distal) + } else { + n_labels <- as.numeric(x_show_labels) + } + n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) + labels_to_show <- c( + names(head(sort(my_load_distal),n = n_labels)) + , names(tail(sort(my_load_distal),n = n_labels)) + ) + labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) + x_text_offset <- 0.024 + y_text_offset <- (if (projection == 1) 1 else -1) * -0.017 + label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { + print("str(x_arg)") + print(str(x_arg)) + print("str(y_arg)") + print(str(y_arg)) + print("str(labels_arg)") + print(str(labels_arg)) + text( + y = y_arg + , x = x_arg + x_text_offset + , cex = 0.4 + , labels = labels_arg + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant_arg + , adj = 0 # left-justified + ) + } + my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + if (length(my_x) > 1) { + label_features( + x_arg = my_x [my_x > 0] + , y_arg = my_y [my_x > 0] - y_text_offset + , labels_arg = labels[my_x > 0] + , slant_arg = -my_slant + ) + label_features( + x_arg = my_x [my_x < 0] + , y_arg = my_y [my_x < 0] + y_text_offset + , labels_arg = labels[my_x < 0] + , slant_arg = my_slant + ) + } else { + label_features( + x_arg = my_x + , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset + , labels_arg = labels + , slant_arg = (if (my_x > 1) -1 else 1) * my_slant + ) + } + } } - } - ) + ) + return (my_cor_vs_cov) + } + my_cor_vs_cov <- do_s_plot( parallel_x = TRUE ) typeVc <- c("correlation", # 1 "outlier", # 2 "overview", # 3 @@ -120,28 +177,29 @@ } for (my_type in my_typevc) { if (my_type %in% typeVc) { - # print(sprintf("plotting type %s", my_type)) tryCatch({ - plot( - x = my_oplsda - , typeVc = my_type - , parCexN = 0.4 - , parDevNewL = FALSE - , parLayL = TRUE - , parEllipsesL = TRUE - ) + if ( my_type != "x-loading" ) { + plot( + x = my_oplsda + , typeVc = my_type + , parCexN = 0.4 + , parDevNewL = FALSE + , parLayL = TRUE + , parEllipsesL = TRUE + ) + } else { + do_s_plot( parallel_x = FALSE ) + } }, error = function(e) { - x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) + x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) ) }) } else { - # print("plotting dummy graph") plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") text(x=1, y=1, labels="no orthogonal projection is possible") } } return (my_cor_vs_cov) } else { - # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor)))) return (NULL) } } @@ -174,7 +232,6 @@ # matchingC is one of { "none", "wildcard", "regex" } matchingC <- calc_env$matchingC labelFeatures <- calc_env$labelFeatures - labelOrthoFeatures <- calc_env$labelOrthoFeatures # arg/env checking if (!(facC %in% names(smpl_metadata))) { @@ -308,7 +365,6 @@ , x_algorithm = algoC , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -349,8 +405,6 @@ # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - # TODO delete next line displaying currently-processed column - # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match)) # choose only samples with one of the two factors for this column chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) predictor <- smpl_metadata_facC[chosen_samples] @@ -365,7 +419,6 @@ , x_algorithm = algoC , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -402,7 +455,6 @@ } chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) fctr_lvl_2 <- "other" - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { progress_action("NOTHING TO PLOT...") @@ -419,7 +471,6 @@ , x_algorithm = algoC , x_prefix = "Features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -434,7 +485,6 @@ did_plot <<- TRUE } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) @@ -448,7 +498,6 @@ fctr_lvl_1 <- x[1] fctr_lvl_2 <- x[2] chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { progress_action("NOTHING TO PLOT...") @@ -465,7 +514,6 @@ , x_algorithm = algoC , x_prefix = "Features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -480,7 +528,6 @@ did_plot <<- TRUE } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) @@ -500,18 +547,22 @@ # Wiklund_2008 doi:10.1021/ac0713510 # Galindo_Prieto_2014 doi:10.1002/cem.2627 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R -cor_vs_cov <- function(matrix_x, ropls_x) { +cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) { x_class <- class(ropls_x) if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) } result <- list() + result$projection <- projection <- if (parallel_x) 1 else 2 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS if ( ropls_x@suppLs$algoC == "nipals") { # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) - score_matrix <- ropls_x@scoreMN + if (parallel_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) score_matrix_magnitude <- mag(score_matrix) result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) @@ -520,7 +571,10 @@ # WARNING - untested code - I don't have test data to exercise this branch # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F - score_matrix <- ropls_x@scoreMN + if (parallel_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) cov_divisor <- nrow(matrix_x) - 1 result$covariance <- sapply( @@ -540,8 +594,8 @@ } ) } - result$correlation <- result$correlation[1,,drop = TRUE] - result$covariance <- result$covariance[1,,drop = TRUE] + result$correlation <- result$correlation[ 1, , drop = TRUE ] + result$covariance <- result$covariance [ 1, , drop = TRUE ] # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) @@ -556,7 +610,6 @@ feature_count <- length(ropls_x@vipVn) result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) - # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation))) superresult <- list() if (length(result$vip4o) == 0) result$vip4o <- NA greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) @@ -565,6 +618,7 @@ , factorLevel1 = result$level1 , factorLevel2 = result$level2 , greaterLevel = greaterLevel + , projection = result$projection , correlation = result$correlation , covariance = result$covariance , vip4p = result$vip4p @@ -574,6 +628,7 @@ , row.names = NULL ) rownames(superresult$tsv1) <- superresult$tsv1$featureID + superresult$projection <- result$projection superresult$covariance <- result$covariance superresult$correlation <- result$correlation superresult$vip4p <- result$vip4p @@ -581,7 +636,6 @@ superresult$loadp <- result$loadp superresult$loado <- result$loado superresult$details <- result - # #print(superresult$tsv1) result$superresult <- superresult # Include thise in case future consumers of this routine want to use it in currently unanticipated ways result$oplsda <- ropls_x @@ -589,4 +643,4 @@ return (superresult) } - +# vim: sw=2 ts=2 et :