Mercurial > repos > eschen42 > w4mcorcov
changeset 5:50f60f94c034 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit aff1790e25523d038a1e9528de748191c096132f
author | eschen42 |
---|---|
date | Fri, 30 Mar 2018 14:59:19 -0400 |
parents | 8bba31f628da |
children | 7bd523ca1f9a |
files | w4mcorcov.xml w4mcorcov_calc.R w4mcorcov_input.R w4mcorcov_output.R w4mcorcov_util.R w4mcorcov_wrapper.R |
diffstat | 6 files changed, 293 insertions(+), 265 deletions(-) [+] |
line wrap: on
line diff
--- a/w4mcorcov.xml Sun Mar 04 14:51:42 2018 -0500 +++ b/w4mcorcov.xml Fri Mar 30 14:59:19 2018 -0400 @@ -1,11 +1,23 @@ -<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.7"> +<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.8"> <description>OPLS-DA Contrasts of Univariate Results</description> + <macros> + <xml name="paramPairSigFeatOnly"> + <param + name="pairSigFeatOnly" + type="boolean" + checked="true" + truevalue="TRUE" + falsevalue="FALSE" + label="Retain only pairwise-significant features" + help="When this option is set to 'Yes', analysis will be performed including only features that differ significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude any feature that is not significantly different across all levels). See examples below." /> + </xml> + </macros> + <requirements> <requirement type="package">r-batch</requirement> <requirement type="package">bioconductor-ropls</requirement> - <!-- <requirement type="package">r-foreach</requirement> --> </requirements> <stdio> @@ -17,13 +29,17 @@ dataMatrix_in "$dataMatrix_in" sampleMetadata_in "$sampleMetadata_in" variableMetadata_in "$variableMetadata_in" - tesC "$tesC" facC "$facC" - pairSigFeatOnly "$pairSigFeatOnly" + #if str( $signif_test.tesC ) == "none": + tesC "none" + pairSigFeatOnly "FALSE" + #else: + tesC "$signif_test.tesC" + pairSigFeatOnly "$signif_test.pairSigFeatOnly" + #end if levCSV '$levCSV' matchingC '$matchingC' labelFeatures '$labelFeatures' - labelOrthoFeatures '$labelOrthoFeatures' contrast_detail '$contrast_detail' contrast_corcov '$contrast_corcov' contrast_salience '$contrast_salience' @@ -34,21 +50,28 @@ <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="Samples x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> <param name="variableMetadata_in" label="Variable metadata file (ideally from Univariate)" type="data" format="tabular" help="Features x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> <param name="facC" label="Factor of interest" type="text" help="REQUIRED - The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts. Except when the 'Univariate Significance-test' is set to 'none', this also must be a portion of the column names in the variableMetadata file."/> - <param name="tesC" label="Univariate significance-test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file."> - <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option> - <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option> - <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option> - <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option> - <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option> - </param> - <param - name="pairSigFeatOnly" - type="boolean" - checked="true" - truevalue="TRUE" - falsevalue="FALSE" - label="Retain only pairwise-significant features" - help="When 'none' is chosen as the test, all features are included in the analysis (i. e., this parameter is ignored). Otherwise, when this option is set to 'Yes', analysis will be performed including only features that differ significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude any feature that is not significantly different across all levels). See examples below."/> + <conditional name="signif_test"> + <param name="tesC" label="Univariate significance-test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file."> + <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option> + <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option> + <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option> + <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option> + <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option> + </param> + <when value="none" /> + <when value="ttest"> + <expand macro="paramPairSigFeatOnly" /> + </when> + <when value="anova"> + <expand macro="paramPairSigFeatOnly" /> + </when> + <when value="wilcoxon"> + <expand macro="paramPairSigFeatOnly" /> + </when> + <when value="kruskal"> + <expand macro="paramPairSigFeatOnly" /> + </when> + </conditional> <param name="levCSV" label="Levels of interest" type="text" value = "*" help="Comma-separated level-names (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions. Note that extra space characters will affect results - 'a,b' is correct, but 'a , b' is not and may fail or give different results."> <sanitizer> <valid initial="string.letters"> @@ -81,14 +104,6 @@ <option value="regex">use regular expressions for matching level-names</option> </param> <param name="labelFeatures" type="text" value="3" label="How many features having extreme loadings should be labelled on cov-vs.-cor plot" help="Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features or '0' to label no features; this choice has no effect on the OPLS-DA loadings plot."/> - <param - name="labelOrthoFeatures" - type="boolean" - checked="false" - truevalue="TRUE" - falsevalue="FALSE" - label="Label features having extreme orthogonal loadings" - help="When using the preceding parameter to label only features at the loading-extremess in the cor-vs.-cov plot, use 'no' here to label only features having extreme parallel loadings (loadp); this is the default. Choose 'yes' to add labels also to features having extreme orthogonal loadings (both loado and loadp); this may clutter the plot."/> </inputs> <outputs> @@ -130,7 +145,6 @@ <param name="facC" value="k10"/> <param name="pairSigFeatOnly" value="FALSE"/> <param name="labelFeatures" value="3"/> - <param name="labelOrthogonalFeatures" value="FALSE"/> <param name="levCSV" value="k[12],k[3-4]"/> <param name="matchingC" value="regex"/> <output name="contrast_corcov"> @@ -194,7 +208,6 @@ <param name="facC" value="k10"/> <param name="pairSigFeatOnly" value="TRUE"/> <param name="labelFeatures" value="3"/> - <param name="labelOrthogonalFeatures" value="TRUE"/> <param name="levCSV" value="k[12],k[3-4]"/> <param name="matchingC" value="regex"/> <output name="contrast_corcov"> @@ -254,9 +267,7 @@ <param name="variableMetadata_in" value="input_variableMetadata.tsv"/> <param name="tesC" value="none"/> <param name="facC" value="k10"/> - <param name="pairSigFeatOnly" value="TRUE"/> <param name="labelFeatures" value="3"/> - <param name="labelOrthogonalFeatures" value="FALSE"/> <param name="levCSV" value="k[12],k[3-4]"/> <param name="matchingC" value="regex"/> <output name="contrast_corcov"> @@ -312,9 +323,6 @@ </output> </test> </tests> - <!-- - .. |reg| unicode:: U+000AE .. REGISTERED SIGN - see http://docutils.sourceforge.net/docutils/parsers/rst/include/isonum.txt or /usr/share/docutils/parsers/rst/include/isonum.txt - --> <help><![CDATA[ **Run PLS-DA Contrasts of Univariate Results** @@ -425,9 +433,9 @@ | [IN] Retain only pairwise-significant features + | *Note that when 'Test' is 'none', all features are included in the analysis and this parameter is not settable.* | When **true**, for each contrast of two levels, include only those features which pass the significance threshold for that contrast. Choosing true results in an OPLS-DA model that better reflects and visualizes the difference detected by univariate analysis, with somewhat increased reliability of prediction (as assessed by cross-validation). | When **false**, include all features that pass the significance threshold when testing for difference across all factor-levels. This choice produces a plot that displays more features but is not necessarily more informative. - | *Note that when 'Test' is 'none', all features are included in the analysis and this parameter has no effect.* | [IN] Levels of interest @@ -442,10 +450,6 @@ | Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features; this choice has no effect on the OPLS-DA loadings plot. | -[IN] Label features with extreme loado - | If the previous parameter has limited the the number of features to be labelled at each of the loading-extremes, then the extreme values for both loado and loadp will be labelled when this parameter is set to 'yes'; otherwise (in the default case) only extreme values for loadp will be lableld. The default was chosen to make the plot less cluttered. - | - [OUT] Contrast-detail output PDF | Several plots for each two-projection OPLS-DA analysis: @@ -562,8 +566,6 @@ +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Number of features having extreme loadings | ALL | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Label feat. having extreme orth. loadings | Yes | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov.tsv | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience.tsv | @@ -588,8 +590,6 @@ +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Number of features having extreme loadings | 5 | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Label feat. having extreme orth. loadings | No | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_all.tsv | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_all.tsv | @@ -606,16 +606,12 @@ +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Univariate Significance-Test | none | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Retain only pairwise-significant features | No | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Levels of interest | k[12],k[3-4] | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Level-name matching | use regular expressions for matching level-names | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Number of features having extreme loadings | 0 | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Label feat. having extreme orth. loadings | Yes | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_global.tsv | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_global.tsv | @@ -632,16 +628,12 @@ +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Univariate Significance-Test | none | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Retain only pairwise-significant features | No | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Levels of interest | low,high | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Level-name matching | use regular expressions for matching level-names | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Number of features having extreme loadings | 3 | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Label feat. having extreme orth. loadings | Yes | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_lohi.tsv | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_lohi.tsv | @@ -659,25 +651,29 @@ Release notes ------------- +0.98.8 + +- new feature: Replace loadings plot with correlation-versus-covariance plot for orthogonal features, i.e., the consistency of features influencing within-treatment variation (which is linearly related to the loading of the orthogonal projection) versus consistency. This eliminates the need for the parameter to suppress labels for features with extreme orthogonal loadings + 0.98.7 -- bug fix: handle case of a treatment level with only one sample. +- bug fix: Handle case of a treatment level with only one sample. 0.98.6 -- bug fix: set 'crossvalI' param (of R function 'ropls::opls') to the number of samples when the there are fewer than seven samples. +- bug fix: Set 'crossvalI' param (of R function 'ropls::opls') to the number of samples when the there are fewer than seven samples. 0.98.5 -- bug fix: fit feature-labels within clipping region of cor-vs.cov plot +- bug fix: Fit feature-labels within clipping region of cor-vs.cov plot - new feature: optionally (and by default) suppress labels for features with extreme orthogonal loadings 0.98.3 -- add support for two-level factors -- add adjusted mz and rt to output tables -- allow explicitly setting the number of features with extreme loadings to be labelled on the correlation vs. covariance plot -- add loadings to corcov table +- Add support for two-level factors +- Add adjusted mz and rt to output tables +- Allow explicitly setting the number of features with extreme loadings to be labelled on the correlation vs. covariance plot +- Add loadings to corcov table 0.98.2 @@ -686,6 +682,7 @@ ]]></help> <citations> + <!-- this tool --> <citation type="doi">10.5281/zenodo.1034784</citation> <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS --> <citation type="doi">10.1002/cem.2627</citation> @@ -719,6 +716,6 @@ <citation type="doi">10.1021/ac0713510</citation> </citations> <!-- - vim:noet:sw=4:ts=4 + vim:noet:sw=2:ts=2 --> </tool>
--- a/w4mcorcov_calc.R Sun Mar 04 14:51:42 2018 -0500 +++ b/w4mcorcov_calc.R Fri Mar 30 14:59:19 2018 -0400 @@ -7,7 +7,7 @@ #### OPLS-DA algoC <- "nipals" -do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { +do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) { off <- function(x) if (x_show_labels == "0") 0 else x if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { my_oplsda <- opls( @@ -23,83 +23,140 @@ my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] - my_cor_vs_cov <- cor_vs_cov( - matrix_x = x_dataMatrix - , ropls_x = my_oplsda - ) - with( - my_cor_vs_cov - , { - min_x <- min(covariance) - max_x <- max(covariance) - lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) - covariance <- covariance / lim_x - lim_x <- 1.2 - main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) - main_cex <- min(1.0, 46.0/nchar(main_label)) - # "It is generally accepted that a variable should be selected if vj>1, [27–29], - # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." - # (Mehmood 2012 doi:10.1186/1748-7188-6-27) - vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) - alpha <- 0.1 + 0.4 * vipco - red <- as.numeric(correlation > 0) * vipco - blue <- as.numeric(correlation < 0) * vipco - plus_cor <- correlation - plus_cov <- covariance - cex <- 0.75 - plot( - y = plus_cor - , x = plus_cov - , type="p" - , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) ) - , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) ) - , xlab = sprintf("relative covariance(feature,t1)") - , ylab = sprintf("correlation(feature,t1)") - , main = main_label - , cex.main = main_cex - , cex = cex - , pch = 16 - , col = rgb(blue = blue, red = red, green = 0, alpha = alpha) + do_s_plot <- function(parallel_x) { + my_cor_vs_cov <- cor_vs_cov( + matrix_x = x_dataMatrix + , ropls_x = my_oplsda + , parallel_x = parallel_x ) - low_x <- -0.7 * lim_x - high_x <- 0.7 * lim_x - text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") - text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") - if ( x_show_labels != "0" ) { - my_loadp <- loadp - my_loado <- loado - names(my_loadp) <- tsv1$featureID - names(my_loado) <- tsv1$featureID - if ( x_show_labels == "ALL" ) { - n_labels <- length(loadp) + with( + my_cor_vs_cov + , { + min_x <- min(covariance) + max_x <- max(covariance) + lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) + covariance <- covariance / lim_x + lim_x <- 1.2 + # "It is generally accepted that a variable should be selected if vj>1, [27–29], + # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." + # (Mehmood 2012 doi:10.1186/1748-7188-6-27) + plus_cor <- correlation + plus_cov <- covariance + cex <- 0.75 + which_projection <- if (projection == 1) "t1" else "o1" + which_loading <- if (projection == 1) "parallel" else "orthogonal" + if (projection == 1) { + my_xlab <- "relative covariance(feature,t1)" + my_x <- plus_cov + my_ylab <- "correlation(feature,t1) [~ parallel loading]" + my_y <- plus_cor + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loadp + my_load_proximal <- loado + vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) + red <- as.numeric(correlation > 0) * vipcp + blue <- as.numeric(correlation < 0) * vipcp + alpha <- 0.1 + 0.4 * vipcp + my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha) + main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) } else { - n_labels <- as.numeric(x_show_labels) + my_xlab <- "relative covariance(feature,to1)" + my_x <- -plus_cov + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + my_ylab <- "correlation(feature,to1) [~ orthogonal loading]" + my_y <- plus_cor + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loado + my_load_proximal <- loadp + vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) + alpha <- 0.1 + 0.4 * vipco + my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha) + main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2) } - n_labels <- min( n_labels, (1 + length(loadp)) / 2 ) - labels_to_show <- c( - names(head(sort(my_loadp),n = n_labels)) - , names(tail(sort(my_loadp),n = n_labels)) + main_cex <- min(1.0, 46.0/nchar(main_label)) + my_feature_label_slant <- -30 # slant feature labels 30 degrees downward + plot( + y = my_y + , x = my_x + , type = "p" + , xlim = my_xlim + , ylim = my_ylim + , xlab = my_xlab + , ylab = my_ylab + , main = main_label + , cex.main = main_cex + , cex = cex + , pch = 16 + , col = my_col ) - if ( x_show_loado_labels ) { - labels_to_show <- c( - labels_to_show - , names(head(sort(my_loado),n = n_labels)) - , names(tail(sort(my_loado),n = n_labels)) - ) + low_x <- -0.7 * lim_x + high_x <- 0.7 * lim_x + if (projection == 1) { + text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") + text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") } - labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) - text( - y = plus_cor - 0.013 - , x = plus_cov + 0.020 - , cex = 0.4 - , labels = labels - , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha) - , srt = -30 # slant 30 degrees downward - , adj = 0 # left-justified - ) + if ( x_show_labels != "0" ) { + names(my_load_distal) <- tsv1$featureID + names(my_load_proximal) <- tsv1$featureID + if ( x_show_labels == "ALL" ) { + n_labels <- length(my_load_distal) + } else { + n_labels <- as.numeric(x_show_labels) + } + n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) + labels_to_show <- c( + names(head(sort(my_load_distal),n = n_labels)) + , names(tail(sort(my_load_distal),n = n_labels)) + ) + labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) + x_text_offset <- 0.024 + y_text_offset <- (if (projection == 1) 1 else -1) * -0.017 + label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { + print("str(x_arg)") + print(str(x_arg)) + print("str(y_arg)") + print(str(y_arg)) + print("str(labels_arg)") + print(str(labels_arg)) + text( + y = y_arg + , x = x_arg + x_text_offset + , cex = 0.4 + , labels = labels_arg + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant_arg + , adj = 0 # left-justified + ) + } + my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + if (length(my_x) > 1) { + label_features( + x_arg = my_x [my_x > 0] + , y_arg = my_y [my_x > 0] - y_text_offset + , labels_arg = labels[my_x > 0] + , slant_arg = -my_slant + ) + label_features( + x_arg = my_x [my_x < 0] + , y_arg = my_y [my_x < 0] + y_text_offset + , labels_arg = labels[my_x < 0] + , slant_arg = my_slant + ) + } else { + label_features( + x_arg = my_x + , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset + , labels_arg = labels + , slant_arg = (if (my_x > 1) -1 else 1) * my_slant + ) + } + } } - } - ) + ) + return (my_cor_vs_cov) + } + my_cor_vs_cov <- do_s_plot( parallel_x = TRUE ) typeVc <- c("correlation", # 1 "outlier", # 2 "overview", # 3 @@ -120,28 +177,29 @@ } for (my_type in my_typevc) { if (my_type %in% typeVc) { - # print(sprintf("plotting type %s", my_type)) tryCatch({ - plot( - x = my_oplsda - , typeVc = my_type - , parCexN = 0.4 - , parDevNewL = FALSE - , parLayL = TRUE - , parEllipsesL = TRUE - ) + if ( my_type != "x-loading" ) { + plot( + x = my_oplsda + , typeVc = my_type + , parCexN = 0.4 + , parDevNewL = FALSE + , parLayL = TRUE + , parEllipsesL = TRUE + ) + } else { + do_s_plot( parallel_x = FALSE ) + } }, error = function(e) { - x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) + x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) ) }) } else { - # print("plotting dummy graph") plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") text(x=1, y=1, labels="no orthogonal projection is possible") } } return (my_cor_vs_cov) } else { - # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor)))) return (NULL) } } @@ -174,7 +232,6 @@ # matchingC is one of { "none", "wildcard", "regex" } matchingC <- calc_env$matchingC labelFeatures <- calc_env$labelFeatures - labelOrthoFeatures <- calc_env$labelOrthoFeatures # arg/env checking if (!(facC %in% names(smpl_metadata))) { @@ -308,7 +365,6 @@ , x_algorithm = algoC , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -349,8 +405,6 @@ # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - # TODO delete next line displaying currently-processed column - # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match)) # choose only samples with one of the two factors for this column chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) predictor <- smpl_metadata_facC[chosen_samples] @@ -365,7 +419,6 @@ , x_algorithm = algoC , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -402,7 +455,6 @@ } chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) fctr_lvl_2 <- "other" - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { progress_action("NOTHING TO PLOT...") @@ -419,7 +471,6 @@ , x_algorithm = algoC , x_prefix = "Features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -434,7 +485,6 @@ did_plot <<- TRUE } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) @@ -448,7 +498,6 @@ fctr_lvl_1 <- x[1] fctr_lvl_2 <- x[2] chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { progress_action("NOTHING TO PLOT...") @@ -465,7 +514,6 @@ , x_algorithm = algoC , x_prefix = "Features" , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env @@ -480,7 +528,6 @@ did_plot <<- TRUE } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) @@ -500,18 +547,22 @@ # Wiklund_2008 doi:10.1021/ac0713510 # Galindo_Prieto_2014 doi:10.1002/cem.2627 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R -cor_vs_cov <- function(matrix_x, ropls_x) { +cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) { x_class <- class(ropls_x) if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) } result <- list() + result$projection <- projection <- if (parallel_x) 1 else 2 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS if ( ropls_x@suppLs$algoC == "nipals") { # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) - score_matrix <- ropls_x@scoreMN + if (parallel_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) score_matrix_magnitude <- mag(score_matrix) result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) @@ -520,7 +571,10 @@ # WARNING - untested code - I don't have test data to exercise this branch # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F - score_matrix <- ropls_x@scoreMN + if (parallel_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) cov_divisor <- nrow(matrix_x) - 1 result$covariance <- sapply( @@ -540,8 +594,8 @@ } ) } - result$correlation <- result$correlation[1,,drop = TRUE] - result$covariance <- result$covariance[1,,drop = TRUE] + result$correlation <- result$correlation[ 1, , drop = TRUE ] + result$covariance <- result$covariance [ 1, , drop = TRUE ] # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) @@ -556,7 +610,6 @@ feature_count <- length(ropls_x@vipVn) result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) - # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation))) superresult <- list() if (length(result$vip4o) == 0) result$vip4o <- NA greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) @@ -565,6 +618,7 @@ , factorLevel1 = result$level1 , factorLevel2 = result$level2 , greaterLevel = greaterLevel + , projection = result$projection , correlation = result$correlation , covariance = result$covariance , vip4p = result$vip4p @@ -574,6 +628,7 @@ , row.names = NULL ) rownames(superresult$tsv1) <- superresult$tsv1$featureID + superresult$projection <- result$projection superresult$covariance <- result$covariance superresult$correlation <- result$correlation superresult$vip4p <- result$vip4p @@ -581,7 +636,6 @@ superresult$loadp <- result$loadp superresult$loado <- result$loado superresult$details <- result - # #print(superresult$tsv1) result$superresult <- superresult # Include thise in case future consumers of this routine want to use it in currently unanticipated ways result$oplsda <- ropls_x @@ -589,4 +643,4 @@ return (superresult) } - +# vim: sw=2 ts=2 et :
--- a/w4mcorcov_input.R Sun Mar 04 14:51:42 2018 -0500 +++ b/w4mcorcov_input.R Fri Mar 30 14:59:19 2018 -0400 @@ -36,7 +36,7 @@ my_failure_action( sprintf("bad parameter xcms_data_type '%s'", xcms_data_type) ) return ( FALSE ) } - if ( is.character(xcms_data_in) ){ + if ( is.character(xcms_data_in) ) { # case: xcms_data_in is a path to a file xcms_data_input_env <- read_data_frame( xcms_data_in, sprintf("%s input", xcms_data_type) ) if (!xcms_data_input_env$success) { @@ -44,30 +44,6 @@ return ( FALSE ) } return ( xcms_data_input_env$data ) - # commenting out pasted code that is not tested here - # } else if ( is.data.frame(xcms_data_in) || is.matrix(xcms_data_in) ) { - # # case: xcms_data_in is a data.frame or matrix - # return(xcms_data_in) - # } else if ( is.list(xcms_data_in) || is.environment(xcms_data_in) ) { - # # NOTE WELL: is.list succeeds for data.frame, so the is.data.frame test must appear before the is.list test - # # case: xcms_data_in is a list - # if ( ! exists(xcms_data_type, where = xcms_data_in) ) { - # my_failure_action(sprintf("%s xcms_data_in is missing member '%s'"), ifelse(is.environment(xcms_data_in),"environment","list"), xcms_data_type) - # return ( FALSE ) - # } - # prospect <- getElement(name = xcms_data_type, object = xcms_data_in) - # if ( ! is.data.frame(prospect) && ! is.matrix(prospect) ) { - # utils::str("list - str(prospect)") - # utils::str(prospect) - # if ( is.list(xcms_data_in) ) { - # my_failure_action(sprintf("the first member of xcms_data_in['%s'] is neither a data.frame nor a matrix but is a %s", xcms_data_type, typeof(prospect))) - # } else { - # my_failure_action(sprintf("the first member of xcms_data_in$%s is neither a data.frame nor a matrix but is a %s", xcms_data_type, typeof(prospect))) - # } - # return ( prospect ) - # } - # # stop("stopping here for a snapshot") - # return ( prospect ) } else { # case: xcms_data_in is invalid my_failure_action( sprintf("xcms_data_in has unexpected type %s", typeof(xcms_data_in)) )
--- a/w4mcorcov_output.R Sun Mar 04 14:51:42 2018 -0500 +++ b/w4mcorcov_output.R Fri Mar 30 14:59:19 2018 -0400 @@ -0,0 +1,77 @@ + +# turn off all plotting devices +dev.off.all <- function() { + while (!is.null(dev.list())) { dev.off() } +} + +# capture plot and write to PDF; then close any devices opened in the process +plot2pdf <- function( + file.name +, plot.function +, width = 12 +, height = 12 +) { + # capture plot and write to PDF + cur.dev <- dev.list() + filename <- file.name + pdf(file = filename, width = width, height = height) + plot.function() + # close any devices opened in the process + dev.off() + if (is.null(cur.dev)) { + dev.off.all() + } else { + while ( length(dev.list()) > length(cur.dev) ) { dev.off() } + } +} + +# print and capture plot and write to PDF; then close any devices opened in the process +# This is needed for ggplot which does not print the plot when invoked within a function. +print2pdf <- function( + file.name +, plot.function +, width = 12 +, height = 12 +) { + plot2pdf( + file.name = file.name + , width = width + , height = height + , plot.function = function() { + print(plot.function()) + } + ) +} + +iso8601.znow <- function() +{ + strftime(as.POSIXlt(Sys.time(), "UTC"), "%Y-%m-%dT%H:%M:%SZ") +} + +# pdf.name <- function(name) +# { +# paste0(name, "_", iso8601.filename.fragment(), ".pdf") +# } +# +# tsv.name <- function(name) +# { +# paste0(name, "_", iso8601.filename.fragment(), ".tsv") +# } +# + +tsv_action_factory <- function(file, colnames, append) { + return ( + function(tsv) { + write.table( + x = tsv + , file = file + , sep = "\t" + , quote = FALSE + , row.names = FALSE + , col.names = colnames + , append = append + ) + } + ) +} +
--- a/w4mcorcov_util.R Sun Mar 04 14:51:42 2018 -0500 +++ b/w4mcorcov_util.R Fri Mar 30 14:59:19 2018 -0400 @@ -21,66 +21,8 @@ return (retval) } -# turn off all plotting devices -dev.off.all <- function() { - while (!is.null(dev.list())) { dev.off() } -} - -# capture plot and write to PDF; then close any devices opened in the process -plot2pdf <- function( - file.name -, plot.function -, width = 12 -, height = 12 -) { - # capture plot and write to PDF - cur.dev <- dev.list() - filename <- file.name - pdf(file = filename, width = width, height = height) - plot.function() - # close any devices opened in the process - dev.off() - if (is.null(cur.dev)) { - dev.off.all() - } else { - while ( length(dev.list()) > length(cur.dev) ) { dev.off() } - } -} -# print and capture plot and write to PDF; then close any devices opened in the process -# This is needed for ggplot which does not print the plot when invoked within a function. -print2pdf <- function( - file.name -, plot.function -, width = 12 -, height = 12 -) { - plot2pdf( - file.name = file.name - , width = width - , height = height - , plot.function = function() { - print(plot.function()) - } - ) -} - -iso8601.znow <- function() -{ - strftime(as.POSIXlt(Sys.time(), "UTC"), "%Y-%m-%dT%H:%M:%SZ") -} - -# pdf.name <- function(name) -# { -# paste0(name, "_", iso8601.filename.fragment(), ".pdf") -# } -# -# tsv.name <- function(name) -# { -# paste0(name, "_", iso8601.filename.fragment(), ".tsv") -# } -# -# # pseudo-inverse - computational inverse non-square matrix a +# # pseudo-inverse - computational inverse of non-square matrix a # p.i <- function(a) { # solve(t(a) %*% a) %*% t(a) # }
--- a/w4mcorcov_wrapper.R Sun Mar 04 14:51:42 2018 -0500 +++ b/w4mcorcov_wrapper.R Fri Mar 30 14:59:19 2018 -0400 @@ -63,7 +63,6 @@ my_env$contrast_detail <- as.character(argVc["contrast_detail"]) my_env$contrast_corcov <- as.character(argVc["contrast_corcov"]) my_env$contrast_salience <- as.character(argVc["contrast_salience"]) -# print(sprintf("contrast_salience: %s", my_env$contrast_salience)) # other parameters @@ -73,7 +72,6 @@ my_env$levCSV <- as.character(argVc["levCSV"]) my_env$matchingC <- as.character(argVc["matchingC"]) my_env$labelFeatures <- as.character(argVc["labelFeatures"]) # number of features to label at each extreme of the loadings or 'ALL' -my_env$labelOrthoFeatures <- as.logical(argVc["labelOrthoFeatures"]) label_features <- my_env$labelFeatures labelfeatures_check <- TRUE @@ -93,22 +91,6 @@ quit(save = "no", status = 10, runLast = TRUE) } -tsv_action_factory <- function(file, colnames, append) { - return ( - function(tsv) { - write.table( - x = tsv - , file = file - , sep = "\t" - , quote = FALSE - , row.names = FALSE - , col.names = colnames - , append = append - ) - } - ) -} - corcov_tsv_colnames <- TRUE corcov_tsv_append <- FALSE corcov_tsv_action <- function(tsv) {