Mercurial > repos > eschen42 > w4mcorcov
comparison w4mcorcov_calc.R @ 5:50f60f94c034 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit aff1790e25523d038a1e9528de748191c096132f
author | eschen42 |
---|---|
date | Fri, 30 Mar 2018 14:59:19 -0400 |
parents | 8bba31f628da |
children | 7bd523ca1f9a |
comparison
equal
deleted
inserted
replaced
4:8bba31f628da | 5:50f60f94c034 |
---|---|
5 } | 5 } |
6 | 6 |
7 #### OPLS-DA | 7 #### OPLS-DA |
8 algoC <- "nipals" | 8 algoC <- "nipals" |
9 | 9 |
10 do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { | 10 do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env, x_crossval_i) { |
11 off <- function(x) if (x_show_labels == "0") 0 else x | 11 off <- function(x) if (x_show_labels == "0") 0 else x |
12 if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { | 12 if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { |
13 my_oplsda <- opls( | 13 my_oplsda <- opls( |
14 x = x_dataMatrix | 14 x = x_dataMatrix |
15 , y = x_predictor | 15 , y = x_predictor |
21 , crossvalI = x_crossval_i | 21 , crossvalI = x_crossval_i |
22 ) | 22 ) |
23 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) | 23 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) |
24 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] | 24 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] |
25 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] | 25 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] |
26 my_cor_vs_cov <- cor_vs_cov( | 26 do_s_plot <- function(parallel_x) { |
27 matrix_x = x_dataMatrix | 27 my_cor_vs_cov <- cor_vs_cov( |
28 , ropls_x = my_oplsda | 28 matrix_x = x_dataMatrix |
29 , ropls_x = my_oplsda | |
30 , parallel_x = parallel_x | |
31 ) | |
32 with( | |
33 my_cor_vs_cov | |
34 , { | |
35 min_x <- min(covariance) | |
36 max_x <- max(covariance) | |
37 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) | |
38 covariance <- covariance / lim_x | |
39 lim_x <- 1.2 | |
40 # "It is generally accepted that a variable should be selected if vj>1, [27–29], | |
41 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | |
42 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | |
43 plus_cor <- correlation | |
44 plus_cov <- covariance | |
45 cex <- 0.75 | |
46 which_projection <- if (projection == 1) "t1" else "o1" | |
47 which_loading <- if (projection == 1) "parallel" else "orthogonal" | |
48 if (projection == 1) { | |
49 my_xlab <- "relative covariance(feature,t1)" | |
50 my_x <- plus_cov | |
51 my_ylab <- "correlation(feature,t1) [~ parallel loading]" | |
52 my_y <- plus_cor | |
53 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
54 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | |
55 my_load_distal <- loadp | |
56 my_load_proximal <- loado | |
57 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) | |
58 red <- as.numeric(correlation > 0) * vipcp | |
59 blue <- as.numeric(correlation < 0) * vipcp | |
60 alpha <- 0.1 + 0.4 * vipcp | |
61 my_col = rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
62 main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) | |
63 } else { | |
64 my_xlab <- "relative covariance(feature,to1)" | |
65 my_x <- -plus_cov | |
66 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
67 my_ylab <- "correlation(feature,to1) [~ orthogonal loading]" | |
68 my_y <- plus_cor | |
69 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | |
70 my_load_distal <- loado | |
71 my_load_proximal <- loadp | |
72 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) | |
73 alpha <- 0.1 + 0.4 * vipco | |
74 my_col = rgb(blue = 0, red = 0, green = 0, alpha = alpha) | |
75 main_label <- sprintf("Features influencing orthogonal projection for level %s versus %s", fctr_lvl_1, fctr_lvl_2) | |
76 } | |
77 main_cex <- min(1.0, 46.0/nchar(main_label)) | |
78 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward | |
79 plot( | |
80 y = my_y | |
81 , x = my_x | |
82 , type = "p" | |
83 , xlim = my_xlim | |
84 , ylim = my_ylim | |
85 , xlab = my_xlab | |
86 , ylab = my_ylab | |
87 , main = main_label | |
88 , cex.main = main_cex | |
89 , cex = cex | |
90 , pch = 16 | |
91 , col = my_col | |
92 ) | |
93 low_x <- -0.7 * lim_x | |
94 high_x <- 0.7 * lim_x | |
95 if (projection == 1) { | |
96 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | |
97 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | |
98 } | |
99 if ( x_show_labels != "0" ) { | |
100 names(my_load_distal) <- tsv1$featureID | |
101 names(my_load_proximal) <- tsv1$featureID | |
102 if ( x_show_labels == "ALL" ) { | |
103 n_labels <- length(my_load_distal) | |
104 } else { | |
105 n_labels <- as.numeric(x_show_labels) | |
106 } | |
107 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
108 labels_to_show <- c( | |
109 names(head(sort(my_load_distal),n = n_labels)) | |
110 , names(tail(sort(my_load_distal),n = n_labels)) | |
111 ) | |
112 labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) | |
113 x_text_offset <- 0.024 | |
114 y_text_offset <- (if (projection == 1) 1 else -1) * -0.017 | |
115 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | |
116 print("str(x_arg)") | |
117 print(str(x_arg)) | |
118 print("str(y_arg)") | |
119 print(str(y_arg)) | |
120 print("str(labels_arg)") | |
121 print(str(labels_arg)) | |
122 text( | |
123 y = y_arg | |
124 , x = x_arg + x_text_offset | |
125 , cex = 0.4 | |
126 , labels = labels_arg | |
127 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
128 , srt = slant_arg | |
129 , adj = 0 # left-justified | |
130 ) | |
131 } | |
132 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | |
133 if (length(my_x) > 1) { | |
134 label_features( | |
135 x_arg = my_x [my_x > 0] | |
136 , y_arg = my_y [my_x > 0] - y_text_offset | |
137 , labels_arg = labels[my_x > 0] | |
138 , slant_arg = -my_slant | |
139 ) | |
140 label_features( | |
141 x_arg = my_x [my_x < 0] | |
142 , y_arg = my_y [my_x < 0] + y_text_offset | |
143 , labels_arg = labels[my_x < 0] | |
144 , slant_arg = my_slant | |
145 ) | |
146 } else { | |
147 label_features( | |
148 x_arg = my_x | |
149 , y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset | |
150 , labels_arg = labels | |
151 , slant_arg = (if (my_x > 1) -1 else 1) * my_slant | |
152 ) | |
153 } | |
154 } | |
155 } | |
29 ) | 156 ) |
30 with( | 157 return (my_cor_vs_cov) |
31 my_cor_vs_cov | 158 } |
32 , { | 159 my_cor_vs_cov <- do_s_plot( parallel_x = TRUE ) |
33 min_x <- min(covariance) | |
34 max_x <- max(covariance) | |
35 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) | |
36 covariance <- covariance / lim_x | |
37 lim_x <- 1.2 | |
38 main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) | |
39 main_cex <- min(1.0, 46.0/nchar(main_label)) | |
40 # "It is generally accepted that a variable should be selected if vj>1, [27–29], | |
41 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | |
42 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | |
43 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) | |
44 alpha <- 0.1 + 0.4 * vipco | |
45 red <- as.numeric(correlation > 0) * vipco | |
46 blue <- as.numeric(correlation < 0) * vipco | |
47 plus_cor <- correlation | |
48 plus_cov <- covariance | |
49 cex <- 0.75 | |
50 plot( | |
51 y = plus_cor | |
52 , x = plus_cov | |
53 , type="p" | |
54 , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
55 , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) ) | |
56 , xlab = sprintf("relative covariance(feature,t1)") | |
57 , ylab = sprintf("correlation(feature,t1)") | |
58 , main = main_label | |
59 , cex.main = main_cex | |
60 , cex = cex | |
61 , pch = 16 | |
62 , col = rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
63 ) | |
64 low_x <- -0.7 * lim_x | |
65 high_x <- 0.7 * lim_x | |
66 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | |
67 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | |
68 if ( x_show_labels != "0" ) { | |
69 my_loadp <- loadp | |
70 my_loado <- loado | |
71 names(my_loadp) <- tsv1$featureID | |
72 names(my_loado) <- tsv1$featureID | |
73 if ( x_show_labels == "ALL" ) { | |
74 n_labels <- length(loadp) | |
75 } else { | |
76 n_labels <- as.numeric(x_show_labels) | |
77 } | |
78 n_labels <- min( n_labels, (1 + length(loadp)) / 2 ) | |
79 labels_to_show <- c( | |
80 names(head(sort(my_loadp),n = n_labels)) | |
81 , names(tail(sort(my_loadp),n = n_labels)) | |
82 ) | |
83 if ( x_show_loado_labels ) { | |
84 labels_to_show <- c( | |
85 labels_to_show | |
86 , names(head(sort(my_loado),n = n_labels)) | |
87 , names(tail(sort(my_loado),n = n_labels)) | |
88 ) | |
89 } | |
90 labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) | |
91 text( | |
92 y = plus_cor - 0.013 | |
93 , x = plus_cov + 0.020 | |
94 , cex = 0.4 | |
95 , labels = labels | |
96 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha) | |
97 , srt = -30 # slant 30 degrees downward | |
98 , adj = 0 # left-justified | |
99 ) | |
100 } | |
101 } | |
102 ) | |
103 typeVc <- c("correlation", # 1 | 160 typeVc <- c("correlation", # 1 |
104 "outlier", # 2 | 161 "outlier", # 2 |
105 "overview", # 3 | 162 "overview", # 3 |
106 "permutation", # 4 | 163 "permutation", # 4 |
107 "predict-train", # 5 | 164 "predict-train", # 5 |
118 } else { | 175 } else { |
119 my_typevc <- c("(dummy)","overview","(dummy)") | 176 my_typevc <- c("(dummy)","overview","(dummy)") |
120 } | 177 } |
121 for (my_type in my_typevc) { | 178 for (my_type in my_typevc) { |
122 if (my_type %in% typeVc) { | 179 if (my_type %in% typeVc) { |
123 # print(sprintf("plotting type %s", my_type)) | |
124 tryCatch({ | 180 tryCatch({ |
125 plot( | 181 if ( my_type != "x-loading" ) { |
126 x = my_oplsda | 182 plot( |
127 , typeVc = my_type | 183 x = my_oplsda |
128 , parCexN = 0.4 | 184 , typeVc = my_type |
129 , parDevNewL = FALSE | 185 , parCexN = 0.4 |
130 , parLayL = TRUE | 186 , parDevNewL = FALSE |
131 , parEllipsesL = TRUE | 187 , parLayL = TRUE |
132 ) | 188 , parEllipsesL = TRUE |
189 ) | |
190 } else { | |
191 do_s_plot( parallel_x = FALSE ) | |
192 } | |
133 }, error = function(e) { | 193 }, error = function(e) { |
134 x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) | 194 x_progress( sprintf( "factor level %s or %s may have only one sample - %s", fctr_lvl_1, fctr_lvl_2, e$message ) ) |
135 }) | 195 }) |
136 } else { | 196 } else { |
137 # print("plotting dummy graph") | |
138 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 197 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
139 text(x=1, y=1, labels="no orthogonal projection is possible") | 198 text(x=1, y=1, labels="no orthogonal projection is possible") |
140 } | 199 } |
141 } | 200 } |
142 return (my_cor_vs_cov) | 201 return (my_cor_vs_cov) |
143 } else { | 202 } else { |
144 # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor)))) | |
145 return (NULL) | 203 return (NULL) |
146 } | 204 } |
147 } | 205 } |
148 | 206 |
149 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 | 207 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 |
172 # extract the levels from the environment | 230 # extract the levels from the environment |
173 originalLevCSV <- levCSV <- calc_env$levCSV | 231 originalLevCSV <- levCSV <- calc_env$levCSV |
174 # matchingC is one of { "none", "wildcard", "regex" } | 232 # matchingC is one of { "none", "wildcard", "regex" } |
175 matchingC <- calc_env$matchingC | 233 matchingC <- calc_env$matchingC |
176 labelFeatures <- calc_env$labelFeatures | 234 labelFeatures <- calc_env$labelFeatures |
177 labelOrthoFeatures <- calc_env$labelOrthoFeatures | |
178 | 235 |
179 # arg/env checking | 236 # arg/env checking |
180 if (!(facC %in% names(smpl_metadata))) { | 237 if (!(facC %in% names(smpl_metadata))) { |
181 failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) | 238 failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) |
182 return ( FALSE ) | 239 return ( FALSE ) |
306 , x_predictor = predictor | 363 , x_predictor = predictor |
307 , x_is_match = is_match | 364 , x_is_match = is_match |
308 , x_algorithm = algoC | 365 , x_algorithm = algoC |
309 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" | 366 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" |
310 , x_show_labels = labelFeatures | 367 , x_show_labels = labelFeatures |
311 , x_show_loado_labels = labelOrthoFeatures | |
312 , x_progress = progress_action | 368 , x_progress = progress_action |
313 , x_crossval_i = min(7, length(chosen_samples)) | 369 , x_crossval_i = min(7, length(chosen_samples)) |
314 , x_env = calc_env | 370 , x_env = calc_env |
315 ) | 371 ) |
316 if ( is.null(my_cor_cov) ) { | 372 if ( is.null(my_cor_cov) ) { |
347 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 | 403 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 |
348 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 | 404 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 |
349 # only process this column if both factors are members of lvlCSV | 405 # only process this column if both factors are members of lvlCSV |
350 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | 406 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) |
351 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | 407 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) |
352 # TODO delete next line displaying currently-processed column | |
353 # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match)) | |
354 # choose only samples with one of the two factors for this column | 408 # choose only samples with one of the two factors for this column |
355 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 409 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
356 predictor <- smpl_metadata_facC[chosen_samples] | 410 predictor <- smpl_metadata_facC[chosen_samples] |
357 # extract only the significantly-varying features and the chosen samples | 411 # extract only the significantly-varying features and the chosen samples |
358 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) | 412 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) |
363 , x_predictor = predictor | 417 , x_predictor = predictor |
364 , x_is_match = is_match | 418 , x_is_match = is_match |
365 , x_algorithm = algoC | 419 , x_algorithm = algoC |
366 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" | 420 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" |
367 , x_show_labels = labelFeatures | 421 , x_show_labels = labelFeatures |
368 , x_show_loado_labels = labelOrthoFeatures | |
369 , x_progress = progress_action | 422 , x_progress = progress_action |
370 , x_crossval_i = min(7, length(chosen_samples)) | 423 , x_crossval_i = min(7, length(chosen_samples)) |
371 , x_env = calc_env | 424 , x_env = calc_env |
372 ) | 425 ) |
373 if ( is.null(my_cor_cov) ) { | 426 if ( is.null(my_cor_cov) ) { |
400 completed <<- c(completed, fctr_lvl_1) | 453 completed <<- c(completed, fctr_lvl_1) |
401 setdiff(level_union, fctr_lvl_1) | 454 setdiff(level_union, fctr_lvl_1) |
402 } | 455 } |
403 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 456 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
404 fctr_lvl_2 <- "other" | 457 fctr_lvl_2 <- "other" |
405 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) | |
406 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | 458 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) |
407 if (length(unique(chosen_samples)) < 1) { | 459 if (length(unique(chosen_samples)) < 1) { |
408 progress_action("NOTHING TO PLOT...") | 460 progress_action("NOTHING TO PLOT...") |
409 } else { | 461 } else { |
410 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | 462 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) |
417 , x_predictor = predictor | 469 , x_predictor = predictor |
418 , x_is_match = is_match | 470 , x_is_match = is_match |
419 , x_algorithm = algoC | 471 , x_algorithm = algoC |
420 , x_prefix = "Features" | 472 , x_prefix = "Features" |
421 , x_show_labels = labelFeatures | 473 , x_show_labels = labelFeatures |
422 , x_show_loado_labels = labelOrthoFeatures | |
423 , x_progress = progress_action | 474 , x_progress = progress_action |
424 , x_crossval_i = min(7, length(chosen_samples)) | 475 , x_crossval_i = min(7, length(chosen_samples)) |
425 , x_env = calc_env | 476 , x_env = calc_env |
426 ) | 477 ) |
427 if ( is.null(my_cor_cov) ) { | 478 if ( is.null(my_cor_cov) ) { |
432 tsv$rt <- rt_lookup(tsv$featureID) | 483 tsv$rt <- rt_lookup(tsv$featureID) |
433 corcov_tsv_action(tsv) | 484 corcov_tsv_action(tsv) |
434 did_plot <<- TRUE | 485 did_plot <<- TRUE |
435 } | 486 } |
436 } | 487 } |
437 #print("baz") | |
438 "dummy" # need to return a value; otherwise combn fails with an error | 488 "dummy" # need to return a value; otherwise combn fails with an error |
439 } | 489 } |
440 ) | 490 ) |
441 } | 491 } |
442 ## pass 2 - contrast each selected level with each of the other levels individually ## | 492 ## pass 2 - contrast each selected level with each of the other levels individually ## |
446 , m = 2 | 496 , m = 2 |
447 , FUN = function(x) { | 497 , FUN = function(x) { |
448 fctr_lvl_1 <- x[1] | 498 fctr_lvl_1 <- x[1] |
449 fctr_lvl_2 <- x[2] | 499 fctr_lvl_2 <- x[2] |
450 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 500 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
451 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) | |
452 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | 501 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) |
453 if (length(unique(chosen_samples)) < 1) { | 502 if (length(unique(chosen_samples)) < 1) { |
454 progress_action("NOTHING TO PLOT...") | 503 progress_action("NOTHING TO PLOT...") |
455 } else { | 504 } else { |
456 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | 505 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) |
463 , x_predictor = predictor | 512 , x_predictor = predictor |
464 , x_is_match = is_match | 513 , x_is_match = is_match |
465 , x_algorithm = algoC | 514 , x_algorithm = algoC |
466 , x_prefix = "Features" | 515 , x_prefix = "Features" |
467 , x_show_labels = labelFeatures | 516 , x_show_labels = labelFeatures |
468 , x_show_loado_labels = labelOrthoFeatures | |
469 , x_progress = progress_action | 517 , x_progress = progress_action |
470 , x_crossval_i = min(7, length(chosen_samples)) | 518 , x_crossval_i = min(7, length(chosen_samples)) |
471 , x_env = calc_env | 519 , x_env = calc_env |
472 ) | 520 ) |
473 if ( is.null(my_cor_cov) ) { | 521 if ( is.null(my_cor_cov) ) { |
478 tsv$rt <- rt_lookup(tsv$featureID) | 526 tsv$rt <- rt_lookup(tsv$featureID) |
479 corcov_tsv_action(tsv) | 527 corcov_tsv_action(tsv) |
480 did_plot <<- TRUE | 528 did_plot <<- TRUE |
481 } | 529 } |
482 } | 530 } |
483 #print("baz") | |
484 "dummy" # need to return a value; otherwise combn fails with an error | 531 "dummy" # need to return a value; otherwise combn fails with an error |
485 } | 532 } |
486 ) | 533 ) |
487 } else { | 534 } else { |
488 progress_action("NOTHING TO PLOT....") | 535 progress_action("NOTHING TO PLOT....") |
498 # Calculate data for correlation-versus-covariance plot | 545 # Calculate data for correlation-versus-covariance plot |
499 # Adapted from: | 546 # Adapted from: |
500 # Wiklund_2008 doi:10.1021/ac0713510 | 547 # Wiklund_2008 doi:10.1021/ac0713510 |
501 # Galindo_Prieto_2014 doi:10.1002/cem.2627 | 548 # Galindo_Prieto_2014 doi:10.1002/cem.2627 |
502 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R | 549 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R |
503 cor_vs_cov <- function(matrix_x, ropls_x) { | 550 cor_vs_cov <- function(matrix_x, ropls_x, parallel_x = TRUE) { |
504 x_class <- class(ropls_x) | 551 x_class <- class(ropls_x) |
505 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) | 552 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) |
506 stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) | 553 stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) |
507 } | 554 } |
508 result <- list() | 555 result <- list() |
556 result$projection <- projection <- if (parallel_x) 1 else 2 | |
509 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS | 557 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS |
510 if ( ropls_x@suppLs$algoC == "nipals") { | 558 if ( ropls_x@suppLs$algoC == "nipals") { |
511 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 | 559 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 |
512 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) | 560 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) |
513 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) | 561 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) |
514 score_matrix <- ropls_x@scoreMN | 562 if (parallel_x) |
563 score_matrix <- ropls_x@scoreMN | |
564 else | |
565 score_matrix <- ropls_x@orthoScoreMN | |
515 score_matrix_transposed <- t(score_matrix) | 566 score_matrix_transposed <- t(score_matrix) |
516 score_matrix_magnitude <- mag(score_matrix) | 567 score_matrix_magnitude <- mag(score_matrix) |
517 result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) | 568 result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) |
518 result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) | 569 result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) |
519 } else { | 570 } else { |
520 # WARNING - untested code - I don't have test data to exercise this branch | 571 # WARNING - untested code - I don't have test data to exercise this branch |
521 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 | 572 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 |
522 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F | 573 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F |
523 score_matrix <- ropls_x@scoreMN | 574 if (parallel_x) |
575 score_matrix <- ropls_x@scoreMN | |
576 else | |
577 score_matrix <- ropls_x@orthoScoreMN | |
524 score_matrix_transposed <- t(score_matrix) | 578 score_matrix_transposed <- t(score_matrix) |
525 cov_divisor <- nrow(matrix_x) - 1 | 579 cov_divisor <- nrow(matrix_x) - 1 |
526 result$covariance <- sapply( | 580 result$covariance <- sapply( |
527 X = 1:ncol(matrix_x) | 581 X = 1:ncol(matrix_x) |
528 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor | 582 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor |
538 , FUN = function(x) { | 592 , FUN = function(x) { |
539 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) | 593 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) |
540 } | 594 } |
541 ) | 595 ) |
542 } | 596 } |
543 result$correlation <- result$correlation[1,,drop = TRUE] | 597 result$correlation <- result$correlation[ 1, , drop = TRUE ] |
544 result$covariance <- result$covariance[1,,drop = TRUE] | 598 result$covariance <- result$covariance [ 1, , drop = TRUE ] |
545 | 599 |
546 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 | 600 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 |
547 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) | 601 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) |
548 result$vip4p <- as.numeric(ropls_x@vipVn) | 602 result$vip4p <- as.numeric(ropls_x@vipVn) |
549 result$vip4o <- as.numeric(ropls_x@orthoVipVn) | 603 result$vip4o <- as.numeric(ropls_x@orthoVipVn) |
554 fctr_lvl_1 <- level_names[1] | 608 fctr_lvl_1 <- level_names[1] |
555 fctr_lvl_2 <- level_names[2] | 609 fctr_lvl_2 <- level_names[2] |
556 feature_count <- length(ropls_x@vipVn) | 610 feature_count <- length(ropls_x@vipVn) |
557 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) | 611 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) |
558 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) | 612 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) |
559 # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation))) | |
560 superresult <- list() | 613 superresult <- list() |
561 if (length(result$vip4o) == 0) result$vip4o <- NA | 614 if (length(result$vip4o) == 0) result$vip4o <- NA |
562 greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) | 615 greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) |
563 superresult$tsv1 <- data.frame( | 616 superresult$tsv1 <- data.frame( |
564 featureID = names(ropls_x@vipVn) | 617 featureID = names(ropls_x@vipVn) |
565 , factorLevel1 = result$level1 | 618 , factorLevel1 = result$level1 |
566 , factorLevel2 = result$level2 | 619 , factorLevel2 = result$level2 |
567 , greaterLevel = greaterLevel | 620 , greaterLevel = greaterLevel |
621 , projection = result$projection | |
568 , correlation = result$correlation | 622 , correlation = result$correlation |
569 , covariance = result$covariance | 623 , covariance = result$covariance |
570 , vip4p = result$vip4p | 624 , vip4p = result$vip4p |
571 , vip4o = result$vip4o | 625 , vip4o = result$vip4o |
572 , loadp = result$loadp | 626 , loadp = result$loadp |
573 , loado = result$loado | 627 , loado = result$loado |
574 , row.names = NULL | 628 , row.names = NULL |
575 ) | 629 ) |
576 rownames(superresult$tsv1) <- superresult$tsv1$featureID | 630 rownames(superresult$tsv1) <- superresult$tsv1$featureID |
631 superresult$projection <- result$projection | |
577 superresult$covariance <- result$covariance | 632 superresult$covariance <- result$covariance |
578 superresult$correlation <- result$correlation | 633 superresult$correlation <- result$correlation |
579 superresult$vip4p <- result$vip4p | 634 superresult$vip4p <- result$vip4p |
580 superresult$vip4o <- result$vip4o | 635 superresult$vip4o <- result$vip4o |
581 superresult$loadp <- result$loadp | 636 superresult$loadp <- result$loadp |
582 superresult$loado <- result$loado | 637 superresult$loado <- result$loado |
583 superresult$details <- result | 638 superresult$details <- result |
584 # #print(superresult$tsv1) | |
585 result$superresult <- superresult | 639 result$superresult <- superresult |
586 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways | 640 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways |
587 result$oplsda <- ropls_x | 641 result$oplsda <- ropls_x |
588 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways | 642 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways |
589 return (superresult) | 643 return (superresult) |
590 } | 644 } |
591 | 645 |
592 | 646 # vim: sw=2 ts=2 et : |