Mercurial > repos > eschen42 > w4mcorcov
comparison w4mcorcov_calc.R @ 13:2ae2d26e3270 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit e89c652c0849eb1d5a1e6c9100c72c64a8d388b4
author | eschen42 |
---|---|
date | Wed, 12 Dec 2018 09:20:02 -0500 |
parents | ddaf84e15d06 |
children | 90708fdbc22d |
comparison
equal
deleted
inserted
replaced
12:ddaf84e15d06 | 13:2ae2d26e3270 |
---|---|
1 # center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/ | 1 # compute and output detail plots |
2 center_colmeans <- function(x) { | |
3 xcenter = colMeans(x) | |
4 x - rep(xcenter, rep.int(nrow(x), ncol(x))) | |
5 } | |
6 | |
7 #### OPLS-DA | |
8 algoC <- "nipals" | |
9 | |
10 do_detail_plot <- function( | 2 do_detail_plot <- function( |
11 x_dataMatrix | 3 x_dataMatrix |
12 , x_predictor | 4 , x_predictor |
13 , x_is_match | 5 , x_is_match |
14 , x_algorithm | 6 , x_algorithm |
19 , x_crossval_i | 11 , x_crossval_i |
20 ) { | 12 ) { |
21 off <- function(x) if (x_show_labels == "0") 0 else x | 13 off <- function(x) if (x_show_labels == "0") 0 else x |
22 if ( x_is_match | 14 if ( x_is_match |
23 && ncol(x_dataMatrix) > 0 | 15 && ncol(x_dataMatrix) > 0 |
24 && length(unique(x_predictor))> 1 | 16 && length(unique(x_predictor)) > 1 |
25 && x_crossval_i < nrow(x_dataMatrix) | 17 && x_crossval_i < nrow(x_dataMatrix) |
26 ) { | 18 ) { |
27 my_oplsda <- opls( | 19 my_oplsda <- opls( |
28 x = x_dataMatrix | 20 x = x_dataMatrix |
29 , y = x_predictor | 21 , y = x_predictor |
34 , plotL = FALSE | 26 , plotL = FALSE |
35 , crossvalI = x_crossval_i | 27 , crossvalI = x_crossval_i |
36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. | 28 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. |
37 ) | 29 ) |
38 # strip out variables having negligible variance | 30 # strip out variables having negligible variance |
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] | 31 x_dataMatrix <- x_dataMatrix[ , names(my_oplsda@vipVn), drop = FALSE] |
40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) | 32 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) |
41 | 33 |
42 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] | 34 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] |
43 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] | 35 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] |
44 do_s_plot <- function( | 36 do_s_plot <- function( |
45 x_env | 37 x_env |
46 , predictor_projection_x = TRUE | 38 , predictor_projection_x = TRUE |
47 , cplot_x = FALSE | 39 , cplot_x = FALSE |
48 , cor_vs_cov_x = NULL | 40 , cor_vs_cov_x = NULL |
49 ) | 41 ) { |
50 { | |
51 if (cplot_x) { | 42 if (cplot_x) { |
52 cplot_y_correlation <- (x_env$cplot_y == "correlation") | 43 cplot_y_correlation <- (x_env$cplot_y == "correlation") |
44 default_lim_x <- 10 | |
45 } else { | |
46 default_lim_x <- 1.2 | |
53 } | 47 } |
54 if (is.null(cor_vs_cov_x)) { | 48 if (is.null(cor_vs_cov_x)) { |
55 my_cor_vs_cov <- cor_vs_cov( | 49 my_cor_vs_cov <- cor_vs_cov( |
56 matrix_x = x_dataMatrix | 50 matrix_x = x_dataMatrix |
57 , ropls_x = my_oplsda | 51 , ropls_x = my_oplsda |
58 , predictor_projection_x = predictor_projection_x | 52 , predictor_projection_x = predictor_projection_x |
59 , x_progress | 53 , x_progress |
54 , x_env | |
60 ) | 55 ) |
61 } else { | 56 } else { |
62 my_cor_vs_cov <- cor_vs_cov_x | 57 my_cor_vs_cov <- cor_vs_cov_x |
63 } | 58 } |
64 # str(my_cor_vs_cov) | 59 |
65 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { | 60 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { |
66 if (is.null(cor_vs_cov_x)) { | 61 if (is.null(cor_vs_cov_x)) { |
67 x_progress("No cor_vs_cov data produced") | 62 x_progress( |
63 sprintf("No cor_vs_cov data produced for level %s versus %s", fctr_lvl_1, fctr_lvl_2) | |
64 ) | |
68 } | 65 } |
69 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 66 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
70 text(x=1, y=1, labels="too few covariance data") | 67 text(x=1, y=1, labels="too few covariance data") |
71 return(my_cor_vs_cov) | 68 return(my_cor_vs_cov) |
72 } | 69 } |
74 my_cor_vs_cov | 71 my_cor_vs_cov |
75 , { | 72 , { |
76 min_x <- min(covariance, na.rm = TRUE) | 73 min_x <- min(covariance, na.rm = TRUE) |
77 max_x <- max(covariance, na.rm = TRUE) | 74 max_x <- max(covariance, na.rm = TRUE) |
78 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) | 75 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) |
79 covariance <- covariance / lim_x | 76 |
80 lim_x <- 1.2 | 77 # Regarding using VIP as a guide to selecting a biomarker: |
81 # "It is generally accepted that a variable should be selected if vj>1, [27–29], | 78 # "It is generally accepted that a variable should be selected if vj>1, [27–29], |
82 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | 79 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." |
83 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | 80 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) |
84 plus_cor <- correlation | 81 plus_cor <- correlation |
85 plus_cov <- covariance | 82 plus_cov <- covariance |
86 cex <- 0.65 | 83 if (length(plus_cor) != 0 && length(plus_cor) != 0) { |
87 which_projection <- if (projection == 1) "t1" else "o1" | 84 cex <- 0.65 |
88 which_loading <- if (projection == 1) "parallel" else "orthogonal" | 85 if (projection == 1) { |
89 if (projection == 1) { # predictor-projection | 86 # predictor-projection |
90 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) | 87 vipcp <- pmax(0, pmin(1, (vip4p - 0.83) / (1.21 - 0.83))) |
91 if (!cplot_x) { # S-plot predictor-projection | 88 if (!cplot_x) { |
92 my_xlab <- "relative covariance(feature,t1)" | 89 # S-plot predictor-projection |
93 my_x <- plus_cov | 90 my_xlab <- "covariance(feature,t1)" |
94 my_ylab <- "correlation(feature,t1)" | 91 my_x <- plus_cov |
95 my_y <- plus_cor | |
96 } else { # C-plot predictor-projection | |
97 my_xlab <- "variable importance in predictor-projection" | |
98 my_x <- vip4p | |
99 if (cplot_y_correlation) { | |
100 my_ylab <- "correlation(feature,t1)" | 92 my_ylab <- "correlation(feature,t1)" |
101 my_y <- plus_cor | 93 my_y <- plus_cor |
94 # X,Y limits for S-PLOT | |
95 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) | |
96 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
102 } else { | 97 } else { |
103 my_ylab <- "relative covariance(feature,t1)" | 98 # C-plot predictor-projection |
104 my_y <- plus_cov | 99 my_xlab <- "variable importance in predictor-projection" |
100 my_x <- vip4p | |
101 if (cplot_y_correlation) { | |
102 my_ylab <- "correlation(feature,t1)" | |
103 my_y <- plus_cor | |
104 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
105 } else { | |
106 my_ylab <- "covariance(feature,t1)" | |
107 my_y <- plus_cov | |
108 my_ylim <- max(abs(plus_cov)) | |
109 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) | |
110 } | |
111 # X,Y limits for C-plot | |
112 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
113 lim_x <- min(lim_x, default_lim_x) | |
114 my_xlim <- c( 0, lim_x ) # + off(0.2) ) | |
105 } | 115 } |
106 } | 116 my_load_distal <- loadp |
107 if (cplot_x) { | 117 my_load_proximal <- loado |
108 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | 118 red <- as.numeric(correlation > 0) * vipcp |
109 my_xlim <- c( 0, lim_x + off(0.2) ) | 119 blue <- as.numeric(correlation < 0) * vipcp |
120 alpha <- 0.1 + 0.4 * vipcp | |
121 red[is.na(red)] <- 0 | |
122 blue[is.na(blue)] <- 0 | |
123 alpha[is.na(alpha)] <- 0 | |
124 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
125 main_label <- sprintf("%s for level %s versus %s" | |
126 , x_prefix, fctr_lvl_1, fctr_lvl_2) | |
110 } else { | 127 } else { |
111 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | 128 # orthogonal projection |
112 } | 129 vipco <- pmax(0, pmin(1, (vip4o - 0.83) / (1.21 - 0.83))) |
113 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | 130 if (!cplot_x) { |
114 my_load_distal <- loadp | 131 # S-PLOT orthogonal-projection |
115 my_load_proximal <- loado | 132 my_xlab <- "covariance(feature,to1)" |
116 red <- as.numeric(correlation > 0) * vipcp | 133 my_x <- -plus_cov |
117 blue <- as.numeric(correlation < 0) * vipcp | 134 # X,Y limits for S-PLOT |
118 alpha <- 0.1 + 0.4 * vipcp | 135 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) |
119 red[is.na(red)] <- 0 | |
120 blue[is.na(blue)] <- 0 | |
121 alpha[is.na(alpha)] <- 0 | |
122 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
123 main_label <- sprintf("%s for level %s versus %s" | |
124 , x_prefix, fctr_lvl_1, fctr_lvl_2) | |
125 } else { # orthogonal projection | |
126 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) | |
127 if (!cplot_x) { | |
128 my_xlab <- "relative covariance(feature,to1)" | |
129 my_x <- -plus_cov | |
130 } else { | |
131 my_xlab <- "variable importance in orthogonal projection" | |
132 my_x <- vip4o | |
133 } | |
134 if (!cplot_x) { # S-plot orthogonal projection | |
135 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
136 my_ylab <- "correlation(feature,to1)" | |
137 my_y <- plus_cor | |
138 } else { # C-plot orthogonal projection | |
139 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
140 my_xlim <- c( 0, lim_x + off(0.2) ) | |
141 if (cplot_y_correlation) { | |
142 my_ylab <- "correlation(feature,to1)" | 136 my_ylab <- "correlation(feature,to1)" |
143 my_y <- plus_cor | 137 my_y <- plus_cor |
138 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
144 } else { | 139 } else { |
145 my_ylab <- "relative covariance(feature,to1)" | 140 # C-plot orthogonal-projection |
146 my_y <- plus_cov | 141 my_xlab <- "variable importance in orthogonal projection" |
142 my_x <- vip4o | |
143 # C-plot orthogonal projection | |
144 # X,Y limits for C-plot | |
145 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
146 my_xlim <- c( 0, lim_x ) # + off(0.2) ) | |
147 if (cplot_y_correlation) { | |
148 my_ylab <- "correlation(feature,to1)" | |
149 my_y <- plus_cor | |
150 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
151 } else { | |
152 my_ylab <- "covariance(feature,to1)" | |
153 my_y <- plus_cov | |
154 my_ylim <- max(abs(plus_cov)) | |
155 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) | |
156 } | |
147 } | 157 } |
158 my_load_distal <- loado | |
159 my_load_proximal <- loadp | |
160 alpha <- 0.1 + 0.4 * vipco | |
161 alpha[is.na(alpha)] <- 0 | |
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) | |
163 main_label <- sprintf( | |
164 "Features influencing orthogonal projection for %s versus %s" | |
165 , fctr_lvl_1, fctr_lvl_2) | |
148 } | 166 } |
149 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | 167 main_cex <- min(1.0, 46.0/nchar(main_label)) |
150 my_load_distal <- loado | 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward |
151 my_load_proximal <- loadp | 169 my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) |
152 alpha <- 0.1 + 0.4 * vipco | 170 if ( sum(is.infinite(my_xlim)) == 0 ) { |
153 alpha[is.na(alpha)] <- 0 | 171 plot( |
154 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) | 172 y = my_y |
155 main_label <- sprintf( | 173 , x = my_x |
156 "Features influencing orthogonal projection for %s versus %s" | 174 , type = "p" |
157 , fctr_lvl_1, fctr_lvl_2) | 175 , xlim = my_xlim |
158 } | 176 , ylim = my_ylim |
159 main_cex <- min(1.0, 46.0/nchar(main_label)) | 177 , xlab = my_xlab |
160 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward | 178 , ylab = my_ylab |
161 my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) | 179 , main = main_label |
162 plot( | 180 , cex.main = main_cex |
163 y = my_y | 181 , cex = cex |
164 , x = my_x | 182 , pch = my_pch |
165 , type = "p" | 183 , col = my_col |
166 , xlim = my_xlim | |
167 , ylim = my_ylim | |
168 , xlab = my_xlab | |
169 , ylab = my_ylab | |
170 , main = main_label | |
171 , cex.main = main_cex | |
172 , cex = cex | |
173 , pch = my_pch | |
174 , col = my_col | |
175 ) | |
176 low_x <- -0.7 * lim_x | |
177 high_x <- 0.7 * lim_x | |
178 if (projection == 1 && !cplot_x) { | |
179 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | |
180 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | |
181 } | |
182 if ( x_show_labels != "0" ) { | |
183 names(my_load_distal) <- tsv1$featureID | |
184 names(my_load_proximal) <- tsv1$featureID | |
185 if ( x_show_labels == "ALL" ) { | |
186 n_labels <- length(my_load_distal) | |
187 } else { | |
188 n_labels <- as.numeric(x_show_labels) | |
189 } | |
190 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
191 labels_to_show <- c( | |
192 names(head(sort(my_load_distal),n = n_labels)) | |
193 , names(tail(sort(my_load_distal),n = n_labels)) | |
194 ) | |
195 labels <- unname( | |
196 sapply( | |
197 X = tsv1$featureID | |
198 , FUN = function(x) if( x %in% labels_to_show ) x else "" | |
199 ) | 184 ) |
200 ) | 185 low_x <- -0.7 * lim_x |
201 x_text_offset <- 0.024 | 186 high_x <- 0.7 * lim_x |
202 y_text_off <- 0.017 | 187 if (projection == 1 && !cplot_x) { |
203 if (!cplot_x) { # S-plot | 188 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") |
204 y_text_offset <- if (projection == 1) -y_text_off else y_text_off | 189 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") |
205 } else { # C-plot | 190 } |
206 y_text_offset <- | 191 if ( x_show_labels != "0" ) { |
207 sapply( | 192 names(my_load_distal) <- tsv1$featureID |
208 X = (my_y > 0) | 193 names(my_load_proximal) <- tsv1$featureID |
209 , FUN = function(x) { if (x) y_text_off else -y_text_off } | 194 if ( x_show_labels == "ALL" ) { |
195 n_labels <- length(my_load_distal) | |
196 } else { | |
197 n_labels <- as.numeric(x_show_labels) | |
198 } | |
199 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
200 labels_to_show <- c( | |
201 names(head(sort(my_load_distal), n = n_labels)) | |
202 , names(tail(sort(my_load_distal), n = n_labels)) | |
210 ) | 203 ) |
211 } | 204 labels <- unname( |
212 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | 205 sapply( |
213 if (length(labels_arg) > 0) { | 206 X = tsv1$featureID |
214 unique_slant <- unique(slant_arg) | 207 , FUN = function(x) if ( x %in% labels_to_show ) x else "" |
215 if (length(unique_slant) == 1) { | |
216 text( | |
217 y = y_arg | |
218 , x = x_arg + x_text_offset | |
219 , cex = 0.4 | |
220 , labels = labels_arg | |
221 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
222 , srt = slant_arg | |
223 , adj = 0 # left-justified | |
224 ) | 208 ) |
209 ) | |
210 x_text_offset <- 0.024 | |
211 y_text_off <- 0.017 | |
212 if (!cplot_x) { | |
213 # S-plot | |
214 y_text_offset <- if (projection == 1) -y_text_off else y_text_off | |
225 } else { | 215 } else { |
226 for (slant in unique_slant) { | 216 # C-plot |
227 text( | 217 y_text_offset <- |
228 y = y_arg[slant_arg == slant] | 218 sapply( |
229 , x = x_arg[slant_arg == slant] + x_text_offset | 219 X = (my_y > 0) |
230 , cex = 0.4 | 220 , FUN = function(x) { |
231 , labels = labels_arg[slant_arg == slant] | 221 if (x) y_text_off else -y_text_off |
232 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | 222 } |
233 , srt = slant | 223 ) |
234 , adj = 0 # left-justified | 224 } |
225 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | |
226 if (length(labels_arg) > 0) { | |
227 unique_slant <- unique(slant_arg) | |
228 if (length(unique_slant) == 1) { | |
229 text( | |
230 y = y_arg | |
231 , x = x_arg + x_text_offset | |
232 , cex = 0.4 | |
233 , labels = labels_arg | |
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
235 , srt = slant_arg | |
236 , adj = 0 # left-justified | |
237 ) | |
238 } else { | |
239 for (slant in unique_slant) { | |
240 text( | |
241 y = y_arg[slant_arg == slant] | |
242 , x = x_arg[slant_arg == slant] + x_text_offset | |
243 , cex = 0.4 | |
244 , labels = labels_arg[slant_arg == slant] | |
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
246 , srt = slant | |
247 , adj = 0 # left-justified | |
248 ) | |
249 } | |
250 } | |
251 } | |
252 } | |
253 if (!cplot_x) { | |
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | |
255 } else { | |
256 my_slant <- sapply( | |
257 X = (my_y > 0) | |
258 , FUN = function(x) if (x) 2 else -2 | |
259 ) * my_feature_label_slant | |
260 } | |
261 if (length(my_x) > 1) { | |
262 label_features( | |
263 x_arg = my_x [my_x > 0] | |
264 , y_arg = my_y [my_x > 0] - y_text_offset | |
265 , labels_arg = labels[my_x > 0] | |
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) | |
267 ) | |
268 if (!cplot_x) { | |
269 label_features( | |
270 x_arg = my_x [my_x < 0] | |
271 , y_arg = my_y [my_x < 0] + y_text_offset | |
272 , labels_arg = labels[my_x < 0] | |
273 , slant_arg = my_slant | |
235 ) | 274 ) |
236 } | 275 } |
237 } | 276 } else { |
238 } | 277 if (!cplot_x) { |
239 } | 278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant |
240 if (!cplot_x) { | 279 my_y_arg <- my_y + (if (my_x > 1) -1 else 1) * y_text_offset |
241 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | 280 } else { |
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant | |
282 my_y_arg <- my_y + (if (my_y > 1) -1 else 1) * y_text_offset | |
283 } | |
284 label_features( | |
285 x_arg = my_x | |
286 , y_arg = my_y_arg | |
287 , labels_arg = labels | |
288 , slant_arg = my_slant | |
289 ) | |
290 } # end if (length(my_x) > 1) | |
291 } # end if ( x_show_labels != "0" ) | |
242 } else { | 292 } else { |
243 my_slant <- sapply( | 293 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
244 X = (my_y > 0) | 294 text(x=1, y=1, labels="no S-plot is possible") |
245 , FUN = function(x) if (x) 2 else -2 | 295 } # end if (sum(is.infinte(my_xlim))==0) |
246 ) * my_feature_label_slant | 296 } else { |
247 } | 297 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
248 if (length(my_x) > 1) { | 298 text(x=1, y=1, labels="no S-plot is possible") |
249 label_features( | 299 } # end if (length(plus_cor) != 0 && length(plus_cor) != 0) |
250 x_arg = my_x [my_x > 0] | 300 } # end action |
251 , y_arg = my_y [my_x > 0] - y_text_offset | 301 ) # end with( my_cor_vs_cov, action ) |
252 , labels_arg = labels[my_x > 0] | |
253 , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) | |
254 ) | |
255 if (!cplot_x) { | |
256 label_features( | |
257 x_arg = my_x [my_x < 0] | |
258 , y_arg = my_y [my_x < 0] + y_text_offset | |
259 , labels_arg = labels[my_x < 0] | |
260 , slant_arg = my_slant | |
261 ) | |
262 } | |
263 } else { | |
264 if (!cplot_x) { | |
265 my_slant <- (if (my_x > 1) -1 else 1) * my_slant | |
266 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset | |
267 } else { | |
268 my_slant <- (if (my_y > 1) -1 else 1) * my_slant | |
269 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset | |
270 } | |
271 label_features( | |
272 x_arg = my_x | |
273 , y_arg = my_y_arg | |
274 , labels_arg = labels | |
275 , slant_arg = my_slant | |
276 ) | |
277 } | |
278 } | |
279 } | |
280 ) | |
281 return (my_cor_vs_cov) | 302 return (my_cor_vs_cov) |
282 } | 303 } # end function do_s_plot |
283 my_cor_vs_cov <- do_s_plot( | 304 my_cor_vs_cov <- do_s_plot( |
284 x_env = x_env | 305 x_env = x_env |
285 , predictor_projection_x = TRUE | 306 , predictor_projection_x = TRUE |
286 , cplot_x = FALSE | 307 , cplot_x = FALSE |
287 ) | 308 ) |
288 typeVc <- c("correlation", # 1 | 309 typevc <- c("correlation", # 1 |
289 "outlier", # 2 | 310 "outlier", # 2 |
290 "overview", # 3 | 311 "overview", # 3 |
291 "permutation", # 4 | 312 "permutation", # 4 |
292 "predict-train", # 5 | 313 "predict-train", # 5 |
293 "predict-test", # 6 | 314 "predict-test", # 6 |
297 "x-variance", # 10 | 318 "x-variance", # 10 |
298 "xy-score", # 11 | 319 "xy-score", # 11 |
299 "xy-weight" # 12 | 320 "xy-weight" # 12 |
300 ) # [c(3,8,9)] # [c(4,3,8,9)] | 321 ) # [c(3,8,9)] # [c(4,3,8,9)] |
301 if ( length(my_oplsda@orthoVipVn) > 0 ) { | 322 if ( length(my_oplsda@orthoVipVn) > 0 ) { |
302 my_typevc <- typeVc[c(9,3,8)] | 323 my_typevc <- typevc[c(9,3,8)] |
303 } else { | 324 } else { |
304 my_typevc <- c("(dummy)","overview","(dummy)") | 325 my_typevc <- c("(dummy)","overview","(dummy)") |
305 } | 326 } |
306 my_ortho_cor_vs_cov_exists <- FALSE | 327 my_ortho_cor_vs_cov_exists <- FALSE |
307 for (my_type in my_typevc) { | 328 for (my_type in my_typevc) { |
308 if (my_type %in% typeVc) { | 329 if (my_type %in% typevc) { |
309 tryCatch({ | 330 tryCatch({ |
310 if ( my_type != "x-loading" ) { | 331 if ( my_type != "x-loading" ) { |
311 plot( | 332 plot( |
312 x = my_oplsda | 333 x = my_oplsda |
313 , typeVc = my_type | 334 , typeVc = my_type |
314 , parCexN = 0.4 | 335 , parCexN = 0.4 |
315 , parDevNewL = FALSE | 336 , parDevNewL = FALSE |
316 , parLayL = TRUE | 337 , parLayL = TRUE |
317 , parEllipsesL = TRUE | 338 , parEllipsesL = TRUE |
318 ) | 339 ) |
319 if (my_type == "overview") { | 340 if (my_type == "overview") { |
320 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) | 341 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) |
321 title(sub = sub_label) | 342 title(sub = sub_label) |
322 } | 343 } |
323 } else { | 344 } else { |
324 my_ortho_cor_vs_cov <- do_s_plot( | 345 my_ortho_cor_vs_cov <- do_s_plot( |
325 x_env = x_env | 346 x_env = x_env |
326 , predictor_projection_x = FALSE | 347 , predictor_projection_x = FALSE |
327 , cplot_x = FALSE | 348 , cplot_x = FALSE |
328 ) | 349 ) |
329 my_ortho_cor_vs_cov_exists <- TRUE | 350 my_ortho_cor_vs_cov_exists <- TRUE |
351 } | |
330 } | 352 } |
331 }, error = function(e) { | 353 , error = function(e) { |
332 x_progress( | 354 x_progress( |
333 sprintf( | 355 sprintf( |
334 "factor level %s or %s may have only one sample - %s" | 356 "factor level %s or %s may have only one sample - %s" |
335 , fctr_lvl_1 | 357 , fctr_lvl_1 |
336 , fctr_lvl_2 | 358 , fctr_lvl_2 |
345 } | 367 } |
346 cplot_p <- x_env$cplot_p | 368 cplot_p <- x_env$cplot_p |
347 cplot_o <- x_env$cplot_o | 369 cplot_o <- x_env$cplot_o |
348 if (cplot_p || cplot_o) { | 370 if (cplot_p || cplot_o) { |
349 if (cplot_p) { | 371 if (cplot_p) { |
350 do_s_plot( | 372 if (!is.null(my_cor_vs_cov)) { |
351 x_env = x_env | 373 do_s_plot( |
352 , predictor_projection_x = TRUE | 374 x_env = x_env |
353 , cplot_x = TRUE | 375 , predictor_projection_x = TRUE |
354 , cor_vs_cov_x = my_cor_vs_cov | 376 , cplot_x = TRUE |
355 ) | 377 , cor_vs_cov_x = my_cor_vs_cov |
378 ) | |
379 } else { | |
380 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | |
381 text(x=1, y=1, labels="no predictor projection is possible") | |
382 } | |
356 did_plots <- 1 | 383 did_plots <- 1 |
357 } else { | 384 } else { |
358 did_plots <- 0 | 385 did_plots <- 0 |
359 } | 386 } |
360 if (cplot_o) { | 387 if (cplot_o) { |
403 return ( FALSE ) | 430 return ( FALSE ) |
404 } | 431 } |
405 | 432 |
406 # extract parameters from the environment | 433 # extract parameters from the environment |
407 vrbl_metadata <- calc_env$vrbl_metadata | 434 vrbl_metadata <- calc_env$vrbl_metadata |
408 vrbl_metadata_names <- vrbl_metadata[,1] | 435 vrbl_metadata_names <- vrbl_metadata[, 1] |
409 smpl_metadata <- calc_env$smpl_metadata | 436 smpl_metadata <- calc_env$smpl_metadata |
410 data_matrix <- calc_env$data_matrix | 437 data_matrix <- calc_env$data_matrix |
411 pairSigFeatOnly <- calc_env$pairSigFeatOnly | 438 pair_significant_features_only <- calc_env$pairSigFeatOnly |
412 facC <- calc_env$facC | 439 facC <- calc_env$facC |
413 tesC <- calc_env$tesC | 440 tesC <- calc_env$tesC |
414 # extract the levels from the environment | 441 # extract the levels from the environment |
415 originalLevCSV <- levCSV <- calc_env$levCSV | 442 originalLevCSV <- levCSV <- calc_env$levCSV |
416 # matchingC is one of { "none", "wildcard", "regex" } | 443 # matchingC is one of { "none", "wildcard", "regex" } |
431 | 458 |
432 rt <- vrbl_metadata$rt | 459 rt <- vrbl_metadata$rt |
433 names(rt) <- vrbl_metadata$variableMetadata | 460 names(rt) <- vrbl_metadata$variableMetadata |
434 rt_lookup <- function(feature) unname(rt[feature]) | 461 rt_lookup <- function(feature) unname(rt[feature]) |
435 | 462 |
436 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) | 463 # calculate salience_df as data.frame(feature, max_level, max_median, salience_rcv, mean_median, salience, salience_rcv) |
437 salience_df <- calc_env$salience_df <- w4msalience( | 464 salience_df <- calc_env$salience_df <- w4msalience( |
438 data_matrix = data_matrix | 465 data_matrix = data_matrix |
439 , sample_class = smpl_metadata[,facC] | 466 , sample_class = smpl_metadata[,facC] |
440 , failure_action = failure_action | 467 , failure_action = failure_action |
441 ) | 468 ) |
442 salience_tsv_action({ | 469 salience_tsv_action({ |
443 my_df <- data.frame( | 470 with ( |
444 featureID = salience_df$feature | 471 salience_df |
445 , salientLevel = salience_df$max_level | 472 , { |
446 , salientRCV = salience_df$salient_rcv | 473 my_df <<- data.frame( |
447 , salience = salience_df$salience | 474 featureID = feature |
448 , mz = mz_lookup(salience_df$feature) | 475 , salientLevel = max_level |
449 , rt = rt_lookup(salience_df$feature) | 476 , salienceRCV = salience_rcv |
450 ) | 477 , relativeSalientDistance = relative_salient_distance |
451 my_df[order(-my_df$salience),] | 478 , salience = salience |
479 , mz = mz_lookup(feature) | |
480 , rt = rt_lookup(feature) | |
481 ) | |
482 }) | |
483 my_df[order(-my_df$relativeSalientDistance),] | |
452 }) | 484 }) |
453 | 485 |
454 # transform wildcards to regexen | 486 # transform wildcards to regexen |
455 if (matchingC == "wildcard") { | 487 if (matchingC == "wildcard") { |
456 # strsplit(x = "hello,wild,world", split = ",") | 488 # strsplit(x = "hello,wild,world", split = ",") |
558 ) | 590 ) |
559 my_cor_cov <- do_detail_plot( | 591 my_cor_cov <- do_detail_plot( |
560 x_dataMatrix = my_matrix | 592 x_dataMatrix = my_matrix |
561 , x_predictor = predictor | 593 , x_predictor = predictor |
562 , x_is_match = TRUE | 594 , x_is_match = TRUE |
563 , x_algorithm = algoC | 595 , x_algorithm = "nipals" |
564 , x_prefix = if (pairSigFeatOnly) { | 596 , x_prefix = if (pair_significant_features_only) { |
565 "Significantly contrasting features" | 597 "Significantly contrasting features" |
566 } else { | 598 } else { |
567 "Significant features" | 599 "Significant features" |
568 } | 600 } |
569 , x_show_labels = labelFeatures | 601 , x_show_labels = labelFeatures |
625 } else { | 657 } else { |
626 1 | 658 1 |
627 } | 659 } |
628 ) | 660 ) |
629 col_selector <- vrbl_metadata_names[ | 661 col_selector <- vrbl_metadata_names[ |
630 if ( pairSigFeatOnly ) fully_significant else overall_significant | 662 if (pair_significant_features_only) fully_significant else overall_significant |
631 ] | 663 ] |
632 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] | 664 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] |
633 my_cor_cov <- do_detail_plot( | 665 my_cor_cov <- do_detail_plot( |
634 x_dataMatrix = my_matrix | 666 x_dataMatrix = my_matrix |
635 , x_predictor = predictor | 667 , x_predictor = predictor |
636 , x_is_match = is_match | 668 , x_is_match = is_match |
637 , x_algorithm = algoC | 669 , x_algorithm = "nipals" |
638 , x_prefix = if (pairSigFeatOnly) { | 670 , x_prefix = if (pair_significant_features_only) { |
639 "Significantly contrasting features" | 671 "Significantly contrasting features" |
640 } else { | 672 } else { |
641 "Significant features" | 673 "Significant features" |
642 } | 674 } |
643 , x_show_labels = labelFeatures | 675 , x_show_labels = labelFeatures |
666 ) | 698 ) |
667 } | 699 } |
668 } | 700 } |
669 } | 701 } |
670 } | 702 } |
671 } else { # tesC == "none" | 703 } else { |
704 # tesC == "none" | |
672 # find all the levels for factor facC in sampleMetadata | 705 # find all the levels for factor facC in sampleMetadata |
673 level_union <- unique(sort(smpl_metadata_facC)) | 706 level_union <- unique(sort(smpl_metadata_facC)) |
674 # identify the selected levels for factor facC from sampleMetadata | 707 # identify the selected levels for factor facC from sampleMetadata |
675 level_include <- sapply(X = level_union, FUN = isLevelSelected) | 708 level_include <- sapply(X = level_union, FUN = isLevelSelected) |
676 # discard the non-selected levels for factor facC | 709 # discard the non-selected levels for factor facC |
684 , FUN = function(x) { | 717 , FUN = function(x) { |
685 fctr_lvl_1 <- x[1] | 718 fctr_lvl_1 <- x[1] |
686 fctr_lvl_2 <- { | 719 fctr_lvl_2 <- { |
687 if ( fctr_lvl_1 %in% completed ) | 720 if ( fctr_lvl_1 %in% completed ) |
688 return("DUMMY") | 721 return("DUMMY") |
689 # strF(completed) | |
690 completed <<- c(completed, fctr_lvl_1) | 722 completed <<- c(completed, fctr_lvl_1) |
691 setdiff(level_union, fctr_lvl_1) | 723 setdiff(level_union, fctr_lvl_1) |
692 } | 724 } |
693 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 725 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
694 fctr_lvl_2 <- "other" | 726 fctr_lvl_2 <- "other" |
715 ) | 747 ) |
716 my_cor_cov <- do_detail_plot( | 748 my_cor_cov <- do_detail_plot( |
717 x_dataMatrix = my_matrix | 749 x_dataMatrix = my_matrix |
718 , x_predictor = predictor | 750 , x_predictor = predictor |
719 , x_is_match = is_match | 751 , x_is_match = is_match |
720 , x_algorithm = algoC | 752 , x_algorithm = "nipals" |
721 , x_prefix = "Features" | 753 , x_prefix = "Features" |
722 , x_show_labels = labelFeatures | 754 , x_show_labels = labelFeatures |
723 , x_progress = progress_action | 755 , x_progress = progress_action |
724 , x_crossval_i = min(7, length(chosen_samples)) | 756 , x_crossval_i = min(7, length(chosen_samples)) |
725 , x_env = calc_env | 757 , x_env = calc_env |
768 ) | 800 ) |
769 my_cor_cov <- do_detail_plot( | 801 my_cor_cov <- do_detail_plot( |
770 x_dataMatrix = my_matrix | 802 x_dataMatrix = my_matrix |
771 , x_predictor = predictor | 803 , x_predictor = predictor |
772 , x_is_match = is_match | 804 , x_is_match = is_match |
773 , x_algorithm = algoC | 805 , x_algorithm = "nipals" |
774 , x_prefix = "Features" | 806 , x_prefix = "Features" |
775 , x_show_labels = labelFeatures | 807 , x_show_labels = labelFeatures |
776 , x_progress = progress_action | 808 , x_progress = progress_action |
777 , x_crossval_i = min(7, length(chosen_samples)) | 809 , x_crossval_i = min(7, length(chosen_samples)) |
778 , x_env = calc_env | 810 , x_env = calc_env |
802 } | 834 } |
803 } | 835 } |
804 if (!did_plot) { | 836 if (!did_plot) { |
805 failure_action( | 837 failure_action( |
806 sprintf( | 838 sprintf( |
807 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" | 839 "Did not plot. Does sampleMetadata have at least two levels of factor '%s' matching '%s' ?" |
808 , facC, originalLevCSV)) | 840 , facC, originalLevCSV)) |
809 return ( FALSE ) | 841 return ( FALSE ) |
810 } | 842 } |
811 return ( TRUE ) | 843 return ( TRUE ) |
812 } | 844 } |
819 cor_vs_cov <- function( | 851 cor_vs_cov <- function( |
820 matrix_x | 852 matrix_x |
821 , ropls_x | 853 , ropls_x |
822 , predictor_projection_x = TRUE | 854 , predictor_projection_x = TRUE |
823 , x_progress = print | 855 , x_progress = print |
856 , x_env | |
824 ) { | 857 ) { |
825 tryCatch({ | 858 tryCatch({ |
826 return( | 859 return( |
827 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) | 860 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress, x_env) |
828 ) | 861 ) |
829 }, error = function(e) { | 862 } |
863 , error = function(e) { | |
830 x_progress( | 864 x_progress( |
831 sprintf( | 865 sprintf( |
832 "cor_vs_cov fatal error - %s" | 866 "cor_vs_cov fatal error - %s" |
833 , as.character(e) # e$message | 867 , as.character(e) # e$message |
834 ) | 868 ) |
840 cor_vs_cov_try <- function( | 874 cor_vs_cov_try <- function( |
841 matrix_x # rows are samples; columns, features | 875 matrix_x # rows are samples; columns, features |
842 , ropls_x # an instance of ropls::opls | 876 , ropls_x # an instance of ropls::opls |
843 , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection | 877 , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection |
844 , x_progress = print # function to produce progress and error messages | 878 , x_progress = print # function to produce progress and error messages |
879 , x_env | |
845 ) { | 880 ) { |
881 my_matrix_x <- matrix_x | |
882 my_matrix_x[my_matrix_x==0] <- NA | |
883 fdr_features <- x_env$fdr_features | |
884 | |
846 x_class <- class(ropls_x) | 885 x_class <- class(ropls_x) |
847 if ( !( as.character(x_class) == "opls" ) ) { | 886 if ( !( as.character(x_class) == "opls" ) ) { |
848 stop( | 887 stop( |
849 paste( | 888 paste( |
850 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " | 889 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " |
864 result <- list() | 903 result <- list() |
865 result$projection <- projection <- if (predictor_projection_x) 1 else 2 | 904 result$projection <- projection <- if (predictor_projection_x) 1 else 2 |
866 | 905 |
867 # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 | 906 # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 |
868 # (and not from the supplement despite the statement that, for the NIPALS algorithm, | 907 # (and not from the supplement despite the statement that, for the NIPALS algorithm, |
869 # the equations from the supplement should be used) because of the definition of the | 908 # the equations from the supplement should be used) because of the definition of the |
870 # Pearson/Galton coefficient of correlation is defined as | 909 # Pearson/Galton coefficient of correlation is defined as |
871 # $$ | 910 # $$ |
872 # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} | 911 # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} |
873 # $$ | 912 # $$ |
874 # as described (among other places) on Wikipedia at | 913 # as described (among other places) on Wikipedia at |
875 # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population | 914 # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population |
876 # The equations in the supplement said to use, for the predictive component t1, | 915 # The equations in the supplement said to use, for the predictive component t1, |
877 # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))} | 916 # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))} |
878 # but the results that I got were dramatically different from published results for S-PLOTs; | 917 # but the results that I got were dramatically different from published results for S-PLOTs; |
879 # perhaps my data are not centered exactly the same way that theirs were. | 918 # perhaps my data are not centered exactly the same way that theirs were. |
880 # The correlations calculated here are in agreement with those calculated with the code from | 919 # The correlations calculated here are in agreement with those calculated with the code from |
881 # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf | 920 # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf |
882 # I did transform covariance to "relative covariance" (relative to the maximum value) | 921 |
883 # to keep the figures consistent with one another. | 922 |
884 | 923 # count the features/variables (one column for each sample) |
885 # count the features (one column for each sample) | 924 # count the features/variables (one column for each sample) |
886 Nfeatures <- ncol(matrix_x) | 925 n_features <- ncol(my_matrix_x) |
887 # count the samples (one row for each sample) | 926 all_n_features <- x_env$fdr_features |
888 Nobservations <- nrow(matrix_x) | 927 if (length(grep("^[0-9][0-9]*$", all_n_features)) > 0) { |
889 # a one-dimensional magnitude function (i.e., take the vector norm) | 928 all_n_features <- as.integer(all_n_features) |
890 vector_norm <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) | 929 } else { |
891 # calculate the standard deviation for each feature | 930 all_n_features <- n_features |
892 sd_xi <- sapply(X = 1:Nfeatures, FUN = function(x) sd(matrix_x[,x])) | 931 } |
932 fdr_n_features <- max(n_features, all_n_features) | |
933 # print("n_features") | |
934 # print(n_features) | |
935 | |
936 # count the samples/observations (one row for each sample) | |
937 n_observations <- nrow(my_matrix_x) | |
938 | |
893 # choose whether to plot the predictive score vector or orthogonal score vector | 939 # choose whether to plot the predictive score vector or orthogonal score vector |
894 if (predictor_projection_x) | 940 if (predictor_projection_x) |
895 score_matrix <- ropls_x@scoreMN | 941 score_vector <- ropls_x@scoreMN |
896 else | 942 else |
897 score_matrix <- ropls_x@orthoScoreMN | 943 score_vector <- ropls_x@orthoScoreMN |
898 # transpose the score (or orthoscore) vector for use as a premultiplier in covariance calculation | 944 |
899 score_matrix_transposed <- t(score_matrix) | 945 # compute the covariance of each feature with the score vector |
900 # compute the norm of the vector (i.e., the magnitude) | |
901 score_matrix_magnitude <- vector_norm(score_matrix) | |
902 # compute the standard deviation of the vector | |
903 score_matrix_sd <- sd(score_matrix) | |
904 # compute the relative covariance of each feature with the score vector | |
905 result$covariance <- | 946 result$covariance <- |
906 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) | 947 sapply( |
948 X = 1:n_features | |
949 , FUN = function(i) { | |
950 cov(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") | |
951 } | |
952 ) | |
953 # access covariance by feature name | |
954 names(result$covariance) <- colnames(my_matrix_x) | |
955 | |
907 # compute the correlation of each feature with the score vector | 956 # compute the correlation of each feature with the score vector |
908 result$correlation <- | 957 result$correlation <- |
909 score_matrix_transposed %*% matrix_x / ( (Nobservations - 1) * ( score_matrix_sd * sd_xi ) ) | 958 sapply( |
910 | 959 X = 1:n_features |
911 # convert covariance and correlation from one-dimensional matrices to arrays of values, | 960 , FUN = function(i) { |
912 # which are accessed by feature name below | 961 cor(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") |
913 p1 <- result$covariance <- result$covariance [ 1, , drop = TRUE ] | 962 } |
914 # x_progress("strF(p1)") | 963 ) |
915 # x_progress(strF(p1)) | 964 # access correlation by feature name |
916 | 965 names(result$correlation) <- colnames(my_matrix_x) |
917 pcorr1 <- result$correlation <- result$correlation[ 1, , drop = TRUE ] | 966 |
918 # x_progress("pearson strF(pcorr1)") | 967 # eliminate NAs in either correlation or covariance |
919 # x_progress(strF(pcorr1)) | 968 nas <- is.na(result$correlation) | is.na(result$covariance) |
920 # x_progress(typeof(pcorr1)) | 969 featureID <- names(ropls_x@vipVn) |
921 # x_progress(str(pcorr1)) | 970 featureID <- featureID[!nas] |
922 | 971 result$correlation <- result$correlation[!nas] |
923 # # this is how to use Spearman correlation instead of pearson | 972 result$covariance <- result$covariance[!nas] |
924 # result$spearcor <- sapply( | 973 n_features <- length(featureID) |
925 # X = 1:Nfeatures | 974 |
926 # , FUN = function(i) { | |
927 # stats::cor( | |
928 # x = as.vector(score_matrix) | |
929 # , y = as.vector(matrix_x[,i]) | |
930 # # , method = "spearman" | |
931 # , method = "pearson" | |
932 # ) | |
933 # } | |
934 # ) | |
935 # names(result$spearcor) <- names(p1) | |
936 # pcorr1 <- result$spearcor | |
937 # x_progress("spearman strF(pcorr1)") | |
938 # x_progress(strF(pcorr1)) | |
939 # x_progress(typeof(pcorr1)) | |
940 # x_progress(str(pcorr1)) | |
941 # pcorr1 <- result$correlation <- result$spearcor | |
942 | |
943 # correl.ci(r, n, a = 0.05, rho = 0) | |
944 correl_pci <- lapply( | 975 correl_pci <- lapply( |
945 X = 1:Nfeatures | 976 X = 1:n_features |
946 , FUN = function(i) correl.ci(r = pcorr1[i], n = Nobservations) | 977 , FUN = function(i) { |
978 correl.ci( | |
979 r = result$correlation[i] | |
980 , n_obs = n_observations | |
981 , n_vars = fdr_n_features | |
982 ) | |
983 } | |
947 ) | 984 ) |
948 result$p_value_raw <- sapply( | 985 result$p_value_raw <- sapply( |
949 X = 1:Nfeatures | 986 X = 1:n_features |
987 , FUN = function(i) correl_pci[[i]]$p.value.raw | |
988 ) | |
989 result$p_value_raw[is.na(result$p_value_raw)] <- 1.0 | |
990 result$p_value <- sapply( | |
991 X = 1:n_features | |
950 , FUN = function(i) correl_pci[[i]]$p.value | 992 , FUN = function(i) correl_pci[[i]]$p.value |
951 ) | 993 ) |
952 result$p_value_raw[is.na(result$p_value_raw)] <- 0.0 | 994 result$p_value[is.na(result$p_value)] <- 1.0 |
953 result$ci_lower <- sapply( | 995 result$ci_lower <- sapply( |
954 X = 1:Nfeatures | 996 X = 1:n_features |
955 , FUN = function(i) correl_pci[[i]]$CI['lower'] | 997 , FUN = function(i) correl_pci[[i]]$CI["lower"] |
956 ) | 998 ) |
957 result$ci_upper <- sapply( | 999 result$ci_upper <- sapply( |
958 X = 1:Nfeatures | 1000 X = 1:n_features |
959 , FUN = function(i) correl_pci[[i]]$CI['upper'] | 1001 , FUN = function(i) correl_pci[[i]]$CI["upper"] |
960 ) | 1002 ) |
961 | 1003 |
962 | 1004 |
963 # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627) | 1005 # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627) |
964 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) | 1006 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) |
965 result$vip4p <- as.numeric(ropls_x@vipVn) | 1007 result$vip4p <- as.numeric(ropls_x@vipVn)[!nas] |
966 result$vip4o <- as.numeric(ropls_x@orthoVipVn) | 1008 result$vip4o <- as.numeric(ropls_x@orthoVipVn)[!nas] |
967 if (length(result$vip4o) == 0) result$vip4o <- NA | 1009 if (length(result$vip4o) == 0) result$vip4o <- NA |
968 # extract the loadings | 1010 # extract the loadings |
969 result$loadp <- as.numeric(ropls_x@loadingMN) | 1011 result$loadp <- as.numeric(ropls_x@loadingMN)[!nas] |
970 result$loado <- as.numeric(ropls_x@orthoLoadingMN) | 1012 result$loado <- as.numeric(ropls_x@orthoLoadingMN)[!nas] |
971 # get the level names | 1013 # get the level names |
972 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) | 1014 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) |
973 fctr_lvl_1 <- level_names[1] | 1015 fctr_lvl_1 <- level_names[1] |
974 fctr_lvl_2 <- level_names[2] | 1016 fctr_lvl_2 <- level_names[2] |
975 feature_count <- length(ropls_x@vipVn) | 1017 result$level1 <- rep.int(x = fctr_lvl_1, times = n_features) |
976 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) | 1018 result$level2 <- rep.int(x = fctr_lvl_2, times = n_features) |
977 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) | |
978 greaterLevel <- sapply( | 1019 greaterLevel <- sapply( |
979 X = result$correlation | 1020 X = result$correlation |
980 , FUN = function(my_corr) | 1021 , FUN = function(my_corr) { |
981 tryCatch({ | 1022 tryCatch({ |
982 if ( is.nan( my_corr ) ) { | 1023 if ( is.na(my_corr) || ! is.numeric( my_corr ) ) { |
983 NA | 1024 NA |
984 } else { | 1025 } else { |
985 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 | 1026 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 |
986 } | 1027 } |
987 }, error = function(e) { | 1028 } |
1029 , error = function(e) { | |
1030 print(my_corr) | |
988 x_progress( | 1031 x_progress( |
989 sprintf( | 1032 sprintf( |
990 "cor_vs_cov -> sapply: error - substituting NA - %s" | 1033 "cor_vs_cov -> sapply: error - substituting NA - %s" |
991 , as.character(e) | 1034 , as.character(e) |
992 ) | 1035 ) |
993 ) | 1036 ) |
994 NA | 1037 NA |
995 }) | 1038 } |
1039 ) | |
1040 } | |
996 ) | 1041 ) |
997 | |
998 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | |
999 featureID <- names(ropls_x@vipVn) | |
1000 greaterLevel <- greaterLevel[featureID] | |
1001 result$correlation <- result$correlation[featureID] | |
1002 result$covariance <- result$covariance[featureID] | |
1003 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | |
1004 | 1042 |
1005 # build a data frame to hold the content for the tab-separated values file | 1043 # build a data frame to hold the content for the tab-separated values file |
1006 tsv1 <- data.frame( | 1044 tsv1 <- data.frame( |
1007 featureID = featureID | 1045 featureID = featureID |
1008 , factorLevel1 = result$level1 | 1046 , factorLevel1 = result$level1 |
1014 , vip4p = result$vip4p | 1052 , vip4p = result$vip4p |
1015 , vip4o = result$vip4o | 1053 , vip4o = result$vip4o |
1016 , loadp = result$loadp | 1054 , loadp = result$loadp |
1017 , loado = result$loado | 1055 , loado = result$loado |
1018 , cor_p_val_raw = result$p_value_raw | 1056 , cor_p_val_raw = result$p_value_raw |
1019 , cor_p_value = p.adjust(p = result$p_value_raw, method = "BY") | 1057 , cor_p_value = result$p_value |
1020 , cor_ci_lower = result$ci_lower | 1058 , cor_ci_lower = result$ci_lower |
1021 , cor_ci_upper = result$ci_upper | 1059 , cor_ci_upper = result$ci_upper |
1022 ) | 1060 ) |
1023 rownames(tsv1) <- tsv1$featureID | 1061 rownames(tsv1) <- tsv1$featureID |
1024 | 1062 |
1025 # build the superresult, i.e., the result returned by this function | 1063 # build the superresult, i.e., the result returned by this function |
1037 # remove any rows having NA for covariance or correlation | 1075 # remove any rows having NA for covariance or correlation |
1038 tsv1 <- tsv1[!is.na(tsv1$correlation),] | 1076 tsv1 <- tsv1[!is.na(tsv1$correlation),] |
1039 tsv1 <- tsv1[!is.na(tsv1$covariance),] | 1077 tsv1 <- tsv1[!is.na(tsv1$covariance),] |
1040 superresult$tsv1 <- tsv1 | 1078 superresult$tsv1 <- tsv1 |
1041 | 1079 |
1042 # # I did not include these but left them commentd out in case future | 1080 # # I did not include these but left them commentd out in case future |
1043 # # consumers of this routine want to use it in currently unanticipated ways | 1081 # # consumers of this routine want to use it in currently unanticipated ways |
1044 # result$superresult <- superresult | 1082 # result$superresult <- superresult |
1045 # result$oplsda <- ropls_x | 1083 # result$oplsda <- ropls_x |
1046 # result$predictor <- ropls_x@suppLs$y | 1084 # result$predictor <- ropls_x@suppLs$y |
1047 | 1085 |
1057 # title = {Multivariate data analysis in R} | 1095 # title = {Multivariate data analysis in R} |
1058 # } | 1096 # } |
1059 # which follows | 1097 # which follows |
1060 # https://en.wikipedia.org/wiki/Fisher_transformation#Definition | 1098 # https://en.wikipedia.org/wiki/Fisher_transformation#Definition |
1061 | 1099 |
1062 correl.ci <- function(r, n, a = 0.05, rho = 0) { | 1100 correl.ci <- function(r, n_obs, n_vars, a = 0.05, rho = 0) { |
1063 ## r is the calculated correlation coefficient for n pairs | 1101 ## r is the calculated correlation coefficient for n_obs pairs of observations of one variable |
1064 ## a is the significance level | 1102 ## a is the significance level |
1065 ## rho is the hypothesised correlation | 1103 ## rho is the hypothesised correlation |
1066 zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho | 1104 zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho |
1067 zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1 | 1105 zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1 |
1068 se <- (1 - r^2)/sqrt(n - 3) ## standard error for Fisher's z-transformation of Ho | 1106 se <- (1 - r^2)/sqrt(n_obs - 3) ## standard error for Fisher's z-transformation of Ho |
1069 test <- (zh1 - zh0)/se ### test statistic | 1107 test <- (zh1 - zh0)/se ### test statistic |
1070 pvalue <- 2*(1 - pnorm(abs(test))) ## p-value | 1108 pvalue <- 2*(1 - pnorm(abs(test))) ## p-value |
1071 zL <- zh1 - qnorm(1 - a/2)*se | 1109 pvalue.adj <- p.adjust(p = pvalue, method = "BY", n = n_vars) |
1072 zH <- zh1 + qnorm(1 - a/2)*se | 1110 z_L <- zh1 - qnorm(1 - a/2)*se |
1073 fishL <- tanh(zL) # (exp(2*zL)-1)/(exp(2*zL)+1), i.e., lower confidence limit | 1111 z_h <- zh1 + qnorm(1 - a/2)*se |
1074 fishH <- tanh(zH) # (exp(2*zH)-1)/(exp(2*zH)+1), i.e., upper confidence limit | 1112 fish_l <- tanh(z_L) # (exp(2*z_l)-1)/(exp(2*z_l)+1), i.e., lower confidence limit |
1075 CI <- c(fishL, fishH) | 1113 fish_h <- tanh(z_h) # (exp(2*z_h)-1)/(exp(2*z_h)+1), i.e., upper confidence limit |
1076 names(CI) <- c('lower', 'upper') | 1114 ci <- c(fish_l, fish_h) |
1077 list(correlation = r, p.value = pvalue, CI = CI) | 1115 names(ci) <- c("lower", "upper") |
1116 list( | |
1117 correlation = r | |
1118 , p.value.raw = pvalue | |
1119 , p.value = pvalue.adj | |
1120 , CI = ci | |
1121 ) | |
1078 } | 1122 } |
1079 | 1123 |
1080 # vim: sw=2 ts=2 et : | 1124 # vim: sw=2 ts=2 et ai : |