Mercurial > repos > eschen42 > w4mkmeans
comparison w4m_general_purpose_routines.R @ 2:c415b7dc6f37 draft default tip
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 3e916537da6bb37e6f3927d7a11e98e0ab6ef5ec
author | eschen42 |
---|---|
date | Mon, 05 Mar 2018 12:40:17 -0500 |
parents | 6ccbe18131a6 |
children |
comparison
equal
deleted
inserted
replaced
1:02cafb660b72 | 2:c415b7dc6f37 |
---|---|
1 ##----------------------------------------------- | |
2 ## helper functions for error detection/reporting | |
3 ##----------------------------------------------- | |
4 | |
5 # ISO 8601 date ref: https://en.wikipedia.org/wiki/ISO_8601 | |
6 iso_date <- function() { | |
7 format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z") | |
8 } | |
9 | |
10 # log-printing to stderr | |
11 log_print <- function(x, ...) { | |
12 cat( | |
13 sep="" | |
14 , file=stderr() | |
15 , iso_date() | |
16 , " " | |
17 , c(x, ...) | |
18 , "\n" | |
19 ) | |
20 } | |
21 | |
22 # format error for logging | |
23 format_error <- function(e) { | |
24 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ") | |
25 } | |
26 | |
27 # tryCatchFunc produces a list | |
28 # func - a function that takes no arguments | |
29 # On success of func(), tryCatchFunc produces | |
30 # list(success = TRUE, value = func(), msg = "") | |
31 # On failure of func(), tryCatchFunc produces | |
32 # list(success = FALSE, value = NA, msg = "the error message") | |
33 tryCatchFunc <- function(func) { | |
34 retval <- NULL | |
35 tryCatch( | |
36 expr = { | |
37 retval <- ( list( success = TRUE, value = func(), msg = "" ) ) | |
38 } | |
39 , error = function(e) { | |
40 retval <<- list( success = FALSE, value = NA, msg = format_error(e) ) | |
41 } | |
42 ) | |
43 return (retval) | |
44 } | |
45 | |
1 # prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA) | 46 # prepare.data.matrix - Prepare x.datamatrix for multivariate statistical analaysis (MVA) |
2 # - Motivation: | 47 # - Motivation: |
3 # - Selection: | 48 # - Selection: |
4 # - You may want to exclude several samples from your analysis: | 49 # - You may want to exclude several samples from your analysis: |
5 # - If so, set the argument 'exclude.samples' to a vector of sample names | 50 # - If so, set the argument 'exclude.samples' to a vector of sample names |
6 # - You may want to exclude several features or features from your analysis: | 51 # - You may want to exclude several features or features from your analysis: |
7 # - If so, set the argument 'exclude.features' to a vector of feature names | 52 # - If so, set the argument 'exclude.features' to a vector of feature names |
8 # - Renaming samples: | 53 # - Renaming samples: |
9 # - You may want to rename several samples from your analysis: | 54 # - You may want to rename several samples from your analysis: |
10 # - If so, set the argument 'sample.rename.function' to a function accepting a vector | 55 # - If so, set the argument 'sample.rename.function' to a function accepting a vector |
11 # of sample names and producing a vector of strings of equivalent length | 56 # of sample names and producing a vector of strings of equivalent length |
12 # - MVA is confounded by missing values. | 57 # - MVA is confounded by missing values. |
13 # - By default, this function imputes missing values as zero. | 58 # - By default, this function imputes missing values as zero. |
14 # - For a different imputation, set the 'data.imputation' argument to a function | 59 # - For a different imputation, set the 'data.imputation' argument to a function |
15 # accepting a single matrix argument and returning a matrix of the same | 60 # accepting a single matrix argument and returning a matrix of the same |
17 # - Transformation | 62 # - Transformation |
18 # - It may be desirable to transform the intensity data to reduce the range. | 63 # - It may be desirable to transform the intensity data to reduce the range. |
19 # - By default, this function performs an eigth-root transformation: | 64 # - By default, this function performs an eigth-root transformation: |
20 # - Any root-tranformation has the advantage of never being negative. | 65 # - Any root-tranformation has the advantage of never being negative. |
21 # - Calculation of the eight-root is four times faster in my hands than log10. | 66 # - Calculation of the eight-root is four times faster in my hands than log10. |
22 # - However, it has the disadvantage that calculation of fold-differences | 67 # - However, it has the disadvantage that calculation of fold-differences |
23 # is not additive as with log-transformation. | 68 # is not additive as with log-transformation. |
24 # - Rather, you must divide the values and raise to the eighth power. | 69 # - Rather, you must divide the values and raise to the eighth power. |
25 # - For a different transformation, set the 'data.transformation' argument | 70 # - For a different transformation, set the 'data.transformation' argument |
26 # to a function accepting a single matrix argument. | 71 # to a function accepting a single matrix argument. |
27 # - The function should be written to return a matrix of the same dimensions | 72 # - The function should be written to return a matrix of the same dimensions |
105 , data.transformation = function(x) { | 150 , data.transformation = function(x) { |
106 sqrt( sqrt( sqrt(x) ) ) | 151 sqrt( sqrt( sqrt(x) ) ) |
107 } | 152 } |
108 , en = new.env() | 153 , en = new.env() |
109 ) { | 154 ) { |
155 # log to environment | |
156 if ( !exists("log", envir = en) ) { | |
157 en$log <- c() | |
158 } | |
159 enlog <- function(s) { en$log <- c(en$log, s); s } | |
160 #enlog("foo") | |
161 | |
110 # MatVar - Compute variance of rows or columns of a matrix | 162 # MatVar - Compute variance of rows or columns of a matrix |
111 # ref: http://stackoverflow.com/a/25100036 | 163 # ref: http://stackoverflow.com/a/25100036 |
112 # For row variance, dim == 1, for col variance, dim == 2 | 164 # For row variance, dim == 1, for col variance, dim == 2 |
113 MatVar <- function(x, dim = 1) { | 165 MatVar <- function(x, dim = 1) { |
114 if (dim == 1) { | 166 if (dim == 1) { |
135 } else stop("Please enter valid dimension, for rows, dim = 1; for colums, dim = 2") | 187 } else stop("Please enter valid dimension, for rows, dim = 1; for colums, dim = 2") |
136 } | 188 } |
137 | 189 |
138 nonzero.var <- function(x) { | 190 nonzero.var <- function(x) { |
139 if (nrow(x) == 0) { | 191 if (nrow(x) == 0) { |
140 print(str(x)) | |
141 stop("matrix has no rows") | 192 stop("matrix has no rows") |
142 } | 193 } |
143 if (ncol(x) == 0) { | 194 if (ncol(x) == 0) { |
144 print(str(x)) | |
145 stop("matrix has no columns") | 195 stop("matrix has no columns") |
146 } | 196 } |
147 if ( is.numeric(x) ) { | 197 if ( is.numeric(x) ) { |
148 # exclude any rows with zero variance | 198 # exclude any rows with zero variance |
149 row.vars <- MatVar(x, dim = 1) | 199 row.vars <- MatVar(x, dim = 1) |
151 nonzero.rows <- row.vars[nonzero.row.vars] | 201 nonzero.rows <- row.vars[nonzero.row.vars] |
152 if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) { | 202 if ( length(rownames(x)) != length(rownames(nonzero.rows)) ) { |
153 row.names <- attr(nonzero.rows,"names") | 203 row.names <- attr(nonzero.rows,"names") |
154 x <- x[ row.names, , drop = FALSE ] | 204 x <- x[ row.names, , drop = FALSE ] |
155 } | 205 } |
156 | 206 |
157 # exclude any columns with zero variance | 207 # exclude any columns with zero variance |
158 column.vars <- MatVar(x, dim = 2) | 208 column.vars <- MatVar(x, dim = 2) |
159 nonzero.column.vars <- column.vars > 0 | 209 nonzero.column.vars <- column.vars > 0 |
160 nonzero.columns <- column.vars[nonzero.column.vars] | 210 nonzero.columns <- column.vars[nonzero.column.vars] |
161 if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) { | 211 if ( length(colnames(x)) != length(colnames(nonzero.columns)) ) { |
168 | 218 |
169 if (is.null(x.matrix)) { | 219 if (is.null(x.matrix)) { |
170 stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix") | 220 stop("FATAL ERROR - prepare.data.matrix was called with null x.matrix") |
171 } | 221 } |
172 | 222 |
223 enlog("prepare.data.matrix - get matrix") | |
224 | |
173 en$xpre <- x <- x.matrix | 225 en$xpre <- x <- x.matrix |
174 | 226 |
175 # exclude any samples as indicated | 227 # exclude any samples as indicated |
176 if ( !is.null(exclude.features) ) { | 228 if ( !is.null(exclude.features) ) { |
229 enlog("prepare.data.matrix - exclude any samples as indicated") | |
177 my.colnames <- colnames(x) | 230 my.colnames <- colnames(x) |
178 my.col.diff <- setdiff(my.colnames, exclude.features) | 231 my.col.diff <- setdiff(my.colnames, exclude.features) |
179 x <- x[ , my.col.diff , drop = FALSE ] | 232 x <- x[ , my.col.diff , drop = FALSE ] |
180 } | 233 } |
181 | 234 |
182 # exclude any features as indicated | 235 # exclude any features as indicated |
183 if ( !is.null(exclude.samples) ) { | 236 if ( !is.null(exclude.samples) ) { |
237 enlog("prepare.data.matrix - exclude any features as indicated") | |
184 my.rownames <- rownames(x) | 238 my.rownames <- rownames(x) |
185 my.row.diff <- setdiff(my.rownames, exclude.samples) | 239 my.row.diff <- setdiff(my.rownames, exclude.samples) |
186 x <- x[ my.row.diff, , drop = FALSE ] | 240 x <- x[ my.row.diff, , drop = FALSE ] |
187 } | 241 } |
188 | 242 |
189 # rename rows if desired | 243 # rename rows if desired |
190 if ( !is.null(sample.rename.function) ) { | 244 if ( !is.null(sample.rename.function) ) { |
245 enlog("prepare.data.matrix - rename rows if desired") | |
191 renamed <- sample.rename.function(x) | 246 renamed <- sample.rename.function(x) |
192 rownames(x) <- renamed | 247 rownames(x) <- renamed |
193 } | 248 } |
194 | 249 |
250 enlog("prepare.data.matrix - save redacted x.datamatrix to environment") | |
251 | |
195 # save redacted x.datamatrix to environment | 252 # save redacted x.datamatrix to environment |
196 en$redacted.data.matrix <- x | 253 en$redacted.data.matrix <- x |
197 | 254 |
198 # impute values missing from the x.datamatrix | 255 # impute values missing from the x.datamatrix |
199 if ( !is.null(data.imputation) ) { | 256 if ( !is.null(data.imputation) ) { |
257 enlog("prepare.data.matrix - impute values missing from the x.datamatrix") | |
200 x <- data.imputation(x) | 258 x <- data.imputation(x) |
201 } | 259 } |
202 | 260 |
203 # perform transformation if desired | 261 # perform transformation if desired |
204 if ( !is.null(data.transformation) ) { | 262 if ( !is.null(data.transformation) ) { |
263 enlog("prepare.data.matrix - perform transformation") | |
205 x <- data.transformation(x) | 264 x <- data.transformation(x) |
206 } else { | 265 } else { |
207 x <- x | 266 x <- x |
208 } | 267 } |
209 | 268 |
210 # purge rows and columns that have zero variance | 269 # purge rows and columns that have zero variance |
211 if ( is.numeric(x) ) { | 270 if ( is.numeric(x) ) { |
271 enlog("prepare.data.matrix - purge rows and columns that have zero variance") | |
212 x <- nonzero.var(x) | 272 x <- nonzero.var(x) |
213 } | 273 } |
214 | 274 |
215 # save imputed, transformed x.datamatrix to environment | 275 # save imputed, transformed x.datamatrix to environment |
216 en$imputed.transformed.data.matrix <- x | 276 en$imputed.transformed.data.matrix <- x |
217 | 277 |
218 return(x) | 278 return(x) |
219 } | 279 } |
220 | 280 |
221 | 281 # vim: sw=2 ts=2 et : |
222 ##----------------------------------------------- | |
223 ## helper functions for error detection/reporting | |
224 ##----------------------------------------------- | |
225 | |
226 # log-printing to stderr | |
227 log_print <- function(x, ...) { | |
228 cat( | |
229 format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z") | |
230 , " " | |
231 , c(x, ...) | |
232 , "\n" | |
233 , sep="" | |
234 , file=stderr() | |
235 ) | |
236 } | |
237 | |
238 # tryCatchFunc produces a list | |
239 # On success of expr(), tryCatchFunc produces | |
240 # list(success TRUE, value = expr(), msg = "") | |
241 # On failure of expr(), tryCatchFunc produces | |
242 # list(success = FALSE, value = NA, msg = "the error message") | |
243 tryCatchFunc <- function(expr) { | |
244 # format error for logging | |
245 format_error <- function(e) { | |
246 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ") | |
247 } | |
248 my_expr <- expr | |
249 retval <- NULL | |
250 tryCatch( | |
251 expr = { | |
252 retval <- ( list( success = TRUE, value = my_expr(), msg = "" ) ) | |
253 } | |
254 , error = function(e) { | |
255 retval <<- list( success = FALSE, value = NA, msg = format_error(e) ) | |
256 } | |
257 ) | |
258 return (retval) | |
259 } | |
260 | |
261 # tryCatchProc produces a list | |
262 # On success of expr(), tryCatchProc produces | |
263 # list(success TRUE, msg = "") | |
264 # On failure of expr(), tryCatchProc produces | |
265 # list(success = FALSE, msg = "the error message") | |
266 tryCatchProc <- function(expr) { | |
267 # format error for logging | |
268 format_error <- function(e) { | |
269 paste(c("Error { message:", e$message, ", call:", e$call, "}"), collapse = " ") | |
270 } | |
271 retval <- NULL | |
272 tryCatch( | |
273 expr = { | |
274 expr() | |
275 retval <- ( list( success = TRUE, msg = "" ) ) | |
276 } | |
277 , error = function(e) { | |
278 retval <<- list( success = FALSE, msg = format_error(e) ) | |
279 } | |
280 ) | |
281 return (retval) | |
282 } | |
283 |