Mercurial > repos > eschen42 > w4mkmeans
annotate w4mkmeans_routines.R @ 1:02cafb660b72 draft
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
author | eschen42 |
---|---|
date | Wed, 09 Aug 2017 18:06:55 -0400 |
parents | 6ccbe18131a6 |
children | c415b7dc6f37 |
rev | line source |
---|---|
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
1 ##------------------------------------------------------------------------------------------------------ |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
2 ## these are the batch-independent and file-structure-independent routines to support the w4mkmeans tool |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
3 ##------------------------------------------------------------------------------------------------------ |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
4 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
5 library(parallel) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
6 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
7 w4kmeans_usage <- function() { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
8 return ( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
9 c( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
10 "w4mkmeans: bad input.", |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
11 "# contract:", |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
12 " required - caller will provide an environment comprising:", |
1
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
13 " log_print - a logging function with the signature function(x, ...) expecting strings as x and ...", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
14 " variableMetadata - the corresponding W4M data.frame having feature metadata", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
15 " sampleMetdata - the corresponding W4M data.frame having sample metadata", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
16 " dataMatrix - the corresponding W4M matrix", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
17 " slots - the number of parallel slots for calculating kmeans", |
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
18 " optional - environment may comprise:", |
1
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
19 " kfeatures - an array of integers, the k's to apply for clustering by feature (default, empty array)", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
20 " ksamples - an array of integers, the k's to apply for clustering by sample (default, empty array)", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
21 " iter.max - the maximum number of iterations when calculating a cluster (default = 10)", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
22 " nstart - how many random sets of centers should be chosen (default = 1)", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
23 " algorithm - string from c('Hartigan-Wong', 'Lloyd', 'Forgy', 'MacQueen') (default = Hartigan-Wong)", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
24 " categorical_prefix - string from c('Hartigan-Wong', 'Lloyd', 'Forgy', 'MacQueen') (default = Hartigan-Wong)", |
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
25 " ", |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
26 " this routine will return a list comprising:", |
1
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
27 " variableMetadata - the input variableMetadata data.frame with updates, if any", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
28 " sampleMetadata - the input sampleMetadata data.frame with updates, if any", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
29 " scores - an array of strings, each representing a line of a tsv having the following header:", |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
30 " clusterOn TAB k TAB totalSS TAB betweenSS TAB proportion" |
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
31 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
32 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
33 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
34 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
35 w4mkmeans <- function(env) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
36 # abort if 'env' is null or is not an environment |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
37 if ( is.null(env) || ! is.environment(env) ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
38 lapply(w4kmeans_usage(),print) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
39 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
40 # supply default arguments |
1
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
41 if ( ! exists("iter.max" , env) ) env$iter.max <- 10 |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
42 if ( ! exists("nstart" , env) ) env$nstart <- 1 |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
43 if ( ! exists("algorithm" , env) ) env$algorithm <- 'Hartigan-Wong' |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
44 if ( ! exists("categorical_prefix", env) ) env$categorical_prefix <- 'k' |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
45 if ( ! exists("ksamples" , env) ) env$ksamples <- c() |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
46 if ( ! exists("kfeatures" , env) ) env$kfeatures <- c() |
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
47 # check mandatory arguments |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
48 expected <- c( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
49 "log_print" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
50 , "variableMetadata" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
51 , "sampleMetadata" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
52 , "dataMatrix" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
53 , "slots" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
54 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
55 missing_from_env <- setdiff(expected, (ls(env))) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
56 if ( length(missing_from_env) > 0 ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
57 print(paste(c('expected environment members not found: ', as.character(missing_from_env)), collapse = ", ")) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
58 lapply(w4kmeans_usage(),print) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
59 stop("w4mkmeans: contract has been broken") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
60 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
61 # extract parameters from 'env' |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
62 failure_action <- env$log_print |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
63 scores <- c( "clusterOn\tk\ttotalSS\tbetweenSS\tproportion" ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
64 sampleMetadata <- env$sampleMetadata |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
65 featureMetadata <- env$variableMetadata |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
66 slots <- env$slots |
1
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
67 positive_ints <- function(a, what) { |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
68 i <- as.integer(a) # may introduce NAs by coercion |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
69 i <- i[!is.na(i)] # eliminate NAs |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
70 i <- i[i > 0] # eliminate non-positive integers |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
71 i <- unique(sort(i)) # eliminate redundancy and disorder |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
72 if (length(a)!=length(i)) { |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
73 failure_action("Some values for '", what, "' were skipped where not unique, not positive, or not convertible to an integer.") |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
74 } |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
75 return (i) # return results, if any |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
76 } |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
77 ksamples <- positive_ints(env$ksamples , "ksamples") |
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
78 kfeatures <- positive_ints(env$kfeatures, "kfeatures") |
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
79 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
80 myLapply <- parLapply |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
81 # uncomment the next line to mimic parLapply, but without parallelization (for testing/experimentation) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
82 # myLapply <- function(cl, ...) lapply(...) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
83 cl <- NULL |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
84 if ( identical(myLapply, parLapply) ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
85 failure_action(sprintf("w4mkmeans: using parallel evaluation with %d slots", slots)) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
86 failure_action(names(cl)) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
87 cl <- makePSOCKcluster(names = slots) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
88 # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster." |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
89 clusterExport( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
90 cl = cl |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
91 , varlist = c( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
92 "tryCatchFunc" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
93 , "calc_kmeans_one_dimension_one_k" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
94 , "prepare.data.matrix" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
95 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
96 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
97 final <- function(cl) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
98 # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster." |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
99 if ( !is.null(cl) ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
100 failure_action("w4mkmeans: stopping cluster used for parallel evaluation") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
101 stopCluster(cl) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
102 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
103 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
104 } else { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
105 failure_action("w4mkmeans: using sequential evaluation (1 slot)") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
106 final <- function(cl) { } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
107 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
108 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
109 tryCatch( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
110 expr = { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
111 # These myLapply calls produce lists of lists of results: |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
112 # - The outer list has no keys and its members are accessed by index |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
113 # - The inner list has keys "clusters" and "scores" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
114 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
115 # for each $i in ksamples, append column 'k$i' to data frame sampleMetadata |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
116 ksamples_length <- length(ksamples) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
117 if ( ksamples_length > 0 ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
118 smpl_result_list <- myLapply( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
119 cl = cl |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
120 , ksamples |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
121 , calc_kmeans_one_dimension_one_k |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
122 , env = env |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
123 , dimension = "samples" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
124 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
125 for ( i in 1:ksamples_length ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
126 result <- smpl_result_list[[i]] |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
127 if (result$success) { |
1
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
128 sampleMetadata[sprintf("k%d",ksamples[i])] <- sprintf("%s%d", env$categorical_prefix, result$value$clusters) |
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
129 scores <- c(scores, result$value$scores) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
130 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
131 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
132 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
133 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
134 # for each $i in kfeatures, append column 'k$i' to data frame featureMetadata |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
135 kfeatures_length <- length(kfeatures) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
136 if ( kfeatures_length > 0 ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
137 feat_result_list <- myLapply( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
138 cl = cl |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
139 , kfeatures |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
140 , calc_kmeans_one_dimension_one_k |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
141 , env = env |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
142 , dimension = "features" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
143 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
144 for ( i in 1:kfeatures_length ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
145 result <- feat_result_list[[i]] |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
146 if (result$success) { |
1
02cafb660b72
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents:
0
diff
changeset
|
147 featureMetadata[sprintf("k%d",kfeatures[i])] <- sprintf("%s%d", env$categorical_prefix, result$value$clusters) |
0
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
148 scores <- c(scores, result$value$scores) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
149 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
150 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
151 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
152 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
153 return ( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
154 list( |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
155 variableMetadata = featureMetadata |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
156 , sampleMetadata = sampleMetadata |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
157 , scores = scores |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
158 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
159 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
160 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
161 , finally = final(cl) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
162 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
163 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
164 |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
165 # calculate k-means for features or samples |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
166 # - recall that the dataMatrix has features in rows and samples in columns |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
167 # return value: |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
168 # list(clusters = km$cluster, scores = scores) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
169 # arguments: |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
170 # env: |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
171 # environment having dataMatrix |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
172 # dimension: |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
173 # - "samples": produce clusters column to add to the sampleMetadata table |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
174 # - this is the default case |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
175 # - "variables": produce clusters column to add to the variableMetadata table |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
176 # k: |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
177 # integer, the number of clusters to make |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
178 calc_kmeans_one_dimension_one_k <- function(k, env, dimension = "samples") { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
179 # abort if environment is not as expected |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
180 if ( is.null(env) || ! is.environment(env) ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
181 stop("calc_kmeans_one_dimension_one_k - argument 'env' is not an environment") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
182 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
183 if ( ! exists("log_print", env) || ! is.function(env$log_print) ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
184 stop("calc_kmeans_one_dimension_one_k - argument 'env' - environment does not include log_print or it is not a function") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
185 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
186 # abort if k is not as expected |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
187 if ( ! is.numeric(k) ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
188 stop(sprintf("calc_kmeans_one_dimension_one_k - expected numeric argument 'k' but type is %s", typeof(k))) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
189 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
190 k <- as.integer(k) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
191 # abort if dimension is not as expected |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
192 if ( ! is.character(dimension) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
193 || ! Reduce( f =`|`, x = sapply(X = c("features","samples"), FUN = `==`, dimension), init = FALSE) ) { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
194 stop("calc_kmeans_one_dimension_one_k - argument 'dimension' is neither 'features' nor 'samples'") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
195 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
196 dm <- env$dataMatrix |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
197 iter.max <- env$iter.max |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
198 nstart <- env$nstart |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
199 algorithm <- env$algorithm |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
200 dim_features <- dimension == "features" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
201 # tryCatchFunc produces a list |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
202 # On success of expr(), tryCatchFunc produces |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
203 # list(success TRUE, value = expr(), msg = "") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
204 # On failure of expr(), tryCatchFunc produces |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
205 # list(success = FALSE, value = NA, msg = "the error message") |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
206 result_list <- tryCatchFunc( expr = function() { |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
207 # kmeans clusters the rows; features are the columns of args_env$dataMatrix; samples, the rows |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
208 # - to calculate sample-clusters, no transposition is needed because samples are rows |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
209 # - to calculate feature-clusters, transposition is needed so that features will be the rows |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
210 if ( ! dim_features ) dm <- t(dm) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
211 dm <- prepare.data.matrix( x.matrix = dm, data.transformation = function(x) { x } ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
212 # need to set.seed to get reproducible results from kmeans |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
213 set.seed(4567) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
214 # do the k-means clustering |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
215 km <- kmeans( x = dm, centers = k, iter.max, nstart = nstart, algorithm = algorithm ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
216 scores <- |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
217 sprintf("%s\t%d\t%0.5e\t%0.5e\t%0.5f" |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
218 , dimension |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
219 , k |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
220 , km$totss |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
221 , km$betweenss |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
222 , km$betweenss/km$totss |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
223 ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
224 list(clusters = km$cluster, scores = scores) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
225 }) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
226 return ( result_list ) |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
227 } |
6ccbe18131a6
planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d
eschen42
parents:
diff
changeset
|
228 |