w4mkmeans: w4mkmeans_routines.R comparison

comparison w4mkmeans_routines.R @ 1:02cafb660b72 draft

planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a

author	eschen42
date	Wed, 09 Aug 2017 18:06:55 -0400
parents	6ccbe18131a6
children	c415b7dc6f37

comparison

equal deleted inserted replaced

-:6ccbe18131a6
+:02cafb660b72
 return (
 c(
 "w4mkmeans: bad input.",
 "# contract:",
 "    required - caller will provide an environment comprising:",
-"      log_print        - a logging function with the signature function(x, ...) expecting strings as x and ...",
+"      log_print          - a logging function with the signature function(x, ...) expecting strings as x and ...",
-"      variableMetadata - the corresponding W4M data.frame having feature metadata",
+"      variableMetadata   - the corresponding W4M data.frame having feature metadata",
-"      sampleMetdata    - the corresponding W4M data.frame having sample metadata",
+"      sampleMetdata      - the corresponding W4M data.frame having sample metadata",
-"      dataMatrix       - the corresponding W4M matrix",
+"      dataMatrix         - the corresponding W4M matrix",
-"      slots            - the number of parallel slots for calculating kmeans",
+"      slots              - the number of parallel slots for calculating kmeans",
 "    optional - environment may comprise:",
-"      kfeatures        - an array of integers, the k's to apply for clustering by feature (default, empty array)",
+"      kfeatures          - an array of integers, the k's to apply for clustering by feature (default, empty array)",
-"      ksamples         - an array of integers, the k's to apply for clustering by sample (default, empty array)",
+"      ksamples           - an array of integers, the k's to apply for clustering by sample (default, empty array)",
-"      iter.max         - the maximum number of iterations when calculating a cluster (default = 10)",
+"      iter.max           - the maximum number of iterations when calculating a cluster (default = 10)",
-"      nstart           - how many random sets of centers should be chosen (default = 1)",
+"      nstart             - how many random sets of centers should be chosen (default = 1)",
-"      algorithm        - string from c('Hartigan-Wong', 'Lloyd', 'Forgy', 'MacQueen') (default = Hartigan-Wong)",
+"      algorithm          - string from c('Hartigan-Wong', 'Lloyd', 'Forgy', 'MacQueen') (default = Hartigan-Wong)",
+"      categorical_prefix - string from c('Hartigan-Wong', 'Lloyd', 'Forgy', 'MacQueen') (default = Hartigan-Wong)",
 "      ",
 "    this routine will return a list comprising:",
-"      variableMetadata - the input variableMetadata data.frame with updates, if any",
+"      variableMetadata   - the input variableMetadata data.frame with updates, if any",
-"      sampleMetadata   - the input sampleMetadata data.frame with updates, if any",
+"      sampleMetadata     - the input sampleMetadata data.frame with updates, if any",
-"      scores           - an array of strings, each representing a line of a tsv having the following header:",
+"      scores             - an array of strings, each representing a line of a tsv having the following header:",
-"                           clusterOn TAB k TAB totalSS TAB betweenSS TAB proportion"
+"                             clusterOn TAB k TAB totalSS TAB betweenSS TAB proportion"
 )
 )
 }
 w4mkmeans <- function(env) {
 # abort if 'env' is null or is not an environment
 if ( is.null(env) || ! is.environment(env) ) {
 lapply(w4kmeans_usage(),print)
 }
 # supply default arguments
-if ( ! exists("iter.max" , env) ) env$iter.max  <- 10
+if ( ! exists("iter.max"          , env) ) env$iter.max  <- 10
-if ( ! exists("nstart"   , env) ) env$nstart    <- 1
+if ( ! exists("nstart"            , env) ) env$nstart    <- 1
-if ( ! exists("algorithm", env) ) env$algorithm <- 'Hartigan-Wong'
+if ( ! exists("algorithm"         , env) ) env$algorithm <- 'Hartigan-Wong'
-if ( ! exists("ksamples" , env) ) env$ksamples  <- c()
+if ( ! exists("categorical_prefix", env) ) env$categorical_prefix <- 'k'
-if ( ! exists("kfeatures", env) ) env$kfeatures <- c()
+if ( ! exists("ksamples"          , env) ) env$ksamples  <- c()
+if ( ! exists("kfeatures"         , env) ) env$kfeatures <- c()
 # check mandatory arguments
 expected <- c(
 "log_print"
 , "variableMetadata"
 , "sampleMetadata"
 # extract parameters from 'env'
 failure_action  <- env$log_print
 scores          <- c( "clusterOn\tk\ttotalSS\tbetweenSS\tproportion" )
 sampleMetadata  <- env$sampleMetadata
 featureMetadata <- env$variableMetadata
-ksamples        <- as.numeric(env$ksamples)
-kfeatures       <- as.numeric(env$kfeatures)
 slots           <- env$slots
+positive_ints <- function(a, what) {
+i <- as.integer(a)    # may introduce NAs by coercion
+i <- i[!is.na(i)]     # eliminate NAs
+i <- i[i > 0]         # eliminate non-positive integers
+i <- unique(sort(i))  # eliminate redundancy and disorder
+if (length(a)!=length(i)) {
+failure_action("Some values for '", what, "' were skipped where not unique, not positive, or not convertible to an integer.")
+}
+return (i)            # return results, if any
+}
+ksamples        <- positive_ints(env$ksamples , "ksamples")
+kfeatures       <- positive_ints(env$kfeatures, "kfeatures")
 myLapply <- parLapply
 # uncomment the next line to mimic parLapply, but without parallelization (for testing/experimentation)
 # myLapply <- function(cl, ...) lapply(...)
 cl <- NULL
 , dimension = "samples"
 )
 for ( i in 1:ksamples_length ) {
 result <- smpl_result_list[[i]]
 if (result$success) {
-sampleMetadata[sprintf("k%d",ksamples[i])] <- result$value$clusters
+sampleMetadata[sprintf("k%d",ksamples[i])] <- sprintf("%s%d", env$categorical_prefix, result$value$clusters)
 scores <- c(scores, result$value$scores)
 }
 }
 }
 , dimension = "features"
 )
 for ( i in 1:kfeatures_length ) {
 result <- feat_result_list[[i]]
 if (result$success) {
-featureMetadata[sprintf("k%d",kfeatures[i])] <- result$value$clusters
+featureMetadata[sprintf("k%d",kfeatures[i])] <- sprintf("%s%d", env$categorical_prefix, result$value$clusters)
 scores <- c(scores, result$value$scores)
 }
 }
 }

Mercurial > repos > eschen42 > w4mkmeans

comparison w4mkmeans_routines.R @ 1:02cafb660b72 draft