# HG changeset patch # User guerler # Date 1398470935 14400 # Node ID 0e3df2630d9b6a8ce3e0a0aeaa5acbd26452b4df # Parent f1c9378592be9aeca7bfee2915c5c8bd6f7f3d33 Uploaded diff -r f1c9378592be -r 0e3df2630d9b histogram.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/histogram.r Fri Apr 25 20:08:55 2014 -0400 @@ -0,0 +1,76 @@ +# utilities +boundary <- function(x, increment) { + return (floor(x / increment) * increment) +} + +roundup <- function(x) { + return (sign(x) * 10^ceiling(log10(abs(x)))) +} + +# wrapper +wrapper <- function(table, columns, options) { + + # initialize output list + l <- list() + + # loop through all columns + m <- list() + for (key in names(columns)) { + # load column data + column <- as.numeric(columns[key]) + column_data <- sapply( table[column], as.numeric ) + + # collect vectors in list + m <- append(m, list(column_data)) + } + + # get min/max boundaries + min_value <- min(unlist(m)) + max_value <- max(unlist(m)) + + # identify increment + increment <- roundup((max_value - min_value) / 10) + + # fix min value + min_value <- boundary(min_value, increment) + + # fix max value + max_value <- min_value + increment * 10 + + # check if single bin is enough + if (min_value == max_value) { + l <- append(l, max_value) + for (key in seq(m)) { + l <- append(l, 1.0) + } + return (l) + } + + # fix range and bins + bin_seq = seq(min_value, max_value, by=increment) + + # add as first column + l <- append(l, list(bin_seq[2: length(bin_seq)])) + + # loop through all columns + for (key in seq(m)) { + # load column data + column_data <- m[[key]] + + # create hist data + hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE) + + # normalize densities + count_sum <- sum(hist_data$counts) + if (count_sum > 0) { + hist_data$counts = hist_data$counts / count_sum + } + + # collect vectors in list + l <- append(l, list(hist_data$counts)) + } + + + # return + return (l) +}