changeset 23:0e3df2630d9b draft

Uploaded
author guerler
date Fri, 25 Apr 2014 20:08:55 -0400
parents f1c9378592be
children 27c0329b431c
files histogram.r
diffstat 1 files changed, 76 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/histogram.r	Fri Apr 25 20:08:55 2014 -0400
@@ -0,0 +1,76 @@
+# utilities
+boundary <- function(x, increment) {
+    return (floor(x / increment) * increment)
+}
+
+roundup <- function(x) {
+    return (sign(x) * 10^ceiling(log10(abs(x))))
+}
+
+# wrapper
+wrapper <- function(table, columns, options) {
+
+    # initialize output list
+    l <- list()
+
+    # loop through all columns
+    m <- list()
+    for (key in names(columns)) {
+        # load column data
+        column <- as.numeric(columns[key])
+        column_data <- sapply( table[column], as.numeric )
+
+        # collect vectors in list
+        m <- append(m, list(column_data))
+    }
+    
+    # get min/max boundaries
+    min_value <- min(unlist(m))
+    max_value <- max(unlist(m))
+    
+    # identify increment
+    increment <- roundup((max_value - min_value) / 10)
+    
+    # fix min value
+    min_value <- boundary(min_value, increment)
+    
+    # fix max value
+    max_value <- min_value + increment * 10
+    
+    # check if single bin is enough
+    if (min_value == max_value) {
+        l <- append(l, max_value)
+        for (key in seq(m)) {
+            l <- append(l, 1.0)
+        }
+        return (l)
+    }
+    
+    # fix range and bins
+    bin_seq = seq(min_value, max_value, by=increment)
+    
+    # add as first column
+    l <- append(l, list(bin_seq[2: length(bin_seq)]))
+    
+    # loop through all columns
+    for (key in seq(m)) {
+        # load column data
+        column_data <- m[[key]]
+        
+        # create hist data
+        hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE)
+        
+        # normalize densities
+        count_sum <- sum(hist_data$counts)
+        if (count_sum > 0) {
+            hist_data$counts = hist_data$counts / count_sum
+        }
+
+        # collect vectors in list
+        l <- append(l, list(hist_data$counts))
+    }
+    
+
+    # return
+    return (l)
+}