diff histogram.r @ 45:b84a193361be draft

Uploaded
author guerler
date Thu, 15 May 2014 15:15:21 -0400
parents 524184c2f524
children 12eb81b00cd9
line wrap: on
line diff
--- a/histogram.r	Fri May 09 01:01:39 2014 -0400
+++ b/histogram.r	Thu May 15 15:15:21 2014 -0400
@@ -1,27 +1,6 @@
-# binsize
-min_binsize = 10
-
-# lower boundary
-lowerboundary <- function(x, increment) {
-    return (floor(x / increment) * increment)
-}
-
-# upper boundary
-upperboundary <- function(x, increment) {
-    return (ceiling(x / increment) * increment)
-}
-
-# round to decimals
-roundup <- function(x) {
-    return (sign(x) * 10^ceiling(log10(abs(x))))
-}
-
 # wrapper
 wrapper <- function(table, columns, options) {
 
-    # get binsize
-    binsize = max(as.integer(options$binsize), min_binsize)
-    
     # initialize output list
     l <- list()
 
@@ -36,43 +15,12 @@
         m <- append(m, list(column_data))
     }
     
-    # get min/max boundaries
-    min_value <- min(unlist(m))
-    max_value <- max(unlist(m))
-    
-    # identify range
-    diff <- max_value - min_value
-    
-    # identify increment
-    increment <- roundup(diff / binsize)
-    
-    # fix min value
-    min_value <- lowerboundary(min_value, increment)
-    max_value <- upperboundary(max_value, increment)
-    
-    # update range
-    diff <- max_value - min_value
-    
-    # fix bin size
-    binsize = round(diff / increment)
-    
-    # fix max value
-    max_value <- min_value + binsize * increment
-    
-    # check if single bin is enough
-    if (min_value == max_value) {
-        l <- append(l, max_value)
-        for (key in seq(m)) {
-            l <- append(l, 1.0)
-        }
-        return (l)
-    }
-    
-    # fix range and bins
-    bin_seq = seq(min_value, max_value, by=increment)
+    # identify optimal breaks
+    hist_data <- hist(unlist(m), plot=FALSE)
+    breaks <- hist_data$breaks;
     
     # add as first column
-    l <- append(l, list(bin_seq[2: length(bin_seq)]))
+    l <- append(l, list(breaks[2: length(breaks)]))
     
     # loop through all columns
     for (key in seq(m)) {
@@ -80,14 +28,14 @@
         column_data <- m[[key]]
         
         # create hist data
-        hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE)
+        hist_data <- hist(column_data, breaks=breaks, plot=FALSE)
         
         # normalize densities
         count_sum <- sum(hist_data$counts)
         if (count_sum > 0) {
             hist_data$counts = hist_data$counts / count_sum
         }
-
+        
         # collect vectors in list
         l <- append(l, list(hist_data$counts))
     }