view histogram.r @ 24:27c0329b431c draft

Uploaded
author guerler
date Fri, 25 Apr 2014 20:09:09 -0400
parents 0e3df2630d9b
children 524184c2f524
line wrap: on
line source

# utilities
boundary <- function(x, increment) {
    return (floor(x / increment) * increment)
}

roundup <- function(x) {
    return (sign(x) * 10^ceiling(log10(abs(x))))
}

# wrapper
wrapper <- function(table, columns, options) {

    # initialize output list
    l <- list()

    # loop through all columns
    m <- list()
    for (key in names(columns)) {
        # load column data
        column <- as.numeric(columns[key])
        column_data <- sapply( table[column], as.numeric )

        # collect vectors in list
        m <- append(m, list(column_data))
    }
    
    # get min/max boundaries
    min_value <- min(unlist(m))
    max_value <- max(unlist(m))
    
    # identify increment
    increment <- roundup((max_value - min_value) / 10)
    
    # fix min value
    min_value <- boundary(min_value, increment)
    
    # fix max value
    max_value <- min_value + increment * 10
    
    # check if single bin is enough
    if (min_value == max_value) {
        l <- append(l, max_value)
        for (key in seq(m)) {
            l <- append(l, 1.0)
        }
        return (l)
    }
    
    # fix range and bins
    bin_seq = seq(min_value, max_value, by=increment)
    
    # add as first column
    l <- append(l, list(bin_seq[2: length(bin_seq)]))
    
    # loop through all columns
    for (key in seq(m)) {
        # load column data
        column_data <- m[[key]]
        
        # create hist data
        hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE)
        
        # normalize densities
        count_sum <- sum(hist_data$counts)
        if (count_sum > 0) {
            hist_data$counts = hist_data$counts / count_sum
        }

        # collect vectors in list
        l <- append(l, list(hist_data$counts))
    }
    

    # return
    return (l)
}