comparison histogram.r @ 45:b84a193361be draft

Uploaded
author guerler
date Thu, 15 May 2014 15:15:21 -0400
parents 524184c2f524
children 12eb81b00cd9
comparison
equal deleted inserted replaced
44:9713035b1a47 45:b84a193361be
1 # binsize
2 min_binsize = 10
3
4 # lower boundary
5 lowerboundary <- function(x, increment) {
6 return (floor(x / increment) * increment)
7 }
8
9 # upper boundary
10 upperboundary <- function(x, increment) {
11 return (ceiling(x / increment) * increment)
12 }
13
14 # round to decimals
15 roundup <- function(x) {
16 return (sign(x) * 10^ceiling(log10(abs(x))))
17 }
18
19 # wrapper 1 # wrapper
20 wrapper <- function(table, columns, options) { 2 wrapper <- function(table, columns, options) {
21 3
22 # get binsize
23 binsize = max(as.integer(options$binsize), min_binsize)
24
25 # initialize output list 4 # initialize output list
26 l <- list() 5 l <- list()
27 6
28 # loop through all columns 7 # loop through all columns
29 m <- list() 8 m <- list()
34 13
35 # collect vectors in list 14 # collect vectors in list
36 m <- append(m, list(column_data)) 15 m <- append(m, list(column_data))
37 } 16 }
38 17
39 # get min/max boundaries 18 # identify optimal breaks
40 min_value <- min(unlist(m)) 19 hist_data <- hist(unlist(m), plot=FALSE)
41 max_value <- max(unlist(m)) 20 breaks <- hist_data$breaks;
42
43 # identify range
44 diff <- max_value - min_value
45
46 # identify increment
47 increment <- roundup(diff / binsize)
48
49 # fix min value
50 min_value <- lowerboundary(min_value, increment)
51 max_value <- upperboundary(max_value, increment)
52
53 # update range
54 diff <- max_value - min_value
55
56 # fix bin size
57 binsize = round(diff / increment)
58
59 # fix max value
60 max_value <- min_value + binsize * increment
61
62 # check if single bin is enough
63 if (min_value == max_value) {
64 l <- append(l, max_value)
65 for (key in seq(m)) {
66 l <- append(l, 1.0)
67 }
68 return (l)
69 }
70
71 # fix range and bins
72 bin_seq = seq(min_value, max_value, by=increment)
73 21
74 # add as first column 22 # add as first column
75 l <- append(l, list(bin_seq[2: length(bin_seq)])) 23 l <- append(l, list(breaks[2: length(breaks)]))
76 24
77 # loop through all columns 25 # loop through all columns
78 for (key in seq(m)) { 26 for (key in seq(m)) {
79 # load column data 27 # load column data
80 column_data <- m[[key]] 28 column_data <- m[[key]]
81 29
82 # create hist data 30 # create hist data
83 hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE) 31 hist_data <- hist(column_data, breaks=breaks, plot=FALSE)
84 32
85 # normalize densities 33 # normalize densities
86 count_sum <- sum(hist_data$counts) 34 count_sum <- sum(hist_data$counts)
87 if (count_sum > 0) { 35 if (count_sum > 0) {
88 hist_data$counts = hist_data$counts / count_sum 36 hist_data$counts = hist_data$counts / count_sum
89 } 37 }
90 38
91 # collect vectors in list 39 # collect vectors in list
92 l <- append(l, list(hist_data$counts)) 40 l <- append(l, list(hist_data$counts))
93 } 41 }
94 42
95 # return 43 # return