Mercurial > repos > guerler > charts
comparison histogram.r @ 45:b84a193361be draft
Uploaded
author | guerler |
---|---|
date | Thu, 15 May 2014 15:15:21 -0400 |
parents | 524184c2f524 |
children | 12eb81b00cd9 |
comparison
equal
deleted
inserted
replaced
44:9713035b1a47 | 45:b84a193361be |
---|---|
1 # binsize | |
2 min_binsize = 10 | |
3 | |
4 # lower boundary | |
5 lowerboundary <- function(x, increment) { | |
6 return (floor(x / increment) * increment) | |
7 } | |
8 | |
9 # upper boundary | |
10 upperboundary <- function(x, increment) { | |
11 return (ceiling(x / increment) * increment) | |
12 } | |
13 | |
14 # round to decimals | |
15 roundup <- function(x) { | |
16 return (sign(x) * 10^ceiling(log10(abs(x)))) | |
17 } | |
18 | |
19 # wrapper | 1 # wrapper |
20 wrapper <- function(table, columns, options) { | 2 wrapper <- function(table, columns, options) { |
21 | 3 |
22 # get binsize | |
23 binsize = max(as.integer(options$binsize), min_binsize) | |
24 | |
25 # initialize output list | 4 # initialize output list |
26 l <- list() | 5 l <- list() |
27 | 6 |
28 # loop through all columns | 7 # loop through all columns |
29 m <- list() | 8 m <- list() |
34 | 13 |
35 # collect vectors in list | 14 # collect vectors in list |
36 m <- append(m, list(column_data)) | 15 m <- append(m, list(column_data)) |
37 } | 16 } |
38 | 17 |
39 # get min/max boundaries | 18 # identify optimal breaks |
40 min_value <- min(unlist(m)) | 19 hist_data <- hist(unlist(m), plot=FALSE) |
41 max_value <- max(unlist(m)) | 20 breaks <- hist_data$breaks; |
42 | |
43 # identify range | |
44 diff <- max_value - min_value | |
45 | |
46 # identify increment | |
47 increment <- roundup(diff / binsize) | |
48 | |
49 # fix min value | |
50 min_value <- lowerboundary(min_value, increment) | |
51 max_value <- upperboundary(max_value, increment) | |
52 | |
53 # update range | |
54 diff <- max_value - min_value | |
55 | |
56 # fix bin size | |
57 binsize = round(diff / increment) | |
58 | |
59 # fix max value | |
60 max_value <- min_value + binsize * increment | |
61 | |
62 # check if single bin is enough | |
63 if (min_value == max_value) { | |
64 l <- append(l, max_value) | |
65 for (key in seq(m)) { | |
66 l <- append(l, 1.0) | |
67 } | |
68 return (l) | |
69 } | |
70 | |
71 # fix range and bins | |
72 bin_seq = seq(min_value, max_value, by=increment) | |
73 | 21 |
74 # add as first column | 22 # add as first column |
75 l <- append(l, list(bin_seq[2: length(bin_seq)])) | 23 l <- append(l, list(breaks[2: length(breaks)])) |
76 | 24 |
77 # loop through all columns | 25 # loop through all columns |
78 for (key in seq(m)) { | 26 for (key in seq(m)) { |
79 # load column data | 27 # load column data |
80 column_data <- m[[key]] | 28 column_data <- m[[key]] |
81 | 29 |
82 # create hist data | 30 # create hist data |
83 hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE) | 31 hist_data <- hist(column_data, breaks=breaks, plot=FALSE) |
84 | 32 |
85 # normalize densities | 33 # normalize densities |
86 count_sum <- sum(hist_data$counts) | 34 count_sum <- sum(hist_data$counts) |
87 if (count_sum > 0) { | 35 if (count_sum > 0) { |
88 hist_data$counts = hist_data$counts / count_sum | 36 hist_data$counts = hist_data$counts / count_sum |
89 } | 37 } |
90 | 38 |
91 # collect vectors in list | 39 # collect vectors in list |
92 l <- append(l, list(hist_data$counts)) | 40 l <- append(l, list(hist_data$counts)) |
93 } | 41 } |
94 | 42 |
95 # return | 43 # return |