annotate histogram.r @ 42:5bf0fc9f8b07 draft

Uploaded
author guerler
date Fri, 09 May 2014 01:01:04 -0400
parents 524184c2f524
children b84a193361be
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
36
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
1 # binsize
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
2 min_binsize = 10
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
3
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
4 # lower boundary
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
5 lowerboundary <- function(x, increment) {
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
6 return (floor(x / increment) * increment)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
7 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
8
36
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
9 # upper boundary
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
10 upperboundary <- function(x, increment) {
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
11 return (ceiling(x / increment) * increment)
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
12 }
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
13
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
14 # round to decimals
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
15 roundup <- function(x) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
16 return (sign(x) * 10^ceiling(log10(abs(x))))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
17 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
18
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
19 # wrapper
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
20 wrapper <- function(table, columns, options) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
21
36
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
22 # get binsize
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
23 binsize = max(as.integer(options$binsize), min_binsize)
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
24
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
25 # initialize output list
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
26 l <- list()
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
27
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
28 # loop through all columns
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
29 m <- list()
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
30 for (key in names(columns)) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
31 # load column data
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
32 column <- as.numeric(columns[key])
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
33 column_data <- sapply( table[column], as.numeric )
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
34
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
35 # collect vectors in list
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
36 m <- append(m, list(column_data))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
37 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
38
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
39 # get min/max boundaries
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
40 min_value <- min(unlist(m))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
41 max_value <- max(unlist(m))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
42
36
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
43 # identify range
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
44 diff <- max_value - min_value
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
45
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
46 # identify increment
36
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
47 increment <- roundup(diff / binsize)
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
48
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
49 # fix min value
36
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
50 min_value <- lowerboundary(min_value, increment)
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
51 max_value <- upperboundary(max_value, increment)
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
52
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
53 # update range
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
54 diff <- max_value - min_value
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
55
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
56 # fix bin size
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
57 binsize = round(diff / increment)
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
58
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
59 # fix max value
36
524184c2f524 Uploaded
guerler
parents: 23
diff changeset
60 max_value <- min_value + binsize * increment
23
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
61
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
62 # check if single bin is enough
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
63 if (min_value == max_value) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
64 l <- append(l, max_value)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
65 for (key in seq(m)) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
66 l <- append(l, 1.0)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
67 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
68 return (l)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
69 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
70
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
71 # fix range and bins
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
72 bin_seq = seq(min_value, max_value, by=increment)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
73
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
74 # add as first column
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
75 l <- append(l, list(bin_seq[2: length(bin_seq)]))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
76
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
77 # loop through all columns
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
78 for (key in seq(m)) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
79 # load column data
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
80 column_data <- m[[key]]
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
81
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
82 # create hist data
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
83 hist_data <- hist(column_data, breaks=bin_seq, plot=FALSE)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
84
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
85 # normalize densities
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
86 count_sum <- sum(hist_data$counts)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
87 if (count_sum > 0) {
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
88 hist_data$counts = hist_data$counts / count_sum
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
89 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
90
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
91 # collect vectors in list
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
92 l <- append(l, list(hist_data$counts))
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
93 }
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
94
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
95 # return
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
96 return (l)
0e3df2630d9b Uploaded
guerler
parents:
diff changeset
97 }