Mercurial > repos > artbio > ez_histograms
comparison ez_histograms.R @ 2:d375c9df9c34 draft default tip
planemo upload for repository https://github.com/artbio/tools-artbio/tree/main/tools/ez_histograms commit f8dffb887b68fdc2c9b35f30676539b8396abd54
author | artbio |
---|---|
date | Thu, 07 Nov 2024 15:40:40 +0000 |
parents | fbedb212982d |
children |
comparison
equal
deleted
inserted
replaced
1:fbedb212982d | 2:d375c9df9c34 |
---|---|
3 library(dplyr) | 3 library(dplyr) |
4 library(scales) | 4 library(scales) |
5 library(psych) | 5 library(psych) |
6 library(optparse) | 6 library(optparse) |
7 | 7 |
8 options(show.error.messages = FALSE, | 8 options( |
9 error = function() { | 9 show.error.messages = FALSE, |
10 cat(geterrmessage(), file = stderr()) | 10 error = function() { |
11 q("no", 1, FALSE) | 11 cat(geterrmessage(), file = stderr()) |
12 } | 12 q("no", 1, FALSE) |
13 } | |
13 ) | 14 ) |
14 | 15 |
15 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 16 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
16 warnings() | 17 warnings() |
17 | 18 |
18 option_list <- list( | 19 option_list <- list( |
19 make_option( | 20 make_option( |
20 c("-f", "--file"), | 21 c("-f", "--file"), |
21 default = NA, | 22 default = NA, |
22 type = "character", | 23 type = "character", |
23 help = "Input file that contains count values to transform" | 24 help = "Input file that contains count values to transform" |
24 ), | 25 ), |
25 make_option( | 26 make_option( |
26 c("-d", "--profile"), | 27 c("-d", "--profile"), |
27 default = "count", | 28 default = "count", |
28 type = "character", | 29 type = "character", |
29 help = "Whether y-axis shows absolute counts or density: 'count' or 'density' [default : '%default' ]" | 30 help = "Whether y-axis shows absolute counts or density: 'count' or 'density' [default : '%default' ]" |
30 ), | 31 ), |
31 make_option( | 32 make_option( |
32 "--xscale", | 33 "--xscale", |
33 default = "cartesian", | 34 default = "cartesian", |
34 type = "character", | 35 type = "character", |
35 help = "Whether x-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" | 36 help = "Whether x-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" |
36 ), | 37 ), |
37 make_option( | 38 make_option( |
38 "--yscale", | 39 "--yscale", |
39 default = "cartesian", | 40 default = "cartesian", |
40 type = "character", | 41 type = "character", |
41 help = "Whether y-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" | 42 help = "Whether y-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" |
42 ), | 43 ), |
43 make_option( | 44 make_option( |
44 c("-p", "--pdf"), | 45 c("-p", "--pdf"), |
45 default = "histograms.pdf", | 46 default = "histograms.pdf", |
46 type = "character", | 47 type = "character", |
47 help = "Output pdf file name [default : '%default' ]" | 48 help = "Output pdf file name [default : '%default' ]" |
48 ), | 49 ), |
49 make_option( | 50 make_option( |
50 c("-s", "--summary"), | 51 c("-s", "--summary"), |
51 default = "summary.tsv", | 52 default = "summary.tsv", |
52 type = "character", | 53 type = "character", |
53 help = "statistics summary file name [default : '%default' ]" | 54 help = "statistics summary file name [default : '%default' ]" |
54 ) | 55 ) |
55 ) | 56 ) |
56 | 57 |
57 opt <- parse_args(OptionParser(option_list = option_list), | 58 opt <- parse_args(OptionParser(option_list = option_list), |
58 args = commandArgs(trailingOnly = TRUE)) | 59 args = commandArgs(trailingOnly = TRUE) |
60 ) | |
59 | 61 |
60 plot_histograms <- function(mdata, profile = "count", xscale = "cartesian", yscale = "cartesian", bins = 30) { | 62 plot_histograms <- function(mdata, profile = "count", xscale = "cartesian", yscale = "cartesian", bins = 30) { |
61 if (profile == "count") { | 63 if (profile == "count") { |
62 # count histogram | 64 # count histogram |
63 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable, y = after_stat(count)), show.legend = FALSE) + | 65 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable, y = after_stat(count)), show.legend = FALSE) + |
64 geom_histogram(bins = bins) + theme(legend.position = "none") | 66 geom_histogram(bins = bins) + |
65 if (xscale == "cartesian") { | 67 theme(legend.position = "none") |
66 if (yscale == "log2") { | 68 if (xscale == "cartesian") { |
67 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | 69 if (yscale == "log2") { |
68 } else { | 70 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) |
69 if (yscale == "log10") { | 71 } else { |
70 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | 72 if (yscale == "log10") { |
73 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
74 } | |
75 } | |
71 } | 76 } |
72 } | 77 if (xscale == "log2") { |
78 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | |
79 if (yscale == "log2") { | |
80 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | |
81 } else { | |
82 if (yscale == "log10") { | |
83 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
84 } | |
85 } | |
86 } | |
87 if (xscale == "log10") { | |
88 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
89 if (yscale == "log2") { | |
90 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | |
91 } else { | |
92 if (yscale == "log10") { | |
93 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
94 } | |
95 } | |
96 } | |
73 } | 97 } |
74 if (xscale == "log2") { | 98 |
75 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | 99 if (profile == "density") { |
76 if (yscale == "log2") { | 100 # density histogram |
77 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | 101 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable)) + |
78 } else { | 102 geom_density() + |
79 if (yscale == "log10") { | 103 theme(legend.position = "none") |
80 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | 104 if (xscale == "log2") { |
105 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | |
81 } | 106 } |
82 } | 107 if (xscale == "log10") { |
108 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
109 } | |
83 } | 110 } |
84 if (xscale == "log10") { | 111 return(p) |
85 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
86 if (yscale == "log2") { | |
87 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | |
88 } else { | |
89 if (yscale == "log10") { | |
90 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
91 } | |
92 } | |
93 } | |
94 } | |
95 | |
96 if (profile == "density") { | |
97 # density histogram | |
98 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable)) + | |
99 geom_density() + theme(legend.position = "none") | |
100 if (xscale == "log2") { | |
101 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) | |
102 } | |
103 if (xscale == "log10") { | |
104 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) | |
105 } | |
106 } | |
107 return(p) | |
108 } | 112 } |
109 | 113 |
110 test_header <- function(file) { | 114 test_header <- function(file) { |
111 data <- read.delim(file = file, header = FALSE, row.names = 1, nrows = 2) | 115 data <- read.delim(file = file, header = FALSE, row.names = 1, nrows = 2) |
112 if (all(is.na(as.numeric(data[1, seq_len(ncol(data))])))) { | 116 if (all(is.na(as.numeric(data[1, seq_len(ncol(data))])))) { |
113 return(TRUE) | 117 return(TRUE) |
114 } else { | 118 } else { |
115 return(FALSE) | 119 return(FALSE) |
116 } | 120 } |
117 } | 121 } |
118 | 122 |
119 ##### prepare input data | 123 ##### prepare input data |
120 | 124 |
121 data <- read.delim(file = opt$file, header = test_header(opt$file)) | 125 data <- read.delim(file = opt$file, header = test_header(opt$file)) |
122 data <- data %>% select(where(is.numeric)) # remove non numeric columns | 126 data <- data %>% select(where(is.numeric)) # remove non numeric columns |
123 mdata <- melt(data) | 127 mdata <- melt(data) |
124 | 128 |
125 ##### main | 129 ##### main |
126 | 130 |
127 # determine optimal number of bins (Sturges’ Rule) | 131 # determine optimal number of bins (Sturges’ Rule) |
130 p <- plot_histograms(mdata, profile = opt$profile, xscale = opt$xscale, bins = bins, yscale = opt$yscale) | 134 p <- plot_histograms(mdata, profile = opt$profile, xscale = opt$xscale, bins = bins, yscale = opt$yscale) |
131 | 135 |
132 # determine optimal width for the graph | 136 # determine optimal width for the graph |
133 width <- length(data) | 137 width <- length(data) |
134 width <- case_when( | 138 width <- case_when( |
135 width == 1 ~ 14 / 3, | 139 width == 1 ~ 14 / 3, |
136 width == 2 ~ (2 / 3) * 14, | 140 width == 2 ~ (2 / 3) * 14, |
137 TRUE ~ 14 | 141 TRUE ~ 14 |
138 ) | 142 ) |
139 # determine optimal height for the graph | 143 # determine optimal height for the graph |
140 height <- length(data) | 144 height <- length(data) |
141 height <- case_when( | 145 height <- case_when( |
142 height <= 3 ~ 3, | 146 height <= 3 ~ 3, |
143 height <= 6 ~ 6, | 147 height <= 6 ~ 6, |
144 TRUE ~ (floor(height / 3) + 1) * 3 | 148 TRUE ~ (floor(height / 3) + 1) * 3 |
145 ) | 149 ) |
146 # determine optimal number of col for the graph | 150 # determine optimal number of col for the graph |
147 ncol <- length(data) | 151 ncol <- length(data) |
148 ncol <- case_when( | 152 ncol <- case_when( |
149 ncol == 1 ~ 1, | 153 ncol == 1 ~ 1, |
150 ncol == 2 ~ 2, | 154 ncol == 2 ~ 2, |
151 TRUE ~ 3 | 155 TRUE ~ 3 |
152 ) | 156 ) |
153 pdf(opt$pdf, width = width, height = height) | 157 pdf(opt$pdf, width = width, height = height) |
154 print(p + facet_wrap(~variable, ncol = ncol, scales = "free")) | 158 print(p + facet_wrap(~variable, ncol = ncol, scales = "free")) |
155 dev.off() | 159 dev.off() |
156 | 160 |