comparison ez_histograms.R @ 2:d375c9df9c34 draft default tip

planemo upload for repository https://github.com/artbio/tools-artbio/tree/main/tools/ez_histograms commit f8dffb887b68fdc2c9b35f30676539b8396abd54
author artbio
date Thu, 07 Nov 2024 15:40:40 +0000
parents fbedb212982d
children
comparison
equal deleted inserted replaced
1:fbedb212982d 2:d375c9df9c34
3 library(dplyr) 3 library(dplyr)
4 library(scales) 4 library(scales)
5 library(psych) 5 library(psych)
6 library(optparse) 6 library(optparse)
7 7
8 options(show.error.messages = FALSE, 8 options(
9 error = function() { 9 show.error.messages = FALSE,
10 cat(geterrmessage(), file = stderr()) 10 error = function() {
11 q("no", 1, FALSE) 11 cat(geterrmessage(), file = stderr())
12 } 12 q("no", 1, FALSE)
13 }
13 ) 14 )
14 15
15 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 16 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
16 warnings() 17 warnings()
17 18
18 option_list <- list( 19 option_list <- list(
19 make_option( 20 make_option(
20 c("-f", "--file"), 21 c("-f", "--file"),
21 default = NA, 22 default = NA,
22 type = "character", 23 type = "character",
23 help = "Input file that contains count values to transform" 24 help = "Input file that contains count values to transform"
24 ), 25 ),
25 make_option( 26 make_option(
26 c("-d", "--profile"), 27 c("-d", "--profile"),
27 default = "count", 28 default = "count",
28 type = "character", 29 type = "character",
29 help = "Whether y-axis shows absolute counts or density: 'count' or 'density' [default : '%default' ]" 30 help = "Whether y-axis shows absolute counts or density: 'count' or 'density' [default : '%default' ]"
30 ), 31 ),
31 make_option( 32 make_option(
32 "--xscale", 33 "--xscale",
33 default = "cartesian", 34 default = "cartesian",
34 type = "character", 35 type = "character",
35 help = "Whether x-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" 36 help = "Whether x-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]"
36 ), 37 ),
37 make_option( 38 make_option(
38 "--yscale", 39 "--yscale",
39 default = "cartesian", 40 default = "cartesian",
40 type = "character", 41 type = "character",
41 help = "Whether y-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]" 42 help = "Whether y-axis is 'cartesian', 'log2' or 'log10' [default : '%default' ]"
42 ), 43 ),
43 make_option( 44 make_option(
44 c("-p", "--pdf"), 45 c("-p", "--pdf"),
45 default = "histograms.pdf", 46 default = "histograms.pdf",
46 type = "character", 47 type = "character",
47 help = "Output pdf file name [default : '%default' ]" 48 help = "Output pdf file name [default : '%default' ]"
48 ), 49 ),
49 make_option( 50 make_option(
50 c("-s", "--summary"), 51 c("-s", "--summary"),
51 default = "summary.tsv", 52 default = "summary.tsv",
52 type = "character", 53 type = "character",
53 help = "statistics summary file name [default : '%default' ]" 54 help = "statistics summary file name [default : '%default' ]"
54 ) 55 )
55 ) 56 )
56 57
57 opt <- parse_args(OptionParser(option_list = option_list), 58 opt <- parse_args(OptionParser(option_list = option_list),
58 args = commandArgs(trailingOnly = TRUE)) 59 args = commandArgs(trailingOnly = TRUE)
60 )
59 61
60 plot_histograms <- function(mdata, profile = "count", xscale = "cartesian", yscale = "cartesian", bins = 30) { 62 plot_histograms <- function(mdata, profile = "count", xscale = "cartesian", yscale = "cartesian", bins = 30) {
61 if (profile == "count") { 63 if (profile == "count") {
62 # count histogram 64 # count histogram
63 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable, y = after_stat(count)), show.legend = FALSE) + 65 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable, y = after_stat(count)), show.legend = FALSE) +
64 geom_histogram(bins = bins) + theme(legend.position = "none") 66 geom_histogram(bins = bins) +
65 if (xscale == "cartesian") { 67 theme(legend.position = "none")
66 if (yscale == "log2") { 68 if (xscale == "cartesian") {
67 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) 69 if (yscale == "log2") {
68 } else { 70 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x)))
69 if (yscale == "log10") { 71 } else {
70 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) 72 if (yscale == "log10") {
73 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
74 }
75 }
71 } 76 }
72 } 77 if (xscale == "log2") {
78 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x)))
79 if (yscale == "log2") {
80 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x)))
81 } else {
82 if (yscale == "log10") {
83 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
84 }
85 }
86 }
87 if (xscale == "log10") {
88 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
89 if (yscale == "log2") {
90 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x)))
91 } else {
92 if (yscale == "log10") {
93 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
94 }
95 }
96 }
73 } 97 }
74 if (xscale == "log2") { 98
75 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) 99 if (profile == "density") {
76 if (yscale == "log2") { 100 # density histogram
77 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x))) 101 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable)) +
78 } else { 102 geom_density() +
79 if (yscale == "log10") { 103 theme(legend.position = "none")
80 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x))) 104 if (xscale == "log2") {
105 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x)))
81 } 106 }
82 } 107 if (xscale == "log10") {
108 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
109 }
83 } 110 }
84 if (xscale == "log10") { 111 return(p)
85 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
86 if (yscale == "log2") {
87 p <- p + scale_y_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x)))
88 } else {
89 if (yscale == "log10") {
90 p <- p + scale_y_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
91 }
92 }
93 }
94 }
95
96 if (profile == "density") {
97 # density histogram
98 p <- ggplot(mdata, aes(x = value, fill = variable, color = variable)) +
99 geom_density() + theme(legend.position = "none")
100 if (xscale == "log2") {
101 p <- p + scale_x_continuous(trans = "log2", labels = trans_format("log2", math_format(2^.x)))
102 }
103 if (xscale == "log10") {
104 p <- p + scale_x_continuous(trans = "log10", labels = trans_format("log10", math_format(10^.x)))
105 }
106 }
107 return(p)
108 } 112 }
109 113
110 test_header <- function(file) { 114 test_header <- function(file) {
111 data <- read.delim(file = file, header = FALSE, row.names = 1, nrows = 2) 115 data <- read.delim(file = file, header = FALSE, row.names = 1, nrows = 2)
112 if (all(is.na(as.numeric(data[1, seq_len(ncol(data))])))) { 116 if (all(is.na(as.numeric(data[1, seq_len(ncol(data))])))) {
113 return(TRUE) 117 return(TRUE)
114 } else { 118 } else {
115 return(FALSE) 119 return(FALSE)
116 } 120 }
117 } 121 }
118 122
119 ##### prepare input data 123 ##### prepare input data
120 124
121 data <- read.delim(file = opt$file, header = test_header(opt$file)) 125 data <- read.delim(file = opt$file, header = test_header(opt$file))
122 data <- data %>% select(where(is.numeric)) # remove non numeric columns 126 data <- data %>% select(where(is.numeric)) # remove non numeric columns
123 mdata <- melt(data) 127 mdata <- melt(data)
124 128
125 ##### main 129 ##### main
126 130
127 # determine optimal number of bins (Sturges’ Rule) 131 # determine optimal number of bins (Sturges’ Rule)
130 p <- plot_histograms(mdata, profile = opt$profile, xscale = opt$xscale, bins = bins, yscale = opt$yscale) 134 p <- plot_histograms(mdata, profile = opt$profile, xscale = opt$xscale, bins = bins, yscale = opt$yscale)
131 135
132 # determine optimal width for the graph 136 # determine optimal width for the graph
133 width <- length(data) 137 width <- length(data)
134 width <- case_when( 138 width <- case_when(
135 width == 1 ~ 14 / 3, 139 width == 1 ~ 14 / 3,
136 width == 2 ~ (2 / 3) * 14, 140 width == 2 ~ (2 / 3) * 14,
137 TRUE ~ 14 141 TRUE ~ 14
138 ) 142 )
139 # determine optimal height for the graph 143 # determine optimal height for the graph
140 height <- length(data) 144 height <- length(data)
141 height <- case_when( 145 height <- case_when(
142 height <= 3 ~ 3, 146 height <= 3 ~ 3,
143 height <= 6 ~ 6, 147 height <= 6 ~ 6,
144 TRUE ~ (floor(height / 3) + 1) * 3 148 TRUE ~ (floor(height / 3) + 1) * 3
145 ) 149 )
146 # determine optimal number of col for the graph 150 # determine optimal number of col for the graph
147 ncol <- length(data) 151 ncol <- length(data)
148 ncol <- case_when( 152 ncol <- case_when(
149 ncol == 1 ~ 1, 153 ncol == 1 ~ 1,
150 ncol == 2 ~ 2, 154 ncol == 2 ~ 2,
151 TRUE ~ 3 155 TRUE ~ 3
152 ) 156 )
153 pdf(opt$pdf, width = width, height = height) 157 pdf(opt$pdf, width = width, height = height)
154 print(p + facet_wrap(~variable, ncol = ncol, scales = "free")) 158 print(p + facet_wrap(~variable, ncol = ncol, scales = "free"))
155 dev.off() 159 dev.off()
156 160