Mercurial > repos > artbio > small_rna_maps
view small_rna_maps.r @ 34:966bc5c46efd draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit b7c7c60d608694ca4f1638e4bb0d6db5b1afa944
author | artbio |
---|---|
date | Fri, 21 Oct 2022 23:19:55 +0000 |
parents | f2e7ad3058e8 |
children |
line wrap: on
line source
## Setup R error handling to go to stderr options(show.error.messages = FALSE, error = function() { cat(geterrmessage(), file = stderr()) q("no", 1, FALSE) } ) options(warn = -1) library(RColorBrewer) library(lattice) library(latticeExtra) library(grid) library(gridExtra) library(optparse) option_list <- list( make_option(c("-i", "--ymin"), type = "double", help = "set min ylimit. e.g. '-100.0'"), make_option(c("-a", "--ymax"), type = "double", help = "set max ylimit. e.g. '100.0'"), make_option(c("-f", "--first_dataframe"), type = "character", help = "path to first dataframe"), make_option(c("-e", "--extra_dataframe"), type = "character", help = "path to additional dataframe"), make_option(c("-n", "--normalization"), type = "character", help = "space-separated normalization/size factors"), make_option("--first_plot_method", type = "character", help = "How additional data should be plotted"), make_option("--extra_plot_method", type = "character", help = "How additional data should be plotted"), make_option("--global", type = "character", help = "data should be plotted as global size distribution"), make_option("--output_pdf", type = "character", help = "path to the pdf file with plots") ) parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) args <- parse_args(parser) # data frames implementation ## first table table <- read.delim(args$first_dataframe, header = TRUE, row.names = NULL) colnames(table)[1] <- "Dataset" dropcol <- c("Strandness", "z.score") # not used by this Rscript and is dropped for backward compatibility table <- table[, !(names(table) %in% dropcol)] if (args$first_plot_method == "Counts" || args$first_plot_method == "Size") { table <- within(table, Counts[Polarity == "R"] <- (Counts[Polarity == "R"] * - 1)) # nolint } n_samples <- length(unique(table$Dataset)) samples <- unique(table$Dataset) if (args$normalization != "") { norm_factors <- as.numeric(unlist(strsplit(args$normalization, " "))) } else { norm_factors <- rep(1, n_samples) } if (args$first_plot_method == "Counts" || args$first_plot_method == "Size" || args$first_plot_method == "Coverage") { i <- 1 for (sample in samples) { # Warning Here the column is hard coded as the last column (dangerous) # because its name changes with the method table[, length(table)][table$Dataset == sample] <- table[, length(table)][table$Dataset == sample] * norm_factors[i] i <- i + 1 } } genes <- unique(table$Chromosome) per_gene_readmap <- lapply(genes, function(x) subset(table, Chromosome == x)) per_gene_limit <- lapply(genes, function(x) c(1, unique(subset(table, Chromosome == x)$Chrom_length))) n_genes <- length(per_gene_readmap) # second table if (args$extra_plot_method != "") { extra_table <- read.delim(args$extra_dataframe, header = TRUE, row.names = NULL) colnames(extra_table)[1] <- "Dataset" dropcol <- c("Strandness", "z.score") table <- table[, !(names(table) %in% dropcol)] if (args$extra_plot_method == "Counts" || args$extra_plot_method == "Size") { extra_table <- within(extra_table, Counts[Polarity == "R"] <- (Counts[Polarity == "R"] * -1)) # nolint } if (args$extra_plot_method == "Counts" || args$extra_plot_method == "Size" || args$extra_plot_method == "Coverage") { i <- 1 for (sample in samples) { extra_table[, length(extra_table)][extra_table$Dataset == sample] <- extra_table[, length(extra_table)][extra_table$Dataset == sample] * norm_factors[i] i <- i + 1 } } per_gene_size <- lapply(genes, function(x) subset(extra_table, Chromosome == x)) } ## functions globalbc <- function(df, global = "", ...) { if (global == "yes") { bc <- barchart(Counts ~ as.factor(Size) | factor(Dataset, levels = unique(Dataset)), data = df, origin = 0, horizontal = FALSE, col = c("darkblue"), scales = list(y = list(tick.number = 4, rot = 90, relation = "same", cex = 0.5, alternating = TRUE), x = list(rot = 0, cex = 0.6, tck = 0.5, alternating = c(3, 3))), xlab = list(label = bottom_first_method[[args$first_plot_method]], cex = .85), ylab = list(label = legend_first_method[[args$first_plot_method]], cex = .85), main = title_first_method[[args$first_plot_method]], layout = c(2, 6), newpage = TRUE, as.table = TRUE, aspect = 0.5, strip = strip.custom(par.strip.text = list(cex = 1), which.given = 1, bg = "lightblue"), ... ) } else { bc <- barchart(Counts ~ as.factor(Size) | factor(Dataset, levels = unique(Dataset)), data = df, origin = 0, horizontal = FALSE, group = Polarity, stack = TRUE, col = c("red", "blue"), scales = list(y = list(tick.number = 4, rot = 90, relation = "same", cex = 0.5, alternating = TRUE), x = list(rot = 0, cex = 0.6, tck = 0.5, alternating = c(3, 3))), xlab = list(label = bottom_first_method[[args$first_plot_method]], cex = .85), ylab = list(label = legend_first_method[[args$first_plot_method]], cex = .85), main = title_first_method[[args$first_plot_method]], layout = c(2, 6), newpage = TRUE, as.table = TRUE, aspect = 0.5, strip = strip.custom(par.strip.text = list(cex = 1), which.given = 1, bg = "lightblue"), ... ) } return(bc) } plot_unit <- function(df, method = args$first_plot_method, ...) { if (exists("ymin", where = args)) { min <- args$ymin } else { min <- "" } if ((exists("ymax", where = args))) { max <- args$ymax } else { max <- "" } ylimits <- c(min, max) if (method == "Counts") { p <- xyplot(Counts ~ Coordinate | factor(Dataset, levels = unique(Dataset)) + factor(Chromosome, levels = unique(Chromosome)), data = df, type = "h", lwd = 1.5, scales = list(relation = "free", x = list(rot = 0, cex = 0.7, axs = "i", tck = 0.5), y = list(tick.number = 4, rot = 90, cex = 0.7)), xlab = NULL, main = NULL, ylab = NULL, ylim = ylimits, as.table = TRUE, origin = 0, horizontal = FALSE, group = Polarity, col = c("red", "blue"), par.strip.text = list(cex = 0.7), ...) p <- combineLimits(p) } else if (method != "Size") { p <- xyplot(eval(as.name(method)) ~ Coordinate | factor(Dataset, levels = unique(Dataset)) + factor(Chromosome, levels = unique(Chromosome)), data = df, type = ifelse(method == "Coverage", "l", "p"), pch = 19, cex = 0.35, scales = list(relation = "free", x = list(rot = 0, cex = 0.7, axs = "i", tck = 0.5), y = list(tick.number = 4, rot = 90, cex = 0.7)), xlab = NULL, main = NULL, ylab = NULL, ylim = ylimits, as.table = TRUE, origin = 0, horizontal = FALSE, group = Polarity, col = c("red", "blue"), par.strip.text = list(cex = 0.7), ...) p <- combineLimits(p) } else { p <- barchart(Counts ~ as.factor(Size) | factor(Dataset, levels = unique(Dataset)) + Chromosome, data = df, origin = 0, horizontal = FALSE, group = Polarity, stack = TRUE, col = c("red", "blue"), scales = list(y = list(rot = 90, relation = "free", cex = 0.7), x = list(rot = 0, cex = 0.7, axs = "i", tck = c(1, 0))), xlab = NULL, ylab = NULL, main = NULL, as.table = TRUE, par.strip.text = list(cex = 0.6), ...) p <- combineLimits(p) } return(p) } ## function parameters par_settings_firstplot <- list(layout.heights = list(top.padding = -2, bottom.padding = -2), strip.background = list(col = c("lightblue", "lightgreen"))) par_settings_secondplot <- list(layout.heights = list(top.padding = -1, bottom.padding = -1), strip.background = list(col = c("lightblue", "lightgreen"))) title_first_method <- list(Counts = "Read Counts", Coverage = "Coverage depths", Median = "Median sizes", Mean = "Mean sizes", Size = "Size Distributions") title_extra_method <- list(Counts = "Read Counts", Coverage = "Coverage depths", Median = "Median sizes", Mean = "Mean sizes", Size = "Size Distributions") legend_first_method <- list(Counts = "Read count", Coverage = "Coverage depth", Median = "Median size", Mean = "Mean size", Size = "Read count") legend_extra_method <- list(Counts = "Read count", Coverage = "Coverage depth", Median = "Median size", Mean = "Mean size", Size = "Read count") bottom_first_method <- list(Counts = "Coordinates (nucleotides)", Coverage = "Coordinates (nucleotides)", Median = "Coordinates (nucleotides)", Mean = "Coordinates (nucleotides)", Size = "Sizes of reads") bottom_extra_method <- list(Counts = "Coordinates (nucleotides)", Coverage = "Coordinates (nucleotides)", Median = "Coordinates (nucleotides)", Mean = "Coordinates (nucleotides)", Size = "Sizes of reads") ## Plotting Functions double_plot <- function(...) { page_height <- 15 rows_per_page <- 10 graph_heights <- c(40, 30, 40, 30, 40, 30, 40, 30, 40, 30, 10) page_width <- 8.2677 * n_samples / 2 pdf(file = args$output_pdf, paper = "special", height = page_height, width = page_width) for (i in seq(1, n_genes, rows_per_page / 2)) { start <- i end <- i + rows_per_page / 2 - 1 if (end > n_genes) { end <- n_genes } if (end - start + 1 < 5) { graph_heights <- c(rep(c(40, 30), end - start + 1), 10, rep(c(40, 30), 5 - (end - start + 1))) } first_plot_list <- lapply(per_gene_readmap[start:end], function(x) update(useOuterStrips(plot_unit(x, par.settings = par_settings_secondplot), strip.left = strip.custom(par.strip.text = list(cex = 0.5))))) second_plot_list <- lapply(per_gene_size[start:end], function(x) update(useOuterStrips(plot_unit(x, method = args$extra_plot_method, par.settings = par_settings_firstplot), strip.left = strip.custom(par.strip.text = list(cex = 0.5)), strip = FALSE))) plot.list <- rbind(first_plot_list, second_plot_list) args_list <- c(plot.list, list(nrow = rows_per_page + 1, ncol = 1, heights = unit(graph_heights, rep("mm", 11)), top = textGrob(paste(title_first_method[[args$first_plot_method]], "and", title_extra_method[[args$extra_plot_method]]), gp = gpar(cex = 1), vjust = 0, just = "top"), left = textGrob(paste(legend_first_method[[args$first_plot_method]], "/", legend_extra_method[[args$extra_plot_method]]), gp = gpar(cex = 1), vjust = 0, hjust = 0, x = 1, y = (-0.38 / 4) * (end - start - (3.28 / 0.38)), rot = 90), sub = textGrob(paste(bottom_first_method[[args$first_plot_method]], "/", bottom_extra_method[[args$extra_plot_method]]), gp = gpar(cex = 1), just = "bottom", vjust = 2) ) ) do.call(grid.arrange, args_list) } devname <- dev.off() } single_plot <- function(...) { width <- 8.2677 * n_samples / 2 rows_per_page <- 8 graph_heights <- c(rep(40, 8), 10) pdf(file = args$output_pdf, paper = "special", height = 15, width = width) for (i in seq(1, n_genes, rows_per_page)) { start <- i end <- i + rows_per_page - 1 if (end > n_genes) { end <- n_genes } if (end - start + 1 < 8) { graph_heights <- c(rep(c(40), end - start + 1), 10, rep(c(40), 8 - (end - start + 1))) } first_plot_list <- lapply(per_gene_readmap[start:end], function(x) update(useOuterStrips(plot_unit(x, par.settings = par_settings_firstplot), strip.left = strip.custom(par.strip.text = list(cex = 0.5))))) plot.list <- rbind(first_plot_list) args_list <- c(plot.list, list(nrow = rows_per_page + 1, ncol = 1, heights = unit(graph_heights, rep("mm", 9)), top = textGrob(title_first_method[[args$first_plot_method]], gp = gpar(cex = 1), vjust = 0, just = "top"), left = textGrob(legend_first_method[[args$first_plot_method]], gp = gpar(cex = 1), vjust = 0, hjust = 0, x = 1, y = (-0.41 / 7) * (end - start - (6.23 / 0.41)), rot = 90), sub = textGrob(bottom_first_method[[args$first_plot_method]], gp = gpar(cex = 1), just = "bottom", vjust = 2) ) ) do.call(grid.arrange, args_list) } devname <- dev.off() } # main if (args$extra_plot_method != "") { double_plot() } if (args$extra_plot_method == "" && !exists("global", where = args)) { single_plot() } if (exists("global", where = args)) { pdf(file = args$output, paper = "special", height = 11.69) table <- within(table, Counts[Polarity == "R"] <- abs(Counts[Polarity == "R"])) # nolint library(reshape2) ml <- melt(table, id.vars = c("Dataset", "Chromosome", "Polarity", "Size")) if (args$global == "nomerge") { castml <- dcast(ml, Dataset + Polarity + Size ~ variable, function(x) sum(x)) castml <- within(castml, Counts[Polarity == "R"] <- (Counts[Polarity == "R"] * -1)) # nolint bc <- globalbc(castml, global = "no") } else { castml <- dcast(ml, Dataset + Size ~ variable, function(x) sum(x)) bc <- globalbc(castml, global = "yes") } plot(bc) devname <- dev.off() }