Mercurial > repos > iuc > crosscontamination_barcode_filter
diff scripts/contamination_plot.R @ 1:253c9448f524 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit f967afe562781e5c8ed4e24e9d1e0bc3ebb29401
author | iuc |
---|---|
date | Mon, 03 Jun 2019 14:55:24 -0400 |
parents | 582b7bd4ae4c |
children | 5ade5cf200da |
line wrap: on
line diff
--- a/scripts/contamination_plot.R Thu Jan 24 09:52:58 2019 -0500 +++ b/scripts/contamination_plot.R Mon Jun 03 14:55:24 2019 -0400 @@ -1,8 +1,8 @@ #!/usr/bin/env R -require(ggplot2) +suppressPackageStartupMessages(require(ggplot2)) -log10histoPlot <- function(columncounts, title=""){ +log10histoPlot <- function(title="", columncounts){ #' Log10 histogram plot #' #' @param columncounts colSums(input_matrix) @@ -17,124 +17,152 @@ return(p1) } -contaminationPlot <- function(columncounts, title = "", - indexes.plates, indexes.fullbc, indexes.truebc, - filtered=FALSE) -{ - #' Plots true and false barcodes - #' - #' - #' @param columncounts colSums(input_matrix) - #' @param title plot title - #' @param indexes.plates plate line positions - #' @param indexes.fullbc full batch line positions - #' @param indexes.truebc true batch line positions - #' @param filtered specifies whether the positions have been adjusted for true barcodes - #' @return ggplot grob - dfer <- data.frame(colcounts=columncounts) +## This is calculated by the first call to contaminationPlot +## and then re-used by the second call. +ylim.max = NULL - ## Remove indexes where plates and full barcodes mix - indexes.fullbc = indexes.fullbc[!(indexes.fullbc %in% indexes.plates)] - - nit <- length(indexes.truebc) - nif <- length(indexes.fullbc) - nip <- length(indexes.plates) - mval <- max(dfer) - - ## Aesthetics - min.height <- -200 - tf.spacing.left <- 12 - tf.spacing.right <- 12 - tf.height <- mval - 10000 - bn.height <- mval - 2000 - plate.color <- "grey" - plate.text.color <- "black" - plate.text.alpha <- 0.5 - plate.text.size <- 3 - plate.height <- 2* mval / 5 - plate.spacing <- if (filtered) 12 else 24 - plate.height.text <- plate.height - 3000 - - truebcs <- data.frame( - x=indexes.truebc, y=rep(min.height,nit), - xend=indexes.truebc, yend=rep(mval,nit) - ) - fullbcs <- data.frame( - x=indexes.fullbc, y=rep(min.height,nif), - xend=indexes.fullbc, yend=rep(mval,nif) - ) - platess <- data.frame( - x=indexes.plates, y=rep(min.height,nip), - xend=indexes.plates, yend=rep(plate.height,nip) - ) - connecting.bar <- data.frame( - xsta = min(indexes.plates), ysta = min.height, - xfin = max(indexes.plates), yfin = min.height - ) - - p1 <- ggplot() - - if (!filtered){ - p1 <- p1 + - geom_segment(data=truebcs, aes(x=x,y=y,xend=xend,yend=yend - 4000), col='grey', lty=2, size=0.2) + - geom_segment(data=fullbcs, aes(x=x,y=y,xend=xend,yend=yend), col='blue', lty=1, size=0.4, alpha=0.2) +contaminationPlot <- function(title, columndata, barcode.data, plate.data, RAW) +{ + coldata = data.frame(colsums=columndata) + maxval = max(coldata) + # Set once and once only + if (is.null(ylim.max)){ + ylim.max <<- maxval + 500 } - else { - p1 <- p1 + - geom_segment(data=truebcs, aes(x=x,y=y,xend=xend,yend=yend), col='blue', lty=1, size=0.4, alpha=0.2) - } - - p1 <- p1 + - geom_segment(data=platess, aes(x=x,y=y,xend=xend,yend=yend), col=plate.color, lty=1, size=1) + - geom_segment(data=connecting.bar, aes(x=xsta,y=ysta,xend=xfin,yend=yfin), col=plate.color, lty=1, size=1) + - geom_point( - data=dfer, aes(x=1:length(rownames(dfer)), y=dfer$colcounts), - pch = 16, cex = 1) + - theme(plot.title = element_text(hjust = 0.5), - axis.ticks.x=element_blank(), axis.ticks.y=element_blank(), - axis.text.x=element_blank()) + - labs(title=paste("Contamination Plot\n", title), y="Library Size", x="Barcode Index") + - scale_y_continuous(breaks=seq(0,mval + 10000, 10000)) + - scale_x_continuous(breaks=NULL) - ## Add true/false and batch labels - res <- lapply(indexes.truebc, function(xval){ - batch <- match(xval, indexes.truebc) + drawPlates <- function(plate.data){ # 0 384 768 - if (!filtered){ - p1 <<- p1 + - annotate("text", x=xval - tf.spacing.left, size=2, y=tf.height, angle=90, label=" true positives", color = "dark blue", alpha = 0.5) + - annotate("text", x=xval + tf.spacing.right, size=2, y=tf.height, angle=-90,label="false positives", color = "black", alpha = 0.5) + - annotate("text", x=xval , size=4, y=bn.height, angle=-90, label=paste("B",batch,sep=""), color = "grey", alpha = 0.8) - } - else { - p1 <<- p1 + - annotate("text", x=xval - 48, size=4, y=bn.height, angle=-90, label=paste("B",batch,sep=""), color = "grey", alpha = 0.8) - } - }) - - ## Add Plate labels - res <- lapply(indexes.plates, function(p){ - plate.num <- match(p, indexes.plates) - c.label <- paste("Plate", plate.num, sep="") - b.label <- paste("Plate", plate.num - 1, sep="") - - # Right label - if (plate.num < length(indexes.plates)){ - p1 <<- p1 + - annotate("text", x=p + plate.spacing, size=plate.text.size, y=plate.height.text, angle=-90, - label=c.label, color = plate.text.color, alpha=plate.text.alpha) + plate.boundaries <- plate.data["filtered.plates",] + if (RAW){ + plate.boundaries <- plate.data["unfilter.plates",] } - # Left label - if (plate.num > 1){ - p1 <<- p1 + - annotate("text", x=p - plate.spacing, size=plate.text.size, y=plate.height.text, angle=90, - label=b.label, color = plate.text.color, alpha=plate.text.alpha) - } - }) - - return(p1) -} + plate.minyval <- -200 + plate.color <- "grey" + plate.text.color <- "red" + plate.text.alpha <- 0.5 + plate.height <- 2* maxval / 5 + plate.spacing <- 12 + plate.height.text <- plate.height - 3000 + plate.text.size <- 3 + + + zzz <- lapply(names(plate.boundaries), function(plate.name){ + plate.num = as.integer(sub("P","",plate.name)) + plate.xval = plate.boundaries[[plate.name]] + + ## If not first plate -- inner boundary, print next plate name to the right + if (plate.name != names(plate.boundaries)[length(plate.boundaries)]){ + gplot <<- gplot + annotate("text", x=plate.xval + plate.spacing, label=paste("Plate", plate.num + 1), + size=plate.text.size, y=plate.height*0.7, angle=-90, + color=plate.text.color, alpha=plate.text.alpha) + } + ## If not last plate -- inner boundary, print previous name to the left + if (plate.name != names(plate.boundaries)[1]){ + gplot <<- gplot + annotate("text", x=plate.xval - plate.spacing, label=paste("Plate", plate.num), + size=plate.text.size, y=plate.height*0.7, angle=90, + color=plate.text.color, alpha=plate.text.alpha) + } + gplot <<- gplot + geom_segment(aes(x=plate.xval, xend=plate.xval, y=plate.minyval, yend=plate.height), color=plate.color, lty=1, size=1) + }) + + ## Connecting bar underneath + gplot <<- gplot + geom_segment(aes(x=min(unname(unlist(plate.boundaries))), xend=max(unname(unlist(plate.boundaries))), + y=plate.minyval, yend=plate.minyval), color=plate.color, lty=1, size=1) + } + drawBatches <- function(barcode.data, plate.data){ + batch.minyval = -200 + divide.color <- "grey" + divide.style <- 2 ## dashed + divide.size <- 0.3 + boundary.color <- "blue" + boundary.style <- 1 + boundary.size <- 0.5 + divide.text.true.color <- "blue" + divide.text.false.color <- "black" + divide.text.name.color <- "grey" + batch.text.height <- maxval * 4/5 + batch.text.tf.height <- maxval * 2/5 + batch.text.tf.spacing <- 12 + batch.text.alpha <- 0.8 + batch.text.size <- 3 + batch.label.text.size <- 6 + batch.height <- maxval * 4/5 + divide.height <- maxval * 4/5 + batch.spacing <- 24 + + boundary.pos <- barcode.data["filtered_positions",] + plate.pos <- unname(unlist(plate.data["filtered.plates",])) ## just want values + + if (RAW){ + boundary.pos <- barcode.data["unfilter_positions",] + divider.pos <- barcode.data["filter_in_unfilter",] + plate.pos <- unname(unlist(plate.data["unfilter.plates",])) ## just want values + } + + + zzz <- lapply(names(boundary.pos), function(batch.name){ + batch.num = as.integer(sub("B","",batch.name)) + batch.xval = unname(unlist(boundary.pos[[batch.name]])) + + # Plot boundary line only if not intersecting with a plate line + if (!(batch.xval %in% plate.pos)){ + gplot <<- gplot + geom_segment(aes(x=batch.xval, xend=batch.xval, y=batch.minyval, yend=batch.height), + color=boundary.color, lty=boundary.style, size=boundary.size) + } + + ## Plot labels (except P0) + if (batch.name != "B0"){ + if (RAW){ + divide.xval = unname(unlist(divider.pos[[batch.name]])) + + ## Plot the divider line + gplot <<- gplot + geom_segment(aes(x=divide.xval, xend=divide.xval, y=batch.minyval, yend=divide.height), + color=divide.color, lty=divide.style, size=divide.size) + + ## Plot the True/False labels + gplot <<- gplot + annotate("text", x=divide.xval - batch.text.tf.spacing, label="True positives", + size=batch.text.size, y=batch.text.tf.height, angle=+90, + color=divide.text.true.color, alpha=batch.text.alpha) + gplot <<- gplot + annotate("text", x=divide.xval + batch.text.tf.spacing, label="False positives", + size=batch.text.size, y=batch.text.tf.height, angle=-90, + color=divide.text.false.color, alpha=batch.text.alpha) + ## Plot the Batch names + gplot <<- gplot + annotate("text", x=divide.xval, label=batch.name, + size=batch.label.text.size, y=batch.text.height, angle=0, + color = divide.text.name.color, alpha=batch.text.alpha) + } else { + ## Plot the Batch names between blue lines + gplot <<- gplot + annotate("text", x=batch.xval, label=batch.name, + size=batch.label.text.size, y=batch.text.height, angle=0, + color = divide.text.name.color, alpha=batch.text.alpha) + } + } + }) + } + + + plotCells <- function(coldata){ + gplot <<- gplot + geom_point(data=coldata, aes(x=1:nrow(coldata), y=coldata$colsums), pch = 16, cex = 1) + } + + plotTheme <- function(){ + gplot <<- gplot + theme(plot.title = element_text(hjust = 0.5), + axis.ticks.x=element_blank(), axis.ticks.y=element_blank(), + axis.text.x=element_blank()) + + labs(title=paste("Contamination Plot\n", title), y="Library Size", x="Barcode Index") + + coord_cartesian(ylim = c(-200, ylim.max)) + + scale_x_continuous(breaks=NULL) + } + + + gplot <- ggplot() + drawPlates(plate.data) + drawBatches(barcode.data, plate.data) + plotCells(coldata) + plotTheme() + + return(gplot) +}