diff scripts/contamination_plot.R @ 1:253c9448f524 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit f967afe562781e5c8ed4e24e9d1e0bc3ebb29401
author iuc
date Mon, 03 Jun 2019 14:55:24 -0400
parents 582b7bd4ae4c
children 5ade5cf200da
line wrap: on
line diff
--- a/scripts/contamination_plot.R	Thu Jan 24 09:52:58 2019 -0500
+++ b/scripts/contamination_plot.R	Mon Jun 03 14:55:24 2019 -0400
@@ -1,8 +1,8 @@
 #!/usr/bin/env R
 
-require(ggplot2)
+suppressPackageStartupMessages(require(ggplot2))
 
-log10histoPlot <- function(columncounts, title=""){
+log10histoPlot <- function(title="", columncounts){
     #' Log10 histogram plot
     #'
     #' @param columncounts colSums(input_matrix)
@@ -17,124 +17,152 @@
     return(p1)
 }
 
-contaminationPlot <- function(columncounts, title = "",
-                              indexes.plates, indexes.fullbc, indexes.truebc,
-                              filtered=FALSE)
-{
-    #' Plots true and false barcodes
-    #'
-    #'
-    #' @param columncounts colSums(input_matrix)
-    #' @param title plot title
-    #' @param indexes.plates plate line positions
-    #' @param indexes.fullbc full batch line positions
-    #' @param indexes.truebc true batch line positions
-    #' @param filtered specifies whether the positions have been adjusted for true barcodes
-    #' @return ggplot grob
-    dfer <- data.frame(colcounts=columncounts)
+## This is calculated by the first call to contaminationPlot
+## and then re-used by the second call.
+ylim.max = NULL
 
-    ## Remove indexes where plates and full barcodes mix
-    indexes.fullbc = indexes.fullbc[!(indexes.fullbc %in% indexes.plates)]
-    
-    nit <- length(indexes.truebc)
-    nif <- length(indexes.fullbc)
-    nip <- length(indexes.plates)
-    mval <- max(dfer)
-  
-    ## Aesthetics
-    min.height <- -200
-    tf.spacing.left <- 12
-    tf.spacing.right <- 12   
-    tf.height <- mval - 10000
-    bn.height <- mval - 2000
-    plate.color <- "grey"
-    plate.text.color <- "black"
-    plate.text.alpha <- 0.5
-    plate.text.size <- 3
-    plate.height <- 2* mval / 5
-    plate.spacing <- if (filtered) 12 else 24
-    plate.height.text <- plate.height - 3000
-    
-    truebcs <- data.frame(
-        x=indexes.truebc, y=rep(min.height,nit),
-        xend=indexes.truebc, yend=rep(mval,nit)
-    )
-    fullbcs <- data.frame(
-        x=indexes.fullbc, y=rep(min.height,nif),
-        xend=indexes.fullbc, yend=rep(mval,nif)
-    )
-    platess <- data.frame(
-        x=indexes.plates, y=rep(min.height,nip),
-        xend=indexes.plates, yend=rep(plate.height,nip)
-    )
-    connecting.bar <- data.frame(
-        xsta = min(indexes.plates), ysta = min.height,
-        xfin = max(indexes.plates), yfin = min.height
-    )
-      
-    p1 <- ggplot()
-
-    if (!filtered){
-        p1 <- p1 +
-            geom_segment(data=truebcs, aes(x=x,y=y,xend=xend,yend=yend - 4000), col='grey', lty=2, size=0.2) + 
-            geom_segment(data=fullbcs, aes(x=x,y=y,xend=xend,yend=yend), col='blue', lty=1, size=0.4, alpha=0.2)
+contaminationPlot <- function(title, columndata, barcode.data, plate.data, RAW)
+{
+    coldata = data.frame(colsums=columndata)
+    maxval = max(coldata)
+    # Set once and once only
+    if (is.null(ylim.max)){
+        ylim.max <<- maxval + 500
     }
-    else {
-        p1 <- p1 +
-            geom_segment(data=truebcs, aes(x=x,y=y,xend=xend,yend=yend), col='blue', lty=1, size=0.4, alpha=0.2)
-    }
-    
-    p1 <- p1 +
-        geom_segment(data=platess, aes(x=x,y=y,xend=xend,yend=yend), col=plate.color, lty=1, size=1) +
-        geom_segment(data=connecting.bar, aes(x=xsta,y=ysta,xend=xfin,yend=yfin), col=plate.color, lty=1, size=1) +
-        geom_point(
-            data=dfer, aes(x=1:length(rownames(dfer)), y=dfer$colcounts),
-            pch = 16, cex = 1) +
-        theme(plot.title = element_text(hjust = 0.5),
-              axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
-              axis.text.x=element_blank()) +
-        labs(title=paste("Contamination Plot\n", title), y="Library Size", x="Barcode Index") +
-        scale_y_continuous(breaks=seq(0,mval + 10000, 10000)) +
-        scale_x_continuous(breaks=NULL)
 
-    ## Add true/false and batch labels
-    res <- lapply(indexes.truebc, function(xval){
-        batch <- match(xval, indexes.truebc)
+    drawPlates <- function(plate.data){ # 0 384 768
 
-        if (!filtered){
-            p1 <<- p1 +
-                annotate("text", x=xval - tf.spacing.left, size=2, y=tf.height, angle=90, label=" true positives", color = "dark blue", alpha = 0.5) +
-                annotate("text", x=xval + tf.spacing.right, size=2, y=tf.height, angle=-90,label="false positives", color = "black", alpha = 0.5) +
-                annotate("text", x=xval , size=4, y=bn.height, angle=-90, label=paste("B",batch,sep=""), color = "grey", alpha = 0.8)
-        }
-        else {
-            p1 <<- p1 +
-                annotate("text", x=xval - 48, size=4, y=bn.height, angle=-90, label=paste("B",batch,sep=""), color = "grey", alpha = 0.8)
-        }
-    })
-
-    ## Add Plate labels
-    res <- lapply(indexes.plates, function(p){
-        plate.num <- match(p, indexes.plates)
-        c.label <- paste("Plate", plate.num, sep="")
-        b.label <- paste("Plate", plate.num - 1, sep="")
-
-        # Right label
-        if (plate.num <  length(indexes.plates)){
-            p1 <<- p1 +
-                annotate("text", x=p + plate.spacing, size=plate.text.size, y=plate.height.text, angle=-90,
-                         label=c.label, color = plate.text.color, alpha=plate.text.alpha)
+        plate.boundaries <- plate.data["filtered.plates",]
+        if (RAW){
+            plate.boundaries <- plate.data["unfilter.plates",]
         }
 
-        # Left label
-        if (plate.num > 1){
-            p1 <<- p1 +
-                annotate("text", x=p - plate.spacing, size=plate.text.size, y=plate.height.text, angle=90,
-                         label=b.label, color = plate.text.color,  alpha=plate.text.alpha)
-        }
-    })
-        
-    return(p1)
-}
+        plate.minyval <- -200
+        plate.color <- "grey"
+        plate.text.color <- "red"
+        plate.text.alpha <- 0.5
+        plate.height <- 2* maxval / 5
+        plate.spacing <- 12
+        plate.height.text <- plate.height - 3000
+        plate.text.size <- 3
+
+
+        zzz <- lapply(names(plate.boundaries), function(plate.name){
+            plate.num = as.integer(sub("P","",plate.name))
+            plate.xval = plate.boundaries[[plate.name]]
+
+            ## If not first plate -- inner boundary, print next plate name to the right
+            if (plate.name != names(plate.boundaries)[length(plate.boundaries)]){
+                gplot <<- gplot + annotate("text", x=plate.xval + plate.spacing, label=paste("Plate", plate.num + 1),
+                                           size=plate.text.size, y=plate.height*0.7, angle=-90,
+                                           color=plate.text.color, alpha=plate.text.alpha)
+            }
+            ## If not last plate -- inner boundary, print previous name to the left
+            if (plate.name != names(plate.boundaries)[1]){
+                gplot <<- gplot + annotate("text", x=plate.xval - plate.spacing, label=paste("Plate", plate.num),
+                                           size=plate.text.size, y=plate.height*0.7, angle=90,
+                                           color=plate.text.color, alpha=plate.text.alpha)
+            }
+            gplot <<- gplot + geom_segment(aes(x=plate.xval, xend=plate.xval, y=plate.minyval, yend=plate.height), color=plate.color, lty=1, size=1)
+        })
+
+        ## Connecting bar underneath
+        gplot <<- gplot + geom_segment(aes(x=min(unname(unlist(plate.boundaries))), xend=max(unname(unlist(plate.boundaries))),
+                                           y=plate.minyval, yend=plate.minyval), color=plate.color, lty=1, size=1)
+    }
 
 
+    drawBatches <- function(barcode.data, plate.data){
+        batch.minyval = -200
+        divide.color <- "grey"
+        divide.style <- 2 ## dashed
+        divide.size <- 0.3
+        boundary.color <- "blue"
+        boundary.style <- 1
+        boundary.size <- 0.5
+        divide.text.true.color <- "blue"
+        divide.text.false.color <- "black"
+        divide.text.name.color <- "grey"
+        batch.text.height <- maxval * 4/5
+        batch.text.tf.height <- maxval * 2/5
+        batch.text.tf.spacing <- 12
+        batch.text.alpha <- 0.8
+        batch.text.size <- 3
+        batch.label.text.size <- 6
+        batch.height <- maxval * 4/5
+        divide.height <- maxval * 4/5
+        batch.spacing <- 24
+
+        boundary.pos <- barcode.data["filtered_positions",]
+        plate.pos <- unname(unlist(plate.data["filtered.plates",])) ## just want values
+
+        if (RAW){
+            boundary.pos <- barcode.data["unfilter_positions",]
+            divider.pos <- barcode.data["filter_in_unfilter",]
+            plate.pos <- unname(unlist(plate.data["unfilter.plates",])) ## just want values
+        }
+
+
+        zzz <- lapply(names(boundary.pos), function(batch.name){
+            batch.num = as.integer(sub("B","",batch.name))
+            batch.xval = unname(unlist(boundary.pos[[batch.name]]))
+
+            # Plot boundary line only if not intersecting with a plate line
+            if (!(batch.xval %in% plate.pos)){
+                gplot <<- gplot + geom_segment(aes(x=batch.xval, xend=batch.xval, y=batch.minyval, yend=batch.height),
+                                               color=boundary.color, lty=boundary.style, size=boundary.size)
+            }
+
+            ## Plot labels (except P0)
+            if (batch.name != "B0"){
+                if (RAW){
+                    divide.xval = unname(unlist(divider.pos[[batch.name]]))
+
+                    ## Plot the divider line
+                    gplot <<- gplot + geom_segment(aes(x=divide.xval, xend=divide.xval, y=batch.minyval, yend=divide.height),
+                                               color=divide.color, lty=divide.style, size=divide.size)
+
+                    ## Plot the True/False labels
+                    gplot <<- gplot + annotate("text", x=divide.xval - batch.text.tf.spacing, label="True positives",
+                                               size=batch.text.size, y=batch.text.tf.height, angle=+90,
+                                               color=divide.text.true.color, alpha=batch.text.alpha)
+                    gplot <<- gplot + annotate("text", x=divide.xval + batch.text.tf.spacing, label="False positives",
+                                               size=batch.text.size, y=batch.text.tf.height, angle=-90,
+                                               color=divide.text.false.color, alpha=batch.text.alpha)
+                    ## Plot the Batch names
+                    gplot <<- gplot + annotate("text", x=divide.xval, label=batch.name,
+                                               size=batch.label.text.size, y=batch.text.height, angle=0,
+                                               color = divide.text.name.color, alpha=batch.text.alpha)
+                } else {
+                    ## Plot the Batch names between blue lines
+                    gplot <<- gplot + annotate("text", x=batch.xval, label=batch.name,
+                                               size=batch.label.text.size, y=batch.text.height, angle=0,
+                                               color = divide.text.name.color, alpha=batch.text.alpha)
+                }
+            }
+        })
+    }
+
+
+    plotCells <- function(coldata){
+        gplot <<- gplot + geom_point(data=coldata, aes(x=1:nrow(coldata), y=coldata$colsums), pch = 16, cex = 1)
+    }
+
+    plotTheme <- function(){
+        gplot <<- gplot + theme(plot.title = element_text(hjust = 0.5),
+                                axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
+                                axis.text.x=element_blank()) +
+            labs(title=paste("Contamination Plot\n", title), y="Library Size", x="Barcode Index") +
+            coord_cartesian(ylim = c(-200, ylim.max)) +
+            scale_x_continuous(breaks=NULL)
+    }
+
+
+    gplot <- ggplot()
+    drawPlates(plate.data)
+    drawBatches(barcode.data, plate.data)
+    plotCells(coldata)
+    plotTheme()
+
+    return(gplot)
+}