diff scripts/dropletutils.Rscript @ 0:4cd9f0008d9c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit e66ab3d4fc0c1a72523e8f93447cc07cdd6816b7
author iuc
date Tue, 04 Jun 2019 17:19:52 -0400
parents
children cfe1e6c28d95
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/dropletutils.Rscript	Tue Jun 04 17:19:52 2019 -0400
@@ -0,0 +1,137 @@
+## Load in data
+args = commandArgs(trailingOnly = T)
+if (length(args) != 1){
+    stop("Please provide the config file")
+}
+
+suppressWarnings(suppressPackageStartupMessages(require(DropletUtils)))
+suppressWarnings(suppressPackageStartupMessages(require(Matrix)))
+
+source(args[1])
+
+## Helper functions
+setSparse <- function(obj){
+    return(as(obj, "dgCMatrix"))
+}
+
+writeTSV <- function(fileout, obj){
+    write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE)
+}
+
+writeOut <- function(counts, fileout, typeout){
+    if (typeout == "tsv"){
+        writeTSV(fileout, counts)
+    }
+    else if (typeout == "h5ad"){
+        write10xCounts(fileout, counts, type="HDF5", overwrite=TRUE)
+    }
+    else if (typeout == "directory"){
+        write10xCounts(fileout, counts, type="sparse", overwrite=TRUE)
+    }
+}
+
+
+read10xFiles <- function(filein, typein){
+    sce <- NULL
+    if (typein == "tsv"){
+        dat <- read.table(filein, header = TRUE,  sep='\t', stringsAsFactors=FALSE, quote="", row.names=1)
+        sce <- SingleCellExperiment(assays = list(counts = as.matrix(dat)))
+    }
+    else if (typein == "h5ad"){
+        sce <- read10xCounts(filein, col.names=T, type="HDF5")   # use barcodes.tsv as column names
+    }
+    else if (typein == "directory"){
+        sce <- read10xCounts(filein, col.names=T, type="sparse")
+    }
+    counts(sce) <- setSparse(counts(sce))
+    return(sce)
+}
+
+
+## Methods
+
+
+doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5ad", fdr_threshold = 0.01){
+    sce <- read10xFiles(files$infile, in.type)
+
+    eparams$... <- NULL ## hack
+    eparams$m = Matrix(counts(sce), sparse=TRUE)
+    
+    e.out <- do.call(emptyDrops, c(eparams))
+    
+    bar.names <- colnames(sce)
+    if (length(bar.names) != nrow(e.out)){    
+        stop("Length of barcodes and output metrics don't match.")
+    }
+    e.out <- cbind(bar.names, e.out)
+    e.out$is.Cell <- e.out$FDR <= fdr_threshold
+    e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited
+    
+    # Write to table
+    writeTSV(files$table, e.out)
+    
+    # Print to log
+    print(table(Limited=e.out$Limited, Significant=e.out$is.Cell))
+    
+    # Write to Plot
+    png(files$plot)
+    plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"),
+    xlab="Total UMI count", ylab="-Log Probability")
+    dev.off()
+    
+    # Filtered
+    called <- e.out$is.CellAndLimited
+    called[is.na(called)] <- FALSE    # replace NA's with FALSE
+    sce.filtered <- sce[,called]
+    
+    writeOut(counts(sce.filtered), files$out, out.type)
+}
+
+
+doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5ad"){
+    sce <- read10xFiles(files$infile, in.type)
+
+    dparams$m = as.matrix(counts(sce))
+    called <- do.call(defaultDrops, c(dparams))
+    print(table(called))
+        
+    # Filtered
+    sce.filtered <- sce[,called]
+    
+    writeOut(Matrix(counts(sce.filtered),sparse=TRUE), files$out, out.type)
+}
+
+
+doBarcodeRankings <- function(files, bparams, in.type="directory"){
+    sce <- read10xFiles(files$infile, in.type)
+
+    bparams$... <- NULL ## hack
+    bparams$m = as.matrix(counts(sce))
+
+    br.out <- do.call(barcodeRanks, c(bparams))
+    
+    png(files$plot)
+    plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes")
+    o <- order(br.out$rank)
+    lines(br.out$rank[o], br.out$fitted[o], col="red")
+
+    abline(h=br.out$knee, col="dodgerblue", lty=2)
+    abline(h=br.out$inflection, col="forestgreen", lty=2)
+    legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection"))
+    dev.off()
+    
+    print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection))
+}
+
+## Main
+set.seed(seed.val)
+
+if (do.method == "barcodeRankings") {
+    doBarcodeRankings(files, bparams, in.type)
+
+} else if (do.method == "defaultDrops") {
+    doDefaultDrops(files, dparams, in.type, out.type)
+
+} else if (do.method == "emptyDrops") {
+    doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold)
+}