# HG changeset patch # User iuc # Date 1607608206 0 # Node ID 8855361fcfc5278dc1c6b618741a2c4966c7d23e # Parent cdf4443d56250a58bd3cddf58400a8e97e9fb658 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit ed0625fe59342d14a08745996e3e32c6f922a738" diff -r cdf4443d5625 -r 8855361fcfc5 dropletutils.xml --- a/dropletutils.xml Wed Jan 29 15:07:38 2020 -0500 +++ b/dropletutils.xml Thu Dec 10 13:50:06 2020 +0000 @@ -1,9 +1,22 @@ - + Utilities for handling droplet-based single-cell RNA-seq data + + dropletutils + + + topic_0203 + topic_3168 + topic_3170 + topic_3308 + + + operation_1812 + operation_3200 + - 1.2.1 - galaxy6 + 1.10.0 + 0 tenx.input tenx.output @@ -16,9 +29,10 @@ - bioconductor-dropletutils - r-matrix - bioconductor-scater + bioconductor-dropletutils + bioconductor-scater + r-base + r-matrix fonts-conda-ecosystem ## defaults -empty.fdr_threshold = 0.01 -eparams=formals(emptyDrops) -dparams=formals(defaultDrops) -bparams=formals(barcodeRanks) +empty_fdr_threshold = 0.01 +eparams = formals(emptyDrops) +dparams = formals(defaultDrops) +bparams = formals(barcodeRanks) ## File params -in.type='$tenx_format.use' -out.type=NULL +intype='$tenx_format.use' +outtype=NULL files=list() files\$table='$table' @@ -74,10 +88,10 @@ #else if str($operation.method.use) == 'emptydrops': do.method="emptyDrops" eparams\$lower=as.integer('$operation.method.lower') -empty.fdr_threshold=as.numeric('$operation.method.fdr_thresh') +empty_fdr_threshold=as.numeric('$operation.method.fdr_thresh') #end if -out.type='$operation.outformat' +outtype='$operation.outformat' #if str($operation.outformat) == 'directory': files\$out='@TXOUT@' #else if str($operation.outformat) == 'h5': @@ -172,8 +186,8 @@ - + @@ -196,17 +210,17 @@ - + - + - + @@ -221,16 +235,16 @@ - - - + + + - + @@ -248,9 +262,9 @@ - - - + + + diff -r cdf4443d5625 -r 8855361fcfc5 scripts/dropletutils.Rscript --- a/scripts/dropletutils.Rscript Wed Jan 29 15:07:38 2020 -0500 +++ b/scripts/dropletutils.Rscript Thu Dec 10 13:50:06 2020 +0000 @@ -1,6 +1,6 @@ ## Load in data -args = commandArgs(trailingOnly = T) -if (length(args) != 1){ +args <- commandArgs(trailingOnly = T) +if (length(args) != 1) { stop("Please provide the config file") } @@ -11,57 +11,60 @@ source(args[1]) ## Helper functions -setSparse <- function(obj){ +set_sparse <- function(obj) { return(as(obj, "dgCMatrix")) } -writeTSV <- function(fileout, obj){ - write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE) +write_tsv <- function(fileout, obj) { + write.table(as.matrix(obj), file = fileout, + col.names = NA, sep = "\t", quote = FALSE) } -determineGeneIDs <- function(object){ - if (!is.null(rowData(object)$Symbol)){ +determine_geneids <- function(object) { + if (!is.null(rowData(object)$Symbol)) { return(rowData(object)$Symbol) } return(rownames(object)) } -getCounts <- function(object){ - return(Matrix(counts(object), sparse=TRUE)) +get_counts <- function(object) { + return(Matrix(counts(object), sparse = TRUE)) } -writeOut <- function(object, fileout, typeout){ - if (typeout == "tsv"){ - writeTSV(fileout, getCounts(object)) +write_out <- function(object, fileout, typeout) { + if (typeout == "tsv") { + write_tsv(fileout, get_counts(object)) } - else if (typeout == "h5"){ - write10xCounts(fileout, getCounts(object), - type="HDF5", - gene.symbol=determineGeneIDs(object), - overwrite=TRUE) + else if (typeout == "h5") { + write10xCounts(fileout, get_counts(object), + type = "HDF5", + gene.symbol = determine_geneids(object), + overwrite = TRUE) } - else if (typeout == "directory"){ - write10xCounts(fileout, getCounts(object), - type="sparse", - gene.symbol=determineGeneIDs(object), - overwrite=TRUE) + else if (typeout == "directory") { + write10xCounts(fileout, get_counts(object), + type = "sparse", + gene.symbol = determine_geneids(object), + overwrite = TRUE) } } -read10xFiles <- function(filein, typein){ +read_10x_files <- function(filein, typein) { sce <- NULL - if (typein == "tsv"){ + if (typein == "tsv") { ## Exploding memory problems occured here ## - solution is to use the readSparseCounts function from scater - sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein))) - } - else if (typein == "h5"){ - sce <- read10xCounts(filein, col.names=T, type="HDF5") # use barcodes.tsv as column names + sce <- SingleCellExperiment(assays = + list(counts = readSparseCounts(filein))) } - else if (typein == "directory"){ - sce <- read10xCounts(filein, col.names=T, type="sparse") + else if (typein == "h5") { + # use barcodes.tsv as column names + sce <- read10xCounts(filein, col.names = T, type = "HDF5") } - counts(sce) <- setSparse(counts(sce)) + else if (typein == "directory") { + sce <- read10xCounts(filein, col.names = T, type = "sparse") + } + counts(sce) <- set_sparse(counts(sce)) return(sce) } @@ -69,97 +72,113 @@ ## Methods -doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5", fdr_threshold = 0.01){ - sce <- read10xFiles(files$infile, in.type) +do_empty_drops <- function(files, eparams, intype = "directory", + outtype = "h5", fdr_threshold = 0.01) { + sce <- read_10x_files(files$infile, intype) + + eparams$... <- NULL ## hack to remove other parameters from being + eparams$m <- Matrix(counts(sce), sparse = TRUE) + + ## Determine sensible lowerbound + m_stats <- summary(colSums(counts(sce))) + print("Cell Library Size Distribution:") + print(m_stats) - eparams$... <- NULL ## hack - eparams$m = Matrix(counts(sce), sparse=TRUE) + if (m_stats["Min."] > eparams$lower) { + print(paste0("CAUTION: Min. Lib. Size (", m_stats["Min."] + , ") < requested lowerbound (", eparams$lower, ")")) + message(paste0("Setting lowerbound to Mean: ", m_stats["Mean"])) + eparams$lower <- m_stats["Mean"] + } - e.out <- do.call(emptyDrops, c(eparams)) + e_out <- do.call(emptyDrops, c(eparams)) - bar.names <- colnames(sce) - if (length(bar.names) != nrow(e.out)){ + bar_names <- colnames(sce) + if (length(bar_names) != nrow(e_out)) { stop("Length of barcodes and output metrics don't match.") } - e.out <- cbind(bar.names, e.out) - e.out$is.Cell <- e.out$FDR <= fdr_threshold - e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited + e_out <- cbind(bar_names, e_out) + e_out$is_cell <- e_out$FDR <= fdr_threshold + e_out$is_cellandlimited <- e_out$is_cell & e_out$Limited ## Write to Plot - e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE - xlim.dat <- e.out[complete.cases(e.out),]$Total + e_out$is_cell[is.na(e_out$is_cell)] <- FALSE + xlim_dat <- e_out[complete.cases(e_out), ]$Total ## Write to table - writeTSV(files$table, e.out[complete.cases(e.out),]) + write_tsv(files$table, e_out[complete.cases(e_out), ]) png(files$plot) - plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), - xlab="Total UMI count", ylab="-Log Probability", - xlim=c(min(xlim.dat),max(xlim.dat))) + plot(e_out$Total, -e_out$LogProb, col = ifelse(e_out$is_cell, + "red", "black"), + xlab = "Total UMI count", ylab = "-Log Probability", + xlim = c(min(xlim_dat), max(xlim_dat))) dev.off() ## Filtered called <- NULL - if (fdr_threshold != 0){ - called <- e.out$is.CellAndLimited + if (fdr_threshold != 0) { + called <- e_out$is_cellandlimited } else { - called <- e.out$is.Cell + called <- e_out$is_cell } called[is.na(called)] <- FALSE # replace NA's with FALSE - sce.filtered <- sce[,called] + sce_filtered <- sce[, called] - writeOut(sce.filtered, files$out, out.type) + write_out(sce_filtered, files$out, outtype) - message(paste("Cells:", sum(na.omit(e.out$is.Cell)))) - message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited)))) + message(paste("Cells:", sum(na.omit(e_out$is_cell)))) + message(paste("Cells and Limited:", sum(na.omit(e_out$is_cellandlimited)))) } -doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5"){ - sce <- read10xFiles(files$infile, in.type) +do_default_drops <- function(files, dparams, + intype = "directory", outtype = "h5") { + sce <- read_10x_files(files$infile, intype) - dparams$m = counts(sce) + dparams$m <- counts(sce) called <- do.call(defaultDrops, c(dparams)) # Filtered - sce.filtered <- sce[,called] + sce_filtered <- sce[, called] - writeOut(sce.filtered, files$out, out.type) + write_out(sce_filtered, files$out, outtype) message(paste("Cells:", sum(called))) } - -doBarcodeRankings <- function(files, bparams, in.type="directory"){ - sce <- read10xFiles(files$infile, in.type) +do_barcode_rankings <- function(files, bparams, intype = "directory") { + sce <- read_10x_files(files$infile, intype) bparams$... <- NULL ## hack - bparams$m = counts(sce) + bparams$m <- counts(sce) - br.out <- do.call(barcodeRanks, c(bparams)) + brout <- do.call(barcodeRanks, c(bparams)) png(files$plot) - plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") - o <- order(br.out$rank) - lines(br.out$rank[o], br.out$fitted[o], col="red") + plot(brout$rank, brout$total, log = "xy", + xlab = "(log) Rank", ylab = "(log) Total Number of Barcodes") + o <- order(brout$rank) + lines(brout$rank[o], brout$fitted[o], col = "red") - abline(h=br.out$knee, col="dodgerblue", lty=2) - abline(h=br.out$inflection, col="forestgreen", lty=2) - legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) + abline(h = brout$knee, col = "dodgerblue", lty = 2) + abline(h = brout$inflection, col = "forestgreen", lty = 2) + legend("bottomleft", lty = 2, col = c("dodgerblue", "forestgreen"), + legend = c("knee", "inflection")) dev.off() - print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) + print(paste("knee =", brout$knee, ", inflection = ", brout$inflection)) } ## Main set.seed(seed.val) if (do.method == "barcodeRankings") { - doBarcodeRankings(files, bparams, in.type) + do_barcode_rankings(files, bparams, intype) } else if (do.method == "defaultDrops") { - doDefaultDrops(files, dparams, in.type, out.type) + do_default_drops(files, dparams, intype, outtype) } else if (do.method == "emptyDrops") { - doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold) + do_empty_drops(files, eparams, intype, outtype, empty_fdr_threshold) } diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_defaultdrops.h5 Binary file test-data/defs_defaultdrops.h5 has changed diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002.h5 Binary file test-data/defs_emptydrops_150_0002.h5 has changed diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002.png Binary file test-data/defs_emptydrops_150_0002.png has changed diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002a.h5 Binary file test-data/defs_emptydrops_150_0002a.h5 has changed diff -r cdf4443d5625 -r 8855361fcfc5 test-data/defs_emptydrops_150_0002a.png Binary file test-data/defs_emptydrops_150_0002a.png has changed