Mercurial > repos > iuc > dropletutils
comparison scripts/dropletutils.Rscript @ 6:8855361fcfc5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit ed0625fe59342d14a08745996e3e32c6f922a738"
| author | iuc |
|---|---|
| date | Thu, 10 Dec 2020 13:50:06 +0000 |
| parents | cdf4443d5625 |
| children | 2c1200fba922 |
comparison
equal
deleted
inserted
replaced
| 5:cdf4443d5625 | 6:8855361fcfc5 |
|---|---|
| 1 ## Load in data | 1 ## Load in data |
| 2 args = commandArgs(trailingOnly = T) | 2 args <- commandArgs(trailingOnly = T) |
| 3 if (length(args) != 1){ | 3 if (length(args) != 1) { |
| 4 stop("Please provide the config file") | 4 stop("Please provide the config file") |
| 5 } | 5 } |
| 6 | 6 |
| 7 suppressWarnings(suppressPackageStartupMessages(require(DropletUtils))) | 7 suppressWarnings(suppressPackageStartupMessages(require(DropletUtils))) |
| 8 suppressWarnings(suppressPackageStartupMessages(require(Matrix))) | 8 suppressWarnings(suppressPackageStartupMessages(require(Matrix))) |
| 9 suppressWarnings(suppressPackageStartupMessages(require(scater))) | 9 suppressWarnings(suppressPackageStartupMessages(require(scater))) |
| 10 | 10 |
| 11 source(args[1]) | 11 source(args[1]) |
| 12 | 12 |
| 13 ## Helper functions | 13 ## Helper functions |
| 14 setSparse <- function(obj){ | 14 set_sparse <- function(obj) { |
| 15 return(as(obj, "dgCMatrix")) | 15 return(as(obj, "dgCMatrix")) |
| 16 } | 16 } |
| 17 | 17 |
| 18 writeTSV <- function(fileout, obj){ | 18 write_tsv <- function(fileout, obj) { |
| 19 write.table(as.matrix(obj), file=fileout, col.names=NA, sep='\t', quote=FALSE) | 19 write.table(as.matrix(obj), file = fileout, |
| 20 col.names = NA, sep = "\t", quote = FALSE) | |
| 20 } | 21 } |
| 21 | 22 |
| 22 determineGeneIDs <- function(object){ | 23 determine_geneids <- function(object) { |
| 23 if (!is.null(rowData(object)$Symbol)){ | 24 if (!is.null(rowData(object)$Symbol)) { |
| 24 return(rowData(object)$Symbol) | 25 return(rowData(object)$Symbol) |
| 25 } | 26 } |
| 26 return(rownames(object)) | 27 return(rownames(object)) |
| 27 } | 28 } |
| 28 | 29 |
| 29 getCounts <- function(object){ | 30 get_counts <- function(object) { |
| 30 return(Matrix(counts(object), sparse=TRUE)) | 31 return(Matrix(counts(object), sparse = TRUE)) |
| 31 } | 32 } |
| 32 | 33 |
| 33 writeOut <- function(object, fileout, typeout){ | 34 write_out <- function(object, fileout, typeout) { |
| 34 if (typeout == "tsv"){ | 35 if (typeout == "tsv") { |
| 35 writeTSV(fileout, getCounts(object)) | 36 write_tsv(fileout, get_counts(object)) |
| 36 } | 37 } |
| 37 else if (typeout == "h5"){ | 38 else if (typeout == "h5") { |
| 38 write10xCounts(fileout, getCounts(object), | 39 write10xCounts(fileout, get_counts(object), |
| 39 type="HDF5", | 40 type = "HDF5", |
| 40 gene.symbol=determineGeneIDs(object), | 41 gene.symbol = determine_geneids(object), |
| 41 overwrite=TRUE) | 42 overwrite = TRUE) |
| 42 } | 43 } |
| 43 else if (typeout == "directory"){ | 44 else if (typeout == "directory") { |
| 44 write10xCounts(fileout, getCounts(object), | 45 write10xCounts(fileout, get_counts(object), |
| 45 type="sparse", | 46 type = "sparse", |
| 46 gene.symbol=determineGeneIDs(object), | 47 gene.symbol = determine_geneids(object), |
| 47 overwrite=TRUE) | 48 overwrite = TRUE) |
| 48 } | 49 } |
| 49 } | 50 } |
| 50 | 51 |
| 51 read10xFiles <- function(filein, typein){ | 52 read_10x_files <- function(filein, typein) { |
| 52 sce <- NULL | 53 sce <- NULL |
| 53 if (typein == "tsv"){ | 54 if (typein == "tsv") { |
| 54 ## Exploding memory problems occured here | 55 ## Exploding memory problems occured here |
| 55 ## - solution is to use the readSparseCounts function from scater | 56 ## - solution is to use the readSparseCounts function from scater |
| 56 sce <- SingleCellExperiment(assays = list(counts = readSparseCounts(filein))) | 57 sce <- SingleCellExperiment(assays = |
| 58 list(counts = readSparseCounts(filein))) | |
| 57 } | 59 } |
| 58 else if (typein == "h5"){ | 60 else if (typein == "h5") { |
| 59 sce <- read10xCounts(filein, col.names=T, type="HDF5") # use barcodes.tsv as column names | 61 # use barcodes.tsv as column names |
| 62 sce <- read10xCounts(filein, col.names = T, type = "HDF5") | |
| 60 } | 63 } |
| 61 else if (typein == "directory"){ | 64 else if (typein == "directory") { |
| 62 sce <- read10xCounts(filein, col.names=T, type="sparse") | 65 sce <- read10xCounts(filein, col.names = T, type = "sparse") |
| 63 } | 66 } |
| 64 counts(sce) <- setSparse(counts(sce)) | 67 counts(sce) <- set_sparse(counts(sce)) |
| 65 return(sce) | 68 return(sce) |
| 66 } | 69 } |
| 67 | 70 |
| 68 | 71 |
| 69 ## Methods | 72 ## Methods |
| 70 | 73 |
| 71 | 74 |
| 72 doEmptyDrops <- function(files, eparams, in.type="directory", out.type="h5", fdr_threshold = 0.01){ | 75 do_empty_drops <- function(files, eparams, intype = "directory", |
| 73 sce <- read10xFiles(files$infile, in.type) | 76 outtype = "h5", fdr_threshold = 0.01) { |
| 77 sce <- read_10x_files(files$infile, intype) | |
| 74 | 78 |
| 75 eparams$... <- NULL ## hack | 79 eparams$... <- NULL ## hack to remove other parameters from being |
| 76 eparams$m = Matrix(counts(sce), sparse=TRUE) | 80 eparams$m <- Matrix(counts(sce), sparse = TRUE) |
| 77 | 81 |
| 78 e.out <- do.call(emptyDrops, c(eparams)) | 82 ## Determine sensible lowerbound |
| 83 m_stats <- summary(colSums(counts(sce))) | |
| 84 print("Cell Library Size Distribution:") | |
| 85 print(m_stats) | |
| 79 | 86 |
| 80 bar.names <- colnames(sce) | 87 if (m_stats["Min."] > eparams$lower) { |
| 81 if (length(bar.names) != nrow(e.out)){ | 88 print(paste0("CAUTION: Min. Lib. Size (", m_stats["Min."] |
| 89 , ") < requested lowerbound (", eparams$lower, ")")) | |
| 90 message(paste0("Setting lowerbound to Mean: ", m_stats["Mean"])) | |
| 91 eparams$lower <- m_stats["Mean"] | |
| 92 } | |
| 93 | |
| 94 e_out <- do.call(emptyDrops, c(eparams)) | |
| 95 | |
| 96 bar_names <- colnames(sce) | |
| 97 if (length(bar_names) != nrow(e_out)) { | |
| 82 stop("Length of barcodes and output metrics don't match.") | 98 stop("Length of barcodes and output metrics don't match.") |
| 83 } | 99 } |
| 84 e.out <- cbind(bar.names, e.out) | 100 e_out <- cbind(bar_names, e_out) |
| 85 e.out$is.Cell <- e.out$FDR <= fdr_threshold | 101 e_out$is_cell <- e_out$FDR <= fdr_threshold |
| 86 e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited | 102 e_out$is_cellandlimited <- e_out$is_cell & e_out$Limited |
| 87 | 103 |
| 88 ## Write to Plot | 104 ## Write to Plot |
| 89 e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE | 105 e_out$is_cell[is.na(e_out$is_cell)] <- FALSE |
| 90 xlim.dat <- e.out[complete.cases(e.out),]$Total | 106 xlim_dat <- e_out[complete.cases(e_out), ]$Total |
| 91 | 107 |
| 92 ## Write to table | 108 ## Write to table |
| 93 writeTSV(files$table, e.out[complete.cases(e.out),]) | 109 write_tsv(files$table, e_out[complete.cases(e_out), ]) |
| 94 | 110 |
| 95 png(files$plot) | 111 png(files$plot) |
| 96 plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), | 112 plot(e_out$Total, -e_out$LogProb, col = ifelse(e_out$is_cell, |
| 97 xlab="Total UMI count", ylab="-Log Probability", | 113 "red", "black"), |
| 98 xlim=c(min(xlim.dat),max(xlim.dat))) | 114 xlab = "Total UMI count", ylab = "-Log Probability", |
| 115 xlim = c(min(xlim_dat), max(xlim_dat))) | |
| 99 dev.off() | 116 dev.off() |
| 100 | 117 |
| 101 ## Filtered | 118 ## Filtered |
| 102 called <- NULL | 119 called <- NULL |
| 103 if (fdr_threshold != 0){ | 120 if (fdr_threshold != 0) { |
| 104 called <- e.out$is.CellAndLimited | 121 called <- e_out$is_cellandlimited |
| 105 } else { | 122 } else { |
| 106 called <- e.out$is.Cell | 123 called <- e_out$is_cell |
| 107 } | 124 } |
| 108 called[is.na(called)] <- FALSE # replace NA's with FALSE | 125 called[is.na(called)] <- FALSE # replace NA's with FALSE |
| 109 sce.filtered <- sce[,called] | 126 sce_filtered <- sce[, called] |
| 110 | 127 |
| 111 writeOut(sce.filtered, files$out, out.type) | 128 write_out(sce_filtered, files$out, outtype) |
| 112 | 129 |
| 113 message(paste("Cells:", sum(na.omit(e.out$is.Cell)))) | 130 message(paste("Cells:", sum(na.omit(e_out$is_cell)))) |
| 114 message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited)))) | 131 message(paste("Cells and Limited:", sum(na.omit(e_out$is_cellandlimited)))) |
| 115 } | 132 } |
| 116 | 133 |
| 117 | 134 |
| 118 doDefaultDrops <- function(files, dparams, in.type="directory", out.type="h5"){ | 135 do_default_drops <- function(files, dparams, |
| 119 sce <- read10xFiles(files$infile, in.type) | 136 intype = "directory", outtype = "h5") { |
| 137 sce <- read_10x_files(files$infile, intype) | |
| 120 | 138 |
| 121 dparams$m = counts(sce) | 139 dparams$m <- counts(sce) |
| 122 called <- do.call(defaultDrops, c(dparams)) | 140 called <- do.call(defaultDrops, c(dparams)) |
| 123 | 141 |
| 124 # Filtered | 142 # Filtered |
| 125 sce.filtered <- sce[,called] | 143 sce_filtered <- sce[, called] |
| 126 | 144 |
| 127 writeOut(sce.filtered, files$out, out.type) | 145 write_out(sce_filtered, files$out, outtype) |
| 128 | 146 |
| 129 message(paste("Cells:", sum(called))) | 147 message(paste("Cells:", sum(called))) |
| 130 } | 148 } |
| 131 | 149 |
| 132 | 150 do_barcode_rankings <- function(files, bparams, intype = "directory") { |
| 133 doBarcodeRankings <- function(files, bparams, in.type="directory"){ | 151 sce <- read_10x_files(files$infile, intype) |
| 134 sce <- read10xFiles(files$infile, in.type) | |
| 135 | 152 |
| 136 bparams$... <- NULL ## hack | 153 bparams$... <- NULL ## hack |
| 137 bparams$m = counts(sce) | 154 bparams$m <- counts(sce) |
| 138 | 155 |
| 139 br.out <- do.call(barcodeRanks, c(bparams)) | 156 brout <- do.call(barcodeRanks, c(bparams)) |
| 140 | 157 |
| 141 png(files$plot) | 158 png(files$plot) |
| 142 plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") | 159 plot(brout$rank, brout$total, log = "xy", |
| 143 o <- order(br.out$rank) | 160 xlab = "(log) Rank", ylab = "(log) Total Number of Barcodes") |
| 144 lines(br.out$rank[o], br.out$fitted[o], col="red") | 161 o <- order(brout$rank) |
| 162 lines(brout$rank[o], brout$fitted[o], col = "red") | |
| 145 | 163 |
| 146 abline(h=br.out$knee, col="dodgerblue", lty=2) | 164 abline(h = brout$knee, col = "dodgerblue", lty = 2) |
| 147 abline(h=br.out$inflection, col="forestgreen", lty=2) | 165 abline(h = brout$inflection, col = "forestgreen", lty = 2) |
| 148 legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) | 166 legend("bottomleft", lty = 2, col = c("dodgerblue", "forestgreen"), |
| 167 legend = c("knee", "inflection")) | |
| 149 dev.off() | 168 dev.off() |
| 150 | 169 |
| 151 print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) | 170 print(paste("knee =", brout$knee, ", inflection = ", brout$inflection)) |
| 152 } | 171 } |
| 153 | 172 |
| 154 ## Main | 173 ## Main |
| 155 set.seed(seed.val) | 174 set.seed(seed.val) |
| 156 | 175 |
| 157 if (do.method == "barcodeRankings") { | 176 if (do.method == "barcodeRankings") { |
| 158 doBarcodeRankings(files, bparams, in.type) | 177 do_barcode_rankings(files, bparams, intype) |
| 159 | 178 |
| 160 } else if (do.method == "defaultDrops") { | 179 } else if (do.method == "defaultDrops") { |
| 161 doDefaultDrops(files, dparams, in.type, out.type) | 180 do_default_drops(files, dparams, intype, outtype) |
| 162 | 181 |
| 163 } else if (do.method == "emptyDrops") { | 182 } else if (do.method == "emptyDrops") { |
| 164 doEmptyDrops(files, eparams, in.type, out.type, empty.fdr_threshold) | 183 do_empty_drops(files, eparams, intype, outtype, empty_fdr_threshold) |
| 165 } | 184 } |
