Mercurial > repos > iuc > dropletutils
changeset 2:a8aa294401be draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit 4d89eb1eb951ef094d1f77c46824d9c38be4445b"
author | iuc |
---|---|
date | Fri, 06 Sep 2019 10:56:16 -0400 |
parents | cfe1e6c28d95 |
children | f0de368eabca |
files | dropletutils.xml scripts/dropletutils.Rscript test-data/defs_emptydrops_150_0002.h5ad test-data/defs_emptydrops_150_0002.png test-data/defs_emptydrops_150_0002a.h5ad test-data/defs_emptydrops_150_0002a.png |
diffstat | 6 files changed, 42 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/dropletutils.xml Mon Aug 26 05:06:39 2019 -0400 +++ b/dropletutils.xml Fri Sep 06 10:56:16 2019 -0400 @@ -3,7 +3,7 @@ <description>Utilities for handling droplet-based single-cell RNA-seq data</description> <macros> <token name="@PACKAGE_VERSION@" >1.2.1</token> - <token name="@GALAXY_VERSION@" >2</token> + <token name="@GALAXY_VERSION@" >3</token> <token name="@TXIN@">tenx.input</token> <token name="@TXOUT@">tenx.output</token> <xml name="test_dirin" > @@ -125,13 +125,13 @@ <option value="emptydrops">EmptyDrops</option> </param> <when value="defaultdrops"> - <param name="expected" type="integer" min="10" value="3000" label="Expected Number of Cells" help="The expected number of cells in the sample." /> - <param name="upperquant" type="float" min="0.1" max="1.0" value="0.99" label="Upper Quantile" help="The quantile of the top expected barcodes to consider." /> - <param name="lowerprop" type="float" min="0.1" max="1.0" value="0.1" label="Lower Proportion" help="The fraction of molecules of the upper quantile that must be exceeded for a cell barcode to be detected." /> + <param name="expected" type="integer" min="1" value="3000" label="Expected Number of Cells" help="The expected number of cells in the sample." /> + <param name="upperquant" type="float" min="0" max="1.0" value="0.99" label="Upper Quantile" help="The quantile of the top expected barcodes to consider." /> + <param name="lowerprop" type="float" min="0" max="1.0" value="0.1" label="Lower Proportion" help="The fraction of molecules of the upper quantile that must be exceeded for a cell barcode to be detected." /> </when> <when value="emptydrops"> - <param name="lower" type="integer" min="10" value="100" label="Lower-bound Threshold" help="The lower-bound threshold of the total UMI count at which barcodes beneath this are assumed to be empty droplets." /> - <param name="fdr_thresh" type="float" min="0" max="1" value="0.01" label="FDR Threshold" help="False Discovery Rate threshold at which droplets with significant deviations from the ambient profile are detected at this threshold." /> + <param name="lower" type="integer" min="1" value="100" label="Lower-bound Threshold" help="The lower-bound threshold of the total UMI count at which barcodes beneath this are assumed to be empty droplets." /> + <param name="fdr_thresh" type="float" min="0" max="1" value="0.01" label="FDR Threshold" help="False Discovery Rate threshold at which droplets with significant deviations from the ambient profile are detected at this threshold. If set to 0, the threshold is ignored." /> </when> </conditional> <param name="outformat" type="select" label="Format for output matrices"> @@ -141,7 +141,7 @@ </param> </when> <when value="barcode_rank"> - <param name="lower" type="integer" min="10" value="100" label="Lower Bound" help="Lower bound on the total UMI count, at or below which all barcodes are assumed to correspond to empty droplets."/> + <param name="lower" type="integer" min="1" value="100" label="Lower Bound" help="Lower bound on the total UMI count, at or below which all barcodes are assumed to correspond to empty droplets."/> </when> </conditional> <param name="seed" type="integer" value="100" label="Random Seed" /> @@ -221,8 +221,8 @@ <assert_contents> <has_n_columns n="9" /> <has_line_matching expression="^\sbar.names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis.Cell\sis.CellAndLimited" /> - <has_line_matching expression="^1\sGGAAGAAGAA\s0\sNA\sNA\sNA\sNA\sNA\sNA" /> - <has_line_matching expression="^11100\sGCTGAAGCAA\s71\sNA\sNA\sNA\sNA\sNA\sNA" /> + <has_line_matching expression="^994\sGGCATTACAA\s338\s-246.922772388055\s9.99900009999e-05\sTRUE\s9.99900009999e-05\sTRUE\sTRUE" /> + <has_line_matching expression="^998\sCATGAAGCAA\s151\s-166.644236503983\s9.99900009999e-05\sTRUE\s9.99900009999e-05\sTRUE\sTRUE" /> </assert_contents> </output> <output name="plot" value="defs_emptydrops_150_0002.png" compare="sim_size" delta="100" /> @@ -248,8 +248,8 @@ <assert_contents> <has_n_columns n="9" /> <has_line_matching expression="^\sbar.names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis.Cell\sis.CellAndLimited" /> - <has_line_matching expression="^1\sGGAAGAAGAA\s20\sNA\sNA\sNA\sNA\sNA\sNA" /> - <has_line_matching expression="^11100\sGCTGAAGCAA\s203\s-222.833111872316\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" /> + <has_line_matching expression="^1100\sCCGGAAGCAA\s169\s-198.117943099773\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" /> + <has_line_matching expression="^1114\sTCCGAAGCAA\s182\s-196.181449214729\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" /> </assert_contents> </output> <output name="plot" value="defs_emptydrops_150_0002a.png" compare="sim_size" delta="100" />
--- a/scripts/dropletutils.Rscript Mon Aug 26 05:06:39 2019 -0400 +++ b/scripts/dropletutils.Rscript Fri Sep 06 10:56:16 2019 -0400 @@ -58,35 +58,44 @@ eparams$... <- NULL ## hack eparams$m = Matrix(counts(sce), sparse=TRUE) - + e.out <- do.call(emptyDrops, c(eparams)) - + bar.names <- colnames(sce) - if (length(bar.names) != nrow(e.out)){ + if (length(bar.names) != nrow(e.out)){ stop("Length of barcodes and output metrics don't match.") } e.out <- cbind(bar.names, e.out) e.out$is.Cell <- e.out$FDR <= fdr_threshold e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited - - # Write to table - writeTSV(files$table, e.out) - - # Print to log - print(table(Limited=e.out$Limited, Significant=e.out$is.Cell)) - - # Write to Plot + + ## Write to Plot + e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE + xlim.dat <- e.out[complete.cases(e.out),]$Total + + ## Write to table + writeTSV(files$table, e.out[complete.cases(e.out),]) + png(files$plot) plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"), - xlab="Total UMI count", ylab="-Log Probability") + xlab="Total UMI count", ylab="-Log Probability", + xlim=c(min(xlim.dat),max(xlim.dat))) dev.off() - - # Filtered - called <- e.out$is.CellAndLimited + + ## Filtered + called <- NULL + if (fdr_threshold != 0){ + called <- e.out$is.CellAndLimited + } else { + called <- e.out$is.Cell + } called[is.na(called)] <- FALSE # replace NA's with FALSE sce.filtered <- sce[,called] - + writeOut(counts(sce.filtered), files$out, out.type) + + message(paste("Cells:", sum(na.omit(e.out$is.Cell)))) + message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited)))) } @@ -95,12 +104,13 @@ dparams$m = counts(sce) called <- do.call(defaultDrops, c(dparams)) - print(table(called)) - + # Filtered sce.filtered <- sce[,called] - + writeOut(Matrix(counts(sce.filtered),sparse=TRUE), files$out, out.type) + + message(paste("Cells:", sum(called))) } @@ -111,7 +121,7 @@ bparams$m = counts(sce) br.out <- do.call(barcodeRanks, c(bparams)) - + png(files$plot) plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes") o <- order(br.out$rank) @@ -121,7 +131,7 @@ abline(h=br.out$inflection, col="forestgreen", lty=2) legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection")) dev.off() - + print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection)) }