changeset 2:a8aa294401be draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dropletutils/ commit 4d89eb1eb951ef094d1f77c46824d9c38be4445b"
author iuc
date Fri, 06 Sep 2019 10:56:16 -0400
parents cfe1e6c28d95
children f0de368eabca
files dropletutils.xml scripts/dropletutils.Rscript test-data/defs_emptydrops_150_0002.h5ad test-data/defs_emptydrops_150_0002.png test-data/defs_emptydrops_150_0002a.h5ad test-data/defs_emptydrops_150_0002a.png
diffstat 6 files changed, 42 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/dropletutils.xml	Mon Aug 26 05:06:39 2019 -0400
+++ b/dropletutils.xml	Fri Sep 06 10:56:16 2019 -0400
@@ -3,7 +3,7 @@
     <description>Utilities for handling droplet-based single-cell RNA-seq data</description>
     <macros>
         <token name="@PACKAGE_VERSION@" >1.2.1</token>
-        <token name="@GALAXY_VERSION@" >2</token>
+        <token name="@GALAXY_VERSION@" >3</token>
         <token name="@TXIN@">tenx.input</token>
         <token name="@TXOUT@">tenx.output</token>
         <xml name="test_dirin" >
@@ -125,13 +125,13 @@
                         <option value="emptydrops">EmptyDrops</option>
                     </param>
                     <when value="defaultdrops">
-                        <param name="expected" type="integer" min="10" value="3000" label="Expected Number of Cells" help="The expected number of cells in the sample." />
-                        <param name="upperquant" type="float" min="0.1" max="1.0" value="0.99" label="Upper Quantile" help="The quantile of the top expected barcodes to consider." />
-                        <param name="lowerprop" type="float" min="0.1" max="1.0" value="0.1" label="Lower Proportion" help="The fraction of molecules of the upper quantile that must be exceeded for a cell barcode to be detected." />
+                        <param name="expected" type="integer" min="1" value="3000" label="Expected Number of Cells" help="The expected number of cells in the sample." />
+                        <param name="upperquant" type="float" min="0" max="1.0" value="0.99" label="Upper Quantile" help="The quantile of the top expected barcodes to consider." />
+                        <param name="lowerprop" type="float" min="0" max="1.0" value="0.1" label="Lower Proportion" help="The fraction of molecules of the upper quantile that must be exceeded for a cell barcode to be detected." />
                     </when>
                     <when value="emptydrops">
-                        <param name="lower" type="integer" min="10" value="100" label="Lower-bound Threshold" help="The lower-bound threshold of the total UMI count at which barcodes beneath this are assumed to be empty droplets." />
-                        <param name="fdr_thresh" type="float" min="0" max="1" value="0.01" label="FDR Threshold" help="False Discovery Rate threshold at which droplets with significant deviations from the ambient profile are detected at this threshold." />
+                        <param name="lower" type="integer" min="1" value="100" label="Lower-bound Threshold" help="The lower-bound threshold of the total UMI count at which barcodes beneath this are assumed to be empty droplets." />
+                        <param name="fdr_thresh" type="float" min="0" max="1" value="0.01" label="FDR Threshold" help="False Discovery Rate threshold at which droplets with significant deviations from the ambient profile are detected at this threshold. If set to 0, the threshold is ignored." />
                     </when>
                 </conditional>
                 <param name="outformat" type="select" label="Format for output matrices">
@@ -141,7 +141,7 @@
                 </param>
             </when>
             <when value="barcode_rank">
-                <param name="lower" type="integer" min="10" value="100" label="Lower Bound" help="Lower bound on the total UMI count, at or below which all barcodes are assumed to correspond to empty droplets."/>
+                <param name="lower" type="integer" min="1" value="100" label="Lower Bound" help="Lower bound on the total UMI count, at or below which all barcodes are assumed to correspond to empty droplets."/>
             </when>
         </conditional>
         <param name="seed" type="integer" value="100" label="Random Seed" />
@@ -221,8 +221,8 @@
                 <assert_contents>
                     <has_n_columns n="9" />
                     <has_line_matching expression="^\sbar.names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis.Cell\sis.CellAndLimited" />
-                    <has_line_matching expression="^1\sGGAAGAAGAA\s0\sNA\sNA\sNA\sNA\sNA\sNA" />
-                    <has_line_matching expression="^11100\sGCTGAAGCAA\s71\sNA\sNA\sNA\sNA\sNA\sNA" />
+                    <has_line_matching expression="^994\sGGCATTACAA\s338\s-246.922772388055\s9.99900009999e-05\sTRUE\s9.99900009999e-05\sTRUE\sTRUE" />
+                    <has_line_matching expression="^998\sCATGAAGCAA\s151\s-166.644236503983\s9.99900009999e-05\sTRUE\s9.99900009999e-05\sTRUE\sTRUE" />
                 </assert_contents>
             </output>
             <output name="plot" value="defs_emptydrops_150_0002.png" compare="sim_size" delta="100" />
@@ -248,8 +248,8 @@
                 <assert_contents>
                     <has_n_columns n="9" />
                     <has_line_matching expression="^\sbar.names\sTotal\sLogProb\sPValue\sLimited\sFDR\sis.Cell\sis.CellAndLimited" />
-                    <has_line_matching expression="^1\sGGAAGAAGAA\s20\sNA\sNA\sNA\sNA\sNA\sNA" />
-                    <has_line_matching expression="^11100\sGCTGAAGCAA\s203\s-222.833111872316\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" />
+                    <has_line_matching expression="^1100\sCCGGAAGCAA\s169\s-198.117943099773\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" />
+                    <has_line_matching expression="^1114\sTCCGAAGCAA\s182\s-196.181449214729\s9.99900009999e-05\sTRUE\s0.000126279506880773\sTRUE\sTRUE" />
                 </assert_contents>
             </output>
             <output name="plot" value="defs_emptydrops_150_0002a.png" compare="sim_size" delta="100" />
--- a/scripts/dropletutils.Rscript	Mon Aug 26 05:06:39 2019 -0400
+++ b/scripts/dropletutils.Rscript	Fri Sep 06 10:56:16 2019 -0400
@@ -58,35 +58,44 @@
 
     eparams$... <- NULL ## hack
     eparams$m = Matrix(counts(sce), sparse=TRUE)
-    
+
     e.out <- do.call(emptyDrops, c(eparams))
-    
+
     bar.names <- colnames(sce)
-    if (length(bar.names) != nrow(e.out)){    
+    if (length(bar.names) != nrow(e.out)){
         stop("Length of barcodes and output metrics don't match.")
     }
     e.out <- cbind(bar.names, e.out)
     e.out$is.Cell <- e.out$FDR <= fdr_threshold
     e.out$is.CellAndLimited <- e.out$is.Cell & e.out$Limited
-    
-    # Write to table
-    writeTSV(files$table, e.out)
-    
-    # Print to log
-    print(table(Limited=e.out$Limited, Significant=e.out$is.Cell))
-    
-    # Write to Plot
+
+    ## Write to Plot
+    e.out$is.Cell[is.na(e.out$is.Cell)] <- FALSE
+    xlim.dat <- e.out[complete.cases(e.out),]$Total
+
+    ## Write to table
+    writeTSV(files$table, e.out[complete.cases(e.out),])
+
     png(files$plot)
     plot(e.out$Total, -e.out$LogProb, col=ifelse(e.out$is.Cell, "red", "black"),
-    xlab="Total UMI count", ylab="-Log Probability")
+         xlab="Total UMI count", ylab="-Log Probability",
+         xlim=c(min(xlim.dat),max(xlim.dat)))
     dev.off()
-    
-    # Filtered
-    called <- e.out$is.CellAndLimited
+
+    ## Filtered
+    called <- NULL
+    if (fdr_threshold != 0){
+        called <- e.out$is.CellAndLimited
+    } else {
+        called <- e.out$is.Cell
+    }
     called[is.na(called)] <- FALSE    # replace NA's with FALSE
     sce.filtered <- sce[,called]
-    
+
     writeOut(counts(sce.filtered), files$out, out.type)
+
+    message(paste("Cells:", sum(na.omit(e.out$is.Cell))))
+    message(paste("Cells and Limited:", sum(na.omit(e.out$is.CellAndLimited))))
 }
 
 
@@ -95,12 +104,13 @@
 
     dparams$m = counts(sce)
     called <- do.call(defaultDrops, c(dparams))
-    print(table(called))
-        
+
     # Filtered
     sce.filtered <- sce[,called]
-    
+
     writeOut(Matrix(counts(sce.filtered),sparse=TRUE), files$out, out.type)
+
+    message(paste("Cells:", sum(called)))
 }
 
 
@@ -111,7 +121,7 @@
     bparams$m = counts(sce)
 
     br.out <- do.call(barcodeRanks, c(bparams))
-    
+
     png(files$plot)
     plot(br.out$rank, br.out$total, log="xy", xlab="(log) Rank", ylab="(log) Total Number of Barcodes")
     o <- order(br.out$rank)
@@ -121,7 +131,7 @@
     abline(h=br.out$inflection, col="forestgreen", lty=2)
     legend("bottomleft", lty=2, col=c("dodgerblue", "forestgreen"), legend=c("knee", "inflection"))
     dev.off()
-    
+
     print(paste("knee =", br.out$knee, ", inflection = ", br.out$inflection))
 }
 
Binary file test-data/defs_emptydrops_150_0002.h5ad has changed
Binary file test-data/defs_emptydrops_150_0002.png has changed
Binary file test-data/defs_emptydrops_150_0002a.h5ad has changed
Binary file test-data/defs_emptydrops_150_0002a.png has changed