Mercurial > repos > iuc > raceid_filtnormconf

--- a/macros_cluster.xml	Thu Feb 28 17:40:54 2019 -0500
+++ b/macros_cluster.xml	Mon Apr 15 17:55:17 2019 -0400
@@ -287,6 +287,7 @@
 filt.use.ccorrect = FALSE
 filt.lbatch.regexes = NULL

+filt.geqone = as.logical( '$filt.hist_geq_one' )
 filt\$mintotal = as.integer( '$filt.mintotal' )
 filt\$minexpr = as.integer( '$filt.minexpr' )
 filt\$minnumber = as.integer( '$filt.minnumber' )
--- a/raceid_filtnormconf.xml	Thu Feb 28 17:40:54 2019 -0500
+++ b/raceid_filtnormconf.xml	Mon Apr 15 17:55:17 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="raceid_filtnormconf" name="Filtering, Normalisation, and Confounder Removal using RaceID" version="@VERSION_RACEID@.@VERSION_PACKAGE@.2" >
+<tool id="raceid_filtnormconf" name="Filtering, Normalisation, and Confounder Removal using RaceID" version="@VERSION_RACEID@.@VERSION_PACKAGE@.3" >
     <description>generates a normalised and filtered count matrix of single-cell RNA data</description>
     <macros>
         <import>macros.xml</import>
@@ -7,13 +7,13 @@
     <expand macro="requirements" />
     <version_command><![CDATA[
 Rscript '$__tool_directory__/scripts/cluster.R' @GET_VERSION@
-]]></version_command>
+]]>
+    </version_command>

     <command detect_errors="exit_code"><![CDATA[
 #set bin = 'cluster.R'
 Rscript '$__tool_directory__/scripts/$bin' '$userconf' 2> '$outlog' > /dev/null
     ]]></command>
-
     <configfiles>
         <configfile name="userconf" ><![CDATA[
 @STRING2VECTOR@
@@ -28,6 +28,7 @@
             <param name="mintotal" type="integer" min="1" value="3000" label="Min Transcripts" help="The minimum total transcripts required. Cells with less than mintotal transcripts are filtered out." />
             <param name="minexpr" type="integer" min="1" value="5" label="Min Expression" help="The minimum required transcript counts of a gene in the minimum number of cells (below)" />
             <param name="minnumber" type="integer" min="1" value="5" label="Min Cells" help="The minumum number of cells for gene expression to be counted"  />
+            <param name="hist_geq_one" type="boolean" checked="false" label="Count filtered features greater than or equal to 1" help="By default features are counted if they are above zero, but RaceID adds 0.1 to all counts after normalisation to create a non-zero dataset."  />
             <expand macro="use_defaults_no" >
                 <param name="knn" type="integer" min="0" value="10" label="K-nearest-neighbours" help="Number of nearest neighbors used to infer corresponding cell types in different batches" />
                 <param name="CGenes" type="text" optional="true" label="CGenes" help="Filter out genes with correlated expression for cell type inference" >
@@ -65,24 +66,22 @@
                         <param name="logscale" type="boolean" value="false" label="Log-transform data prior to PCA or ICA" help="" />
                     </when>
                 </conditional>
-                <param name="use_log" type="boolean" checked="false" label="Output Log?" />
             </expand>
-        </section>
+        </section>
     </inputs>
     <outputs>
         <data name="outpdf" format="pdf" label="${tool.name} on ${on_string}: PDF Report" />
         <data name="outrdat" format="rdata" label="${tool.name} on ${on_string}: RDS" />
-        <data name="outlog" format="txt" label="${tool.name} on ${on_string}: Log" >
-            <filter>use_log</filter>
-        </data>
+        <data name="outlog" format="txt" label="${tool.name} on ${on_string}: Metrics"  />
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="3">
             <!-- This is a file with a single word 'test', which prompts the scripts to use the test intestinalData in the library -->
             <param name="intable" value="use.intestinal" />
             <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" />
+            <output name="outlog" value="intestinal.filter.log" />
         </test>
-        <test>
+        <test expect_num_outputs="3">
             <!-- defaults, feeding in a matrix with reduced filtering -->
             <param name="intable" value="matrix.tabular" />
             <section name="filt" >
@@ -93,7 +92,7 @@
             <output name="outrdat" value="matrix.filter.rdat" compare="sim_size" delta="300" />
             <output name="outpdf" value="matrix.filter.pdf" compare="sim_size" delta="10" />
         </test>
-        <test>
+        <test expect_num_outputs="3">
             <!-- defaults, but manually specified. No opts, no CC. Generates identical to above -->
             <param name="intable" value="use.intestinal" />
             <section name="filt" >
@@ -108,7 +107,15 @@
             </section>
             <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" />
         </test>
-        <test>
+        <test expect_num_outputs="3">
+            <!-- defaults, but histogram adjustment  -->
+            <param name="intable" value="use.intestinal" />
+            <section name="filt" >
+                <param name="hist_geq_one" value="true" />
+            </section>
+            <output name="outpdf" value="matrix.filter.geqone.pdf" compare="sim_size" delta="10" />
+        </test>
+        <test expect_num_outputs="3">
             <!-- Advanced. Opts, CC used  -->
             <param name="intable" value="use.intestinal" />
             <section name="filt" >
@@ -136,7 +143,7 @@
             <output name="outpdf" value="intestinal_advanced.filter.pdf" compare="sim_size" delta="150" />
         </test>
     </tests>
-        <help><![CDATA[
+    <help><![CDATA[
 RaceID3
 =======

@@ -171,6 +178,6 @@
 A PDF report will be generated giving metrics about the library size and number of features as histograms, and additional metrics relating to cell-cycle correction will be produced if that option has been selected.

 ]]>
-        </help>
-        <expand macro="citations" />
+    </help>
+    <expand macro="citations" />
 </tool>
--- a/scripts/cluster.R	Thu Feb 28 17:40:54 2019 -0500
+++ b/scripts/cluster.R	Mon Apr 15 17:55:17 2019 -0400
@@ -1,5 +1,5 @@
 #!/usr/bin/env R
-VERSION = "0.3"
+VERSION = "0.4"

 args = commandArgs(trailingOnly = T)

@@ -28,6 +28,10 @@
     filt.lib <- log10(colSums(getfdata(sc)))
     filt.feat <- log10(colSums(getfdata(sc)>0))

+    if (filt.geqone){
+        filt.feat <- log10(colSums(getfdata(sc)>=1))
+    }
+
     br <- 50
     ## Determine limits on plots based on the unfiltered data
     ## (doesn't work, R rejects limits and norm data is too different to compare to exp data
@@ -124,11 +128,12 @@

         goi <- head(rownames(dg.goi.table), genelist.plotlim)
         print(plotmarkergenes(sc, goi))
-        print(do.call(mtext, c(paste("                               Cluster ",n), test)))  ## spacing is a hack
+        buffer <- paste(rep("", 36), collapse=" ")
+        print(do.call(mtext, c(paste(buffer, "Cluster ",n), test)))  ## spacing is a hack
         test$line=-1
-        print(do.call(mtext, c(paste("                               Sig. Genes"), test)))  ## spacing is a hack
+        print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test)))  ## spacing is a hack
         test$line=-2
-        print(do.call(mtext, c(paste("                               (fc > ", genelist.foldchange,")"), test)))  ## spacing is a hack
+        print(do.call(mtext, c(paste(buffer, "(fc > ", genelist.foldchange,")"), test)))  ## spacing is a hack

     })
     write.table(df, file=out.genelist, sep="\t", quote=F)
@@ -139,10 +144,10 @@
 if (use.filtnormconf){
     sc <- do.filter(sc)
     message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata)))
-    message(paste(" - Filter:: genes:",nrow(sc@ndata),", cells:",ncol(sc@ndata)))
+    message(paste(" - Filter:: genes:",nrow(getfdata(sc)),", cells:",ncol(getfdata(sc))))
     message(paste("         :: ",
-                  sprintf("%.1f", 100 * nrow(sc@ndata)/nrow(sc@expdata)), "% of genes remain,",
-                  sprintf("%.1f", 100 * ncol(sc@ndata)/ncol(sc@expdata)), "% of cells remain"))
+                  sprintf("%.1f", 100 * nrow(getfdata(sc))/nrow(sc@expdata)), "% of genes remain,",
+                  sprintf("%.1f", 100 * ncol(getfdata(sc))/ncol(sc@expdata)), "% of cells remain"))
 }

 if (use.cluster){
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/intestinal.filter.log	Mon Apr 15 17:55:17 2019 -0400
@@ -0,0 +1,4 @@
+Loading test data from library
+ - Source:: genes: 20268 , cells: 431
+ - Filter:: genes: 2089 , cells: 287
+         ::  10.3 % of genes remain, 66.6 % of cells remain
Binary file test-data/matrix.filter.geqone.pdf has changed