Mercurial > repos > iuc > raceid_filtnormconf
changeset 3:d55e29ac02e3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit d94b3b8a4c7cf8c604279eb1eea24d32b3868922
author | iuc |
---|---|
date | Mon, 15 Apr 2019 17:55:17 -0400 |
parents | 56a093c2a3f9 |
children | 5d5b14dbd092 |
files | macros_cluster.xml raceid_filtnormconf.xml scripts/cluster.R test-data/intestinal.filter.log test-data/matrix.filter.geqone.pdf |
diffstat | 5 files changed, 39 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/macros_cluster.xml Thu Feb 28 17:40:54 2019 -0500 +++ b/macros_cluster.xml Mon Apr 15 17:55:17 2019 -0400 @@ -287,6 +287,7 @@ filt.use.ccorrect = FALSE filt.lbatch.regexes = NULL +filt.geqone = as.logical( '$filt.hist_geq_one' ) filt\$mintotal = as.integer( '$filt.mintotal' ) filt\$minexpr = as.integer( '$filt.minexpr' ) filt\$minnumber = as.integer( '$filt.minnumber' )
--- a/raceid_filtnormconf.xml Thu Feb 28 17:40:54 2019 -0500 +++ b/raceid_filtnormconf.xml Mon Apr 15 17:55:17 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="raceid_filtnormconf" name="Filtering, Normalisation, and Confounder Removal using RaceID" version="@VERSION_RACEID@.@VERSION_PACKAGE@.2" > +<tool id="raceid_filtnormconf" name="Filtering, Normalisation, and Confounder Removal using RaceID" version="@VERSION_RACEID@.@VERSION_PACKAGE@.3" > <description>generates a normalised and filtered count matrix of single-cell RNA data</description> <macros> <import>macros.xml</import> @@ -7,13 +7,13 @@ <expand macro="requirements" /> <version_command><![CDATA[ Rscript '$__tool_directory__/scripts/cluster.R' @GET_VERSION@ -]]></version_command> +]]> + </version_command> <command detect_errors="exit_code"><![CDATA[ #set bin = 'cluster.R' Rscript '$__tool_directory__/scripts/$bin' '$userconf' 2> '$outlog' > /dev/null ]]></command> - <configfiles> <configfile name="userconf" ><![CDATA[ @STRING2VECTOR@ @@ -28,6 +28,7 @@ <param name="mintotal" type="integer" min="1" value="3000" label="Min Transcripts" help="The minimum total transcripts required. Cells with less than mintotal transcripts are filtered out." /> <param name="minexpr" type="integer" min="1" value="5" label="Min Expression" help="The minimum required transcript counts of a gene in the minimum number of cells (below)" /> <param name="minnumber" type="integer" min="1" value="5" label="Min Cells" help="The minumum number of cells for gene expression to be counted" /> + <param name="hist_geq_one" type="boolean" checked="false" label="Count filtered features greater than or equal to 1" help="By default features are counted if they are above zero, but RaceID adds 0.1 to all counts after normalisation to create a non-zero dataset." /> <expand macro="use_defaults_no" > <param name="knn" type="integer" min="0" value="10" label="K-nearest-neighbours" help="Number of nearest neighbors used to infer corresponding cell types in different batches" /> <param name="CGenes" type="text" optional="true" label="CGenes" help="Filter out genes with correlated expression for cell type inference" > @@ -65,24 +66,22 @@ <param name="logscale" type="boolean" value="false" label="Log-transform data prior to PCA or ICA" help="" /> </when> </conditional> - <param name="use_log" type="boolean" checked="false" label="Output Log?" /> </expand> - </section> + </section> </inputs> <outputs> <data name="outpdf" format="pdf" label="${tool.name} on ${on_string}: PDF Report" /> <data name="outrdat" format="rdata" label="${tool.name} on ${on_string}: RDS" /> - <data name="outlog" format="txt" label="${tool.name} on ${on_string}: Log" > - <filter>use_log</filter> - </data> + <data name="outlog" format="txt" label="${tool.name} on ${on_string}: Metrics" /> </outputs> <tests> - <test> + <test expect_num_outputs="3"> <!-- This is a file with a single word 'test', which prompts the scripts to use the test intestinalData in the library --> <param name="intable" value="use.intestinal" /> <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" /> + <output name="outlog" value="intestinal.filter.log" /> </test> - <test> + <test expect_num_outputs="3"> <!-- defaults, feeding in a matrix with reduced filtering --> <param name="intable" value="matrix.tabular" /> <section name="filt" > @@ -93,7 +92,7 @@ <output name="outrdat" value="matrix.filter.rdat" compare="sim_size" delta="300" /> <output name="outpdf" value="matrix.filter.pdf" compare="sim_size" delta="10" /> </test> - <test> + <test expect_num_outputs="3"> <!-- defaults, but manually specified. No opts, no CC. Generates identical to above --> <param name="intable" value="use.intestinal" /> <section name="filt" > @@ -108,7 +107,15 @@ </section> <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" /> </test> - <test> + <test expect_num_outputs="3"> + <!-- defaults, but histogram adjustment --> + <param name="intable" value="use.intestinal" /> + <section name="filt" > + <param name="hist_geq_one" value="true" /> + </section> + <output name="outpdf" value="matrix.filter.geqone.pdf" compare="sim_size" delta="10" /> + </test> + <test expect_num_outputs="3"> <!-- Advanced. Opts, CC used --> <param name="intable" value="use.intestinal" /> <section name="filt" > @@ -136,7 +143,7 @@ <output name="outpdf" value="intestinal_advanced.filter.pdf" compare="sim_size" delta="150" /> </test> </tests> - <help><![CDATA[ + <help><![CDATA[ RaceID3 ======= @@ -171,6 +178,6 @@ A PDF report will be generated giving metrics about the library size and number of features as histograms, and additional metrics relating to cell-cycle correction will be produced if that option has been selected. ]]> - </help> - <expand macro="citations" /> + </help> + <expand macro="citations" /> </tool>
--- a/scripts/cluster.R Thu Feb 28 17:40:54 2019 -0500 +++ b/scripts/cluster.R Mon Apr 15 17:55:17 2019 -0400 @@ -1,5 +1,5 @@ #!/usr/bin/env R -VERSION = "0.3" +VERSION = "0.4" args = commandArgs(trailingOnly = T) @@ -28,6 +28,10 @@ filt.lib <- log10(colSums(getfdata(sc))) filt.feat <- log10(colSums(getfdata(sc)>0)) + if (filt.geqone){ + filt.feat <- log10(colSums(getfdata(sc)>=1)) + } + br <- 50 ## Determine limits on plots based on the unfiltered data ## (doesn't work, R rejects limits and norm data is too different to compare to exp data @@ -124,11 +128,12 @@ goi <- head(rownames(dg.goi.table), genelist.plotlim) print(plotmarkergenes(sc, goi)) - print(do.call(mtext, c(paste(" Cluster ",n), test))) ## spacing is a hack + buffer <- paste(rep("", 36), collapse=" ") + print(do.call(mtext, c(paste(buffer, "Cluster ",n), test))) ## spacing is a hack test$line=-1 - print(do.call(mtext, c(paste(" Sig. Genes"), test))) ## spacing is a hack + print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test))) ## spacing is a hack test$line=-2 - print(do.call(mtext, c(paste(" (fc > ", genelist.foldchange,")"), test))) ## spacing is a hack + print(do.call(mtext, c(paste(buffer, "(fc > ", genelist.foldchange,")"), test))) ## spacing is a hack }) write.table(df, file=out.genelist, sep="\t", quote=F) @@ -139,10 +144,10 @@ if (use.filtnormconf){ sc <- do.filter(sc) message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata))) - message(paste(" - Filter:: genes:",nrow(sc@ndata),", cells:",ncol(sc@ndata))) + message(paste(" - Filter:: genes:",nrow(getfdata(sc)),", cells:",ncol(getfdata(sc)))) message(paste(" :: ", - sprintf("%.1f", 100 * nrow(sc@ndata)/nrow(sc@expdata)), "% of genes remain,", - sprintf("%.1f", 100 * ncol(sc@ndata)/ncol(sc@expdata)), "% of cells remain")) + sprintf("%.1f", 100 * nrow(getfdata(sc))/nrow(sc@expdata)), "% of genes remain,", + sprintf("%.1f", 100 * ncol(getfdata(sc))/ncol(sc@expdata)), "% of cells remain")) } if (use.cluster){