Mercurial > repos > iuc > raceid_filtnormconf
diff raceid_filtnormconf.xml @ 0:8dc8ff057b0f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit f880060c478d42202df5b78a81329f8af56b1138
author | iuc |
---|---|
date | Thu, 22 Nov 2018 04:44:44 -0500 |
parents | |
children | 01290f30211f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/raceid_filtnormconf.xml Thu Nov 22 04:44:44 2018 -0500 @@ -0,0 +1,176 @@ +<tool id="raceid_filtnormconf" name="Filtering, Normalisation, and Confounder Removal using RaceID" version="@VERSION_RACEID@.@VERSION_PACKAGE@.1" > + <description>generates a normalised and filtered count matrix of single-cell RNA data</description> + <macros> + <import>macros.xml</import> + <import>macros_cluster.xml</import> + </macros> + <expand macro="requirements" /> + <version_command><![CDATA[ +Rscript '$__tool_directory__/scripts/cluster.R' @GET_VERSION@ +]]></version_command> + + <command detect_errors="exit_code"><![CDATA[ +#set bin = 'cluster.R' +Rscript '$__tool_directory__/scripts/$bin' '$userconf' 2> '$outlog' > /dev/null + ]]></command> + + <configfiles> + <configfile name="userconf" ><![CDATA[ +@STRING2VECTOR@ + +@FILTNORM_CHEETAH@ +]]> + </configfile> + </configfiles> + <inputs> + <param name="intable" type="data" format="tabular" label="Count Matrix" /> + <section name="filt" title="Filtering" expanded="true" > + <param name="mintotal" type="integer" min="1" value="3000" label="Min Transcripts" help="The minimum total transcripts required. Cells with less than mintotal transcripts are filtered out." /> + <param name="minexpr" type="integer" min="1" value="5" label="Min Expression" help="The minimum required transcript counts of a gene in the minimum number of cells (below)" /> + <param name="minnumber" type="integer" min="1" value="5" label="Min Cells" help="The minumum number of cells for gene expression to be counted" /> + <expand macro="use_defaults_no" > + <param name="knn" type="integer" min="0" value="10" label="K-nearest-neighbours" help="Number of nearest neighbors used to infer corresponding cell types in different batches" /> + <param name="CGenes" type="text" optional="true" label="CGenes" help="Filter out genes with correlated expression for cell type inference" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="FGenes" type="text" optional="true" label="FGenes" help="Explicitly filter out genes for cell type inference" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="LBatch_regexes" type="text" optional="true" label="Batch Regex" help="List of regexes to capture experimental batches for batch effect correction" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="ccor" type="float" value="0.4" label="CCor" help="Correlation coefficient used as a threshold for determining correlated genes" /> + <param name="bmode" type="select" label="Batch Mode" help="Method to regress out batch effects" > + <option value="RaceID" selected="true" >RaceID</option> + <option value="scran">SCRAN</option> + </param> + <conditional name="ccc" > + <param name="use" type="select" label="Perform Cell-cycle correction?" > + <option value="yes" >Yes</option> + <option value="no" selected="true" >No</option> + </param> + <when value="no" /> + <when value="yes" > + <param name="vset" type="text" optional="true" label="List of Gene Sets" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="pvalue" type="float" value="0.01" min="0" max="1" label="P-value Cutoff" help="P-value cutoff for determining enriched components" /> + <param name="quant" type="float" value="0.01" min="0" max="1" label="Quantification Fraction" help="Upper and lower fraction of gene loadings use for determining enriched components" /> + <param name="ncomp" type="integer" min="0" optional="true" label="Number of components to use" help="If left blank, the maximum number of components are used" /><!-- 0 = NULL --> + <param name="dimr" type="boolean" value="true" label="Derive Components from saturation criterion" /> + <param name="mode" type="select" label="Type of Component Analysis" help="If ICA is selected, ensure that the number of components value above is sufficiently high" > + <option value="pca" selected="true">PCA</option> + <option value="ica">ICA</option> + </param> + <param name="logscale" type="boolean" value="false" label="Log-transform data prior to PCA or ICA" help="" /> + </when> + </conditional> + <param name="use_log" type="boolean" checked="false" label="Output Log?" /> + </expand> + </section> + </inputs> + <outputs> + <data name="outpdf" format="pdf" label="${tool.name} on ${on_string}: PDF Report" /> + <data name="outrdat" format="rdata" label="${tool.name} on ${on_string}: RDS" /> + <data name="outlog" format="txt" label="${tool.name} on ${on_string}: Log" > + <filter>use_log</filter> + </data> + </outputs> + <tests> + <test> + <!-- This is a file with a single word 'test', which prompts the scripts to use the test intestinalData in the library --> + <param name="intable" value="use.intestinal" /> + <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" /> + </test> + <test> + <!-- defaults, feeding in a matrix with reduced filtering --> + <param name="intable" value="matrix.tabular" /> + <section name="filt" > + <param name="mintotal" value="1050" /> + <param name="minexpr" value="1" /> + <param name="minnumber" value="3" /> + </section> + <output name="outrdat" value="matrix.filter.rdat" compare="sim_size" delta="300" /> + <output name="outpdf" value="matrix.filter.pdf" compare="sim_size" delta="10" /> + </test> + <test> + <!-- defaults, but manually specified. No opts, no CC. Generates identical to above --> + <param name="intable" value="use.intestinal" /> + <section name="filt" > + <param name="mintotal" value="3000" /> + <param name="minexpr" value="5" /> + <param name="minnumber" value="5" /> + <expand macro="test_nondef" > + <param name="knn" value="10" /> + <param name="ccor" value="0.4" /> + <param name="bmode" value="RaceID" /> + </expand> + </section> + <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" /> + </test> + <test> + <!-- Advanced. Opts, CC used --> + <param name="intable" value="use.intestinal" /> + <section name="filt" > + <param name="mintotal" value="2000" /> + <param name="minexpr" value="3" /> + <param name="minnumber" value="2" /> + <expand macro="test_nondef" > + <param name="knn" value="5" /> + <param name="ccor" value="0.5" /> + <param name="CGenes" value="Gga3,Ggact,Ggct" /> + <param name="FGenes" value="Zxdc,Zyg11a,Zyg11b,Zyx" /> + <param name="LBatch_regexes" value="^I5,^II5,^III5,^IV5d,^V5d" /> + <param name="bmode" value="scran" /> + <conditional name="ccc" > + <param name="use" value="yes" /> + <param name="pvalue" value="0.05" /> + <param name="quant" value="0.05" /> + <param name="ncomp" value="3" /> + <param name="dimr" value="true" /> + <param name="mode" value="pca" /> + <param name="logscale" value="true" /> + </conditional> + </expand> + </section> + <output name="outpdf" value="intestinal_advanced.filter.pdf" compare="sim_size" delta="150" /> + </test> + </tests> + <help><![CDATA[ +RaceID3 +======= + +RaceID is a clustering algorithm for the identification of cell types from single-cell RNA-sequencing data. It was specifically designed for the detection of rare cells which correspond to outliers in conventional clustering methods. + +This module performs filtering, normalisation, and batch effect removal in the same step. + + +Example Usage: Inspecting the Aggregated Expression for a Group of Genes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Our cells come from 5 different batches (I5,II5,III5,IV5,V5) and are labelled to reflect this (i.e. "I5_1", "I5_2", ..., "I5_129", "II5_1", ..., "V5_236" ) + +We wish to filter out the gene Lpca5 and Atk2 which we know in advance will saturate our analysis with unwanted expression. + +We will also be interested in the cluster that contains significant expression for Apoa genes (Apoa1, Apoa1bp, Apoa2, Apoa4, Apoa5). + +First, we must load in our count matrix in order to correct for batch effects, filter out unwanted genes, and compute our clusters and outliers. + + * *Mode of Analysis* → **Cluster** + + * *Count Matrix* → [input tabular] + + * Filtering: + + * *Use Defaults?* → **No** + + * *Batch Regex* → "^I5,^II5,^III5,^IV5,^V5" + + * *FGenes* → "Lpca5,Atk2" + +A PDF report will be generated giving metrics about the library size and number of features as histograms, and additional metrics relating to cell-cycle correction will be produced if that option has been selected. + +]]> + </help> + <expand macro="citations" /> +</tool>