Mercurial > repos > iuc > raceid_inspecttrajectory
diff macros_cluster.xml @ 0:e0e9b24d76aa draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit f880060c478d42202df5b78a81329f8af56b1138
author | iuc |
---|---|
date | Thu, 22 Nov 2018 04:42:18 -0500 |
parents | |
children | 4164c0da0a5d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros_cluster.xml Thu Nov 22 04:42:18 2018 -0500 @@ -0,0 +1,403 @@ +<macros> + <macro name="cluster_inputs" > + <param name="intable" type="data" format="tabular" label="Count Matrix" /> + <section name="filt" title="Filtering" expanded="true" > + <param name="mintotal" type="integer" min="1" value="3000" label="Min Transcripts" help="The minimum total transcripts required. Cells with less than mintotal transcripts are filtered out." /> + <param name="minexpr" type="integer" min="1" value="5" label="Min Expression" help="The minimum required transcript counts of a gene in the minimum number of cells (below)" /> + <param name="minnumber" type="integer" min="1" value="5" label="Min Cells" help="The minumum number of cells for gene expression to be counted" /> + <expand macro="use_defaults_no" > + <param name="knn" type="integer" min="0" value="10" label="K-nearest-neighbours" help="Number of nearest neighbors used to infer corresponding cell types in different batches" /> + <param name="CGenes" type="text" optional="true" label="CGenes" help="Filter out genes with correlated expression for cell type inference" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="FGenes" type="text" optional="true" label="FGenes" help="Explicitly filter out genes for cell type inference" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="LBatch_regexes" type="text" optional="true" label="Batch Regex" help="List of regexes to capture experimental batches for batch effect correction" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="ccor" type="float" value="0.4" label="CCor" help="Correlation coefficient used as a threshold for determining correlated genes" /> + <param name="bmode" type="select" label="Batch Mode" help="Method to regress out batch effects" > + <option value="RaceID" selected="true" >RaceID</option> + <option value="scran">SCRAN</option> + </param> + <conditional name="ccc" > + <param name="use" type="select" label="Perform Cell-cycle correction?" > + <option value="yes" >Yes</option> + <option value="no" selected="true" >No</option> + </param> + <when value="no" /> + <when value="yes" > + <param name="vset" type="text" optional="true" label="List of Gene Sets" > + <expand macro="sanitize_string_vector" /> + </param> + <param name="pvalue" type="float" value="0.01" min="0" max="1" label="P-value Cutoff" help="P-value cutoff for determining enriched components" /> + <param name="quant" type="float" value="0.01" min="0" max="1" label="Quantification Fraction" help="Upper and lower fraction of gene loadings use for determining enriched components" /> + <param name="ncomp" type="integer" min="0" optional="true" label="Number of components to use" help="If left blank, the maximum number of components are used" /><!-- 0 = NULL --> + <param name="dimr" type="boolean" value="true" label="Derive Components from saturation criterion" /> + <param name="mode" type="select" label="Type of Component Analysis" help="If ICA is selected, ensure that the number of components value above is sufficiently high" > + <option value="pca" selected="true">PCA</option> + <option value="ica">ICA</option> + </param> + <param name="logscale" type="boolean" value="false" label="Log-transform data prior to PCA or ICA" help="" /> + </when> + </conditional> + </expand> + </section> + <section name="clust" title="Clustering" expanded="true" > + <!-- CompDist --> + <param name="metric" type="select" label="Distance Metric" > + <option value="pearson" selected="true" >Pearson</option> + <option value="spearman">Spearman</option> + <option value="logpearson">Log Pearson</option> + <option value="euclidean">Euclidean</option> + </param> + <!-- ClustExp --> + <param name="funcluster" type="select" label="Clustering method" > + <option value="kmedoids" selected="true" >K-medoids</option> + <option value="kmeans">K-means</option> + <option value="hclust">H-Clust</option> + </param> + <expand macro="use_defaults_no" > + <!-- CompDist --> + <param name="fselect" type="boolean" value="true" label="Perform feature selection" /> + <param name="knn" type="integer" min="0" optional="true" label="KNN" help="Number of nearest neighbours for imputing gene expression" /><!-- 0: NULL --> + <!-- ClustExp --> + <param name="sat" type="boolean" checked="true" label="Saturation-based clustering?" help="Determine number of clusters on saturation point of the mean within-cluster dispersion as a function of the cluster number." /> + <param name="clustnr" type="integer" min="0" value="30" label="Max number of clusters using Saturation-by-mean" help="Max number of clusters for the derivation of the cluster number by the saturation of mean within-cluster-dispersion." /> + <param name="samp" type="integer" min="0" optional="true" label="Sample random number of cells" help="Number of random sample of cells used for the inference of cluster number and Jaccard similarity" /><!-- 0:NULL --> + <param name="cln" type="integer" min="0" optional="true" label="Number of clusters" /><!-- 0:Null --> + <param name="bootnr" type="integer" min="0" value="50" label="Number of booststrapping runs" /> + <param name="rseed" type="integer" value="17000" label="Random seed" /> + </expand> + </section> + <section name="outlier" title="Outliers" expanded="true" > + <!-- Find Outliers --> + <param name="outminc" type="integer" min="0" value="5" label="Minimum Transcripts" help="minimal transcript count of a gene in a clusters to be tested for being an outlier gene" /> + <param name="outlg" type="integer" min="1" value="2" label="Minimum Genes" help="Minimum number of outlier genes required for being an outlier cell" /> + <!-- RFCorrect --> + <param name="final" type="boolean" value="true" label="Plot Final Clusters?" help="Reclassification of cell types using out-of-bag analysis is performed based on the final clusters after outlier identification. If 'FALSE', then the cluster partition prior to outlier identification is used for reclassification." /> + <expand macro="use_defaults_no" > + <!-- Find Outliers --> + <param name="probthr" type="float" min="0" value="0.001" label="Outlier Probability Threshold" help="Probability threshold for the above specified minimum number of genes to be an outlier cell. This probability is computed from a negative binomial background model of expression in a cluster" /> + <param name="outdistquant" type="float" min="0" max="1" value="0.95" label="Outlier Distance Quantile" help="Outlier cells are merged to outlier clusters if their distance smaller than the outdistquant-quantile of the distance distribution of pairs of cells in the orginal clusters after outlier removal" /> + <!-- RFCorrect --> + <param name="nbtree" type="integer" optional="true" label="Number of trees to be built" /><!-- 0:Null --> + <param name="nbfactor" type="integer" min="0" value="5" label="Tree Factor" help="Number of trees based on the number of cells multiplied by this factor. Effective only if the number of trees parameter is set to 0" /> + <param name="rfseed" type="integer" value="12345" label="Random Seed" /> + </expand> + </section> + <section name="tsne" title="tSNE and FR" expanded="true" > + <!-- CompTSNE --> + <param name="perplexity" type="integer" min="0" value="30" label="Perplexity" help="Perplexity of the t-SNE map" /> + <!-- CompFR --> + <param name="knn" type="integer" min="0" value="10" label="KNN" help="Number of nearest neighbours used for the inference of the Fruchterman-Rheingold layout" /> + <expand macro="use_defaults_no" > + <!-- CompTSNE --> + <param name="initial_cmd" type="boolean" checked="true" label="tSNE map initialised by classical multidimensional scaling" /> + <param name="rseed_tsne" type="integer" value="15555" label="Random Seed (tSNE)" /> + <!-- CompFR --> + <param name="rseed_fr" type="integer" min="0" value="15555" label="Random Seed (FR)" /> + </expand> + </section> + <section name="extra" title="Extra Parameters" expanded="false" > + <param name="tablelim" type="integer" min="1" value="25" label="Table Limit" help="Top N genes to print per cluster" /> + <param name="plotlim" type="integer" min="1" value="10" label="Plot Limit" help="Top N genes to plot. Must be less than or equal to the Table Limit" /> + <param name="foldchange" type="float" min="0" value="1" label="Fold change" /> + <param name="pvalue" type="float" min="0" max="1" value="0.01" label="P-value Cutoff" help="P-value cutoff for the inference of differential gene expression" /> + </section> + </macro> + <macro name="cluster_tests" > + <test> + <!-- default test --> + <conditional name="tool" > + <param name="mode" value="cluster" /> + <!-- This is a file with a single word 'test', which prompts the scripts to use the test intestinalData in the library --> + <param name="intable" value="use.intestinal" /> + </conditional> + <output name="outgenelist" value="intestinal.genelist" /> + <output name="outpdf" value="intestinal.pdf" compare="sim_size" delta="50" /> + </test> + <test> + <!-- defaults, feeding in a matrix with reduced filtering --> + <conditional name="tool" > + <param name="mode" value="cluster" /> + <param name="intable" value="matrix.tabular" /> + <section name="filt" > + <param name="mintotal" value="1000" /> + <param name="minexpr" value="1" /> + <param name="minnumber" value="3" /> + </section> + <param name="use_log" value="true" /> + <output name="outgenelist" value="matrix.genelist" /> + <output name="outrdat" value="matrix.rdat" compare="sim_size" delta="15" /> + <output name="outpdf" value="matrix.pdf" compare="sim_size" delta="10" /> + <output name="outlog" value="matrix.log" /> + </conditional> + </test> + <test> + <!-- defaults, but manually specified. No opts, no CC. Generates identical to above --> + <conditional name="tool" > + <param name="mode" value="cluster" /> + <param name="intable" value="use.intestinal" /> + <section name="filt" > + <param name="mintotal" value="3000" /> + <param name="minexpr" value="5" /> + <param name="minnumber" value="5" /> + <expand macro="test_nondef" > + <param name="knn" value="10" /> + <param name="ccor" value="0.4" /> + <param name="bmode" value="RaceID" /> + </expand> + </section> + <section name="clust" > + <param name="metric" value="pearson" /> + <param name="funcluster" value="kmedoids" /> + <expand macro="test_nondef" > + <param name="fselect" value="true" /> + <param name="sat" value="true" /> + <param name="clustnr" value="30" /> + <param name="bootnr" value="50" /> + <param name="rseed" value="17000" /> + </expand> + </section> + <section name="outlier" > + <param name="outminc" value="5" /> + <param name="outlg" value="2" /> + <param name="final" value="false" /> + <expand macro="test_nondef" section_name="outlier" > + <param name="probthr" value="0.001" /> + <param name="outdistquant" value="0.95" /> + <param name="rfseed" value="12345" /> + <param name="nbfactor" value="5" /> + </expand> + </section> + <section name="tsne" > + <param name="perplexity" value="30" /> + <param name="knn" value="10" /> + <expand macro="test_nondef" section_name="tsne" > + <param name="initial_cmd" value="true" /> + <param name="rseed_tsne" value="15555" /> + <param name="rfseed_fr" value="15555" /> + </expand> + </section> + </conditional> + <output name="outgenelist" value="intestinal.genelist" /> + <output name="outpdf" value="intestinal.pdf" compare="sim_size" delta="50" /> + </test> + <test> + <!-- Advanced. Opts, CC used --> + <conditional name="tool" > + <param name="mode" value="cluster" /> + <param name="intable" value="use.intestinal" /> + <section name="filt" > + <param name="mintotal" value="2000" /> + <param name="minexpr" value="3" /> + <param name="minnumber" value="2" /> + <expand macro="test_nondef" > + <param name="knn" value="5" /> + <param name="ccor" value="0.5" /> + <param name="CGenes" value="Gga3,Ggact,Ggct" /> + <param name="FGenes" value="Zxdc,Zyg11a,Zyg11b,Zyx" /> + <param name="LBatch_regexes" value="^I5,^II5,^III5,^IV5d,^V5d" /> + <param name="bmode" value="scran" /> + <conditional name="ccc" > + <param name="use" value="yes" /> + <param name="pvalue" value="0.05" /> + <param name="quant" value="0.05" /> + <param name="ncomp" value="3" /> + <param name="dimr" value="true" /> + <param name="mode" value="pca" /> + <param name="logscale" value="true" /> + </conditional> + </expand> + </section> + <section name="clust" > + <param name="metric" value="euclidean" /> + <param name="funcluster" value="hclust" /> + <expand macro="test_nondef" > + <param name="fselect" value="false" /> + <param name="knn" value="5" /> + <param name="sat" value="false" /> + <param name="samp" value="10" /> + <param name="cln" value="10" /> + <param name="clustnr" value="10" /> + <param name="bootnr" value="30" /> + <param name="rseed" value="17000" /> + </expand> + </section> + <section name="outlier" > + <param name="outminc" value="3" /> + <param name="outlg" value="5" /> + <param name="final" value="true" /> + <expand macro="test_nondef" > + <param name="probthr" value="0.01" /> + <param name="outdistquant" value="0.5" /> + <param name="rfseed" value="12345" /> + <param name="nbfactor" value="5" /> + <param name="nbtree" value="10" /> + </expand> + </section> + <section name="tsne" > + <param name="perplexity" value="20" /> + <param name="knn" value="6" /> + <expand macro="test_nondef" > + <param name="initial_cmd" value="false" /> + <param name="rseed_tsne" value="15555" /> + <param name="rfseed_fr" value="15555" /> + </expand> + </section> + </conditional> + <output name="outgenelist" value="intestinal_advanced.genelist" /> + <output name="outpdf" value="intestinal_advanced.pdf" compare="sim_size" delta="150" /> + </test> + </macro> + <token name="@FILTNORM_CHEETAH@"><![CDATA[ +## Perform do.filter +use.filtnormconf = TRUE + +## Perform do.cluster, do.outlier, do.clustmap, mkgenelist +use.cluster = FALSE + +in.table = read.table( + '${intable}', + stringsAsFactors = F, + na.strings=c("NA", "-", "?", "."), + sep='\t', + header=TRUE, + row.names=1 +) + +## Hidden flag to use test data instead +## see: test-data/use.intestinal + +use.test.data = (names(in.table)[1] == "test") + +sc = NULL +if (use.test.data) { + sc = SCseq(intestinalData) + message("Loading test data from library") +} else { + sc = SCseq(in.table) +} + + +filt = formals(filterdata) +filt.ccc = formals(CCcorrect) +filt.use.ccorrect = FALSE +filt.lbatch.regexes = NULL + +filt\$mintotal = as.integer( '$filt.mintotal' ) +filt\$minexpr = as.integer( '$filt.minexpr' ) +filt\$minnumber = as.integer( '$filt.minnumber' ) +#if str($filt.use.def) == "no": +filt\$knn = as.integer( '$filt.use.knn' ) +filt\$ccor = as.numeric( '$filt.use.ccor' ) +filt\$bmode = as.character( '$filt.use.bmode' ) + #if $filt.use.LBatch_regexes: +filt.lbatch.regexes = string2textvector( '$filt.use.LBatch_regexes' ) + #end if + #if $filt.use.CGenes: +filt\$CGenes = string2textvector( '$filt.use.CGenes' ) + #end if + #if $filt.use.FGenes: +filt\$FGenes = string2textvector( '$filt.use.FGenes' ) + #end if + #if str($filt.use.ccc.use) == "yes" +filt.use.ccorrect = TRUE + #if $filt.use.ccc.vset: +filt.ccc\$vset = string2textvector( '$filt.use.ccc.vset' ) + #end if + #if $filt.use.ccc.ncomp: +filt.ccc\$nComp = as.integer( '$filt.use.ccc.ncomp' ) + #end if +filt.ccc\$pvalue = as.numeric( '$filt.use.ccc.pvalue' ) +filt.ccc\$quant = as.numeric( '$filt.use.ccc.quant' ) +filt.ccc\$dimR = as.logical( '$filt.use.ccc.dimr' ) +filt.ccc\$mode = as.character( '$filt.use.ccc.mode.value' ) +filt.ccc\$logscale = as.logical( '$filt.use.ccc.logscale' ) + #end if +#end if + +out.pdf = '${outpdf}' +out.rdat = '${outrdat}' + +]]></token> + <token name="@CLUSTER_CHEETAH@"><![CDATA[ + +in.rdat = readRDS('${inputrds}') + +sc = in.rdat + +## Perform do.filter +use.filtnormconf = FALSE + +## Perform do.cluster, do.outlier, do.clustmap, mkgenelist +use.cluster = TRUE + + +clust.compdist = formals(compdist) +clust.clustexp = formals(clustexp) +clust.compdist\$metric = as.character( '$clust.metric' ) +clust.clustexp\$FUNcluster = as.character( '$clust.funcluster' ) + +#if str($clust.use.def) == "no": + +clust.compdist\$FSelect = as.logical( '$clust.use.fselect' ) + #if $clust.use.knn: +clust.compdist\$knn = as.integer( '$clust.use.knn' ) + #end if +clust.clustexp\$sat = as.logical( '$clust.use.sat' ) + #if $clust.use.samp: +clust.clustexp\$samp = as.integer( '$clust.use.samp' ) + #end if + #if $clust.use.cln: +clust.clustexp\$cln = as.integer( '$clust.use.cln' ) +clust.clustexp\$clustnr = as.integer( '$clust.use.clustnr' ) +clust.clustexp\$bootnr = as.integer( '$clust.use.bootnr' ) +##clust.clustexp\$rseed = as.integer( '$clust.use.rseed' ) + #end if +#end if + +outlier.use.randomforest = FALSE +outlier.findoutliers = formals(findoutliers) +outlier.clustheatmap = formals(clustheatmap) +outlier.rfcorrect = formals(rfcorrect) + +outlier.findoutliers\$outminc = as.integer( '$outlier.outminc' ) +outlier.findoutliers\$outlg = as.integer( '$outlier.outlg' ) +outlier.rfcorrect\$final = as.logical( '$outlier.final' ) + +#if str($outlier.use.def) == "no": + #if $outlier.use.nbtree: +outlier.rfcorrect\$nbtree = as.integer( '$outlier.use.nbtree' ) + #end if +outlier.findoutliers\$probthr = as.numeric( '$outlier.use.probthr' ) +outlier.findoutliers\$outdistquant = as.numeric( '$outlier.use.outdistquant' ) +##outlier.rfcorrect\$rfseed = as.integer( '$outlier.use.rfseed' ) +outlier.rfcorrect\$nbfactor = as.integer( '$outlier.use.nbfactor' ) +#end if + +cluster.comptsne = formals(comptsne) +cluster.compfr = formals(compfr) + +cluster.comptsne\$perplexity = as.integer( '$tsne.perplexity' ) +cluster.compfr\$knn = as.integer( '$tsne.knn' ) +#if str($tsne.use.def) == "no": +cluster.comptsne\$initial_cmd = as.logical( '$tsne.use.initial_cmd' ) +cluster.comptsne\$rseed = as.integer( '$tsne.use.rseed_tsne' ) +cluster.compfr\$rseed = as.integer( '$tsne.use.rseed_fr' ) +#end if + +genelist.tablelim = as.integer( '$extra.tablelim' ) +genelist.plotlim = as.integer( '$extra.plotlim' ) +genelist.foldchange = as.integer( '$extra.foldchange' ) +genelist.pvalue = as.numeric( '$extra.pvalue' ) + +out.pdf = '${outpdf}' +out.rdat = '${outrdat}' +out.genelist = '${outgenelist}' + +]]> + </token> +</macros>