Mercurial > repos > iuc > raceid_filtnormconf
comparison raceid_filtnormconf.xml @ 0:8dc8ff057b0f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit f880060c478d42202df5b78a81329f8af56b1138
author | iuc |
---|---|
date | Thu, 22 Nov 2018 04:44:44 -0500 |
parents | |
children | 01290f30211f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8dc8ff057b0f |
---|---|
1 <tool id="raceid_filtnormconf" name="Filtering, Normalisation, and Confounder Removal using RaceID" version="@VERSION_RACEID@.@VERSION_PACKAGE@.1" > | |
2 <description>generates a normalised and filtered count matrix of single-cell RNA data</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 <import>macros_cluster.xml</import> | |
6 </macros> | |
7 <expand macro="requirements" /> | |
8 <version_command><![CDATA[ | |
9 Rscript '$__tool_directory__/scripts/cluster.R' @GET_VERSION@ | |
10 ]]></version_command> | |
11 | |
12 <command detect_errors="exit_code"><![CDATA[ | |
13 #set bin = 'cluster.R' | |
14 Rscript '$__tool_directory__/scripts/$bin' '$userconf' 2> '$outlog' > /dev/null | |
15 ]]></command> | |
16 | |
17 <configfiles> | |
18 <configfile name="userconf" ><![CDATA[ | |
19 @STRING2VECTOR@ | |
20 | |
21 @FILTNORM_CHEETAH@ | |
22 ]]> | |
23 </configfile> | |
24 </configfiles> | |
25 <inputs> | |
26 <param name="intable" type="data" format="tabular" label="Count Matrix" /> | |
27 <section name="filt" title="Filtering" expanded="true" > | |
28 <param name="mintotal" type="integer" min="1" value="3000" label="Min Transcripts" help="The minimum total transcripts required. Cells with less than mintotal transcripts are filtered out." /> | |
29 <param name="minexpr" type="integer" min="1" value="5" label="Min Expression" help="The minimum required transcript counts of a gene in the minimum number of cells (below)" /> | |
30 <param name="minnumber" type="integer" min="1" value="5" label="Min Cells" help="The minumum number of cells for gene expression to be counted" /> | |
31 <expand macro="use_defaults_no" > | |
32 <param name="knn" type="integer" min="0" value="10" label="K-nearest-neighbours" help="Number of nearest neighbors used to infer corresponding cell types in different batches" /> | |
33 <param name="CGenes" type="text" optional="true" label="CGenes" help="Filter out genes with correlated expression for cell type inference" > | |
34 <expand macro="sanitize_string_vector" /> | |
35 </param> | |
36 <param name="FGenes" type="text" optional="true" label="FGenes" help="Explicitly filter out genes for cell type inference" > | |
37 <expand macro="sanitize_string_vector" /> | |
38 </param> | |
39 <param name="LBatch_regexes" type="text" optional="true" label="Batch Regex" help="List of regexes to capture experimental batches for batch effect correction" > | |
40 <expand macro="sanitize_string_vector" /> | |
41 </param> | |
42 <param name="ccor" type="float" value="0.4" label="CCor" help="Correlation coefficient used as a threshold for determining correlated genes" /> | |
43 <param name="bmode" type="select" label="Batch Mode" help="Method to regress out batch effects" > | |
44 <option value="RaceID" selected="true" >RaceID</option> | |
45 <option value="scran">SCRAN</option> | |
46 </param> | |
47 <conditional name="ccc" > | |
48 <param name="use" type="select" label="Perform Cell-cycle correction?" > | |
49 <option value="yes" >Yes</option> | |
50 <option value="no" selected="true" >No</option> | |
51 </param> | |
52 <when value="no" /> | |
53 <when value="yes" > | |
54 <param name="vset" type="text" optional="true" label="List of Gene Sets" > | |
55 <expand macro="sanitize_string_vector" /> | |
56 </param> | |
57 <param name="pvalue" type="float" value="0.01" min="0" max="1" label="P-value Cutoff" help="P-value cutoff for determining enriched components" /> | |
58 <param name="quant" type="float" value="0.01" min="0" max="1" label="Quantification Fraction" help="Upper and lower fraction of gene loadings use for determining enriched components" /> | |
59 <param name="ncomp" type="integer" min="0" optional="true" label="Number of components to use" help="If left blank, the maximum number of components are used" /><!-- 0 = NULL --> | |
60 <param name="dimr" type="boolean" value="true" label="Derive Components from saturation criterion" /> | |
61 <param name="mode" type="select" label="Type of Component Analysis" help="If ICA is selected, ensure that the number of components value above is sufficiently high" > | |
62 <option value="pca" selected="true">PCA</option> | |
63 <option value="ica">ICA</option> | |
64 </param> | |
65 <param name="logscale" type="boolean" value="false" label="Log-transform data prior to PCA or ICA" help="" /> | |
66 </when> | |
67 </conditional> | |
68 <param name="use_log" type="boolean" checked="false" label="Output Log?" /> | |
69 </expand> | |
70 </section> | |
71 </inputs> | |
72 <outputs> | |
73 <data name="outpdf" format="pdf" label="${tool.name} on ${on_string}: PDF Report" /> | |
74 <data name="outrdat" format="rdata" label="${tool.name} on ${on_string}: RDS" /> | |
75 <data name="outlog" format="txt" label="${tool.name} on ${on_string}: Log" > | |
76 <filter>use_log</filter> | |
77 </data> | |
78 </outputs> | |
79 <tests> | |
80 <test> | |
81 <!-- This is a file with a single word 'test', which prompts the scripts to use the test intestinalData in the library --> | |
82 <param name="intable" value="use.intestinal" /> | |
83 <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" /> | |
84 </test> | |
85 <test> | |
86 <!-- defaults, feeding in a matrix with reduced filtering --> | |
87 <param name="intable" value="matrix.tabular" /> | |
88 <section name="filt" > | |
89 <param name="mintotal" value="1050" /> | |
90 <param name="minexpr" value="1" /> | |
91 <param name="minnumber" value="3" /> | |
92 </section> | |
93 <output name="outrdat" value="matrix.filter.rdat" compare="sim_size" delta="300" /> | |
94 <output name="outpdf" value="matrix.filter.pdf" compare="sim_size" delta="10" /> | |
95 </test> | |
96 <test> | |
97 <!-- defaults, but manually specified. No opts, no CC. Generates identical to above --> | |
98 <param name="intable" value="use.intestinal" /> | |
99 <section name="filt" > | |
100 <param name="mintotal" value="3000" /> | |
101 <param name="minexpr" value="5" /> | |
102 <param name="minnumber" value="5" /> | |
103 <expand macro="test_nondef" > | |
104 <param name="knn" value="10" /> | |
105 <param name="ccor" value="0.4" /> | |
106 <param name="bmode" value="RaceID" /> | |
107 </expand> | |
108 </section> | |
109 <output name="outpdf" value="intestinal.filter.pdf" compare="sim_size" delta="50" /> | |
110 </test> | |
111 <test> | |
112 <!-- Advanced. Opts, CC used --> | |
113 <param name="intable" value="use.intestinal" /> | |
114 <section name="filt" > | |
115 <param name="mintotal" value="2000" /> | |
116 <param name="minexpr" value="3" /> | |
117 <param name="minnumber" value="2" /> | |
118 <expand macro="test_nondef" > | |
119 <param name="knn" value="5" /> | |
120 <param name="ccor" value="0.5" /> | |
121 <param name="CGenes" value="Gga3,Ggact,Ggct" /> | |
122 <param name="FGenes" value="Zxdc,Zyg11a,Zyg11b,Zyx" /> | |
123 <param name="LBatch_regexes" value="^I5,^II5,^III5,^IV5d,^V5d" /> | |
124 <param name="bmode" value="scran" /> | |
125 <conditional name="ccc" > | |
126 <param name="use" value="yes" /> | |
127 <param name="pvalue" value="0.05" /> | |
128 <param name="quant" value="0.05" /> | |
129 <param name="ncomp" value="3" /> | |
130 <param name="dimr" value="true" /> | |
131 <param name="mode" value="pca" /> | |
132 <param name="logscale" value="true" /> | |
133 </conditional> | |
134 </expand> | |
135 </section> | |
136 <output name="outpdf" value="intestinal_advanced.filter.pdf" compare="sim_size" delta="150" /> | |
137 </test> | |
138 </tests> | |
139 <help><![CDATA[ | |
140 RaceID3 | |
141 ======= | |
142 | |
143 RaceID is a clustering algorithm for the identification of cell types from single-cell RNA-sequencing data. It was specifically designed for the detection of rare cells which correspond to outliers in conventional clustering methods. | |
144 | |
145 This module performs filtering, normalisation, and batch effect removal in the same step. | |
146 | |
147 | |
148 Example Usage: Inspecting the Aggregated Expression for a Group of Genes | |
149 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
150 | |
151 Our cells come from 5 different batches (I5,II5,III5,IV5,V5) and are labelled to reflect this (i.e. "I5_1", "I5_2", ..., "I5_129", "II5_1", ..., "V5_236" ) | |
152 | |
153 We wish to filter out the gene Lpca5 and Atk2 which we know in advance will saturate our analysis with unwanted expression. | |
154 | |
155 We will also be interested in the cluster that contains significant expression for Apoa genes (Apoa1, Apoa1bp, Apoa2, Apoa4, Apoa5). | |
156 | |
157 First, we must load in our count matrix in order to correct for batch effects, filter out unwanted genes, and compute our clusters and outliers. | |
158 | |
159 * *Mode of Analysis* → **Cluster** | |
160 | |
161 * *Count Matrix* → [input tabular] | |
162 | |
163 * Filtering: | |
164 | |
165 * *Use Defaults?* → **No** | |
166 | |
167 * *Batch Regex* → "^I5,^II5,^III5,^IV5,^V5" | |
168 | |
169 * *FGenes* → "Lpca5,Atk2" | |
170 | |
171 A PDF report will be generated giving metrics about the library size and number of features as histograms, and additional metrics relating to cell-cycle correction will be produced if that option has been selected. | |
172 | |
173 ]]> | |
174 </help> | |
175 <expand macro="citations" /> | |
176 </tool> |