comparison normalize.xml @ 0:ed64c90a9b93 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author iuc
date Mon, 04 Mar 2019 10:16:12 -0500
parents
children a9f14e2d1655
comparison
equal deleted inserted replaced
-1:000000000000 0:ed64c90a9b93
1 <tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_command"/>
8 <command detect_errors="exit_code"><![CDATA[
9 @CMD@
10 ]]></command>
11 <configfiles>
12 <configfile name="script_file"><![CDATA[
13 @CMD_imports@
14 @CMD_read_inputs@
15
16 #if $method.method == "pp.normalize_per_cell"
17 sc.pp.normalize_per_cell(
18 data=adata,
19 #if $method.counts_per_cell_after
20 counts_per_cell_after=$method.counts_per_cell_after,
21 #end if
22 #if $method.counts_per_cell
23 counts_per_cell=np.loadtxt('$method.counts_per_cell'),
24 #end if
25 key_n_counts='$method.key_n_counts',
26 copy=False)
27 adata.obs.to_csv('$anndata_obs', sep='\t')
28 #elif $method.method == "pp.recipe_zheng17"
29 sc.pp.recipe_zheng17(
30 adata=adata,
31 n_top_genes=$method.n_top_genes,
32 log=$method.log,
33 plot=False,
34 copy=False)
35 #elif $method.method == "pp.recipe_weinreb17"
36 sc.pp.recipe_weinreb17(
37 adata=adata,
38 log=$method.log,
39 mean_threshold=$method.mean_threshold,
40 cv_threshold=$method.cv_threshold,
41 n_pcs=$method.n_pcs,
42 svd_solver='$method.svd_solver',
43 random_state=$method.random_state,
44 copy=False)
45 #elif $method.method == "pp.recipe_seurat"
46 sc.pp.recipe_seurat(
47 adata=adata,
48 log=$method.log,
49 plot=False,
50 copy=False)
51 #elif $method.method == "pp.log1p"
52 sc.pp.log1p(
53 data=adata,
54 copy=False)
55 #elif $method.method == "pp.scale"
56 sc.pp.scale(
57 data=adata,
58 zero_center=$method.zero_center,
59 #if $method.max_value
60 max_value=$method.max_value,
61 #end if
62 copy=False)
63 #elif $method.method == "pp.sqrt"
64 sc.pp.sqrt(
65 data=adata,
66 copy=False)
67 #elif $method.method == "pp.downsample_counts"
68 sc.pp.downsample_counts(
69 adata=adata,
70 target_counts=$method.target_counts,
71 random_state=$method.random_state,
72 copy=False)
73 #end if
74
75 @CMD_anndata_write_outputs@
76
77 ]]></configfile>
78 </configfiles>
79 <inputs>
80 <expand macro="inputs_anndata"/>
81 <conditional name="method">
82 <param argument="method" type="select" label="Method used for plotting">
83 <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option>
84 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option>
85 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option>
86 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option>
87 <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option>
88 <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option>
89 <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option>
90 <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option>
91 </param>
92 <when value="pp.normalize_per_cell">
93 <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/>
94 <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/>
95 <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/>
96 </when>
97 <when value="pp.recipe_zheng17">
98 <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
99 <expand macro="param_log"/>
100 </when>
101 <when value="pp.recipe_weinreb17">
102 <expand macro="param_log"/>
103 <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
104 <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
105 <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
106 <expand macro="svd_solver"/>
107 <expand macro="pca_random_state"/>
108 </when>
109 <when value="pp.recipe_seurat">
110 <expand macro="param_log"/>
111 </when>
112 <when value="pp.log1p"/>
113 <when value="pp.scale">
114 <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
115 label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
116 <param argument="max_value" type="float" value="" optional="true" label="Maximum value"
117 help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
118 </when>
119 <when value="pp.sqrt"/>
120 <when value="pp.downsample_counts">
121 <param argument="target_counts" type="integer" min="0" value="20000"
122 label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/>
123 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/>
124 </when>
125 </conditional>
126 <expand macro="anndata_output_format"/>
127 </inputs>
128 <outputs>
129 <expand macro="anndata_outputs"/>
130 <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations">
131 <filter>method['method'] == 'pp.normalize_per_cell'</filter>
132 </data>
133 </outputs>
134 <tests>
135 <test>
136 <conditional name="input">
137 <param name="format" value="h5ad" />
138 <param name="adata" value="krumsiek11.h5ad" />
139 </conditional>
140 <conditional name="method">
141 <param name="method" value="pp.normalize_per_cell"/>
142 <param name="counts_per_cell_after" value="2"/>
143 <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/>
144 <param name="key_n_counts" value="n_counts"/>
145 </conditional>
146 <param name="anndata_output_format" value="h5ad"/>
147 <assert_stdout>
148 <has_text_matching expression="sc.pp.normalize_per_cell"/>
149 <has_text_matching expression="counts_per_cell_after=2.0"/>
150 <has_text_matching expression="counts_per_cell=np.loadtxt"/>
151 <has_text_matching expression="key_n_counts='n_counts'"/>
152 </assert_stdout>
153 <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
154 <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/>
155 </test>
156 <test>
157 <conditional name="input">
158 <param name="format" value="h5ad" />
159 <param name="adata" value="random-randint.h5ad"/>
160 </conditional>
161 <conditional name="method">
162 <param name="method" value="pp.recipe_zheng17"/>
163 <param name="n_top_genes" value="1000"/>
164 <param name="log" value="True"/>
165 </conditional>
166 <param name="anndata_output_format" value="h5ad"/>
167 <assert_stdout>
168 <has_text_matching expression="sc.pp.recipe_zheng17"/>
169 <has_text_matching expression="n_top_genes=1000"/>
170 <has_text_matching expression="log=True"/>
171 </assert_stdout>
172 <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/>
173 </test>
174 <test>
175 <conditional name="input">
176 <param name="format" value="h5ad" />
177 <param name="adata" value="paul15_subsample.h5ad" />
178 </conditional>
179 <conditional name="method">
180 <param name="method" value="pp.recipe_weinreb17"/>
181 <param name="log" value="True"/>
182 <param name="mean_threshold" value="0.01"/>
183 <param name="cv_threshold" value="2.0"/>
184 <param name="n_pcs" value="50"/>
185 <param name="svd_solver" value="randomized"/>
186 <param name="random_state" value="0"/>
187 </conditional>
188 <param name="anndata_output_format" value="h5ad" />
189 <assert_stdout>
190 <has_text_matching expression="sc.pp.recipe_weinreb17"/>
191 <has_text_matching expression="log=True"/>
192 <has_text_matching expression="mean_threshold=0.01"/>
193 <has_text_matching expression="cv_threshold=2.0"/>
194 <has_text_matching expression="n_pcs=50"/>
195 <has_text_matching expression="svd_solver='randomized'"/>
196 <has_text_matching expression="random_state=0"/>
197 </assert_stdout>
198 <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
199 </test>
200 <test>
201 <conditional name="input">
202 <param name="format" value="h5ad" />
203 <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
204 </conditional>
205 <conditional name="method">
206 <param name="method" value="pp.recipe_seurat"/>
207 <param name="log" value="True"/>
208 </conditional>
209 <param name="anndata_output_format" value="h5ad"/>
210 <assert_stdout>
211 <has_text_matching expression="sc.pp.recipe_seurat"/>
212 <has_text_matching expression="log=True"/>
213 </assert_stdout>
214 <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/>
215 </test>
216 <test>
217 <conditional name="input">
218 <param name="format" value="h5ad" />
219 <param name="adata" value="krumsiek11.h5ad" />
220 </conditional>
221 <conditional name="method">
222 <param name="method" value="pp.log1p"/>
223 </conditional>
224 <param name="anndata_output_format" value="h5ad" />
225 <assert_stdout>
226 <has_text_matching expression="sc.pp.log1p"/>
227 </assert_stdout>
228 <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
229 </test>
230 <test>
231 <conditional name="input">
232 <param name="format" value="h5ad" />
233 <param name="adata" value="krumsiek11.h5ad" />
234 </conditional>
235 <conditional name="method">
236 <param name="method" value="pp.scale"/>
237 <param name="zero_center" value="true"/>
238 </conditional>
239 <param name="anndata_output_format" value="h5ad" />
240 <assert_stdout>
241 <has_text_matching expression="sc.pp.scale"/>
242 <has_text_matching expression="zero_center=True"/>
243 </assert_stdout>
244 <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
245 </test>
246 <test>
247 <conditional name="input">
248 <param name="format" value="h5ad" />
249 <param name="adata" value="krumsiek11.h5ad" />
250 </conditional>
251 <conditional name="method">
252 <param name="method" value="pp.scale"/>
253 <param name="zero_center" value="true"/>
254 <param name="max_value" value="10"/>
255 </conditional>
256 <param name="anndata_output_format" value="h5ad" />
257 <assert_stdout>
258 <has_text_matching expression="sc.pp.scale"/>
259 <has_text_matching expression="zero_center=True"/>
260 <has_text_matching expression="max_value=10.0"/>
261 </assert_stdout>
262 <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
263 </test>
264 <test>
265 <conditional name="input">
266 <param name="format" value="h5ad" />
267 <param name="adata" value="krumsiek11.h5ad" />
268 </conditional>
269 <conditional name="method">
270 <param name="method" value="pp.sqrt"/>
271 </conditional>
272 <param name="anndata_output_format" value="h5ad" />
273 <assert_stdout>
274 <has_text_matching expression="sc.pp.sqrt"/>
275 </assert_stdout>
276 <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
277 </test>
278 <test>
279 <conditional name="input">
280 <param name="format" value="h5ad" />
281 <param name="adata" value="random-randint.h5ad" />
282 </conditional>
283 <conditional name="method">
284 <param name="method" value="pp.downsample_counts"/>
285 <param name="target_counts" value="20000"/>
286 <param name="random_state" value="0"/>
287 </conditional>
288 <param name="anndata_output_format" value="h5ad" />
289 <assert_stdout>
290 <has_text_matching expression="sc.pp.downsample_counts"/>
291 <has_text_matching expression="target_counts=20000"/>
292 <has_text_matching expression="random_state=0"/>
293 </assert_stdout>
294 <output name="anndata_out_h5ad" ftype="h5">
295 <assert_contents>
296 <has_h5_keys keys="X, obs, var" />
297 </assert_contents>
298 </output>
299 </test>
300 </tests>
301 <help><![CDATA[
302 Normalize total counts per cell (`pp.normalize_per_cell`)
303 =========================================================
304
305 Normalize each cell by total counts over all genes, so that every cell has
306 the same total count after normalization.
307
308 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
309
310 More details on the `scanpy documentation
311 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__
312
313
314 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
315 ==================================================================================================================
316
317 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
318
319 The recipe runs the following steps:
320
321 - only consider genes with more than 1 count
322 - normalize with total UMI count per cell
323 - select highly-variable genes
324 - subset the genes
325 - renormalize after filtering
326 - log transform (if needed)
327 - scale to unit variance and shift to zero mean
328
329 More details on the `scanpy documentation
330 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__
331
332
333 Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
334 ==============================================================================
335
336 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
337
338 More details on the `scanpy documentation
339 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__
340
341
342 Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
343 ==========================================================================
344
345 This uses a particular preprocessing.
346
347 Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
348
349 More details on the `scanpy documentation
350 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__
351
352 Logarithmize the data matrix (`pp.log1p`)
353 =========================================
354
355 More details on the `scanpy documentation
356 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__
357
358 Scale data to unit variance and zero mean (`pp.scale`)
359 ======================================================
360
361 More details on the `scanpy documentation
362 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__
363
364 Computes the square root the data matrix (`pp.sqrt`)
365 ====================================================
366
367 `X = sqrt(X)`
368
369 Downsample counts (`pp.downsample_counts`)
370 ==========================================
371
372 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
373 has been implemented by M. D. Luecken.
374
375 More details on the `scanpy documentation
376 <https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__
377
378 ]]></help>
379 <expand macro="citations"/>
380 </tool>