Mercurial > repos > iuc > episcanpy_preprocess
comparison preprocess.xml @ 0:8ba672d0d8aa draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5
author | iuc |
---|---|
date | Tue, 18 Apr 2023 13:18:50 +0000 |
parents | |
children | 29f5f25b9935 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8ba672d0d8aa |
---|---|
1 <tool id="episcanpy_preprocess" name="scATAC-seq Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>with EpiScanpy</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="bio_tools"/> | |
7 <expand macro="requirements"/> | |
8 <expand macro="version_command"/> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 @CMD@ | |
11 ]]></command> | |
12 <configfiles> | |
13 <configfile name="script_file"><![CDATA[ | |
14 @CMD_imports@ | |
15 @CMD_read_inputs@ | |
16 | |
17 #if $method.method == 'pp.binarize' | |
18 esc.pp.binarize( | |
19 adata, | |
20 copy=False) | |
21 | |
22 #else if $method.method == 'pp.filter_cells' | |
23 esc.pp.filter_cells( | |
24 adata, | |
25 #if $method.filter.filter == 'min_counts' | |
26 min_counts=$method.filter.min_counts, | |
27 #else if $method.filter.filter == 'max_counts' | |
28 max_counts=$method.filter.max_counts, | |
29 #else if $method.filter.filter == 'min_features' | |
30 min_features=$method.filter.min_features, | |
31 #else if $method.filter.filter == 'max_features' | |
32 max_features=$method.filter.max_features, | |
33 #end if | |
34 copy=False) | |
35 | |
36 #else if $method.method == 'pp.filter_features' | |
37 esc.pp.filter_features( | |
38 adata, | |
39 #if $method.filter.filter == 'min_counts' | |
40 min_counts=$method.filter.min_counts, | |
41 #else if $method.filter.filter == 'max_counts' | |
42 max_counts=$method.filter.max_counts, | |
43 #else if $method.filter.filter == 'min_cells' | |
44 min_cells=$method.filter.min_cells, | |
45 #else if $method.filter.filter == 'max_cells' | |
46 max_cells=$method.filter.max_cells, | |
47 #end if | |
48 copy=False) | |
49 | |
50 #else if $method.method == 'nb_feat_log' | |
51 adata.obs['log_nb_features'] = [np.log10(x) for x in adata.obs['nb_features']] | |
52 | |
53 #else if $method.method == 'pp.coverage_cells' | |
54 esc.pp.coverage_cells( | |
55 adata, | |
56 binary=$method.binary, | |
57 log=$method.log, | |
58 #if $method.threshold | |
59 threshold=$method.threshold, | |
60 #end if | |
61 bins=$method.bins, | |
62 save='plot.png' | |
63 ) | |
64 #else if $method.method == 'pp.coverage_features' | |
65 esc.pp.coverage_features( | |
66 adata, | |
67 binary=$method.binary, | |
68 log=$method.log, | |
69 #if $method.threshold | |
70 threshold=$method.threshold, | |
71 #end if | |
72 bins=$method.bins, | |
73 save='plot.png' | |
74 ) | |
75 | |
76 #else if $method.method == 'pp.select_var_feature' | |
77 esc.pp.select_var_feature( | |
78 adata, | |
79 min_score=$method.min_score, | |
80 show=False, | |
81 #if $method.nb_features | |
82 nb_features=$method.nb_features | |
83 #end if | |
84 ) | |
85 | |
86 #else if $method.method == 'pp.cal_var' | |
87 esc.pp.cal_var( | |
88 adata, | |
89 show=True, | |
90 color=['b', 'r'], | |
91 save='plot.png' | |
92 ) | |
93 | |
94 #else if $method.method == 'pp.variability_features' | |
95 esc.pp.variability_features( | |
96 adata, | |
97 min_score=$method.min_score, | |
98 nb_features=$method.nb_features, | |
99 #if $method.log_mode | |
100 log='$method.log_mode', | |
101 #end if | |
102 save='plot.png' | |
103 ) | |
104 | |
105 #end if | |
106 @CMD_anndata_write_outputs@ | |
107 ]]></configfile> | |
108 </configfiles> | |
109 <inputs> | |
110 <expand macro="inputs_anndata"/> | |
111 <conditional name="method"> | |
112 <param argument="method" type="select" label="Method used for filtering"> | |
113 <option value="pp.binarize">Binarize count matrix, using 'pp.binarize'</option> | |
114 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of features expressed, using 'pp.filter_cells'</option> | |
115 <option value="pp.filter_features">Filter features based on counts and numbers of features expressed, using 'pp.filter_features'</option> | |
116 <option value="nb_feat_log">Compute log10 of nb_features</option> | |
117 <option value="pp.coverage_cells">Coverage cells: Histogram of the number of open features (in the case of ATAC-seq data) per cell, using 'pp.coverage_cells'</option> | |
118 <option value="pp.coverage_features">Coverage features: Distribution of the feature commoness in cells, using 'pp.coverage_features'</option> | |
119 <option value="pp.select_var_feature">Select the most variable features, 'pp.select_var_feature'</option> | |
120 <option value="pp.cal_var">Show distribution plots of cells sharing features and variability score 'pp.cal_var'</option> | |
121 <option value="pp.variability_features">Computes variability score to rank the most variable features across all cells, using 'pp.variability_features'</option> | |
122 </param> | |
123 <when value="pp.binarize" /> | |
124 <when value="pp.filter_cells"> | |
125 <conditional name="filter"> | |
126 <param argument="filter" type="select" label="Filter" help="Filter mode"> | |
127 <option value="min_counts">Minimum number of counts</option> | |
128 <option value="max_counts">Maximum number of counts</option> | |
129 <option value="min_features">Minimum number of features expressed</option> | |
130 <option value="max_features">Maximum number of features expressed</option> | |
131 </param> | |
132 <when value="min_counts"> | |
133 <param argument="min_counts" type="integer" min="0" value="" label="Minimum counts" help="Minimum number of counts required for a cell to pass filtering"/> | |
134 </when> | |
135 <when value="max_counts"> | |
136 <param argument="max_counts" type="integer" min="0" value="" label="Maximum counts" help="Maximum number of counts required for a cell to pass filtering"/> | |
137 </when> | |
138 <when value="min_features"> | |
139 <param argument="min_features" type="integer" min="0" value="" label="Minimum features" help="Minimum number of features expressed required for a cell to pass filtering"/> | |
140 </when> | |
141 <when value="max_features"> | |
142 <param argument="max_features" type="integer" min="0" value="" label="Maximum features" help="Maximum number of features expressed required for a cell to pass filtering"/> | |
143 </when> | |
144 </conditional> | |
145 </when> | |
146 <when value="pp.filter_features"> | |
147 <conditional name="filter"> | |
148 <param argument="filter" type="select" label="Filter"> | |
149 <option value="min_counts">Minimum number of counts</option> | |
150 <option value="max_counts">Maximum number of counts</option> | |
151 <option value="min_cells">Minimum number of cells expressed</option> | |
152 <option value="max_cells">Maximum number of cells expressed</option> | |
153 </param> | |
154 <when value="min_counts"> | |
155 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/> | |
156 </when> | |
157 <when value="max_counts"> | |
158 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> | |
159 </when> | |
160 <when value="min_cells"> | |
161 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> | |
162 </when> | |
163 <when value="max_cells"> | |
164 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> | |
165 </when> | |
166 </conditional> | |
167 </when> | |
168 <when value="nb_feat_log" /> | |
169 <when value="pp.coverage_cells"> | |
170 <expand macro="coverage_params" /> | |
171 </when> | |
172 <when value="pp.coverage_features"> | |
173 <expand macro="coverage_params" /> | |
174 </when> | |
175 <when value="pp.select_var_feature"> | |
176 <param argument="min_score" type="float" min="0" max="1" value="0.5" label="Min score" help="Minimum threshold variability score to retain features" /> | |
177 <param argument="nb_features" type="integer" min="0" value="" optional="True" label="Number of features" help="Default value is None, if specify it will select a the top most | |
178 variable features. If this parameter is larger than the total number of feature, it filters based on the min_score argument." /> | |
179 </when> | |
180 <when value="pp.cal_var" /> | |
181 <when value="pp.variability_features"> | |
182 <param name="min_score" type="float" min="0" max="1" value="0.5" label="Minimum score value"/> | |
183 <param name="nb_features" type="integer" min="0" value="" label="Number of features"/> | |
184 <param name="log_mode" type="select" optional="True" label="Log" help="Log mode"> | |
185 <option value="log2">Log2</option> | |
186 <option value="log10">Log10</option> | |
187 </param> | |
188 </when> | |
189 </conditional> | |
190 <expand macro="inputs_common_advanced"/> | |
191 </inputs> | |
192 <outputs> | |
193 <expand macro="anndata_outputs"/> | |
194 <data name="out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) on ${on_string}"> | |
195 <filter>method['method'] != 'pp.binarize' and method['method'] != 'pp.filter_cells' and method['method'] != 'pp.filter_features' and method['method'] != 'nb_feat_log' and method['method'] != 'select_var_feature'</filter> | |
196 </data> | |
197 </outputs> | |
198 <tests> | |
199 <test expect_num_outputs="1"> | |
200 <!-- pp.binarize --> | |
201 <param name="adata" value="krumsiek11.h5ad" /> | |
202 <conditional name="method"> | |
203 <param name="method" value="pp.binarize"/> | |
204 </conditional> | |
205 <output name="anndata_out" file="krumsiek11.pp.binarize.h5ad" ftype="h5ad" compare="sim_size"/> | |
206 </test> | |
207 <test expect_num_outputs="2"> | |
208 <!-- pp.filter_cells --> | |
209 <param name="adata" value="krumsiek11.h5ad" /> | |
210 <conditional name="method"> | |
211 <param name="method" value="pp.filter_cells"/> | |
212 <conditional name="filter"> | |
213 <param name="filter" value="min_features"/> | |
214 <param name="min_features" value="10"/> | |
215 </conditional> | |
216 </conditional> | |
217 <section name="advanced_common"> | |
218 <param name="show_log" value="true" /> | |
219 </section> | |
220 <assert_stdout> | |
221 <has_text_matching expression="395 × 11"/> | |
222 </assert_stdout> | |
223 <output name="hidden_output"> | |
224 <assert_contents> | |
225 <has_text_matching expression="esc.pp.filter_cells"/> | |
226 <has_text_matching expression="min_features=10"/> | |
227 </assert_contents> | |
228 </output> | |
229 <output name="anndata_out" file="krumsiek11.pp.filter_cells.min_features.h5ad" ftype="h5ad" compare="sim_size"/> | |
230 </test> | |
231 <test expect_num_outputs="2"> | |
232 <!-- pp.filter_features --> | |
233 <param name="adata" value="krumsiek11.h5ad" /> | |
234 <conditional name="method"> | |
235 <param name="method" value="pp.filter_features"/> | |
236 <conditional name="filter"> | |
237 <param name="filter" value="min_cells"/> | |
238 <param name="min_cells" value="600"/> | |
239 </conditional> | |
240 </conditional> | |
241 <section name="advanced_common"> | |
242 <param name="show_log" value="true" /> | |
243 </section> | |
244 <assert_stdout> | |
245 <has_text_matching expression="640 × 2"/> | |
246 </assert_stdout> | |
247 <output name="hidden_output"> | |
248 <assert_contents> | |
249 <has_text_matching expression="esc.pp.filter_features"/> | |
250 <has_text_matching expression="min_cells=600"/> | |
251 </assert_contents> | |
252 </output> | |
253 <output name="anndata_out" file="krumsiek11.pp.filter_features.min_cells.h5ad" ftype="h5ad" compare="sim_size"/> | |
254 </test> | |
255 <test expect_num_outputs="1"> | |
256 <!-- nb_feat_log --> | |
257 <param name="adata" value="krumsiek11.pp.filter_cells.min_features.h5ad" /> | |
258 <conditional name="method"> | |
259 <param name="method" value="nb_feat_log"/> | |
260 </conditional> | |
261 <assert_stdout> | |
262 <has_text_matching expression="log_nb_features"/> | |
263 <has_text_matching expression="nb_features"/> | |
264 </assert_stdout> | |
265 </test> | |
266 <test expect_num_outputs="3"> | |
267 <!-- pp.select_var_feature --> | |
268 <param name="adata" value="krumsiek11.h5ad" /> | |
269 <conditional name="method"> | |
270 <param name="method" value="pp.select_var_feature"/> | |
271 <param name="min_score" value="0.6"/> | |
272 <param name="nb_features" value="10"/> | |
273 </conditional> | |
274 <section name="advanced_common"> | |
275 <param name="show_log" value="true" /> | |
276 </section> | |
277 <assert_stdout> | |
278 <has_text_matching expression="prop_shared_cells"/> | |
279 <has_text_matching expression="variability_score"/> | |
280 </assert_stdout> | |
281 <output name="hidden_output"> | |
282 <assert_contents> | |
283 <has_text_matching expression="esc.pp.select_var_feature"/> | |
284 <has_text_matching expression="adata"/> | |
285 <has_text_matching expression="min_score=0.6"/> | |
286 <has_text_matching expression="nb_features=10"/> | |
287 </assert_contents> | |
288 </output> | |
289 <output name="anndata_out" file="krumsiek11.pp.select_var_feature.h5ad" ftype="h5ad" compare="sim_size"/> | |
290 </test> | |
291 <test expect_num_outputs="3"> | |
292 <!-- pp.cal_var --> | |
293 <param name="adata" value="krumsiek11.h5ad" /> | |
294 <conditional name="method"> | |
295 <param name="method" value="pp.cal_var"/> | |
296 </conditional> | |
297 <section name="advanced_common"> | |
298 <param name="show_log" value="true" /> | |
299 </section> | |
300 <output name="hidden_output"> | |
301 <assert_contents> | |
302 <has_text_matching expression="esc.pp.cal_var"/> | |
303 <has_text_matching expression="adata"/> | |
304 <has_text_matching expression="plot.png"/> | |
305 </assert_contents> | |
306 </output> | |
307 <output name="anndata_out" file="krumsiek11.pp.cal_var.h5ad" ftype="h5ad" compare="sim_size"/> | |
308 <output name="out_png" file="krumsiek11.pp.cal_var.png" ftype="png" compare="sim_size"/> | |
309 </test> | |
310 <test expect_num_outputs="3"> | |
311 <!-- pp.coverage_cells --> | |
312 <param name="adata" value="krumsiek11.h5ad" /> | |
313 <conditional name="method"> | |
314 <param name="method" value="pp.coverage_cells"/> | |
315 <param name="threshold" value="3.0" /> | |
316 <param name="binary" value="True" /> | |
317 </conditional> | |
318 <section name="advanced_common"> | |
319 <param name="show_log" value="true" /> | |
320 </section> | |
321 <output name="hidden_output"> | |
322 <assert_contents> | |
323 <has_text_matching expression="esc.pp.coverage_cells"/> | |
324 <has_text_matching expression="adata"/> | |
325 <has_text_matching expression="threshold=3.0"/> | |
326 </assert_contents> | |
327 </output> | |
328 <output name="anndata_out" file="krumsiek11.pp.coverage_cells.h5ad" ftype="h5ad" compare="sim_size"/> | |
329 <output name="out_png" file="krumsiek11.pp.coverage_cells.png" ftype="png" compare="sim_size"/> | |
330 </test> | |
331 <test expect_num_outputs="3"> | |
332 <!-- pp.coverage_features --> | |
333 <param name="adata" value="krumsiek11.h5ad" /> | |
334 <conditional name="method"> | |
335 <param name="method" value="pp.coverage_features"/> | |
336 <param name="threshold" value="100" /> | |
337 <param name="binary" value="True" /> | |
338 </conditional> | |
339 <section name="advanced_common"> | |
340 <param name="show_log" value="true" /> | |
341 </section> | |
342 <output name="hidden_output"> | |
343 <assert_contents> | |
344 <has_text_matching expression="esc.pp.coverage_features"/> | |
345 <has_text_matching expression="adata"/> | |
346 <has_text_matching expression="threshold=100"/> | |
347 </assert_contents> | |
348 </output> | |
349 <output name="anndata_out" file="krumsiek11.pp.coverage_features.h5ad" ftype="h5ad" compare="sim_size"/> | |
350 <output name="out_png" file="krumsiek11.pp.coverage_features.png" ftype="png" compare="sim_size"/> | |
351 </test> | |
352 <test expect_num_outputs="3"> | |
353 <!-- pp.coverage_features --> | |
354 <param name="adata" value="krumsiek11.h5ad" /> | |
355 <conditional name="method"> | |
356 <param name="method" value="pp.variability_features"/> | |
357 <param name="min_score" value="0.75" /> | |
358 <param name="nb_features" value="8" /> | |
359 <param name="log" value="log10" /> | |
360 </conditional> | |
361 <section name="advanced_common"> | |
362 <param name="show_log" value="true" /> | |
363 </section> | |
364 <output name="hidden_output"> | |
365 <assert_contents> | |
366 <has_text_matching expression="esc.pp.variability_features"/> | |
367 <has_text_matching expression="adata"/> | |
368 <has_text_matching expression="min_score=0.75"/> | |
369 <has_text_matching expression="nb_features=8"/> | |
370 </assert_contents> | |
371 </output> | |
372 <output name="anndata_out" file="krumsiek11.pp.variability_features.h5ad" ftype="h5ad" compare="sim_size"/> | |
373 <output name="out_png" file="krumsiek11.pp.variability_features.png" ftype="png" compare="sim_size"/> | |
374 </test> | |
375 </tests> | |
376 <help><![CDATA[ | |
377 | |
378 convert the count matrix into a binary matrix (`pp.binarize`) | |
379 ============================================================================================ | |
380 convert the count matrix into a binary matrix | |
381 | |
382 More details on the `episcanpy documentation | |
383 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.binarize.html>`__ | |
384 | |
385 Filter cells outliers based on counts and numbers of features expressed (`pp.filter_cells`) | |
386 ============================================================================================ | |
387 For instance, only keep cells with at least *min_counts* counts or *min_features* genes expressed. | |
388 This is to filter measurement outliers, i.e. "unreliable" observations. | |
389 | |
390 Only provide one of the optional parameters *min_counts*, *min_features*, *max_counts*, *max_features* per call. | |
391 | |
392 More details on the `episcanpy documentation | |
393 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_cells.html>`__ | |
394 | |
395 Filter features based on number of cells or counts (`pp.filter_features`) | |
396 ======================================================================================== | |
397 Keep features that have at least *min_counts* counts or are expressed in at least *min_cells* cells or | |
398 have at most *max_counts* counts or are expressed in at most *max_cells* cells. | |
399 | |
400 Only provide one of the optional parameters *min_counts*, *min_cells*, *max_counts*, *max_cells* per call. | |
401 | |
402 More details on the `episcanpy documentation | |
403 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_features.html>`__ | |
404 | |
405 Histogram of the number of open features (`pp.coverage_cells`) | |
406 ======================================================================================== | |
407 Histogram of the number of open features (in the case of ATAC-seq data) per cell. | |
408 | |
409 More details on the `episcanpy documentation | |
410 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_cells.html>`__ | |
411 | |
412 Distribution of the feature commoness in cells (`pp.coverage_features`) | |
413 ======================================================================================== | |
414 Display how often a feature is measured as open (for ATAC-seq). Distribution of the feature commoness in cells. | |
415 | |
416 More details on the `episcanpy documentation | |
417 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_features.html>`__ | |
418 | |
419 Selects the most variable features according to either a specified number of features or minimum variance score (`pp.select_var_feature`) | |
420 ========================================================================================================================================= | |
421 | |
422 This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). | |
423 | |
424 More details on the `episcanpy documentation | |
425 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.select_var_feature.html>`__ | |
426 | |
427 Distribution of cells sharing features and variability score (`pp.cal_var`) | |
428 ============================================================================= | |
429 | |
430 Show distribution plots of cells sharing features and variability score. | |
431 | |
432 More details on the `episcanpy documentation | |
433 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.cal_var.html>`__ | |
434 | |
435 Compute a variability score to rank the most variable features across all cells (`pp.variability_features`) | |
436 ============================================================================================================ | |
437 | |
438 This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score). | |
439 | |
440 More details on the `episcanpy documentation | |
441 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.variability_features.html>`__ | |
442 ]]></help> | |
443 <expand macro="citations"/> | |
444 </tool> |