comparison preprocess.xml @ 0:8ba672d0d8aa draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/episcanpy/ commit ce8ee43d7285503a24c7b0f55c09c513be8c66f5
author iuc
date Tue, 18 Apr 2023 13:18:50 +0000
parents
children 29f5f25b9935
comparison
equal deleted inserted replaced
-1:000000000000 0:8ba672d0d8aa
1 <tool id="episcanpy_preprocess" name="scATAC-seq Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>with EpiScanpy</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/>
8 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[
10 @CMD@
11 ]]></command>
12 <configfiles>
13 <configfile name="script_file"><![CDATA[
14 @CMD_imports@
15 @CMD_read_inputs@
16
17 #if $method.method == 'pp.binarize'
18 esc.pp.binarize(
19 adata,
20 copy=False)
21
22 #else if $method.method == 'pp.filter_cells'
23 esc.pp.filter_cells(
24 adata,
25 #if $method.filter.filter == 'min_counts'
26 min_counts=$method.filter.min_counts,
27 #else if $method.filter.filter == 'max_counts'
28 max_counts=$method.filter.max_counts,
29 #else if $method.filter.filter == 'min_features'
30 min_features=$method.filter.min_features,
31 #else if $method.filter.filter == 'max_features'
32 max_features=$method.filter.max_features,
33 #end if
34 copy=False)
35
36 #else if $method.method == 'pp.filter_features'
37 esc.pp.filter_features(
38 adata,
39 #if $method.filter.filter == 'min_counts'
40 min_counts=$method.filter.min_counts,
41 #else if $method.filter.filter == 'max_counts'
42 max_counts=$method.filter.max_counts,
43 #else if $method.filter.filter == 'min_cells'
44 min_cells=$method.filter.min_cells,
45 #else if $method.filter.filter == 'max_cells'
46 max_cells=$method.filter.max_cells,
47 #end if
48 copy=False)
49
50 #else if $method.method == 'nb_feat_log'
51 adata.obs['log_nb_features'] = [np.log10(x) for x in adata.obs['nb_features']]
52
53 #else if $method.method == 'pp.coverage_cells'
54 esc.pp.coverage_cells(
55 adata,
56 binary=$method.binary,
57 log=$method.log,
58 #if $method.threshold
59 threshold=$method.threshold,
60 #end if
61 bins=$method.bins,
62 save='plot.png'
63 )
64 #else if $method.method == 'pp.coverage_features'
65 esc.pp.coverage_features(
66 adata,
67 binary=$method.binary,
68 log=$method.log,
69 #if $method.threshold
70 threshold=$method.threshold,
71 #end if
72 bins=$method.bins,
73 save='plot.png'
74 )
75
76 #else if $method.method == 'pp.select_var_feature'
77 esc.pp.select_var_feature(
78 adata,
79 min_score=$method.min_score,
80 show=False,
81 #if $method.nb_features
82 nb_features=$method.nb_features
83 #end if
84 )
85
86 #else if $method.method == 'pp.cal_var'
87 esc.pp.cal_var(
88 adata,
89 show=True,
90 color=['b', 'r'],
91 save='plot.png'
92 )
93
94 #else if $method.method == 'pp.variability_features'
95 esc.pp.variability_features(
96 adata,
97 min_score=$method.min_score,
98 nb_features=$method.nb_features,
99 #if $method.log_mode
100 log='$method.log_mode',
101 #end if
102 save='plot.png'
103 )
104
105 #end if
106 @CMD_anndata_write_outputs@
107 ]]></configfile>
108 </configfiles>
109 <inputs>
110 <expand macro="inputs_anndata"/>
111 <conditional name="method">
112 <param argument="method" type="select" label="Method used for filtering">
113 <option value="pp.binarize">Binarize count matrix, using 'pp.binarize'</option>
114 <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of features expressed, using 'pp.filter_cells'</option>
115 <option value="pp.filter_features">Filter features based on counts and numbers of features expressed, using 'pp.filter_features'</option>
116 <option value="nb_feat_log">Compute log10 of nb_features</option>
117 <option value="pp.coverage_cells">Coverage cells: Histogram of the number of open features (in the case of ATAC-seq data) per cell, using 'pp.coverage_cells'</option>
118 <option value="pp.coverage_features">Coverage features: Distribution of the feature commoness in cells, using 'pp.coverage_features'</option>
119 <option value="pp.select_var_feature">Select the most variable features, 'pp.select_var_feature'</option>
120 <option value="pp.cal_var">Show distribution plots of cells sharing features and variability score 'pp.cal_var'</option>
121 <option value="pp.variability_features">Computes variability score to rank the most variable features across all cells, using 'pp.variability_features'</option>
122 </param>
123 <when value="pp.binarize" />
124 <when value="pp.filter_cells">
125 <conditional name="filter">
126 <param argument="filter" type="select" label="Filter" help="Filter mode">
127 <option value="min_counts">Minimum number of counts</option>
128 <option value="max_counts">Maximum number of counts</option>
129 <option value="min_features">Minimum number of features expressed</option>
130 <option value="max_features">Maximum number of features expressed</option>
131 </param>
132 <when value="min_counts">
133 <param argument="min_counts" type="integer" min="0" value="" label="Minimum counts" help="Minimum number of counts required for a cell to pass filtering"/>
134 </when>
135 <when value="max_counts">
136 <param argument="max_counts" type="integer" min="0" value="" label="Maximum counts" help="Maximum number of counts required for a cell to pass filtering"/>
137 </when>
138 <when value="min_features">
139 <param argument="min_features" type="integer" min="0" value="" label="Minimum features" help="Minimum number of features expressed required for a cell to pass filtering"/>
140 </when>
141 <when value="max_features">
142 <param argument="max_features" type="integer" min="0" value="" label="Maximum features" help="Maximum number of features expressed required for a cell to pass filtering"/>
143 </when>
144 </conditional>
145 </when>
146 <when value="pp.filter_features">
147 <conditional name="filter">
148 <param argument="filter" type="select" label="Filter">
149 <option value="min_counts">Minimum number of counts</option>
150 <option value="max_counts">Maximum number of counts</option>
151 <option value="min_cells">Minimum number of cells expressed</option>
152 <option value="max_cells">Maximum number of cells expressed</option>
153 </param>
154 <when value="min_counts">
155 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a gene to pass filtering"/>
156 </when>
157 <when value="max_counts">
158 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/>
159 </when>
160 <when value="min_cells">
161 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/>
162 </when>
163 <when value="max_cells">
164 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/>
165 </when>
166 </conditional>
167 </when>
168 <when value="nb_feat_log" />
169 <when value="pp.coverage_cells">
170 <expand macro="coverage_params" />
171 </when>
172 <when value="pp.coverage_features">
173 <expand macro="coverage_params" />
174 </when>
175 <when value="pp.select_var_feature">
176 <param argument="min_score" type="float" min="0" max="1" value="0.5" label="Min score" help="Minimum threshold variability score to retain features" />
177 <param argument="nb_features" type="integer" min="0" value="" optional="True" label="Number of features" help="Default value is None, if specify it will select a the top most
178 variable features. If this parameter is larger than the total number of feature, it filters based on the min_score argument." />
179 </when>
180 <when value="pp.cal_var" />
181 <when value="pp.variability_features">
182 <param name="min_score" type="float" min="0" max="1" value="0.5" label="Minimum score value"/>
183 <param name="nb_features" type="integer" min="0" value="" label="Number of features"/>
184 <param name="log_mode" type="select" optional="True" label="Log" help="Log mode">
185 <option value="log2">Log2</option>
186 <option value="log10">Log10</option>
187 </param>
188 </when>
189 </conditional>
190 <expand macro="inputs_common_advanced"/>
191 </inputs>
192 <outputs>
193 <expand macro="anndata_outputs"/>
194 <data name="out_png" format="png" from_work_dir="plot.png" label="${tool.name} (${method.method}) on ${on_string}">
195 <filter>method['method'] != 'pp.binarize' and method['method'] != 'pp.filter_cells' and method['method'] != 'pp.filter_features' and method['method'] != 'nb_feat_log' and method['method'] != 'select_var_feature'</filter>
196 </data>
197 </outputs>
198 <tests>
199 <test expect_num_outputs="1">
200 <!-- pp.binarize -->
201 <param name="adata" value="krumsiek11.h5ad" />
202 <conditional name="method">
203 <param name="method" value="pp.binarize"/>
204 </conditional>
205 <output name="anndata_out" file="krumsiek11.pp.binarize.h5ad" ftype="h5ad" compare="sim_size"/>
206 </test>
207 <test expect_num_outputs="2">
208 <!-- pp.filter_cells -->
209 <param name="adata" value="krumsiek11.h5ad" />
210 <conditional name="method">
211 <param name="method" value="pp.filter_cells"/>
212 <conditional name="filter">
213 <param name="filter" value="min_features"/>
214 <param name="min_features" value="10"/>
215 </conditional>
216 </conditional>
217 <section name="advanced_common">
218 <param name="show_log" value="true" />
219 </section>
220 <assert_stdout>
221 <has_text_matching expression="395 × 11"/>
222 </assert_stdout>
223 <output name="hidden_output">
224 <assert_contents>
225 <has_text_matching expression="esc.pp.filter_cells"/>
226 <has_text_matching expression="min_features=10"/>
227 </assert_contents>
228 </output>
229 <output name="anndata_out" file="krumsiek11.pp.filter_cells.min_features.h5ad" ftype="h5ad" compare="sim_size"/>
230 </test>
231 <test expect_num_outputs="2">
232 <!-- pp.filter_features -->
233 <param name="adata" value="krumsiek11.h5ad" />
234 <conditional name="method">
235 <param name="method" value="pp.filter_features"/>
236 <conditional name="filter">
237 <param name="filter" value="min_cells"/>
238 <param name="min_cells" value="600"/>
239 </conditional>
240 </conditional>
241 <section name="advanced_common">
242 <param name="show_log" value="true" />
243 </section>
244 <assert_stdout>
245 <has_text_matching expression="640 × 2"/>
246 </assert_stdout>
247 <output name="hidden_output">
248 <assert_contents>
249 <has_text_matching expression="esc.pp.filter_features"/>
250 <has_text_matching expression="min_cells=600"/>
251 </assert_contents>
252 </output>
253 <output name="anndata_out" file="krumsiek11.pp.filter_features.min_cells.h5ad" ftype="h5ad" compare="sim_size"/>
254 </test>
255 <test expect_num_outputs="1">
256 <!-- nb_feat_log -->
257 <param name="adata" value="krumsiek11.pp.filter_cells.min_features.h5ad" />
258 <conditional name="method">
259 <param name="method" value="nb_feat_log"/>
260 </conditional>
261 <assert_stdout>
262 <has_text_matching expression="log_nb_features"/>
263 <has_text_matching expression="nb_features"/>
264 </assert_stdout>
265 </test>
266 <test expect_num_outputs="3">
267 <!-- pp.select_var_feature -->
268 <param name="adata" value="krumsiek11.h5ad" />
269 <conditional name="method">
270 <param name="method" value="pp.select_var_feature"/>
271 <param name="min_score" value="0.6"/>
272 <param name="nb_features" value="10"/>
273 </conditional>
274 <section name="advanced_common">
275 <param name="show_log" value="true" />
276 </section>
277 <assert_stdout>
278 <has_text_matching expression="prop_shared_cells"/>
279 <has_text_matching expression="variability_score"/>
280 </assert_stdout>
281 <output name="hidden_output">
282 <assert_contents>
283 <has_text_matching expression="esc.pp.select_var_feature"/>
284 <has_text_matching expression="adata"/>
285 <has_text_matching expression="min_score=0.6"/>
286 <has_text_matching expression="nb_features=10"/>
287 </assert_contents>
288 </output>
289 <output name="anndata_out" file="krumsiek11.pp.select_var_feature.h5ad" ftype="h5ad" compare="sim_size"/>
290 </test>
291 <test expect_num_outputs="3">
292 <!-- pp.cal_var -->
293 <param name="adata" value="krumsiek11.h5ad" />
294 <conditional name="method">
295 <param name="method" value="pp.cal_var"/>
296 </conditional>
297 <section name="advanced_common">
298 <param name="show_log" value="true" />
299 </section>
300 <output name="hidden_output">
301 <assert_contents>
302 <has_text_matching expression="esc.pp.cal_var"/>
303 <has_text_matching expression="adata"/>
304 <has_text_matching expression="plot.png"/>
305 </assert_contents>
306 </output>
307 <output name="anndata_out" file="krumsiek11.pp.cal_var.h5ad" ftype="h5ad" compare="sim_size"/>
308 <output name="out_png" file="krumsiek11.pp.cal_var.png" ftype="png" compare="sim_size"/>
309 </test>
310 <test expect_num_outputs="3">
311 <!-- pp.coverage_cells -->
312 <param name="adata" value="krumsiek11.h5ad" />
313 <conditional name="method">
314 <param name="method" value="pp.coverage_cells"/>
315 <param name="threshold" value="3.0" />
316 <param name="binary" value="True" />
317 </conditional>
318 <section name="advanced_common">
319 <param name="show_log" value="true" />
320 </section>
321 <output name="hidden_output">
322 <assert_contents>
323 <has_text_matching expression="esc.pp.coverage_cells"/>
324 <has_text_matching expression="adata"/>
325 <has_text_matching expression="threshold=3.0"/>
326 </assert_contents>
327 </output>
328 <output name="anndata_out" file="krumsiek11.pp.coverage_cells.h5ad" ftype="h5ad" compare="sim_size"/>
329 <output name="out_png" file="krumsiek11.pp.coverage_cells.png" ftype="png" compare="sim_size"/>
330 </test>
331 <test expect_num_outputs="3">
332 <!-- pp.coverage_features -->
333 <param name="adata" value="krumsiek11.h5ad" />
334 <conditional name="method">
335 <param name="method" value="pp.coverage_features"/>
336 <param name="threshold" value="100" />
337 <param name="binary" value="True" />
338 </conditional>
339 <section name="advanced_common">
340 <param name="show_log" value="true" />
341 </section>
342 <output name="hidden_output">
343 <assert_contents>
344 <has_text_matching expression="esc.pp.coverage_features"/>
345 <has_text_matching expression="adata"/>
346 <has_text_matching expression="threshold=100"/>
347 </assert_contents>
348 </output>
349 <output name="anndata_out" file="krumsiek11.pp.coverage_features.h5ad" ftype="h5ad" compare="sim_size"/>
350 <output name="out_png" file="krumsiek11.pp.coverage_features.png" ftype="png" compare="sim_size"/>
351 </test>
352 <test expect_num_outputs="3">
353 <!-- pp.coverage_features -->
354 <param name="adata" value="krumsiek11.h5ad" />
355 <conditional name="method">
356 <param name="method" value="pp.variability_features"/>
357 <param name="min_score" value="0.75" />
358 <param name="nb_features" value="8" />
359 <param name="log" value="log10" />
360 </conditional>
361 <section name="advanced_common">
362 <param name="show_log" value="true" />
363 </section>
364 <output name="hidden_output">
365 <assert_contents>
366 <has_text_matching expression="esc.pp.variability_features"/>
367 <has_text_matching expression="adata"/>
368 <has_text_matching expression="min_score=0.75"/>
369 <has_text_matching expression="nb_features=8"/>
370 </assert_contents>
371 </output>
372 <output name="anndata_out" file="krumsiek11.pp.variability_features.h5ad" ftype="h5ad" compare="sim_size"/>
373 <output name="out_png" file="krumsiek11.pp.variability_features.png" ftype="png" compare="sim_size"/>
374 </test>
375 </tests>
376 <help><![CDATA[
377
378 convert the count matrix into a binary matrix (`pp.binarize`)
379 ============================================================================================
380 convert the count matrix into a binary matrix
381
382 More details on the `episcanpy documentation
383 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.binarize.html>`__
384
385 Filter cells outliers based on counts and numbers of features expressed (`pp.filter_cells`)
386 ============================================================================================
387 For instance, only keep cells with at least *min_counts* counts or *min_features* genes expressed.
388 This is to filter measurement outliers, i.e. "unreliable" observations.
389
390 Only provide one of the optional parameters *min_counts*, *min_features*, *max_counts*, *max_features* per call.
391
392 More details on the `episcanpy documentation
393 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_cells.html>`__
394
395 Filter features based on number of cells or counts (`pp.filter_features`)
396 ========================================================================================
397 Keep features that have at least *min_counts* counts or are expressed in at least *min_cells* cells or
398 have at most *max_counts* counts or are expressed in at most *max_cells* cells.
399
400 Only provide one of the optional parameters *min_counts*, *min_cells*, *max_counts*, *max_cells* per call.
401
402 More details on the `episcanpy documentation
403 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.filter_features.html>`__
404
405 Histogram of the number of open features (`pp.coverage_cells`)
406 ========================================================================================
407 Histogram of the number of open features (in the case of ATAC-seq data) per cell.
408
409 More details on the `episcanpy documentation
410 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_cells.html>`__
411
412 Distribution of the feature commoness in cells (`pp.coverage_features`)
413 ========================================================================================
414 Display how often a feature is measured as open (for ATAC-seq). Distribution of the feature commoness in cells.
415
416 More details on the `episcanpy documentation
417 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.coverage_features.html>`__
418
419 Selects the most variable features according to either a specified number of features or minimum variance score (`pp.select_var_feature`)
420 =========================================================================================================================================
421
422 This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score).
423
424 More details on the `episcanpy documentation
425 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.select_var_feature.html>`__
426
427 Distribution of cells sharing features and variability score (`pp.cal_var`)
428 =============================================================================
429
430 Show distribution plots of cells sharing features and variability score.
431
432 More details on the `episcanpy documentation
433 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.cal_var.html>`__
434
435 Compute a variability score to rank the most variable features across all cells (`pp.variability_features`)
436 ============================================================================================================
437
438 This function computes a variability score to rank the most variable features across all cells. Then it selects the most variable features according to either a specified number of features (nb_features) or a minimum variance score (min_score).
439
440 More details on the `episcanpy documentation
441 <https://colomemaria.github.io/episcanpy_doc/api/episcanpy.api.pp.variability_features.html>`__
442 ]]></help>
443 <expand macro="citations"/>
444 </tool>