comparison inspect.xml @ 1:a755eaa1cc32 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author iuc
date Wed, 16 Oct 2019 06:31:52 -0400
parents 5d2e17328afe
children 7d22964a8639
comparison
equal deleted inserted replaced
0:5d2e17328afe 1:a755eaa1cc32
1 <tool id="scanpy_inspect" name="Inspect with scanpy" version="@galaxy_version@"> 1 <tool id="scanpy_inspect" name="Inspect and manipulate" version="@galaxy_version@">
2 <description></description> 2 <description> with scanpy</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <xml name="score_genes_params">
6 <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/>
7 <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/>
8 <expand macro="param_use_raw"/>
9 </xml>
10 <token name="@CMD_score_genes_inputs@"><![CDATA[
11 n_bins=$method.n_bins,
12 random_state=$method.random_state,
13 use_raw=$method.use_raw,
14 copy=False
15 ]]></token>
16 <xml name="corr_method">
17 <param argument="corr_method" type="select" label="P-value correction method">
18 <option value="benjamini-hochberg">Benjamini-Hochberg</option>
19 <option value="bonferroni">Bonferroni</option>
20 </param>
21 </xml>
22 <xml name="fit_intercept">
23 <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true"
24 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/>
25 </xml>
26 <xml name="max_iter">
27 <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/>
28 </xml>
29 <xml name="multi_class">
30 <param argument="multi_class" type="select" label="Multi class" help="">
31 <option value="ovr">ovr: a binary problem is fit for each label</option>
32 <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option>
33 <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
34 </param>
35 </xml>
36 <xml name="penalty">
37 <param argument="penalty" type="select" label="Norm used in the penalization" help="">
38 <option value="l1">l1</option>
39 <option value="l2">l2</option>
40 <option value="customized">customized</option>
41 </param>
42 </xml>
43 <xml name="custom_penalty">
44 <param argument="pen" type="text" value="" label="Norm used in the penalization" help=""/>
45 </xml>
46 <xml name="random_state">
47 <param argument="random_state" type="integer" value="" optional="true"
48 label="The seed of the pseudo random number generator to use when shuffling the data" help=""/>
49 </xml>
5 </macros> 50 </macros>
6 <expand macro="requirements"/> 51 <expand macro="requirements"/>
7 <expand macro="version_command"/> 52 <expand macro="version_command"/>
8 <command detect_errors="exit_code"><![CDATA[ 53 <command detect_errors="exit_code"><![CDATA[
9 @CMD@ 54 @CMD@
11 <configfiles> 56 <configfiles>
12 <configfile name="script_file"><![CDATA[ 57 <configfile name="script_file"><![CDATA[
13 @CMD_imports@ 58 @CMD_imports@
14 @CMD_read_inputs@ 59 @CMD_read_inputs@
15 60
16 #if $method.method == "tl.paga" 61 #if $method.method == "pp.calculate_qc_metrics"
17 sc.tl.paga( 62 sc.pp.calculate_qc_metrics(
18 adata=adata, 63 adata=adata,
19 groups='$method.groups', 64 expr_type='$method.expr_type',
20 use_rna_velocity =$method.use_rna_velocity, 65 var_type='$method.var_type',
21 model='$method.model', 66 #if str($method.qc_vars) != ''
67 #set $qc_vars = [str(x.strip()) for x in str($method.qc_vars).split(',')]
68 qc_vars=$qc_vars,
69 #end if
70 #if str($method.percent_top) != ''
71 #set $percent_top = [int(x.strip()) for x in str($method.percent_top).split(',')]
72 percent_top=$method.percent_top,
73 #end if
74 inplace=True)
75
76 #else if $method.method == "tl.score_genes"
77 sc.tl.score_genes(
78 adata=adata,
79 #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')]
80 gene_list=$gene_list,
81 ctrl_size=$method.ctrl_size,
82 score_name='$method.score_name',
83 #if $method.gene_pool
84 #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')]
85 gene_pool=$gene_pool,
86 #end if
87 @CMD_score_genes_inputs@)
88
89 #else if $method.method == "tl.score_genes_cell_cycle"
90 #if str($method.s_genes.format) == 'file'
91 with open('$method.s_genes.file', 'r') as s_genes_f:
92 s_genes = [str(x.strip()) for x in s_genes_f.readlines()]
93 print(s_genes)
94 #end if
95
96 #if str($method.g2m_genes.format) == 'file'
97 with open('$method.g2m_genes.file', 'r') as g2m_genes_f:
98 g2m_genes = [str(x.strip()) for x in g2m_genes_f.readlines()]
99 print(g2m_genes)
100 #end if
101
102 sc.tl.score_genes_cell_cycle(
103 adata=adata,
104 #if str($method.s_genes.format) == 'text'
105 #set $s_genes = [str(x.strip()) for x in $method.s_genes.text.split(',')]
106 s_genes=$s_genes,
107 #else if str($method.s_genes.format) == 'file'
108 s_genes=s_genes,
109 #end if
110 #if str($method.g2m_genes.format) == 'text'
111 #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.text.split(',')]
112 g2m_genes=$g2m_genes,
113 #else if str($method.g2m_genes.format) == 'file'
114 g2m_genes=g2m_genes,
115 #end if
116 @CMD_score_genes_inputs@)
117
118 #else if $method.method == 'pp.neighbors'
119 sc.pp.neighbors(
120 adata=adata,
121 n_neighbors=$method.n_neighbors,
122 #if str($method.n_pcs) != ''
123 n_pcs=$method.n_pcs,
124 #end if
125 #if str($method.use_rep) != ''
126 use_rep='$method.use_rep',
127 #end if
128 knn=$method.knn,
129 random_state=$method.random_state,
130 method='$method.pp_neighbors_method',
131 metric='$method.metric',
22 copy=False) 132 copy=False)
23 #elif $method.method == "tl.dpt" 133
24 sc.tl.dpt( 134 #else if $method.method == 'tl.rank_genes_groups'
135 sc.tl.rank_genes_groups(
25 adata=adata, 136 adata=adata,
26 n_dcs=$method.n_dcs, 137 groupby='$method.groupby',
27 n_branchings=$method.n_branchings, 138 use_raw=$method.use_raw,
28 min_group_size=$method.min_group_size, 139 #if str($method.groups) != ''
29 allow_kendall_tau_shift=$method.allow_kendall_tau_shift, 140 #set $group=[x.strip() for x in str($method.groups).split(',')]
141 groups=$group,
142 #end if
143 #if $method.ref.rest == 'rest'
144 reference='$method.ref.rest',
145 #else
146 reference='$method.ref.reference',
147 #end if
148 n_genes=$method.n_genes,
149 method='$method.tl_rank_genes_groups_method.method',
150 #if $method.tl_rank_genes_groups_method.method != 'logreg'
151 corr_method='$method.tl_rank_genes_groups_method.corr_method',
152 #else
153 solver='$method.tl_rank_genes_groups_method.solver.solver',
154 #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg'
155 penalty='l2',
156 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
157 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
158 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
159 #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs'
160 penalty='l2',
161 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
162 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
163 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
164 #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear'
165 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
166 penalty='l1',
167 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
168 penalty='l2',
169 dual=$method.tl_rank_genes_groups_method.solver.penalty.dual,
170 #else
171 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
172 #end if
173 fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept,
174 #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True'
175 intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling,
176 #end if
177 #if $method.tl_rank_genes_groups_method.solver.random_state
178 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
179 #end if
180 #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag'
181 penalty='l2',
182 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
183 #if $method.tl_rank_genes_groups_method.solver.random_state
184 random_state=$method.tl_rank_genes_groups_method.solver.random_state,
185 #end if
186 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
187 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
188 #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga'
189 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
190 penalty='l1',
191 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
192 penalty='l2',
193 #else
194 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
195 #end if
196 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
197 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
198 #end if
199 tol=$method.tl_rank_genes_groups_method.tol,
200 C=$method.tl_rank_genes_groups_method.c,
201 #end if
202 only_positive=$method.only_positive)
203
204 #else if $method.method == "tl.marker_gene_overlap"
205 reference_markers = {}
206 #for $i, $s in enumerate($method.reference_markers)
207 #set $list=[x.strip() for x in str($s.values).split(',')]
208 reference_markers['$s.key'] = $list
209 #end for
210
211 sc.tl.marker_gene_overlap(
212 adata,
213 reference_markers,
214 #if str($method.key) != ''
215 key='$method.key',
216 #end if
217 method='$method.overlap.method',
218 #if $method.overlap.method == 'overlap_count' and str($method.overlap.normalize) != 'None'
219 normalize='$method.overlap.normalize',
220 #end if
221 #if str($method.top_n_markers) != ''
222 top_n_markers=$method.top_n_markers,
223 #end if
224 #if str($method.adj_pval_threshold) != ''
225 adj_pval_threshold=$method.adj_pval_threshold,
226 #end if
227 #if str($method.key_added) != ''
228 key_added='$method.key_added',
229 #end if
230 inplace=True)
231
232 #else if $method.method == "pp.log1p"
233 sc.pp.log1p(
234 data=adata,
30 copy=False) 235 copy=False)
31 adata.obs.to_csv('$obs', sep='\t') 236
237 #else if $method.method == "pp.scale"
238 sc.pp.scale(
239 data=adata,
240 zero_center=$method.zero_center,
241 #if $method.max_value
242 max_value=$method.max_value,
243 #end if
244 copy=False)
245
246 #else if $method.method == "pp.sqrt"
247 sc.pp.sqrt(
248 data=adata,
249 copy=False)
32 #end if 250 #end if
33 251
34 @CMD_anndata_write_outputs@ 252 @CMD_anndata_write_outputs@
35 ]]></configfile> 253 ]]></configfile>
36 </configfiles> 254 </configfiles>
37 <inputs> 255 <inputs>
38 <expand macro="inputs_anndata"/> 256 <expand macro="inputs_anndata"/>
39 <conditional name="method"> 257 <conditional name="method">
40 <param argument="method" type="select" label="Method used for plotting"> 258 <param argument="method" type="select" label="Method used for inspecting">
41 <!--<option value="tl.paga_compare_paths">, using `tl.paga_compare_paths`</option>!--> 259 <option value="pp.calculate_qc_metrics">Calculate quality control metrics, using `pp.calculate_qc_metrics`</option>
42 <!--<option value="tl.paga_degrees">, using `tl.paga_degrees`</option>!--> 260 <option value="pp.neighbors">Compute a neighborhood graph of observations, using `pp.neighbors`</option>
43 <!--<option value="tl.paga_expression_entropies">, using `tl.paga_expression_entropies`</option>!--> 261 <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option>
44 <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using `tl.paga`</option> 262 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option>
45 <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using `tl.dpt`</option> 263 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using `tl.rank_genes_groups`</option>
264 <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using `tl.marker_gene_overlap`</option>-->
265 <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option>
266 <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option>
267 <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option>
46 </param> 268 </param>
47 <when value="tl.paga"> 269 <when value="pp.calculate_qc_metrics">
48 <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations (`adata.obs`)."/> 270 <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X"/>
49 <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that `adata.uns` contains a directed single-cell graph with key `['velocyto_transitions']`. This feature might be subject to change in the future."/> 271 <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are"/>
50 <param argument="model" type="select" label="PAGA connectivity model" help=""> 272 <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of `.var` which identify variables you could want to control for"
51 <option value="v1.2">v1.2</option> 273 help="Keys separated by a comma"/>
52 <option value="v1.0">v1.0</option> 274 <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover"
275 help=" Values (integers) are considered 1-indexed, `50` finds cumulative proportion to the 50th most expressed genes. Values separated by a comma.
276 If empty don't calculate"/>
277 </when>
278 <when value="pp.neighbors">
279 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor."/>
280 <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
281 <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter"/>
282 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor."/>
283 <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/>
284 <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help="">
285 <option value="umap">umap (McInnes et al, 2018)</option>
286 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option>
287 </param>
288 <param argument="metric" type="select" label="Distance metric" help="">
289 <expand macro="distance_metric_options"/>
53 </param> 290 </param>
54 </when> 291 </when>
55 <when value="tl.dpt"> 292 <when value="tl.score_genes">
56 <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use" help=""/> 293 <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/>
57 <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect" help=""/> 294 <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled"
58 <param argument="min_group_size" type="float" min="0" value="0.01" label="Min group size" help="During recursive splitting of branches ('dpt groups') for `n_branchings` &gt; 1, do not consider groups that contain less than `min_group_size` data points. If a float, `min_group_size` refers to a fraction of the total number of data points."/> 295 help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/>
59 <param argument="allow_kendall_tau_shift" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Allow Kendal tau shift?" help="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion of Haghverdi et al (2016) to stabilize the splitting."/> 296 <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set"
297 help="Default is all genes. Genes separated by a comma"/>
298 <expand macro="score_genes_params"/>
299 <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/>
60 </when> 300 </when>
301 <when value="tl.score_genes_cell_cycle">
302 <conditional name='s_genes'>
303 <param name="format" type="select" label="Format for the list of genes associated with S phase">
304 <option value="file">File</option>
305 <option value="text" selected="true">Text</option>
306 </param>
307 <when value="text">
308 <param name="text" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/>
309 </when>
310 <when value="file">
311 <param name="file" type="data" format="txt" label="File with the list of genes associated with S phase" help="One gene per line"/>
312 </when>
313 </conditional>
314 <conditional name='g2m_genes'>
315 <param name="format" type="select" label="Format for the list of genes associated with G2M phase">
316 <option value="file">File</option>
317 <option value="text" selected="true">Text</option>
318 </param>
319 <when value="text">
320 <param name="text" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/>
321 </when>
322 <when value="file">
323 <param name="file" type="data" format="txt" label="File with the list of genes associated with G2M phase" help="One gene per line"/>
324 </when>
325 </conditional>
326 <expand macro="score_genes_params"/>
327 </when>
328 <when value="tl.rank_genes_groups">
329 <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help=""/>
330 <expand macro="param_use_raw"/>
331 <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups."/>
332 <conditional name="ref">
333 <param name="rest" type="select" label="Comparison">
334 <option value="rest">Compare each group to the union of the rest of the group</option>
335 <option value="group_id">Compare with respect to a specific group</option>
336 </param>
337 <when value="rest"/>
338 <when value="group_id">
339 <param argument="reference" type="text" value="" label="Group identifier with respect to which compare"/>
340 </when>
341 </conditional>
342 <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/>
343 <conditional name="tl_rank_genes_groups_method">
344 <param argument="method" type="select" label="Method">
345 <option value="t-test">t-test</option>
346 <option value="wilcoxon">Wilcoxon-Rank-Sum</option>
347 <option value="t-test_overestim_var" selected="true">t-test with overestimate of variance of each group</option>
348 <option value="logreg">Logistic regression</option>
349 </param>
350 <when value="t-test">
351 <expand macro="corr_method"/>
352 </when>
353 <when value="wilcoxon">
354 <expand macro="corr_method"/>
355 </when>
356 <when value="t-test_overestim_var">
357 <expand macro="corr_method"/>
358 </when>
359 <when value="logreg">
360 <conditional name="solver">
361 <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty.">
362 <option value="newton-cg">newton-cg</option>
363 <option value="lbfgs">lbfgs</option>
364 <option value="liblinear">liblinear</option>
365 <option value="sag">sag</option>
366 <option value="saga">saga</option>
367 </param>
368 <when value="newton-cg">
369 <expand macro="fit_intercept"/>
370 <expand macro="max_iter"/>
371 <expand macro="multi_class"/>
372 </when>
373 <when value="lbfgs">
374 <expand macro="fit_intercept"/>
375 <expand macro="max_iter"/>
376 <expand macro="multi_class"/>
377 </when>
378 <when value="liblinear">
379 <conditional name="penalty">
380 <expand macro="penalty"/>
381 <when value="l1"/>
382 <when value="l2">
383 <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false"
384 label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/>
385 </when>
386 <when value="customized">
387 <expand macro="custom_penalty"/>
388 </when>
389 </conditional>
390 <conditional name="intercept_scaling">
391 <param argument="fit_intercept" type="select"
392 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help="">
393 <option value="True">Yes</option>
394 <option value="False">No</option>
395 </param>
396 <when value="True">
397 <param argument="intercept_scaling" type="float" value="1.0"
398 label="Intercept scaling"
399 help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
400 </when>
401 <when value="False"/>
402 </conditional>
403 <expand macro="random_state"/>
404 </when>
405 <when value="sag">
406 <expand macro="fit_intercept"/>
407 <expand macro="random_state"/>
408 <expand macro="max_iter"/>
409 <expand macro="multi_class"/>
410 </when>
411 <when value="saga">
412 <conditional name="penalty">
413 <expand macro="penalty"/>
414 <when value="l1"/>
415 <when value="l2"/>
416 <when value="customized">
417 <expand macro="custom_penalty"/>
418 </when>
419 </conditional>
420 <expand macro="fit_intercept"/>
421 <expand macro="multi_class"/>
422 </when>
423 </conditional>
424 <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/>
425 <param argument="c" type="float" value="1.0" label="Inverse of regularization strength"
426 help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
427 </when>
428 </conditional>
429 <param argument="only_positive" type="boolean" truevalue="True" falsevalue="False" checked="true"
430 label="Only consider positive differences?" help=""/>
431 </when>
432 <!--<when value="tl.marker_gene_overlap">
433 <repeat name="reference_markers" title="Marker genes">
434 <param name="key" type="text" value="" label="Cell identity name" help=""/>
435 <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from `var`"/>
436 </repeat>
437 <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/>
438 <conditional name="overlap">
439 <param argument="method" type="select" label="Method to calculate marker gene overlap">
440 <option value="overlap_count">overlap_count: Intersection of the gene set</option>
441 <option value="overlap_coef">overlap_coef: Overlap coefficient</option>
442 <option value="jaccard">jaccard: Jaccard index</option>
443 </param>
444 <when value="overlap_count">
445 <param argument="normalize" type="select" label="Normalization option for the marker gene overlap output">
446 <option value="None">None</option>
447 <option value="reference">reference: Normalization of the data by the total number of marker genes given in the reference annotation per group</option>
448 <option value="data">data: Normalization of the data by the total number of marker genes used for each cluster</option>
449 </param>
450 </when>
451 <when value="overlap_coef"/>
452 <when value="jaccard"/>
453 </conditional>
454 <param argument="top_n_markers" type="integer" optional="true" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
455 <param argument="adj_pval_threshold" type="float" optional="true" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
456 <param argument="key_added" type="text" value="" optional="true" label="Key that will contain the marker overlap scores in 'uns'"/>
457 </when>-->
458 <when value="pp.log1p"/>
459 <when value="pp.scale">
460 <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
461 label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
462 <param argument="max_value" type="float" value="" optional="true" label="Maximum value"
463 help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
464 </when>
465 <when value="pp.sqrt"/>
61 </conditional> 466 </conditional>
62 <expand macro="anndata_output_format"/>
63 </inputs> 467 </inputs>
64 <outputs> 468 <outputs>
65 <expand macro="anndata_outputs"/> 469 <expand macro="anndata_outputs"/>
66 <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation">
67 <filter>method['method'] == 'tl.dpt'</filter>
68 </data>
69 </outputs> 470 </outputs>
70 <tests> 471 <tests>
71 <test> 472 <test>
72 <conditional name="input"> 473 <!-- test 1 -->
73 <param name="format" value="h5ad" /> 474 <param name="adata" value="sparce_csr_matrix.h5ad" />
74 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> 475 <conditional name="method">
75 </conditional> 476 <param name="method" value="pp.calculate_qc_metrics"/>
76 <conditional name="method"> 477 <param name="expr_type" value="counts"/>
77 <param name="method" value="tl.paga"/> 478 <param name="var_type" value="genes"/>
78 <param name="groups" value="paul15_clusters"/> 479 <param name="qc_vars" value="mito,negative"/>
79 <param name="use_rna_velocity" value="False"/> 480 <param name="percent_top" value=""/>
80 <param name="model" value="v1.2"/> 481 </conditional>
81 </conditional> 482 <assert_stdout>
82 <param name="anndata_output_format" value="h5ad" /> 483 <has_text_matching expression="sc.pp.calculate_qc_metrics" />
83 <assert_stdout> 484 <has_text_matching expression="expr_type='counts'" />
84 <has_text_matching expression="sc.tl.paga"/> 485 <has_text_matching expression="var_type='genes'" />
85 <has_text_matching expression="groups='paul15_clusters'"/> 486 <has_text_matching expression="qc_vars=\['mito', 'negative'\]" />
86 <has_text_matching expression="use_rna_velocity =False"/> 487 </assert_stdout>
87 <has_text_matching expression="model='v1.2'"/> 488 <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/>
88 </assert_stdout> 489 </test>
89 <output name="anndata_out_h5ad" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> 490 <test>
491 <!-- test 2 -->
492 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
493 <conditional name="method">
494 <param name="method" value="pp.neighbors"/>
495 <param name="n_neighbors" value="15"/>
496 <param name="knn" value="True"/>
497 <param name="random_state" value="0"/>
498 <param name="pp_neighbors_method" value="umap"/>
499 <param name="metric" value="euclidean"/>
500 </conditional>
501 <assert_stdout>
502 <has_text_matching expression="sc.pp.neighbors"/>
503 <has_text_matching expression="n_neighbors=15"/>
504 <has_text_matching expression="knn=True"/>
505 <has_text_matching expression="random_state=0"/>
506 <has_text_matching expression="method='umap'"/>
507 <has_text_matching expression="metric='euclidean'"/>
508 </assert_stdout>
509 <output name="anndata_out" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size">
90 <assert_contents> 510 <assert_contents>
91 <has_h5_keys keys="X, obs, obsm, uns, var" /> 511 <has_h5_keys keys="X, obs, obsm, uns, var" />
92 </assert_contents> 512 </assert_contents>
93 </output> 513 </output>
94 </test> 514 </test>
95 <test> 515 <test>
96 <conditional name="input"> 516 <!-- test 3 -->
97 <param name="format" value="h5ad" /> 517 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
98 <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> 518 <conditional name="method">
99 </conditional> 519 <param name="method" value="pp.neighbors"/>
100 <conditional name="method"> 520 <param name="n_neighbors" value="15"/>
101 <param name="method" value="tl.dpt"/> 521 <param name="knn" value="True"/>
102 <param name="n_dcs" value="15"/> 522 <param name="pp_neighbors_method" value="gauss"/>
103 <param name="n_branchings" value="1"/> 523 <param name="metric" value="braycurtis"/>
104 <param name="min_group_size" value="0.01"/> 524 </conditional>
105 <param name="allow_kendall_tau_shift" value="True"/> 525 <assert_stdout>
106 </conditional> 526 <has_text_matching expression="sc.pp.neighbors"/>
107 <param name="anndata_output_format" value="h5ad" /> 527 <has_text_matching expression="n_neighbors=15"/>
108 <assert_stdout> 528 <has_text_matching expression="knn=True"/>
109 <has_text_matching expression="sc.tl.dpt"/> 529 <has_text_matching expression="random_state=0"/>
110 <has_text_matching expression="n_dcs=15"/> 530 <has_text_matching expression="method='gauss'"/>
111 <has_text_matching expression="n_branchings=1"/> 531 <has_text_matching expression="metric='braycurtis'"/>
112 <has_text_matching expression="min_group_size=0.01"/> 532 </assert_stdout>
113 <has_text_matching expression="allow_kendall_tau_shift=True"/> 533 <output name="anndata_out" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
114 </assert_stdout> 534 </test>
115 <output name="anndata_out_h5ad" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> 535 <test>
536 <!-- test 4 -->
537 <param name="adata" value="krumsiek11.h5ad" />
538 <conditional name="method">
539 <param name="method" value="tl.score_genes"/>
540 <param name="gene_list" value="Gata2, Fog1"/>
541 <param name="ctrl_size" value="2"/>
542 <param name="n_bins" value="2"/>
543 <param name="random_state" value="2"/>
544 <param name="use_raw" value="False"/>
545 <param name="score_name" value="score"/>
546 </conditional>
547 <assert_stdout>
548 <has_text_matching expression="sc.tl.score_genes" />
549 <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" />
550 <has_text_matching expression="ctrl_size=2" />
551 <has_text_matching expression="score_name='score'" />
552 <has_text_matching expression="n_bins=2" />
553 <has_text_matching expression="random_state=2" />
554 <has_text_matching expression="use_raw=False" />
555 <has_text_matching expression="copy=False" />
556 </assert_stdout>
557 <output name="anndata_out" file="tl.score_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
558 </test>
559 <test>
560 <!-- test 5 -->
561 <param name="adata" value="krumsiek11.h5ad" />
562 <conditional name="method">
563 <param name="method" value="tl.score_genes_cell_cycle"/>
564 <conditional name='s_genes'>
565 <param name="format" value="text"/>
566 <param name="text" value="Gata2, Fog1, EgrNab"/>
567 </conditional>
568 <conditional name='g2m_genes'>
569 <param name="format" value="text"/>
570 <param name="text" value="Gata2, Fog1, EgrNab"/>
571 </conditional>
572 <param name="n_bins" value="2"/>
573 <param name="random_state" value="1"/>
574 <param name="use_raw" value="False"/>
575 </conditional>
576 <assert_stdout>
577 <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
578 <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
579 <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
580 <has_text_matching expression="n_bins=2"/>
581 <has_text_matching expression="random_state=1"/>
582 <has_text_matching expression="use_raw=False"/>
583 </assert_stdout>
584 <output name="anndata_out" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
585 </test>
586 <test>
587 <!-- test 6 -->
588 <param name="adata" value="krumsiek11.h5ad" />
589 <conditional name="method">
590 <param name="method" value="tl.rank_genes_groups"/>
591 <param name="groupby" value="cell_type"/>
592 <param name="use_raw" value="True"/>
593 <conditional name="ref">
594 <param name="rest" value="rest"/>
595 </conditional>
596 <param name="n_genes" value="100"/>
597 <conditional name="tl_rank_genes_groups_method">
598 <param name="method" value="t-test_overestim_var"/>
599 <param name="corr_method" value="benjamini-hochberg"/>
600 </conditional>
601 <param name="only_positive" value="true"/>
602 </conditional>
603 <assert_stdout>
604 <has_text_matching expression="sc.tl.rank_genes_groups"/>
605 <has_text_matching expression="groupby='cell_type'"/>
606 <has_text_matching expression="use_raw=True"/>
607 <has_text_matching expression="reference='rest'"/>
608 <has_text_matching expression="n_genes=100"/>
609 <has_text_matching expression="method='t-test_overestim_var'"/>
610 <has_text_matching expression="corr_method='benjamini-hochberg'"/>
611 <has_text_matching expression="only_positive=True"/>
612 </assert_stdout>
613 <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
614 </test>
615 <test>
616 <!-- test 7 -->
617 <param name="adata" value="pbmc68k_reduced.h5ad" />
618 <conditional name="method">
619 <param name="method" value="tl.rank_genes_groups"/>
620 <param name="groupby" value="louvain"/>
621 <param name="use_raw" value="True"/>
622 <conditional name="ref">
623 <param name="rest" value="rest"/>
624 </conditional>
625 <param name="n_genes" value="100"/>
626 <conditional name="tl_rank_genes_groups_method">
627 <param name="method" value="logreg"/>
628 <conditional name="solver">
629 <param name="solver" value="newton-cg"/>
630 <param name="fit_intercept" value="True"/>
631 <param name="max_iter" value="100"/>
632 <param name="multi_class" value="auto"/>
633 </conditional>
634 <param name="tol" value="1e-4"/>
635 <param name="c" value="1.0"/>
636 </conditional>
637 <param name="only_positive" value="true"/>
638 </conditional>
639 <assert_stdout>
640 <has_text_matching expression="sc.tl.rank_genes_groups"/>
641 <has_text_matching expression="groupby='louvain'"/>
642 <has_text_matching expression="use_raw=True"/>
643 <has_text_matching expression="reference='rest'"/>
644 <has_text_matching expression="n_genes=100"/>
645 <has_text_matching expression="method='logreg'"/>
646 <has_text_matching expression="solver='newton-cg'"/>
647 <has_text_matching expression="penalty='l2'"/>
648 <has_text_matching expression="fit_intercept=True"/>
649 <has_text_matching expression="max_iter=100"/>
650 <has_text_matching expression="multi_class='auto'"/>
651 <has_text_matching expression="tol=0.0001"/>
652 <has_text_matching expression="C=1.0"/>
653 <has_text_matching expression="only_positive=True"/>
654 </assert_stdout>
655 <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size">
116 <assert_contents> 656 <assert_contents>
117 <has_h5_keys keys="X, obs, obsm, uns, var" /> 657 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
118 </assert_contents> 658 </assert_contents>
119 </output> 659 </output>
120 <output name="obs" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.obs.tabular" compare="sim_size"/> 660 </test>
661 <test>
662 <!-- test 8 -->
663 <param name="adata" value="pbmc68k_reduced.h5ad" />
664 <conditional name="method">
665 <param name="method" value="tl.rank_genes_groups"/>
666 <param name="groupby" value="louvain"/>
667 <param name="use_raw" value="True"/>
668 <conditional name="ref">
669 <param name="rest" value="rest"/>
670 </conditional>
671 <param name="n_genes" value="100"/>
672 <conditional name="tl_rank_genes_groups_method">
673 <param name="method" value="logreg"/>
674 <conditional name="solver">
675 <param name="solver" value="liblinear"/>
676 <conditional name="penalty">
677 <param name="penalty" value="l2"/>
678 <param name="dual" value="False"/>
679 <conditional name="intercept_scaling">
680 <param name="fit_intercept" value="True"/>
681 <param name="intercept_scaling" value="1.0" />
682 </conditional>
683 <param name="random_state" value="1"/>
684 </conditional>
685 </conditional>
686 <param name="tol" value="1e-4"/>
687 <param name="c" value="1.0"/>
688 </conditional>
689 <param name="only_positive" value="true"/>
690 </conditional>
691 <assert_stdout>
692 <has_text_matching expression="sc.tl.rank_genes_groups"/>
693 <has_text_matching expression="groupby='louvain'"/>
694 <has_text_matching expression="use_raw=True"/>
695 <has_text_matching expression="reference='rest'"/>
696 <has_text_matching expression="n_genes=100"/>
697 <has_text_matching expression="method='logreg'"/>
698 <has_text_matching expression="solver='liblinear'"/>
699 <has_text_matching expression="penalty='l2'"/>
700 <has_text_matching expression="dual=False"/>
701 <has_text_matching expression="fit_intercept=True"/>
702 <has_text_matching expression="intercept_scaling=1.0"/>
703 <has_text_matching expression="tol=0.0001"/>
704 <has_text_matching expression="C=1.0"/>
705 <has_text_matching expression="only_positive=True"/>
706 </assert_stdout>
707 <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size">
708 <assert_contents>
709 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
710 </assert_contents>
711 </output>
712 </test>
713 <!--<test>
714 < test 9 >
715 <param name="adata" value="tl.rank_genes_groups.louvain.neighbors.pca.pbmc68k_reduced.h5ad" />
716 <conditional name="method">
717 <param name="method" value="tl.marker_gene_overlap"/>
718 <repeat name="reference_markers">
719 <param name="key" value="CD4 T cells"/>
720 <param name="value" value="IL7R"/>
721 </repeat>
722 <repeat name="reference_markers">
723 <param name="key" value="CD14+ Monocytes"/>
724 <param name="value" value="CD14,LYZ"/>
725 </repeat>
726 <repeat name="reference_markers">
727 <param name="key" value="B cells"/>
728 <param name="value" value="MS4A1"/>
729 </repeat>
730 <conditional name="overlap">
731 <param argument="method" value="overlap_count"/>
732 <param argument="normalize" value="None"/>
733 </conditional>
734 </conditional>
735 <assert_stdout>
736 <has_text_matching expression="tl.marker_gene_overlap"/>
737 <has_text_matching expression="key='rank_genes_groups'"/>
738 <has_text_matching expression="method='overlap_count'"/>
739 </assert_stdout>
740 <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
741 </test>-->
742 <test>
743 <!-- test 9 -->
744 <param name="adata" value="krumsiek11.h5ad" />
745 <conditional name="method">
746 <param name="method" value="pp.log1p"/>
747 </conditional>
748 <assert_stdout>
749 <has_text_matching expression="sc.pp.log1p"/>
750 </assert_stdout>
751 <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
752 </test>
753 <test>
754 <!-- test 10 -->
755 <param name="adata" value="krumsiek11.h5ad" />
756 <conditional name="method">
757 <param name="method" value="pp.scale"/>
758 <param name="zero_center" value="true"/>
759 </conditional>
760 <assert_stdout>
761 <has_text_matching expression="sc.pp.scale"/>
762 <has_text_matching expression="zero_center=True"/>
763 </assert_stdout>
764 <output name="anndata_out" file="pp.scale.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
765 </test>
766 <test>
767 <!-- test 11 -->
768 <param name="adata" value="krumsiek11.h5ad" />
769 <conditional name="method">
770 <param name="method" value="pp.scale"/>
771 <param name="zero_center" value="true"/>
772 <param name="max_value" value="10"/>
773 </conditional>
774 <assert_stdout>
775 <has_text_matching expression="sc.pp.scale"/>
776 <has_text_matching expression="zero_center=True"/>
777 <has_text_matching expression="max_value=10.0"/>
778 </assert_stdout>
779 <output name="anndata_out" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
780 </test>
781 <test>
782 <!-- test 12 -->
783 <param name="adata" value="krumsiek11.h5ad" />
784 <conditional name="method">
785 <param name="method" value="pp.sqrt"/>
786 </conditional>
787 <assert_stdout>
788 <has_text_matching expression="sc.pp.sqrt"/>
789 </assert_stdout>
790 <output name="anndata_out" file="pp.sqrt.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
121 </test> 791 </test>
122 </tests> 792 </tests>
123 <help><![CDATA[ 793 <help><![CDATA[
124 Generate cellular maps of differentiation manifolds with complex topologies (`tl.paga`) 794 Calculate quality control metrics., using `pp.calculate_qc_metrics`
125 ======================================================================================= 795 ===================================================================
126 796
127 By quantifying the connectivity of partitions (groups, clusters) of the 797 Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater.
128 single-cell graph, partition-based graph abstraction (PAGA) generates a much 798 Currently is most efficient on a sparse CSR or dense matrix.
129 simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights 799
130 represent confidence in the presence of connections. By tresholding this 800 It updates the observation level metrics:
131 confidence in `paga`, a much simpler representation of data 801
132 can be obtained. 802 - total_{var_type}_by_{expr_type} (e.g. "total_genes_by_counts", number of genes with positive counts in a cell)
133 803 - total_{expr_type} (e.g. "total_counts", total number of counts for a cell)
134 The confidence can be interpreted as the ratio of the actual versus the 804 - pct_{expr_type}_in_top_{n}_{var_type} (e.g. "pct_counts_in_top_50_genes", cumulative percentage of counts for 50 most expressed genes in a cell)
135 expected value of connetions under the null model of randomly connecting 805 - total_{expr_type}_{qc_var} (e.g. "total_counts_mito", total number of counts for variabes in qc_vars )
136 partitions. We do not provide a p-value as this null model does not 806 - pct_{expr_type}_{qc_var} (e.g. "pct_counts_mito", proportion of total counts for a cell which are mitochondrial)
137 precisely capture what one would consider "connected" in real data, hence it 807
138 strongly overestimates the expected value. See an extensive discussion of 808 And also the variable level metrics:
139 this in Wolf et al (2017). 809
140 810 - total_{expr_type} (e.g. "total_counts", sum of counts for a gene)
141 Together with a random walk-based distance measure, this generates a partial 811 - mean_{expr_type} (e.g. "mean counts", mean expression over all cells.
142 coordinatization of data useful for exploring and explaining its variation. 812 - n_cells_by_{expr_type} (e.g. "n_cells_by_counts", number of cells this expression is measured in)
143 813 - pct_dropout_by_{expr_type} (e.g. "pct_dropout_by_counts", percentage of cells this feature does not appear in)
144 More details on the `tl.paga scanpy documentation 814
145 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.paga.html#scanpy.api.tl.paga>`_ 815 More details on the `scanpy documentation
146 816 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.calculate_qc_metrics.html>`__
147 817
148 Infer progression of cells through geodesic distance along the graph (`tl.dpt`) 818 Compute a neighborhood graph of observations, using `pp.neighbors`
149 =============================================================================== 819 ==================================================================
150 820
151 Reconstruct the progression of a biological process from snapshot 821 The neighbor search efficiency of this heavily relies on UMAP (McInnes et al, 2018),
152 data. `Diffusion Pseudotime` has been introduced by Haghverdi et al (2016) and 822 which also provides a method for estimating connectivities of data points -
153 implemented within Scanpy (Wolf et al, 2017). Here, we use a further developed 823 the connectivity of the manifold (`method=='umap'`). If `method=='diffmap'`,
154 version, which is able to deal with disconnected graphs (Wolf et al, 2017) and can 824 connectivities are computed according to Coifman et al (2005), in the adaption of
155 be run in a `hierarchical` mode by setting the parameter 825 Haghverdi et al (2016).
156 `n_branchings>1`. We recommend, however, to only use 826
157 `tl.dpt` for computing pseudotime (`n_branchings=0`) and 827 The returned AnnData object contains:
158 to detect branchings via `paga`. For pseudotime, you need 828
159 to annotate your data with a root cell. 829 - Weighted adjacency matrix of the neighborhood graph of data points (connectivities). Weights should be interpreted as connectivities.
160 830 - Distances for each pair of neighbors (distances)
161 This requires to run `pp.neighbors`, first. In order to 831
162 reproduce the original implementation of DPT, use `method=='gauss'` in 832 This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects
163 this. Using the default `method=='umap'` only leads to minor quantitative 833
164 differences, though. 834 More details on the `scanpy documentation
165 835 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.neighbors.html>`__
166 836
167 If `n_branchings==0`, no field `dpt_groups` will be written. 837 Score a set of genes, using `tl.score_genes`
168 838 ============================================
169 - dpt_pseudotime : Array of dim (number of samples) that stores the pseudotime of each cell, that is, the DPT distance with respect to the root cell. 839
170 - dpt_groups : Array of dim (number of samples) that stores the subgroup id ('0','1', ...) for each cell. The groups typically correspond to 'progenitor cells', 'undecided cells' or 'branches' of a process. 840 The score is the average expression of a set of genes subtracted with the
171 841 average expression of a reference set of genes. The reference set is
172 The tool is similar to the R package `destiny` of Angerer et al (2016). 842 randomly sampled from the `gene_pool` for each binned expression value.
173 843
174 More details on the `tl.dpt scanpy documentation 844 This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented
175 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.dpt.html#scanpy.api.tl.dpt>`_ 845 for Scanpy by Davide Cittaro.
176 846
847 More details on the `scanpy documentation
848 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes.html>`__
849
850 Score cell cycle genes, using `tl.score_genes_cell_cycle`
851 =========================================================
852
853 Given two lists of genes associated to S phase and G2M phase, calculates
854 scores and assigns a cell cycle phase (G1, S or G2M). See
855 `score_genes` for more explanation.
856
857 More details on the `scanpy documentation
858 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes_cell_cycle.html>`__
859
860 Rank genes for characterizing groups, using `tl.rank_genes_groups`
861 ==================================================================
862
863 The returned AnnData object contains:
864
865 - Gene names, ordered according to scores
866 - Z-score underlying the computation of a p-value for each gene for each group, prdered according to scores
867 - Log2 fold change for each gene for each group, ordered according to scores. It is only provided if method is ‘t-test’ like. This is an approximation calculated from mean-log values.
868 - P-values
869 - Ajusted p-values
870
871 This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects
872
873 More details on the `scanpy documentation
874 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.rank_genes_groups.html>`__
875
876
877 Calculate an overlap score between data-deriven marker genes and provided markers (`tl.marker_gene_overlap`)
878 ============================================================================================================
879
880 Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe which can be used to annotate clusters based on marker gene overlaps.
881
882
883 Logarithmize the data matrix (`pp.log1p`)
884 =========================================
885
886 More details on the `scanpy documentation
887 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.log1p.html>`__
888
889 Scale data to unit variance and zero mean (`pp.scale`)
890 ======================================================
891
892 More details on the `scanpy documentation
893 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.scale.html>`__
894
895 Computes the square root the data matrix (`pp.sqrt`)
896 ====================================================
897
898 `X = sqrt(X)`
177 ]]></help> 899 ]]></help>
178 <expand macro="citations"/> 900 <expand macro="citations"/>
179 </tool> 901 </tool>