Mercurial > repos > iuc > scanpy_inspect
comparison inspect.xml @ 1:a755eaa1cc32 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author | iuc |
---|---|
date | Wed, 16 Oct 2019 06:31:52 -0400 |
parents | 5d2e17328afe |
children | 7d22964a8639 |
comparison
equal
deleted
inserted
replaced
0:5d2e17328afe | 1:a755eaa1cc32 |
---|---|
1 <tool id="scanpy_inspect" name="Inspect with scanpy" version="@galaxy_version@"> | 1 <tool id="scanpy_inspect" name="Inspect and manipulate" version="@galaxy_version@"> |
2 <description></description> | 2 <description> with scanpy</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 <xml name="score_genes_params"> | |
6 <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/> | |
7 <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/> | |
8 <expand macro="param_use_raw"/> | |
9 </xml> | |
10 <token name="@CMD_score_genes_inputs@"><![CDATA[ | |
11 n_bins=$method.n_bins, | |
12 random_state=$method.random_state, | |
13 use_raw=$method.use_raw, | |
14 copy=False | |
15 ]]></token> | |
16 <xml name="corr_method"> | |
17 <param argument="corr_method" type="select" label="P-value correction method"> | |
18 <option value="benjamini-hochberg">Benjamini-Hochberg</option> | |
19 <option value="bonferroni">Bonferroni</option> | |
20 </param> | |
21 </xml> | |
22 <xml name="fit_intercept"> | |
23 <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
24 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/> | |
25 </xml> | |
26 <xml name="max_iter"> | |
27 <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/> | |
28 </xml> | |
29 <xml name="multi_class"> | |
30 <param argument="multi_class" type="select" label="Multi class" help=""> | |
31 <option value="ovr">ovr: a binary problem is fit for each label</option> | |
32 <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option> | |
33 <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option> | |
34 </param> | |
35 </xml> | |
36 <xml name="penalty"> | |
37 <param argument="penalty" type="select" label="Norm used in the penalization" help=""> | |
38 <option value="l1">l1</option> | |
39 <option value="l2">l2</option> | |
40 <option value="customized">customized</option> | |
41 </param> | |
42 </xml> | |
43 <xml name="custom_penalty"> | |
44 <param argument="pen" type="text" value="" label="Norm used in the penalization" help=""/> | |
45 </xml> | |
46 <xml name="random_state"> | |
47 <param argument="random_state" type="integer" value="" optional="true" | |
48 label="The seed of the pseudo random number generator to use when shuffling the data" help=""/> | |
49 </xml> | |
5 </macros> | 50 </macros> |
6 <expand macro="requirements"/> | 51 <expand macro="requirements"/> |
7 <expand macro="version_command"/> | 52 <expand macro="version_command"/> |
8 <command detect_errors="exit_code"><![CDATA[ | 53 <command detect_errors="exit_code"><![CDATA[ |
9 @CMD@ | 54 @CMD@ |
11 <configfiles> | 56 <configfiles> |
12 <configfile name="script_file"><![CDATA[ | 57 <configfile name="script_file"><![CDATA[ |
13 @CMD_imports@ | 58 @CMD_imports@ |
14 @CMD_read_inputs@ | 59 @CMD_read_inputs@ |
15 | 60 |
16 #if $method.method == "tl.paga" | 61 #if $method.method == "pp.calculate_qc_metrics" |
17 sc.tl.paga( | 62 sc.pp.calculate_qc_metrics( |
18 adata=adata, | 63 adata=adata, |
19 groups='$method.groups', | 64 expr_type='$method.expr_type', |
20 use_rna_velocity =$method.use_rna_velocity, | 65 var_type='$method.var_type', |
21 model='$method.model', | 66 #if str($method.qc_vars) != '' |
67 #set $qc_vars = [str(x.strip()) for x in str($method.qc_vars).split(',')] | |
68 qc_vars=$qc_vars, | |
69 #end if | |
70 #if str($method.percent_top) != '' | |
71 #set $percent_top = [int(x.strip()) for x in str($method.percent_top).split(',')] | |
72 percent_top=$method.percent_top, | |
73 #end if | |
74 inplace=True) | |
75 | |
76 #else if $method.method == "tl.score_genes" | |
77 sc.tl.score_genes( | |
78 adata=adata, | |
79 #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')] | |
80 gene_list=$gene_list, | |
81 ctrl_size=$method.ctrl_size, | |
82 score_name='$method.score_name', | |
83 #if $method.gene_pool | |
84 #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')] | |
85 gene_pool=$gene_pool, | |
86 #end if | |
87 @CMD_score_genes_inputs@) | |
88 | |
89 #else if $method.method == "tl.score_genes_cell_cycle" | |
90 #if str($method.s_genes.format) == 'file' | |
91 with open('$method.s_genes.file', 'r') as s_genes_f: | |
92 s_genes = [str(x.strip()) for x in s_genes_f.readlines()] | |
93 print(s_genes) | |
94 #end if | |
95 | |
96 #if str($method.g2m_genes.format) == 'file' | |
97 with open('$method.g2m_genes.file', 'r') as g2m_genes_f: | |
98 g2m_genes = [str(x.strip()) for x in g2m_genes_f.readlines()] | |
99 print(g2m_genes) | |
100 #end if | |
101 | |
102 sc.tl.score_genes_cell_cycle( | |
103 adata=adata, | |
104 #if str($method.s_genes.format) == 'text' | |
105 #set $s_genes = [str(x.strip()) for x in $method.s_genes.text.split(',')] | |
106 s_genes=$s_genes, | |
107 #else if str($method.s_genes.format) == 'file' | |
108 s_genes=s_genes, | |
109 #end if | |
110 #if str($method.g2m_genes.format) == 'text' | |
111 #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.text.split(',')] | |
112 g2m_genes=$g2m_genes, | |
113 #else if str($method.g2m_genes.format) == 'file' | |
114 g2m_genes=g2m_genes, | |
115 #end if | |
116 @CMD_score_genes_inputs@) | |
117 | |
118 #else if $method.method == 'pp.neighbors' | |
119 sc.pp.neighbors( | |
120 adata=adata, | |
121 n_neighbors=$method.n_neighbors, | |
122 #if str($method.n_pcs) != '' | |
123 n_pcs=$method.n_pcs, | |
124 #end if | |
125 #if str($method.use_rep) != '' | |
126 use_rep='$method.use_rep', | |
127 #end if | |
128 knn=$method.knn, | |
129 random_state=$method.random_state, | |
130 method='$method.pp_neighbors_method', | |
131 metric='$method.metric', | |
22 copy=False) | 132 copy=False) |
23 #elif $method.method == "tl.dpt" | 133 |
24 sc.tl.dpt( | 134 #else if $method.method == 'tl.rank_genes_groups' |
135 sc.tl.rank_genes_groups( | |
25 adata=adata, | 136 adata=adata, |
26 n_dcs=$method.n_dcs, | 137 groupby='$method.groupby', |
27 n_branchings=$method.n_branchings, | 138 use_raw=$method.use_raw, |
28 min_group_size=$method.min_group_size, | 139 #if str($method.groups) != '' |
29 allow_kendall_tau_shift=$method.allow_kendall_tau_shift, | 140 #set $group=[x.strip() for x in str($method.groups).split(',')] |
141 groups=$group, | |
142 #end if | |
143 #if $method.ref.rest == 'rest' | |
144 reference='$method.ref.rest', | |
145 #else | |
146 reference='$method.ref.reference', | |
147 #end if | |
148 n_genes=$method.n_genes, | |
149 method='$method.tl_rank_genes_groups_method.method', | |
150 #if $method.tl_rank_genes_groups_method.method != 'logreg' | |
151 corr_method='$method.tl_rank_genes_groups_method.corr_method', | |
152 #else | |
153 solver='$method.tl_rank_genes_groups_method.solver.solver', | |
154 #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg' | |
155 penalty='l2', | |
156 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
157 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, | |
158 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
159 #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs' | |
160 penalty='l2', | |
161 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
162 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, | |
163 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
164 #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear' | |
165 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1' | |
166 penalty='l1', | |
167 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2' | |
168 penalty='l2', | |
169 dual=$method.tl_rank_genes_groups_method.solver.penalty.dual, | |
170 #else | |
171 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen', | |
172 #end if | |
173 fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept, | |
174 #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True' | |
175 intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling, | |
176 #end if | |
177 #if $method.tl_rank_genes_groups_method.solver.random_state | |
178 random_state=$method.tl_rank_genes_groups_method.solver.random_state, | |
179 #end if | |
180 #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag' | |
181 penalty='l2', | |
182 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
183 #if $method.tl_rank_genes_groups_method.solver.random_state | |
184 random_state=$method.tl_rank_genes_groups_method.solver.random_state, | |
185 #end if | |
186 max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, | |
187 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
188 #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga' | |
189 #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1' | |
190 penalty='l1', | |
191 #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2' | |
192 penalty='l2', | |
193 #else | |
194 penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen', | |
195 #end if | |
196 fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, | |
197 multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', | |
198 #end if | |
199 tol=$method.tl_rank_genes_groups_method.tol, | |
200 C=$method.tl_rank_genes_groups_method.c, | |
201 #end if | |
202 only_positive=$method.only_positive) | |
203 | |
204 #else if $method.method == "tl.marker_gene_overlap" | |
205 reference_markers = {} | |
206 #for $i, $s in enumerate($method.reference_markers) | |
207 #set $list=[x.strip() for x in str($s.values).split(',')] | |
208 reference_markers['$s.key'] = $list | |
209 #end for | |
210 | |
211 sc.tl.marker_gene_overlap( | |
212 adata, | |
213 reference_markers, | |
214 #if str($method.key) != '' | |
215 key='$method.key', | |
216 #end if | |
217 method='$method.overlap.method', | |
218 #if $method.overlap.method == 'overlap_count' and str($method.overlap.normalize) != 'None' | |
219 normalize='$method.overlap.normalize', | |
220 #end if | |
221 #if str($method.top_n_markers) != '' | |
222 top_n_markers=$method.top_n_markers, | |
223 #end if | |
224 #if str($method.adj_pval_threshold) != '' | |
225 adj_pval_threshold=$method.adj_pval_threshold, | |
226 #end if | |
227 #if str($method.key_added) != '' | |
228 key_added='$method.key_added', | |
229 #end if | |
230 inplace=True) | |
231 | |
232 #else if $method.method == "pp.log1p" | |
233 sc.pp.log1p( | |
234 data=adata, | |
30 copy=False) | 235 copy=False) |
31 adata.obs.to_csv('$obs', sep='\t') | 236 |
237 #else if $method.method == "pp.scale" | |
238 sc.pp.scale( | |
239 data=adata, | |
240 zero_center=$method.zero_center, | |
241 #if $method.max_value | |
242 max_value=$method.max_value, | |
243 #end if | |
244 copy=False) | |
245 | |
246 #else if $method.method == "pp.sqrt" | |
247 sc.pp.sqrt( | |
248 data=adata, | |
249 copy=False) | |
32 #end if | 250 #end if |
33 | 251 |
34 @CMD_anndata_write_outputs@ | 252 @CMD_anndata_write_outputs@ |
35 ]]></configfile> | 253 ]]></configfile> |
36 </configfiles> | 254 </configfiles> |
37 <inputs> | 255 <inputs> |
38 <expand macro="inputs_anndata"/> | 256 <expand macro="inputs_anndata"/> |
39 <conditional name="method"> | 257 <conditional name="method"> |
40 <param argument="method" type="select" label="Method used for plotting"> | 258 <param argument="method" type="select" label="Method used for inspecting"> |
41 <!--<option value="tl.paga_compare_paths">, using `tl.paga_compare_paths`</option>!--> | 259 <option value="pp.calculate_qc_metrics">Calculate quality control metrics, using `pp.calculate_qc_metrics`</option> |
42 <!--<option value="tl.paga_degrees">, using `tl.paga_degrees`</option>!--> | 260 <option value="pp.neighbors">Compute a neighborhood graph of observations, using `pp.neighbors`</option> |
43 <!--<option value="tl.paga_expression_entropies">, using `tl.paga_expression_entropies`</option>!--> | 261 <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option> |
44 <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using `tl.paga`</option> | 262 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option> |
45 <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using `tl.dpt`</option> | 263 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using `tl.rank_genes_groups`</option> |
264 <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using `tl.marker_gene_overlap`</option>--> | |
265 <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option> | |
266 <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option> | |
267 <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option> | |
46 </param> | 268 </param> |
47 <when value="tl.paga"> | 269 <when value="pp.calculate_qc_metrics"> |
48 <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations (`adata.obs`)."/> | 270 <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X"/> |
49 <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that `adata.uns` contains a directed single-cell graph with key `['velocyto_transitions']`. This feature might be subject to change in the future."/> | 271 <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are"/> |
50 <param argument="model" type="select" label="PAGA connectivity model" help=""> | 272 <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of `.var` which identify variables you could want to control for" |
51 <option value="v1.2">v1.2</option> | 273 help="Keys separated by a comma"/> |
52 <option value="v1.0">v1.0</option> | 274 <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover" |
275 help=" Values (integers) are considered 1-indexed, `50` finds cumulative proportion to the 50th most expressed genes. Values separated by a comma. | |
276 If empty don't calculate"/> | |
277 </when> | |
278 <when value="pp.neighbors"> | |
279 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor."/> | |
280 <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/> | |
281 <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter"/> | |
282 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor."/> | |
283 <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/> | |
284 <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help=""> | |
285 <option value="umap">umap (McInnes et al, 2018)</option> | |
286 <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option> | |
287 </param> | |
288 <param argument="metric" type="select" label="Distance metric" help=""> | |
289 <expand macro="distance_metric_options"/> | |
53 </param> | 290 </param> |
54 </when> | 291 </when> |
55 <when value="tl.dpt"> | 292 <when value="tl.score_genes"> |
56 <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use" help=""/> | 293 <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/> |
57 <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect" help=""/> | 294 <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" |
58 <param argument="min_group_size" type="float" min="0" value="0.01" label="Min group size" help="During recursive splitting of branches ('dpt groups') for `n_branchings` > 1, do not consider groups that contain less than `min_group_size` data points. If a float, `min_group_size` refers to a fraction of the total number of data points."/> | 295 help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/> |
59 <param argument="allow_kendall_tau_shift" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Allow Kendal tau shift?" help="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion of Haghverdi et al (2016) to stabilize the splitting."/> | 296 <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" |
297 help="Default is all genes. Genes separated by a comma"/> | |
298 <expand macro="score_genes_params"/> | |
299 <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/> | |
60 </when> | 300 </when> |
301 <when value="tl.score_genes_cell_cycle"> | |
302 <conditional name='s_genes'> | |
303 <param name="format" type="select" label="Format for the list of genes associated with S phase"> | |
304 <option value="file">File</option> | |
305 <option value="text" selected="true">Text</option> | |
306 </param> | |
307 <when value="text"> | |
308 <param name="text" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/> | |
309 </when> | |
310 <when value="file"> | |
311 <param name="file" type="data" format="txt" label="File with the list of genes associated with S phase" help="One gene per line"/> | |
312 </when> | |
313 </conditional> | |
314 <conditional name='g2m_genes'> | |
315 <param name="format" type="select" label="Format for the list of genes associated with G2M phase"> | |
316 <option value="file">File</option> | |
317 <option value="text" selected="true">Text</option> | |
318 </param> | |
319 <when value="text"> | |
320 <param name="text" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/> | |
321 </when> | |
322 <when value="file"> | |
323 <param name="file" type="data" format="txt" label="File with the list of genes associated with G2M phase" help="One gene per line"/> | |
324 </when> | |
325 </conditional> | |
326 <expand macro="score_genes_params"/> | |
327 </when> | |
328 <when value="tl.rank_genes_groups"> | |
329 <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help=""/> | |
330 <expand macro="param_use_raw"/> | |
331 <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups."/> | |
332 <conditional name="ref"> | |
333 <param name="rest" type="select" label="Comparison"> | |
334 <option value="rest">Compare each group to the union of the rest of the group</option> | |
335 <option value="group_id">Compare with respect to a specific group</option> | |
336 </param> | |
337 <when value="rest"/> | |
338 <when value="group_id"> | |
339 <param argument="reference" type="text" value="" label="Group identifier with respect to which compare"/> | |
340 </when> | |
341 </conditional> | |
342 <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/> | |
343 <conditional name="tl_rank_genes_groups_method"> | |
344 <param argument="method" type="select" label="Method"> | |
345 <option value="t-test">t-test</option> | |
346 <option value="wilcoxon">Wilcoxon-Rank-Sum</option> | |
347 <option value="t-test_overestim_var" selected="true">t-test with overestimate of variance of each group</option> | |
348 <option value="logreg">Logistic regression</option> | |
349 </param> | |
350 <when value="t-test"> | |
351 <expand macro="corr_method"/> | |
352 </when> | |
353 <when value="wilcoxon"> | |
354 <expand macro="corr_method"/> | |
355 </when> | |
356 <when value="t-test_overestim_var"> | |
357 <expand macro="corr_method"/> | |
358 </when> | |
359 <when value="logreg"> | |
360 <conditional name="solver"> | |
361 <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty."> | |
362 <option value="newton-cg">newton-cg</option> | |
363 <option value="lbfgs">lbfgs</option> | |
364 <option value="liblinear">liblinear</option> | |
365 <option value="sag">sag</option> | |
366 <option value="saga">saga</option> | |
367 </param> | |
368 <when value="newton-cg"> | |
369 <expand macro="fit_intercept"/> | |
370 <expand macro="max_iter"/> | |
371 <expand macro="multi_class"/> | |
372 </when> | |
373 <when value="lbfgs"> | |
374 <expand macro="fit_intercept"/> | |
375 <expand macro="max_iter"/> | |
376 <expand macro="multi_class"/> | |
377 </when> | |
378 <when value="liblinear"> | |
379 <conditional name="penalty"> | |
380 <expand macro="penalty"/> | |
381 <when value="l1"/> | |
382 <when value="l2"> | |
383 <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false" | |
384 label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/> | |
385 </when> | |
386 <when value="customized"> | |
387 <expand macro="custom_penalty"/> | |
388 </when> | |
389 </conditional> | |
390 <conditional name="intercept_scaling"> | |
391 <param argument="fit_intercept" type="select" | |
392 label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""> | |
393 <option value="True">Yes</option> | |
394 <option value="False">No</option> | |
395 </param> | |
396 <when value="True"> | |
397 <param argument="intercept_scaling" type="float" value="1.0" | |
398 label="Intercept scaling" | |
399 help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/> | |
400 </when> | |
401 <when value="False"/> | |
402 </conditional> | |
403 <expand macro="random_state"/> | |
404 </when> | |
405 <when value="sag"> | |
406 <expand macro="fit_intercept"/> | |
407 <expand macro="random_state"/> | |
408 <expand macro="max_iter"/> | |
409 <expand macro="multi_class"/> | |
410 </when> | |
411 <when value="saga"> | |
412 <conditional name="penalty"> | |
413 <expand macro="penalty"/> | |
414 <when value="l1"/> | |
415 <when value="l2"/> | |
416 <when value="customized"> | |
417 <expand macro="custom_penalty"/> | |
418 </when> | |
419 </conditional> | |
420 <expand macro="fit_intercept"/> | |
421 <expand macro="multi_class"/> | |
422 </when> | |
423 </conditional> | |
424 <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/> | |
425 <param argument="c" type="float" value="1.0" label="Inverse of regularization strength" | |
426 help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/> | |
427 </when> | |
428 </conditional> | |
429 <param argument="only_positive" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
430 label="Only consider positive differences?" help=""/> | |
431 </when> | |
432 <!--<when value="tl.marker_gene_overlap"> | |
433 <repeat name="reference_markers" title="Marker genes"> | |
434 <param name="key" type="text" value="" label="Cell identity name" help=""/> | |
435 <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from `var`"/> | |
436 </repeat> | |
437 <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/> | |
438 <conditional name="overlap"> | |
439 <param argument="method" type="select" label="Method to calculate marker gene overlap"> | |
440 <option value="overlap_count">overlap_count: Intersection of the gene set</option> | |
441 <option value="overlap_coef">overlap_coef: Overlap coefficient</option> | |
442 <option value="jaccard">jaccard: Jaccard index</option> | |
443 </param> | |
444 <when value="overlap_count"> | |
445 <param argument="normalize" type="select" label="Normalization option for the marker gene overlap output"> | |
446 <option value="None">None</option> | |
447 <option value="reference">reference: Normalization of the data by the total number of marker genes given in the reference annotation per group</option> | |
448 <option value="data">data: Normalization of the data by the total number of marker genes used for each cluster</option> | |
449 </param> | |
450 </when> | |
451 <when value="overlap_coef"/> | |
452 <when value="jaccard"/> | |
453 </conditional> | |
454 <param argument="top_n_markers" type="integer" optional="true" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> | |
455 <param argument="adj_pval_threshold" type="float" optional="true" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> | |
456 <param argument="key_added" type="text" value="" optional="true" label="Key that will contain the marker overlap scores in 'uns'"/> | |
457 </when>--> | |
458 <when value="pp.log1p"/> | |
459 <when value="pp.scale"> | |
460 <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" | |
461 label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> | |
462 <param argument="max_value" type="float" value="" optional="true" label="Maximum value" | |
463 help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> | |
464 </when> | |
465 <when value="pp.sqrt"/> | |
61 </conditional> | 466 </conditional> |
62 <expand macro="anndata_output_format"/> | |
63 </inputs> | 467 </inputs> |
64 <outputs> | 468 <outputs> |
65 <expand macro="anndata_outputs"/> | 469 <expand macro="anndata_outputs"/> |
66 <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation"> | |
67 <filter>method['method'] == 'tl.dpt'</filter> | |
68 </data> | |
69 </outputs> | 470 </outputs> |
70 <tests> | 471 <tests> |
71 <test> | 472 <test> |
72 <conditional name="input"> | 473 <!-- test 1 --> |
73 <param name="format" value="h5ad" /> | 474 <param name="adata" value="sparce_csr_matrix.h5ad" /> |
74 <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> | 475 <conditional name="method"> |
75 </conditional> | 476 <param name="method" value="pp.calculate_qc_metrics"/> |
76 <conditional name="method"> | 477 <param name="expr_type" value="counts"/> |
77 <param name="method" value="tl.paga"/> | 478 <param name="var_type" value="genes"/> |
78 <param name="groups" value="paul15_clusters"/> | 479 <param name="qc_vars" value="mito,negative"/> |
79 <param name="use_rna_velocity" value="False"/> | 480 <param name="percent_top" value=""/> |
80 <param name="model" value="v1.2"/> | 481 </conditional> |
81 </conditional> | 482 <assert_stdout> |
82 <param name="anndata_output_format" value="h5ad" /> | 483 <has_text_matching expression="sc.pp.calculate_qc_metrics" /> |
83 <assert_stdout> | 484 <has_text_matching expression="expr_type='counts'" /> |
84 <has_text_matching expression="sc.tl.paga"/> | 485 <has_text_matching expression="var_type='genes'" /> |
85 <has_text_matching expression="groups='paul15_clusters'"/> | 486 <has_text_matching expression="qc_vars=\['mito', 'negative'\]" /> |
86 <has_text_matching expression="use_rna_velocity =False"/> | 487 </assert_stdout> |
87 <has_text_matching expression="model='v1.2'"/> | 488 <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/> |
88 </assert_stdout> | 489 </test> |
89 <output name="anndata_out_h5ad" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> | 490 <test> |
491 <!-- test 2 --> | |
492 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" /> | |
493 <conditional name="method"> | |
494 <param name="method" value="pp.neighbors"/> | |
495 <param name="n_neighbors" value="15"/> | |
496 <param name="knn" value="True"/> | |
497 <param name="random_state" value="0"/> | |
498 <param name="pp_neighbors_method" value="umap"/> | |
499 <param name="metric" value="euclidean"/> | |
500 </conditional> | |
501 <assert_stdout> | |
502 <has_text_matching expression="sc.pp.neighbors"/> | |
503 <has_text_matching expression="n_neighbors=15"/> | |
504 <has_text_matching expression="knn=True"/> | |
505 <has_text_matching expression="random_state=0"/> | |
506 <has_text_matching expression="method='umap'"/> | |
507 <has_text_matching expression="metric='euclidean'"/> | |
508 </assert_stdout> | |
509 <output name="anndata_out" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"> | |
90 <assert_contents> | 510 <assert_contents> |
91 <has_h5_keys keys="X, obs, obsm, uns, var" /> | 511 <has_h5_keys keys="X, obs, obsm, uns, var" /> |
92 </assert_contents> | 512 </assert_contents> |
93 </output> | 513 </output> |
94 </test> | 514 </test> |
95 <test> | 515 <test> |
96 <conditional name="input"> | 516 <!-- test 3 --> |
97 <param name="format" value="h5ad" /> | 517 <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" /> |
98 <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> | 518 <conditional name="method"> |
99 </conditional> | 519 <param name="method" value="pp.neighbors"/> |
100 <conditional name="method"> | 520 <param name="n_neighbors" value="15"/> |
101 <param name="method" value="tl.dpt"/> | 521 <param name="knn" value="True"/> |
102 <param name="n_dcs" value="15"/> | 522 <param name="pp_neighbors_method" value="gauss"/> |
103 <param name="n_branchings" value="1"/> | 523 <param name="metric" value="braycurtis"/> |
104 <param name="min_group_size" value="0.01"/> | 524 </conditional> |
105 <param name="allow_kendall_tau_shift" value="True"/> | 525 <assert_stdout> |
106 </conditional> | 526 <has_text_matching expression="sc.pp.neighbors"/> |
107 <param name="anndata_output_format" value="h5ad" /> | 527 <has_text_matching expression="n_neighbors=15"/> |
108 <assert_stdout> | 528 <has_text_matching expression="knn=True"/> |
109 <has_text_matching expression="sc.tl.dpt"/> | 529 <has_text_matching expression="random_state=0"/> |
110 <has_text_matching expression="n_dcs=15"/> | 530 <has_text_matching expression="method='gauss'"/> |
111 <has_text_matching expression="n_branchings=1"/> | 531 <has_text_matching expression="metric='braycurtis'"/> |
112 <has_text_matching expression="min_group_size=0.01"/> | 532 </assert_stdout> |
113 <has_text_matching expression="allow_kendall_tau_shift=True"/> | 533 <output name="anndata_out" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> |
114 </assert_stdout> | 534 </test> |
115 <output name="anndata_out_h5ad" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"> | 535 <test> |
536 <!-- test 4 --> | |
537 <param name="adata" value="krumsiek11.h5ad" /> | |
538 <conditional name="method"> | |
539 <param name="method" value="tl.score_genes"/> | |
540 <param name="gene_list" value="Gata2, Fog1"/> | |
541 <param name="ctrl_size" value="2"/> | |
542 <param name="n_bins" value="2"/> | |
543 <param name="random_state" value="2"/> | |
544 <param name="use_raw" value="False"/> | |
545 <param name="score_name" value="score"/> | |
546 </conditional> | |
547 <assert_stdout> | |
548 <has_text_matching expression="sc.tl.score_genes" /> | |
549 <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" /> | |
550 <has_text_matching expression="ctrl_size=2" /> | |
551 <has_text_matching expression="score_name='score'" /> | |
552 <has_text_matching expression="n_bins=2" /> | |
553 <has_text_matching expression="random_state=2" /> | |
554 <has_text_matching expression="use_raw=False" /> | |
555 <has_text_matching expression="copy=False" /> | |
556 </assert_stdout> | |
557 <output name="anndata_out" file="tl.score_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
558 </test> | |
559 <test> | |
560 <!-- test 5 --> | |
561 <param name="adata" value="krumsiek11.h5ad" /> | |
562 <conditional name="method"> | |
563 <param name="method" value="tl.score_genes_cell_cycle"/> | |
564 <conditional name='s_genes'> | |
565 <param name="format" value="text"/> | |
566 <param name="text" value="Gata2, Fog1, EgrNab"/> | |
567 </conditional> | |
568 <conditional name='g2m_genes'> | |
569 <param name="format" value="text"/> | |
570 <param name="text" value="Gata2, Fog1, EgrNab"/> | |
571 </conditional> | |
572 <param name="n_bins" value="2"/> | |
573 <param name="random_state" value="1"/> | |
574 <param name="use_raw" value="False"/> | |
575 </conditional> | |
576 <assert_stdout> | |
577 <has_text_matching expression="sc.tl.score_genes_cell_cycle"/> | |
578 <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> | |
579 <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> | |
580 <has_text_matching expression="n_bins=2"/> | |
581 <has_text_matching expression="random_state=1"/> | |
582 <has_text_matching expression="use_raw=False"/> | |
583 </assert_stdout> | |
584 <output name="anndata_out" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
585 </test> | |
586 <test> | |
587 <!-- test 6 --> | |
588 <param name="adata" value="krumsiek11.h5ad" /> | |
589 <conditional name="method"> | |
590 <param name="method" value="tl.rank_genes_groups"/> | |
591 <param name="groupby" value="cell_type"/> | |
592 <param name="use_raw" value="True"/> | |
593 <conditional name="ref"> | |
594 <param name="rest" value="rest"/> | |
595 </conditional> | |
596 <param name="n_genes" value="100"/> | |
597 <conditional name="tl_rank_genes_groups_method"> | |
598 <param name="method" value="t-test_overestim_var"/> | |
599 <param name="corr_method" value="benjamini-hochberg"/> | |
600 </conditional> | |
601 <param name="only_positive" value="true"/> | |
602 </conditional> | |
603 <assert_stdout> | |
604 <has_text_matching expression="sc.tl.rank_genes_groups"/> | |
605 <has_text_matching expression="groupby='cell_type'"/> | |
606 <has_text_matching expression="use_raw=True"/> | |
607 <has_text_matching expression="reference='rest'"/> | |
608 <has_text_matching expression="n_genes=100"/> | |
609 <has_text_matching expression="method='t-test_overestim_var'"/> | |
610 <has_text_matching expression="corr_method='benjamini-hochberg'"/> | |
611 <has_text_matching expression="only_positive=True"/> | |
612 </assert_stdout> | |
613 <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
614 </test> | |
615 <test> | |
616 <!-- test 7 --> | |
617 <param name="adata" value="pbmc68k_reduced.h5ad" /> | |
618 <conditional name="method"> | |
619 <param name="method" value="tl.rank_genes_groups"/> | |
620 <param name="groupby" value="louvain"/> | |
621 <param name="use_raw" value="True"/> | |
622 <conditional name="ref"> | |
623 <param name="rest" value="rest"/> | |
624 </conditional> | |
625 <param name="n_genes" value="100"/> | |
626 <conditional name="tl_rank_genes_groups_method"> | |
627 <param name="method" value="logreg"/> | |
628 <conditional name="solver"> | |
629 <param name="solver" value="newton-cg"/> | |
630 <param name="fit_intercept" value="True"/> | |
631 <param name="max_iter" value="100"/> | |
632 <param name="multi_class" value="auto"/> | |
633 </conditional> | |
634 <param name="tol" value="1e-4"/> | |
635 <param name="c" value="1.0"/> | |
636 </conditional> | |
637 <param name="only_positive" value="true"/> | |
638 </conditional> | |
639 <assert_stdout> | |
640 <has_text_matching expression="sc.tl.rank_genes_groups"/> | |
641 <has_text_matching expression="groupby='louvain'"/> | |
642 <has_text_matching expression="use_raw=True"/> | |
643 <has_text_matching expression="reference='rest'"/> | |
644 <has_text_matching expression="n_genes=100"/> | |
645 <has_text_matching expression="method='logreg'"/> | |
646 <has_text_matching expression="solver='newton-cg'"/> | |
647 <has_text_matching expression="penalty='l2'"/> | |
648 <has_text_matching expression="fit_intercept=True"/> | |
649 <has_text_matching expression="max_iter=100"/> | |
650 <has_text_matching expression="multi_class='auto'"/> | |
651 <has_text_matching expression="tol=0.0001"/> | |
652 <has_text_matching expression="C=1.0"/> | |
653 <has_text_matching expression="only_positive=True"/> | |
654 </assert_stdout> | |
655 <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size"> | |
116 <assert_contents> | 656 <assert_contents> |
117 <has_h5_keys keys="X, obs, obsm, uns, var" /> | 657 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" /> |
118 </assert_contents> | 658 </assert_contents> |
119 </output> | 659 </output> |
120 <output name="obs" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.obs.tabular" compare="sim_size"/> | 660 </test> |
661 <test> | |
662 <!-- test 8 --> | |
663 <param name="adata" value="pbmc68k_reduced.h5ad" /> | |
664 <conditional name="method"> | |
665 <param name="method" value="tl.rank_genes_groups"/> | |
666 <param name="groupby" value="louvain"/> | |
667 <param name="use_raw" value="True"/> | |
668 <conditional name="ref"> | |
669 <param name="rest" value="rest"/> | |
670 </conditional> | |
671 <param name="n_genes" value="100"/> | |
672 <conditional name="tl_rank_genes_groups_method"> | |
673 <param name="method" value="logreg"/> | |
674 <conditional name="solver"> | |
675 <param name="solver" value="liblinear"/> | |
676 <conditional name="penalty"> | |
677 <param name="penalty" value="l2"/> | |
678 <param name="dual" value="False"/> | |
679 <conditional name="intercept_scaling"> | |
680 <param name="fit_intercept" value="True"/> | |
681 <param name="intercept_scaling" value="1.0" /> | |
682 </conditional> | |
683 <param name="random_state" value="1"/> | |
684 </conditional> | |
685 </conditional> | |
686 <param name="tol" value="1e-4"/> | |
687 <param name="c" value="1.0"/> | |
688 </conditional> | |
689 <param name="only_positive" value="true"/> | |
690 </conditional> | |
691 <assert_stdout> | |
692 <has_text_matching expression="sc.tl.rank_genes_groups"/> | |
693 <has_text_matching expression="groupby='louvain'"/> | |
694 <has_text_matching expression="use_raw=True"/> | |
695 <has_text_matching expression="reference='rest'"/> | |
696 <has_text_matching expression="n_genes=100"/> | |
697 <has_text_matching expression="method='logreg'"/> | |
698 <has_text_matching expression="solver='liblinear'"/> | |
699 <has_text_matching expression="penalty='l2'"/> | |
700 <has_text_matching expression="dual=False"/> | |
701 <has_text_matching expression="fit_intercept=True"/> | |
702 <has_text_matching expression="intercept_scaling=1.0"/> | |
703 <has_text_matching expression="tol=0.0001"/> | |
704 <has_text_matching expression="C=1.0"/> | |
705 <has_text_matching expression="only_positive=True"/> | |
706 </assert_stdout> | |
707 <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"> | |
708 <assert_contents> | |
709 <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" /> | |
710 </assert_contents> | |
711 </output> | |
712 </test> | |
713 <!--<test> | |
714 < test 9 > | |
715 <param name="adata" value="tl.rank_genes_groups.louvain.neighbors.pca.pbmc68k_reduced.h5ad" /> | |
716 <conditional name="method"> | |
717 <param name="method" value="tl.marker_gene_overlap"/> | |
718 <repeat name="reference_markers"> | |
719 <param name="key" value="CD4 T cells"/> | |
720 <param name="value" value="IL7R"/> | |
721 </repeat> | |
722 <repeat name="reference_markers"> | |
723 <param name="key" value="CD14+ Monocytes"/> | |
724 <param name="value" value="CD14,LYZ"/> | |
725 </repeat> | |
726 <repeat name="reference_markers"> | |
727 <param name="key" value="B cells"/> | |
728 <param name="value" value="MS4A1"/> | |
729 </repeat> | |
730 <conditional name="overlap"> | |
731 <param argument="method" value="overlap_count"/> | |
732 <param argument="normalize" value="None"/> | |
733 </conditional> | |
734 </conditional> | |
735 <assert_stdout> | |
736 <has_text_matching expression="tl.marker_gene_overlap"/> | |
737 <has_text_matching expression="key='rank_genes_groups'"/> | |
738 <has_text_matching expression="method='overlap_count'"/> | |
739 </assert_stdout> | |
740 <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
741 </test>--> | |
742 <test> | |
743 <!-- test 9 --> | |
744 <param name="adata" value="krumsiek11.h5ad" /> | |
745 <conditional name="method"> | |
746 <param name="method" value="pp.log1p"/> | |
747 </conditional> | |
748 <assert_stdout> | |
749 <has_text_matching expression="sc.pp.log1p"/> | |
750 </assert_stdout> | |
751 <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
752 </test> | |
753 <test> | |
754 <!-- test 10 --> | |
755 <param name="adata" value="krumsiek11.h5ad" /> | |
756 <conditional name="method"> | |
757 <param name="method" value="pp.scale"/> | |
758 <param name="zero_center" value="true"/> | |
759 </conditional> | |
760 <assert_stdout> | |
761 <has_text_matching expression="sc.pp.scale"/> | |
762 <has_text_matching expression="zero_center=True"/> | |
763 </assert_stdout> | |
764 <output name="anndata_out" file="pp.scale.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
765 </test> | |
766 <test> | |
767 <!-- test 11 --> | |
768 <param name="adata" value="krumsiek11.h5ad" /> | |
769 <conditional name="method"> | |
770 <param name="method" value="pp.scale"/> | |
771 <param name="zero_center" value="true"/> | |
772 <param name="max_value" value="10"/> | |
773 </conditional> | |
774 <assert_stdout> | |
775 <has_text_matching expression="sc.pp.scale"/> | |
776 <has_text_matching expression="zero_center=True"/> | |
777 <has_text_matching expression="max_value=10.0"/> | |
778 </assert_stdout> | |
779 <output name="anndata_out" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
780 </test> | |
781 <test> | |
782 <!-- test 12 --> | |
783 <param name="adata" value="krumsiek11.h5ad" /> | |
784 <conditional name="method"> | |
785 <param name="method" value="pp.sqrt"/> | |
786 </conditional> | |
787 <assert_stdout> | |
788 <has_text_matching expression="sc.pp.sqrt"/> | |
789 </assert_stdout> | |
790 <output name="anndata_out" file="pp.sqrt.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> | |
121 </test> | 791 </test> |
122 </tests> | 792 </tests> |
123 <help><![CDATA[ | 793 <help><![CDATA[ |
124 Generate cellular maps of differentiation manifolds with complex topologies (`tl.paga`) | 794 Calculate quality control metrics., using `pp.calculate_qc_metrics` |
125 ======================================================================================= | 795 =================================================================== |
126 | 796 |
127 By quantifying the connectivity of partitions (groups, clusters) of the | 797 Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater. |
128 single-cell graph, partition-based graph abstraction (PAGA) generates a much | 798 Currently is most efficient on a sparse CSR or dense matrix. |
129 simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights | 799 |
130 represent confidence in the presence of connections. By tresholding this | 800 It updates the observation level metrics: |
131 confidence in `paga`, a much simpler representation of data | 801 |
132 can be obtained. | 802 - total_{var_type}_by_{expr_type} (e.g. "total_genes_by_counts", number of genes with positive counts in a cell) |
133 | 803 - total_{expr_type} (e.g. "total_counts", total number of counts for a cell) |
134 The confidence can be interpreted as the ratio of the actual versus the | 804 - pct_{expr_type}_in_top_{n}_{var_type} (e.g. "pct_counts_in_top_50_genes", cumulative percentage of counts for 50 most expressed genes in a cell) |
135 expected value of connetions under the null model of randomly connecting | 805 - total_{expr_type}_{qc_var} (e.g. "total_counts_mito", total number of counts for variabes in qc_vars ) |
136 partitions. We do not provide a p-value as this null model does not | 806 - pct_{expr_type}_{qc_var} (e.g. "pct_counts_mito", proportion of total counts for a cell which are mitochondrial) |
137 precisely capture what one would consider "connected" in real data, hence it | 807 |
138 strongly overestimates the expected value. See an extensive discussion of | 808 And also the variable level metrics: |
139 this in Wolf et al (2017). | 809 |
140 | 810 - total_{expr_type} (e.g. "total_counts", sum of counts for a gene) |
141 Together with a random walk-based distance measure, this generates a partial | 811 - mean_{expr_type} (e.g. "mean counts", mean expression over all cells. |
142 coordinatization of data useful for exploring and explaining its variation. | 812 - n_cells_by_{expr_type} (e.g. "n_cells_by_counts", number of cells this expression is measured in) |
143 | 813 - pct_dropout_by_{expr_type} (e.g. "pct_dropout_by_counts", percentage of cells this feature does not appear in) |
144 More details on the `tl.paga scanpy documentation | 814 |
145 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.paga.html#scanpy.api.tl.paga>`_ | 815 More details on the `scanpy documentation |
146 | 816 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.calculate_qc_metrics.html>`__ |
147 | 817 |
148 Infer progression of cells through geodesic distance along the graph (`tl.dpt`) | 818 Compute a neighborhood graph of observations, using `pp.neighbors` |
149 =============================================================================== | 819 ================================================================== |
150 | 820 |
151 Reconstruct the progression of a biological process from snapshot | 821 The neighbor search efficiency of this heavily relies on UMAP (McInnes et al, 2018), |
152 data. `Diffusion Pseudotime` has been introduced by Haghverdi et al (2016) and | 822 which also provides a method for estimating connectivities of data points - |
153 implemented within Scanpy (Wolf et al, 2017). Here, we use a further developed | 823 the connectivity of the manifold (`method=='umap'`). If `method=='diffmap'`, |
154 version, which is able to deal with disconnected graphs (Wolf et al, 2017) and can | 824 connectivities are computed according to Coifman et al (2005), in the adaption of |
155 be run in a `hierarchical` mode by setting the parameter | 825 Haghverdi et al (2016). |
156 `n_branchings>1`. We recommend, however, to only use | 826 |
157 `tl.dpt` for computing pseudotime (`n_branchings=0`) and | 827 The returned AnnData object contains: |
158 to detect branchings via `paga`. For pseudotime, you need | 828 |
159 to annotate your data with a root cell. | 829 - Weighted adjacency matrix of the neighborhood graph of data points (connectivities). Weights should be interpreted as connectivities. |
160 | 830 - Distances for each pair of neighbors (distances) |
161 This requires to run `pp.neighbors`, first. In order to | 831 |
162 reproduce the original implementation of DPT, use `method=='gauss'` in | 832 This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects |
163 this. Using the default `method=='umap'` only leads to minor quantitative | 833 |
164 differences, though. | 834 More details on the `scanpy documentation |
165 | 835 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.neighbors.html>`__ |
166 | 836 |
167 If `n_branchings==0`, no field `dpt_groups` will be written. | 837 Score a set of genes, using `tl.score_genes` |
168 | 838 ============================================ |
169 - dpt_pseudotime : Array of dim (number of samples) that stores the pseudotime of each cell, that is, the DPT distance with respect to the root cell. | 839 |
170 - dpt_groups : Array of dim (number of samples) that stores the subgroup id ('0','1', ...) for each cell. The groups typically correspond to 'progenitor cells', 'undecided cells' or 'branches' of a process. | 840 The score is the average expression of a set of genes subtracted with the |
171 | 841 average expression of a reference set of genes. The reference set is |
172 The tool is similar to the R package `destiny` of Angerer et al (2016). | 842 randomly sampled from the `gene_pool` for each binned expression value. |
173 | 843 |
174 More details on the `tl.dpt scanpy documentation | 844 This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented |
175 <https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.dpt.html#scanpy.api.tl.dpt>`_ | 845 for Scanpy by Davide Cittaro. |
176 | 846 |
847 More details on the `scanpy documentation | |
848 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes.html>`__ | |
849 | |
850 Score cell cycle genes, using `tl.score_genes_cell_cycle` | |
851 ========================================================= | |
852 | |
853 Given two lists of genes associated to S phase and G2M phase, calculates | |
854 scores and assigns a cell cycle phase (G1, S or G2M). See | |
855 `score_genes` for more explanation. | |
856 | |
857 More details on the `scanpy documentation | |
858 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes_cell_cycle.html>`__ | |
859 | |
860 Rank genes for characterizing groups, using `tl.rank_genes_groups` | |
861 ================================================================== | |
862 | |
863 The returned AnnData object contains: | |
864 | |
865 - Gene names, ordered according to scores | |
866 - Z-score underlying the computation of a p-value for each gene for each group, prdered according to scores | |
867 - Log2 fold change for each gene for each group, ordered according to scores. It is only provided if method is ‘t-test’ like. This is an approximation calculated from mean-log values. | |
868 - P-values | |
869 - Ajusted p-values | |
870 | |
871 This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects | |
872 | |
873 More details on the `scanpy documentation | |
874 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.rank_genes_groups.html>`__ | |
875 | |
876 | |
877 Calculate an overlap score between data-deriven marker genes and provided markers (`tl.marker_gene_overlap`) | |
878 ============================================================================================================ | |
879 | |
880 Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe which can be used to annotate clusters based on marker gene overlaps. | |
881 | |
882 | |
883 Logarithmize the data matrix (`pp.log1p`) | |
884 ========================================= | |
885 | |
886 More details on the `scanpy documentation | |
887 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.log1p.html>`__ | |
888 | |
889 Scale data to unit variance and zero mean (`pp.scale`) | |
890 ====================================================== | |
891 | |
892 More details on the `scanpy documentation | |
893 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.scale.html>`__ | |
894 | |
895 Computes the square root the data matrix (`pp.sqrt`) | |
896 ==================================================== | |
897 | |
898 `X = sqrt(X)` | |
177 ]]></help> | 899 ]]></help> |
178 <expand macro="citations"/> | 900 <expand macro="citations"/> |
179 </tool> | 901 </tool> |