diff inspect.xml @ 1:a755eaa1cc32 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 8ef5f7c6f8728608a3f05bb51e11b642b84a05f5"
author iuc
date Wed, 16 Oct 2019 06:31:52 -0400
parents 5d2e17328afe
children 7d22964a8639
line wrap: on
line diff
--- a/inspect.xml	Mon Mar 04 10:15:38 2019 -0500
+++ b/inspect.xml	Wed Oct 16 06:31:52 2019 -0400
@@ -1,7 +1,52 @@
-<tool id="scanpy_inspect" name="Inspect with scanpy" version="@galaxy_version@">
-    <description></description>
+<tool id="scanpy_inspect" name="Inspect and manipulate" version="@galaxy_version@">
+    <description> with scanpy</description>
     <macros>
         <import>macros.xml</import>
+        <xml name="score_genes_params">
+            <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/>
+            <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/>
+            <expand macro="param_use_raw"/>
+        </xml>
+        <token name="@CMD_score_genes_inputs@"><![CDATA[
+    n_bins=$method.n_bins,
+    random_state=$method.random_state,
+    use_raw=$method.use_raw,
+    copy=False
+        ]]></token>
+        <xml name="corr_method">
+            <param argument="corr_method" type="select" label="P-value correction method">
+                <option value="benjamini-hochberg">Benjamini-Hochberg</option>
+                <option value="bonferroni">Bonferroni</option>
+            </param>
+        </xml>
+        <xml name="fit_intercept">
+            <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true"
+                label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/>
+        </xml>
+        <xml name="max_iter">
+            <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/>
+        </xml>
+        <xml name="multi_class">
+            <param argument="multi_class" type="select" label="Multi class" help="">
+                <option value="ovr">ovr: a binary problem is fit for each label</option>
+                <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option>
+                <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
+            </param>
+        </xml>
+        <xml name="penalty">
+            <param argument="penalty" type="select" label="Norm used in the penalization" help="">
+                <option value="l1">l1</option>
+                <option value="l2">l2</option>
+                <option value="customized">customized</option>
+            </param>
+        </xml>
+        <xml name="custom_penalty">
+            <param argument="pen" type="text" value="" label="Norm used in the penalization" help=""/>
+        </xml>
+        <xml name="random_state">
+            <param argument="random_state" type="integer" value="" optional="true"
+                label="The seed of the pseudo random number generator to use when shuffling the data" help=""/>
+        </xml>
     </macros>
     <expand macro="requirements"/>
     <expand macro="version_command"/>
@@ -13,22 +58,195 @@
 @CMD_imports@
 @CMD_read_inputs@
 
-#if $method.method == "tl.paga"
-sc.tl.paga(
+#if $method.method == "pp.calculate_qc_metrics"
+sc.pp.calculate_qc_metrics(
+    adata=adata,
+    expr_type='$method.expr_type',
+    var_type='$method.var_type',
+    #if str($method.qc_vars) != ''
+        #set $qc_vars = [str(x.strip()) for x in str($method.qc_vars).split(',')]
+    qc_vars=$qc_vars,
+    #end if
+    #if str($method.percent_top) != ''
+        #set $percent_top = [int(x.strip()) for x in str($method.percent_top).split(',')]
+        percent_top=$method.percent_top,
+    #end if
+    inplace=True)
+
+#else if $method.method == "tl.score_genes"
+sc.tl.score_genes(
     adata=adata,
-    groups='$method.groups',
-    use_rna_velocity =$method.use_rna_velocity,
-    model='$method.model',
+    #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')]
+    gene_list=$gene_list,
+    ctrl_size=$method.ctrl_size,
+    score_name='$method.score_name',
+    #if $method.gene_pool
+        #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')]
+    gene_pool=$gene_pool,
+    #end if
+    @CMD_score_genes_inputs@)
+
+#else if $method.method == "tl.score_genes_cell_cycle"
+    #if str($method.s_genes.format) == 'file'
+with open('$method.s_genes.file', 'r') as s_genes_f:
+    s_genes = [str(x.strip()) for x in s_genes_f.readlines()]
+print(s_genes)
+    #end if
+
+    #if str($method.g2m_genes.format) == 'file'
+with open('$method.g2m_genes.file', 'r') as g2m_genes_f:
+    g2m_genes = [str(x.strip()) for x in g2m_genes_f.readlines()]
+print(g2m_genes)
+    #end if
+
+sc.tl.score_genes_cell_cycle(
+    adata=adata,
+    #if str($method.s_genes.format) == 'text'
+        #set $s_genes = [str(x.strip()) for x in $method.s_genes.text.split(',')]
+    s_genes=$s_genes,
+    #else if str($method.s_genes.format) == 'file'
+    s_genes=s_genes,
+    #end if
+    #if str($method.g2m_genes.format) == 'text'
+        #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.text.split(',')]
+    g2m_genes=$g2m_genes,
+    #else if str($method.g2m_genes.format) == 'file'
+    g2m_genes=g2m_genes,
+    #end if
+    @CMD_score_genes_inputs@)
+
+#else if $method.method == 'pp.neighbors'
+sc.pp.neighbors(
+    adata=adata,
+    n_neighbors=$method.n_neighbors,
+    #if str($method.n_pcs) != ''
+    n_pcs=$method.n_pcs,
+    #end if
+    #if str($method.use_rep) != ''
+    use_rep='$method.use_rep',
+    #end if
+    knn=$method.knn,
+    random_state=$method.random_state,
+    method='$method.pp_neighbors_method',
+    metric='$method.metric',
     copy=False)
-#elif $method.method == "tl.dpt"
-sc.tl.dpt(
+
+#else if $method.method == 'tl.rank_genes_groups'
+sc.tl.rank_genes_groups(
     adata=adata,
-    n_dcs=$method.n_dcs,
-    n_branchings=$method.n_branchings,
-    min_group_size=$method.min_group_size,
-    allow_kendall_tau_shift=$method.allow_kendall_tau_shift,
+    groupby='$method.groupby',
+    use_raw=$method.use_raw,
+    #if str($method.groups) != ''
+    #set $group=[x.strip() for x in str($method.groups).split(',')]
+    groups=$group,
+    #end if
+    #if $method.ref.rest == 'rest'
+    reference='$method.ref.rest',
+    #else
+    reference='$method.ref.reference',
+    #end if
+    n_genes=$method.n_genes,
+    method='$method.tl_rank_genes_groups_method.method',
+    #if $method.tl_rank_genes_groups_method.method != 'logreg'
+    corr_method='$method.tl_rank_genes_groups_method.corr_method',
+    #else
+    solver='$method.tl_rank_genes_groups_method.solver.solver',
+        #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg'
+    penalty='l2',
+    fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
+    max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
+    multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
+        #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs'
+    penalty='l2',
+    fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
+    max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
+    multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
+        #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear'
+            #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
+    penalty='l1',
+            #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
+    penalty='l2',
+    dual=$method.tl_rank_genes_groups_method.solver.penalty.dual,
+            #else
+    penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
+            #end if
+    fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept,
+            #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True'
+    intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling,
+            #end if
+            #if $method.tl_rank_genes_groups_method.solver.random_state
+    random_state=$method.tl_rank_genes_groups_method.solver.random_state,
+            #end if
+        #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag'
+    penalty='l2',
+    fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
+            #if $method.tl_rank_genes_groups_method.solver.random_state
+    random_state=$method.tl_rank_genes_groups_method.solver.random_state,
+            #end if
+    max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
+    multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
+        #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga'
+            #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
+    penalty='l1',
+            #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
+    penalty='l2',
+            #else
+    penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
+            #end if
+    fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
+    multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
+        #end if
+    tol=$method.tl_rank_genes_groups_method.tol,
+    C=$method.tl_rank_genes_groups_method.c,
+    #end if
+    only_positive=$method.only_positive)
+
+#else if $method.method == "tl.marker_gene_overlap"
+reference_markers = {}
+#for $i, $s in enumerate($method.reference_markers)
+    #set $list=[x.strip() for x in str($s.values).split(',')]
+reference_markers['$s.key'] = $list
+#end for
+
+sc.tl.marker_gene_overlap(
+    adata,
+    reference_markers,
+    #if str($method.key) != ''
+    key='$method.key',
+    #end if
+    method='$method.overlap.method',
+    #if $method.overlap.method == 'overlap_count' and str($method.overlap.normalize) != 'None'
+    normalize='$method.overlap.normalize',
+    #end if
+    #if str($method.top_n_markers) != ''
+    top_n_markers=$method.top_n_markers,
+    #end if
+    #if str($method.adj_pval_threshold) != ''
+    adj_pval_threshold=$method.adj_pval_threshold,
+    #end if
+    #if str($method.key_added) != ''
+    key_added='$method.key_added',
+    #end if
+    inplace=True)
+
+#else if $method.method == "pp.log1p"
+sc.pp.log1p(
+    data=adata,
     copy=False)
-adata.obs.to_csv('$obs', sep='\t')
+
+#else if $method.method == "pp.scale"
+sc.pp.scale(
+    data=adata,
+    zero_center=$method.zero_center,
+    #if $method.max_value
+    max_value=$method.max_value,
+    #end if
+    copy=False)
+
+#else if $method.method == "pp.sqrt"
+sc.pp.sqrt(
+    data=adata,
+    copy=False)
 #end if
 
 @CMD_anndata_write_outputs@
@@ -37,143 +255,647 @@
     <inputs>
         <expand macro="inputs_anndata"/>
         <conditional name="method">
-            <param argument="method" type="select" label="Method used for plotting">
-                <!--<option value="tl.paga_compare_paths">, using `tl.paga_compare_paths`</option>!-->
-                <!--<option value="tl.paga_degrees">, using `tl.paga_degrees`</option>!-->
-                <!--<option value="tl.paga_expression_entropies">, using `tl.paga_expression_entropies`</option>!-->
-                <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using `tl.paga`</option>
-                <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using `tl.dpt`</option>
+            <param argument="method" type="select" label="Method used for inspecting">
+                <option value="pp.calculate_qc_metrics">Calculate quality control metrics, using `pp.calculate_qc_metrics`</option>
+                <option value="pp.neighbors">Compute a neighborhood graph of observations, using `pp.neighbors`</option>
+                <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option>
+                <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option>
+                <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using `tl.rank_genes_groups`</option>
+                <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using `tl.marker_gene_overlap`</option>-->
+                <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option>
+                <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option>
+                <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option>
             </param>
-            <when value="tl.paga">
-                <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations (`adata.obs`)."/>
-                <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that `adata.uns` contains a directed single-cell graph with key `['velocyto_transitions']`. This feature might be subject to change in the future."/>
-                <param argument="model" type="select" label="PAGA connectivity model" help="">
-                    <option value="v1.2">v1.2</option>
-                    <option value="v1.0">v1.0</option>
+            <when value="pp.calculate_qc_metrics">
+                <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X"/>
+                <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are"/>
+                <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of `.var` which identify variables you could want to control for" 
+                    help="Keys separated by a comma"/>
+                <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover" 
+                    help=" Values (integers) are considered 1-indexed, `50` finds cumulative proportion to the 50th most expressed genes. Values separated by a comma. 
+                    If empty don't calculate"/>
+            </when>
+            <when value="pp.neighbors">
+                <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If `knn` is `True`, number of nearest neighbors to be searched. If `knn` is `False`, a Gaussian kernel width is set to the distance of the `n_neighbors` neighbor."/>
+                <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
+                <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter"/>
+                <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the `n_neighbors` nearest neighbor."/>
+                <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/>
+                <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help="">
+                    <option value="umap">umap (McInnes et al, 2018)</option>
+                    <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option>
+                </param>
+                <param argument="metric" type="select" label="Distance metric" help="">
+                    <expand macro="distance_metric_options"/>
                 </param>
             </when>
-            <when value="tl.dpt">
-                <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use" help=""/>
-                <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect" help=""/>
-                <param argument="min_group_size" type="float" min="0" value="0.01" label="Min group size" help="During recursive splitting of branches ('dpt groups') for `n_branchings` &gt; 1, do not consider groups that contain less than `min_group_size` data points. If a float, `min_group_size` refers to a fraction of the total number of data points."/>
-                <param argument="allow_kendall_tau_shift" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Allow Kendal tau shift?" help="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion of Haghverdi et al (2016) to stabilize the splitting."/>
+            <when value="tl.score_genes">
+                <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/>
+                <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled"
+                    help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/>
+                <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set"
+                    help="Default is all genes. Genes separated by a comma"/>
+                <expand macro="score_genes_params"/>
+                <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/>
+            </when>
+            <when value="tl.score_genes_cell_cycle">
+                <conditional name='s_genes'>
+                    <param name="format" type="select" label="Format for the list of genes associated with S phase">
+                        <option value="file">File</option>
+                        <option value="text" selected="true">Text</option>
+                    </param>
+                    <when value="text">
+                        <param name="text" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/>
+                    </when>
+                    <when value="file">
+                        <param name="file" type="data" format="txt" label="File with the list of genes associated with S phase" help="One gene per line"/>
+                    </when>
+                </conditional>
+                <conditional name='g2m_genes'>
+                    <param name="format" type="select" label="Format for the list of genes associated with G2M phase">
+                        <option value="file">File</option>
+                        <option value="text" selected="true">Text</option>
+                    </param>
+                    <when value="text">
+                        <param name="text" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/>
+                    </when>
+                    <when value="file">
+                        <param name="file" type="data" format="txt" label="File with the list of genes associated with G2M phase" help="One gene per line"/>
+                    </when>
+                </conditional>
+                <expand macro="score_genes_params"/>
             </when>
+            <when value="tl.rank_genes_groups">
+                <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help=""/>
+                <expand macro="param_use_raw"/>
+                <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups."/>
+                <conditional name="ref">
+                    <param name="rest" type="select" label="Comparison">
+                        <option value="rest">Compare each group to the union of the rest of the group</option>
+                        <option value="group_id">Compare with respect to a specific group</option>
+                    </param>
+                    <when value="rest"/>
+                    <when value="group_id">
+                        <param argument="reference" type="text" value="" label="Group identifier with respect to which compare"/>
+                    </when>
+                </conditional>
+                <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param argument="method" type="select" label="Method">
+                        <option value="t-test">t-test</option>
+                        <option value="wilcoxon">Wilcoxon-Rank-Sum</option>
+                        <option value="t-test_overestim_var" selected="true">t-test with overestimate of variance of each group</option>
+                        <option value="logreg">Logistic regression</option>
+                    </param>
+                    <when value="t-test">
+                        <expand macro="corr_method"/>
+                    </when>
+                    <when value="wilcoxon">
+                        <expand macro="corr_method"/>
+                    </when>
+                    <when value="t-test_overestim_var">
+                        <expand macro="corr_method"/>
+                    </when>
+                    <when value="logreg">
+                        <conditional name="solver">
+                            <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty.">
+                                <option value="newton-cg">newton-cg</option>
+                                <option value="lbfgs">lbfgs</option>
+                                <option value="liblinear">liblinear</option>
+                                <option value="sag">sag</option>
+                                <option value="saga">saga</option>
+                            </param>
+                            <when value="newton-cg">
+                                <expand macro="fit_intercept"/>
+                                <expand macro="max_iter"/>
+                                <expand macro="multi_class"/>
+                            </when>
+                            <when value="lbfgs">
+                                <expand macro="fit_intercept"/>
+                                <expand macro="max_iter"/>
+                                <expand macro="multi_class"/>
+                            </when>
+                            <when value="liblinear">
+                                <conditional name="penalty">
+                                    <expand macro="penalty"/>
+                                    <when value="l1"/>
+                                    <when value="l2">
+                                        <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false"
+                                            label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/>
+                                    </when>
+                                    <when value="customized">
+                                        <expand macro="custom_penalty"/>
+                                    </when>
+                                </conditional>
+                                <conditional name="intercept_scaling">
+                                    <param argument="fit_intercept" type="select"
+                                        label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help="">
+                                        <option value="True">Yes</option>
+                                        <option value="False">No</option>
+                                    </param>
+                                    <when value="True">
+                                        <param argument="intercept_scaling" type="float" value="1.0"
+                                            label="Intercept scaling"
+                                            help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
+                                    </when>
+                                    <when value="False"/>
+                                </conditional>
+                                <expand macro="random_state"/>
+                            </when>
+                            <when value="sag">
+                                <expand macro="fit_intercept"/>
+                                <expand macro="random_state"/>
+                                <expand macro="max_iter"/>
+                                <expand macro="multi_class"/>
+                            </when>
+                            <when value="saga">
+                                <conditional name="penalty">
+                                    <expand macro="penalty"/>
+                                    <when value="l1"/>
+                                    <when value="l2"/>
+                                    <when value="customized">
+                                        <expand macro="custom_penalty"/>
+                                    </when>
+                                </conditional>
+                                <expand macro="fit_intercept"/>
+                                <expand macro="multi_class"/>
+                            </when>
+                        </conditional>
+                        <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/>
+                        <param argument="c" type="float" value="1.0" label="Inverse of regularization strength"
+                            help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
+                    </when>
+                </conditional>
+                <param argument="only_positive" type="boolean" truevalue="True" falsevalue="False" checked="true"
+                    label="Only consider positive differences?" help=""/>
+            </when>
+            <!--<when value="tl.marker_gene_overlap">
+                <repeat name="reference_markers" title="Marker genes">
+                    <param name="key" type="text" value="" label="Cell identity name" help=""/>
+                    <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from `var`"/>
+                </repeat>
+                <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/>
+                <conditional name="overlap">
+                    <param argument="method" type="select" label="Method to calculate marker gene overlap">
+                        <option value="overlap_count">overlap_count: Intersection of the gene set</option>
+                        <option value="overlap_coef">overlap_coef: Overlap coefficient</option>
+                        <option value="jaccard">jaccard: Jaccard index</option>
+                    </param>
+                    <when value="overlap_count">
+                        <param argument="normalize" type="select" label="Normalization option for the marker gene overlap output">
+                            <option value="None">None</option>
+                            <option value="reference">reference: Normalization of the data by the total number of marker genes given in the reference annotation per group</option>
+                            <option value="data">data: Normalization of the data by the total number of marker genes used for each cluster</option>
+                        </param>
+                    </when>
+                    <when value="overlap_coef"/>
+                    <when value="jaccard"/>
+                </conditional>
+                <param argument="top_n_markers" type="integer" optional="true" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
+                <param argument="adj_pval_threshold" type="float" optional="true" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
+                <param argument="key_added" type="text" value="" optional="true" label="Key that will contain the marker overlap scores in 'uns'"/>
+            </when>-->
+            <when value="pp.log1p"/>
+            <when value="pp.scale">
+                <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
+                    label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
+                <param argument="max_value" type="float" value="" optional="true" label="Maximum value"
+                    help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
+            </when>
+            <when value="pp.sqrt"/>
         </conditional>
-        <expand macro="anndata_output_format"/>
     </inputs>
     <outputs>
         <expand macro="anndata_outputs"/>
-        <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation">
-            <filter>method['method'] == 'tl.dpt'</filter>
-        </data>
     </outputs>
     <tests>
         <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
-            </conditional>
+            <!-- test 1 -->
+            <param name="adata" value="sparce_csr_matrix.h5ad" />
             <conditional name="method">
-                <param name="method" value="tl.paga"/>
-                <param name="groups" value="paul15_clusters"/>
-                <param name="use_rna_velocity" value="False"/>
-                <param name="model" value="v1.2"/>
+                <param name="method" value="pp.calculate_qc_metrics"/>
+                <param name="expr_type" value="counts"/>
+                <param name="var_type" value="genes"/>
+                <param name="qc_vars" value="mito,negative"/>
+                <param name="percent_top" value=""/>
             </conditional>
-            <param name="anndata_output_format" value="h5ad" />
             <assert_stdout>
-                <has_text_matching expression="sc.tl.paga"/>
-                <has_text_matching expression="groups='paul15_clusters'"/>
-                <has_text_matching expression="use_rna_velocity =False"/>
-                <has_text_matching expression="model='v1.2'"/>
+                <has_text_matching expression="sc.pp.calculate_qc_metrics" />
+                <has_text_matching expression="expr_type='counts'" />
+                <has_text_matching expression="var_type='genes'" />
+                <has_text_matching expression="qc_vars=\['mito', 'negative'\]" />
             </assert_stdout>
-            <output name="anndata_out_h5ad" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size">
+            <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 2 -->
+            <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
+            <conditional name="method">
+                <param name="method" value="pp.neighbors"/>
+                <param name="n_neighbors" value="15"/>
+                <param name="knn" value="True"/>
+                <param name="random_state" value="0"/>
+                <param name="pp_neighbors_method" value="umap"/>
+                <param name="metric" value="euclidean"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.neighbors"/>
+                <has_text_matching expression="n_neighbors=15"/>
+                <has_text_matching expression="knn=True"/>
+                <has_text_matching expression="random_state=0"/>
+                <has_text_matching expression="method='umap'"/>
+                <has_text_matching expression="metric='euclidean'"/>
+            </assert_stdout>
+            <output name="anndata_out" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size">
                 <assert_contents>
                     <has_h5_keys keys="X, obs, obsm, uns, var" />
                 </assert_contents>
             </output>
         </test>
         <test>
-            <conditional name="input">
-                <param name="format" value="h5ad" />
-                <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
+            <!-- test 3 -->
+            <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
+            <conditional name="method">
+                <param name="method" value="pp.neighbors"/>
+                <param name="n_neighbors" value="15"/>
+                <param name="knn" value="True"/>
+                <param name="pp_neighbors_method" value="gauss"/>
+                <param name="metric" value="braycurtis"/>
             </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.neighbors"/>
+                <has_text_matching expression="n_neighbors=15"/>
+                <has_text_matching expression="knn=True"/>
+                <has_text_matching expression="random_state=0"/>
+                <has_text_matching expression="method='gauss'"/>
+                <has_text_matching expression="metric='braycurtis'"/>
+            </assert_stdout>
+            <output name="anndata_out" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 4 -->
+            <param name="adata" value="krumsiek11.h5ad" />
             <conditional name="method">
-                <param name="method" value="tl.dpt"/>
-                <param name="n_dcs" value="15"/>
-                <param name="n_branchings" value="1"/>
-                <param name="min_group_size" value="0.01"/>
-                <param name="allow_kendall_tau_shift" value="True"/>
+                <param name="method" value="tl.score_genes"/>
+                <param name="gene_list" value="Gata2, Fog1"/>
+                <param name="ctrl_size" value="2"/>
+                <param name="n_bins" value="2"/>
+                <param name="random_state" value="2"/>
+                <param name="use_raw" value="False"/>
+                <param name="score_name" value="score"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.tl.score_genes" />
+                <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" />
+                <has_text_matching expression="ctrl_size=2" />
+                <has_text_matching expression="score_name='score'" />
+                <has_text_matching expression="n_bins=2" />
+                <has_text_matching expression="random_state=2" />
+                <has_text_matching expression="use_raw=False" />
+                <has_text_matching expression="copy=False" />
+            </assert_stdout>
+            <output name="anndata_out" file="tl.score_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 5 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="tl.score_genes_cell_cycle"/>
+                <conditional name='s_genes'>
+                    <param name="format" value="text"/>
+                    <param name="text" value="Gata2, Fog1, EgrNab"/>
+                </conditional>
+                <conditional name='g2m_genes'>
+                    <param name="format" value="text"/>
+                    <param name="text" value="Gata2, Fog1, EgrNab"/>
+                </conditional>
+                <param name="n_bins" value="2"/>
+                <param name="random_state" value="1"/>
+                <param name="use_raw" value="False"/>
             </conditional>
-            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
+                <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
+                <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
+                <has_text_matching expression="n_bins=2"/>
+                <has_text_matching expression="random_state=1"/>
+                <has_text_matching expression="use_raw=False"/>
+            </assert_stdout>
+            <output name="anndata_out" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 6 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <param name="groupby" value="cell_type"/>
+                <param name="use_raw" value="True"/>
+                <conditional name="ref">
+                    <param name="rest" value="rest"/>
+                </conditional>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="t-test_overestim_var"/>
+                    <param name="corr_method" value="benjamini-hochberg"/>
+                </conditional>
+                <param name="only_positive" value="true"/>
+            </conditional>
             <assert_stdout>
-                <has_text_matching expression="sc.tl.dpt"/>
-                <has_text_matching expression="n_dcs=15"/>
-                <has_text_matching expression="n_branchings=1"/>
-                <has_text_matching expression="min_group_size=0.01"/>
-                <has_text_matching expression="allow_kendall_tau_shift=True"/>
+                <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                <has_text_matching expression="groupby='cell_type'"/>
+                <has_text_matching expression="use_raw=True"/>
+                <has_text_matching expression="reference='rest'"/>
+                <has_text_matching expression="n_genes=100"/>
+                <has_text_matching expression="method='t-test_overestim_var'"/>
+                <has_text_matching expression="corr_method='benjamini-hochberg'"/>
+                <has_text_matching expression="only_positive=True"/>
             </assert_stdout>
-            <output name="anndata_out_h5ad" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size">
+            <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 7 -->
+            <param name="adata" value="pbmc68k_reduced.h5ad" />
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <param name="groupby" value="louvain"/>
+                <param name="use_raw" value="True"/>
+                <conditional name="ref">
+                    <param name="rest" value="rest"/>
+                </conditional>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="logreg"/>
+                    <conditional name="solver">
+                        <param name="solver" value="newton-cg"/>
+                        <param name="fit_intercept" value="True"/>
+                        <param name="max_iter" value="100"/>
+                        <param name="multi_class" value="auto"/>
+                    </conditional>
+                    <param name="tol" value="1e-4"/>
+                    <param name="c" value="1.0"/>
+                </conditional>
+                <param name="only_positive" value="true"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                <has_text_matching expression="groupby='louvain'"/>
+                <has_text_matching expression="use_raw=True"/>
+                <has_text_matching expression="reference='rest'"/>
+                <has_text_matching expression="n_genes=100"/>
+                <has_text_matching expression="method='logreg'"/>
+                <has_text_matching expression="solver='newton-cg'"/>
+                <has_text_matching expression="penalty='l2'"/>
+                <has_text_matching expression="fit_intercept=True"/>
+                <has_text_matching expression="max_iter=100"/>
+                <has_text_matching expression="multi_class='auto'"/>
+                <has_text_matching expression="tol=0.0001"/>
+                <has_text_matching expression="C=1.0"/>
+                <has_text_matching expression="only_positive=True"/>
+            </assert_stdout>
+            <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size">
                 <assert_contents>
-                    <has_h5_keys keys="X, obs, obsm, uns, var" />
+                    <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
                 </assert_contents>
             </output>
-            <output name="obs" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.obs.tabular" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 8 -->
+            <param name="adata" value="pbmc68k_reduced.h5ad" />
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <param name="groupby" value="louvain"/>
+                <param name="use_raw" value="True"/>
+                <conditional name="ref">
+                    <param name="rest" value="rest"/>
+                </conditional>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="logreg"/>
+                    <conditional name="solver">
+                        <param name="solver" value="liblinear"/>
+                        <conditional name="penalty">
+                            <param name="penalty" value="l2"/>
+                            <param name="dual" value="False"/>
+                            <conditional name="intercept_scaling">
+                                <param name="fit_intercept" value="True"/>
+                                <param name="intercept_scaling" value="1.0" />
+                            </conditional>
+                            <param name="random_state" value="1"/>
+                        </conditional>
+                    </conditional>
+                    <param name="tol" value="1e-4"/>
+                    <param name="c" value="1.0"/>
+                </conditional>
+                <param name="only_positive" value="true"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                <has_text_matching expression="groupby='louvain'"/>
+                <has_text_matching expression="use_raw=True"/>
+                <has_text_matching expression="reference='rest'"/>
+                <has_text_matching expression="n_genes=100"/>
+                <has_text_matching expression="method='logreg'"/>
+                <has_text_matching expression="solver='liblinear'"/>
+                <has_text_matching expression="penalty='l2'"/>
+                <has_text_matching expression="dual=False"/>
+                <has_text_matching expression="fit_intercept=True"/>
+                <has_text_matching expression="intercept_scaling=1.0"/>
+                <has_text_matching expression="tol=0.0001"/>
+                <has_text_matching expression="C=1.0"/>
+                <has_text_matching expression="only_positive=True"/>
+            </assert_stdout>
+            <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size">
+                <assert_contents>
+                    <has_h5_keys keys="X, obs, obsm, raw.X, raw.var, uns, var" />
+                </assert_contents>
+            </output>
+        </test>
+        <!--<test>
+            < test 9 >
+            <param name="adata" value="tl.rank_genes_groups.louvain.neighbors.pca.pbmc68k_reduced.h5ad" />
+            <conditional name="method">
+                <param name="method" value="tl.marker_gene_overlap"/>
+                <repeat name="reference_markers">
+                    <param name="key" value="CD4 T cells"/>
+                    <param name="value" value="IL7R"/>
+                </repeat>
+                <repeat name="reference_markers">
+                    <param name="key" value="CD14+ Monocytes"/>
+                    <param name="value" value="CD14,LYZ"/>
+                </repeat>
+                <repeat name="reference_markers">
+                    <param name="key" value="B cells"/>
+                    <param name="value" value="MS4A1"/>
+                </repeat>
+                <conditional name="overlap">
+                    <param argument="method" value="overlap_count"/>
+                    <param argument="normalize" value="None"/>
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="tl.marker_gene_overlap"/>
+                <has_text_matching expression="key='rank_genes_groups'"/>
+                <has_text_matching expression="method='overlap_count'"/>
+            </assert_stdout>
+            <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>-->
+        <test>
+            <!-- test 9 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="pp.log1p"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.log1p"/>
+            </assert_stdout>
+            <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 10 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="pp.scale"/>
+                <param name="zero_center" value="true"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.scale"/>
+                <has_text_matching expression="zero_center=True"/>
+            </assert_stdout>
+            <output name="anndata_out" file="pp.scale.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 11 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="pp.scale"/>
+                <param name="zero_center" value="true"/>
+                <param name="max_value" value="10"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.scale"/>
+                <has_text_matching expression="zero_center=True"/>
+                <has_text_matching expression="max_value=10.0"/>
+            </assert_stdout>
+            <output name="anndata_out" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test>
+            <!-- test 12 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="pp.sqrt"/>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.sqrt"/>
+            </assert_stdout>
+            <output name="anndata_out" file="pp.sqrt.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
     </tests>
     <help><![CDATA[
-Generate cellular maps of differentiation manifolds with complex topologies (`tl.paga`)
-=======================================================================================
+Calculate quality control metrics., using `pp.calculate_qc_metrics`
+===================================================================
+
+Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater. 
+Currently is most efficient on a sparse CSR or dense matrix.
+
+It updates the observation level metrics:
+
+- total_{var_type}_by_{expr_type} (e.g. "total_genes_by_counts", number of genes with positive counts in a cell)
+- total_{expr_type} (e.g. "total_counts", total number of counts for a cell)
+- pct_{expr_type}_in_top_{n}_{var_type} (e.g. "pct_counts_in_top_50_genes", cumulative percentage of counts for 50 most expressed genes in a cell)
+- total_{expr_type}_{qc_var} (e.g. "total_counts_mito", total number of counts for variabes in qc_vars )
+- pct_{expr_type}_{qc_var} (e.g. "pct_counts_mito", proportion of total counts for a cell which are mitochondrial)
+
+And also the variable level metrics:
 
-By quantifying the connectivity of partitions (groups, clusters) of the
-single-cell graph, partition-based graph abstraction (PAGA) generates a much
-simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights
-represent confidence in the presence of connections. By tresholding this
-confidence in `paga`, a much simpler representation of data
-can be obtained.
+- total_{expr_type} (e.g. "total_counts", sum of counts for a gene)
+- mean_{expr_type} (e.g. "mean counts", mean expression over all cells.
+- n_cells_by_{expr_type} (e.g. "n_cells_by_counts", number of cells this expression is measured in)
+- pct_dropout_by_{expr_type} (e.g. "pct_dropout_by_counts", percentage of cells this feature does not appear in)
+
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.calculate_qc_metrics.html>`__
+
+Compute a neighborhood graph of observations, using `pp.neighbors`
+==================================================================
+
+The neighbor search efficiency of this heavily relies on UMAP (McInnes et al, 2018),
+which also provides a method for estimating connectivities of data points -
+the connectivity of the manifold (`method=='umap'`). If `method=='diffmap'`,
+connectivities are computed according to Coifman et al (2005), in the adaption of
+Haghverdi et al (2016).
+
+The returned AnnData object contains:
+
+- Weighted adjacency matrix of the neighborhood graph of data points (connectivities). Weights should be interpreted as connectivities.
+- Distances for each pair of neighbors (distances)
+
+This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects
 
-The confidence can be interpreted as the ratio of the actual versus the
-expected value of connetions under the null model of randomly connecting
-partitions. We do not provide a p-value as this null model does not
-precisely capture what one would consider "connected" in real data, hence it
-strongly overestimates the expected value. See an extensive discussion of
-this in Wolf et al (2017).
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.neighbors.html>`__
+
+Score a set of genes, using `tl.score_genes`
+============================================
+
+The score is the average expression of a set of genes subtracted with the
+average expression of a reference set of genes. The reference set is
+randomly sampled from the `gene_pool` for each binned expression value.
+
+This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented
+for Scanpy by Davide Cittaro.
+
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes.html>`__
+
+Score cell cycle genes, using `tl.score_genes_cell_cycle`
+=========================================================
 
-Together with a random walk-based distance measure, this generates a partial
-coordinatization of data useful for exploring and explaining its variation.
+Given two lists of genes associated to S phase and G2M phase, calculates
+scores and assigns a cell cycle phase (G1, S or G2M). See
+`score_genes` for more explanation.
+
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.score_genes_cell_cycle.html>`__
+
+Rank genes for characterizing groups, using `tl.rank_genes_groups`
+==================================================================
 
-More details on the `tl.paga scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.paga.html#scanpy.api.tl.paga>`_
+The returned AnnData object contains:
+
+- Gene names, ordered according to scores
+- Z-score underlying the computation of a p-value for each gene for each group, prdered according to scores
+- Log2 fold change for each gene for each group, ordered according to scores. It is only provided if method is ‘t-test’ like. This is an approximation calculated from mean-log values.
+- P-values
+- Ajusted p-values
+
+This data are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects
+
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.tl.rank_genes_groups.html>`__
 
 
-Infer progression of cells through geodesic distance along the graph (`tl.dpt`)
-===============================================================================
+Calculate an overlap score between data-deriven marker genes and provided markers (`tl.marker_gene_overlap`)
+============================================================================================================
 
-Reconstruct the progression of a biological process from snapshot
-data. `Diffusion Pseudotime` has been introduced by Haghverdi et al (2016) and
-implemented within Scanpy (Wolf et al, 2017). Here, we use a further developed
-version, which is able to deal with disconnected graphs (Wolf et al, 2017) and can
-be run in a `hierarchical` mode by setting the parameter
-`n_branchings>1`. We recommend, however, to only use
-`tl.dpt` for computing pseudotime (`n_branchings=0`) and
-to detect branchings via `paga`. For pseudotime, you need
-to annotate your data with a root cell. 
-
-This requires to run `pp.neighbors`, first. In order to
-reproduce the original implementation of DPT, use `method=='gauss'` in
-this. Using the default `method=='umap'` only leads to minor quantitative
-differences, though.
+Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe which can be used to annotate clusters based on marker gene overlaps.
 
 
-If `n_branchings==0`, no field `dpt_groups` will be written.
+Logarithmize the data matrix (`pp.log1p`)
+=========================================
 
-- dpt_pseudotime : Array of dim (number of samples) that stores the pseudotime of each cell, that is, the DPT distance with respect to the root cell.
-- dpt_groups : Array of dim (number of samples) that stores the subgroup id ('0','1', ...) for each cell. The groups  typically correspond to 'progenitor cells', 'undecided cells' or 'branches' of a process.
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.log1p.html>`__
 
-The tool is similar to the R package `destiny` of Angerer et al (2016).
+Scale data to unit variance and zero mean (`pp.scale`)
+======================================================
 
-More details on the `tl.dpt scanpy documentation
-<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.dpt.html#scanpy.api.tl.dpt>`_
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.scale.html>`__
 
+Computes the square root the data matrix (`pp.sqrt`)
+====================================================
+
+`X = sqrt(X)`
     ]]></help>
     <expand macro="citations"/>
 </tool>
\ No newline at end of file