Mercurial > repos > iuc > scanpy_inspect

diff inspect.xml @ 17:18262103fa61 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb
author: iuc
date: Sat, 14 Sep 2024 12:45:03 +0000
parents: ca086f24422f
children: d1cd8c147809
--- a/inspect.xml	Tue Aug 20 09:50:59 2024 +0000
+++ b/inspect.xml	Sat Sep 14 12:45:03 2024 +0000
@@ -1,102 +1,121 @@
-<tool id="scanpy_inspect" name="Inspect and manipulate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
-    <description> with scanpy</description>
+<tool id="scanpy_inspect" name="Scanpy Inspect and manipulate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <macros>
         <import>macros.xml</import>
-        <xml name="score_genes_params">
-            <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/>
-            <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/>
+        <xml name="params_score_genes">
+            <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling"/>
+            <param argument="random_state" type="integer" value="0" label="Random seed for sampling"/>
             <expand macro="param_use_raw"/>
         </xml>
-        <token name="@CMD_score_genes_inputs@"><![CDATA[
+        <token name="@CMD_PARAMS_SCORE_GENES@"><![CDATA[
     n_bins=$method.n_bins,
     random_state=$method.random_state,
     use_raw=$method.use_raw,
     copy=False
-        ]]></token>
+        ]]>
+        </token>
         <xml name="corr_method">
             <param argument="corr_method" type="select" label="P-value correction method">
-                <option value="benjamini-hochberg">Benjamini-Hochberg</option>
+                <option value="benjamini-hochberg" selected="true">Benjamini-Hochberg</option>
                 <option value="bonferroni">Bonferroni</option>
             </param>
         </xml>
         <xml name="fit_intercept">
-            <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true"
-                label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/>
+            <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should a constant (a.k.a. bias or intercept) be added to the decision function?"/>
         </xml>
         <xml name="max_iter">
-            <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/>
+            <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge"/>
         </xml>
         <xml name="multi_class">
-            <param argument="multi_class" type="select" label="Multi class" help="">
+            <param argument="multi_class" type="select" label="Multi class">
+                <option value="auto" selected="true">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
                 <option value="ovr">ovr: a binary problem is fit for each label</option>
                 <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option>
-                <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option>
             </param>
         </xml>
         <xml name="penalty">
-            <param argument="penalty" type="select" label="Norm used in the penalization" help="">
+            <param argument="penalty" type="select" label="Norm used in the penalization">
+                <option value="l2" selected="true">l2</option>
                 <option value="l1">l1</option>
-                <option value="l2">l2</option>
-                <option value="customized">customized</option>
-            </param>
-        </xml>
-        <xml name="custom_penalty">
-            <param argument="pen" type="text" value="" label="Norm used in the penalization" help="">
-                <expand macro="sanitize_query" />
+                <yield/>
             </param>
         </xml>
         <xml name="random_state">
-            <param argument="random_state" type="integer" value="" optional="true"
-                label="The seed of the pseudo random number generator to use when shuffling the data" help=""/>
+            <param argument="random_state" type="integer" value="" optional="true" label="The seed of the pseudo random number generator to use when shuffling the data"/>
         </xml>
     </macros>
     <expand macro="bio_tools"/>
-    <expand macro="requirements"/>
+    <expand macro="requirements">
+        <requirement type="package" version="1.5.1">scikit-learn</requirement>
+    </expand>
     <expand macro="version_command"/>
     <command detect_errors="exit_code"><![CDATA[
 @CMD@
       ]]></command>
     <configfiles>
         <configfile name="script_file"><![CDATA[
-@CMD_imports@
-@CMD_read_inputs@
+@CMD_IMPORTS@
+@CMD_READ_INPUTS@
 
-#if $method.method == "pp.calculate_qc_metrics"
+#if str($method.method) == 'pp.calculate_qc_metrics':
 sc.pp.calculate_qc_metrics(
     adata=adata,
     expr_type='$method.expr_type',
     var_type='$method.var_type',
-    #if $method.qc_vars
+    #if str($method.qc_vars) != '':
         #set $qc_vars = [str(x.strip()) for x in str($method.qc_vars).split(',')]
     qc_vars=$qc_vars,
     #end if
-    #if $method.percent_top
+    #if str($method.percent_top) != '':
         #set $percent_top = [int(x.strip()) for x in str($method.percent_top).split(',')]
-        percent_top=$percent_top,
+    percent_top=$percent_top,
     #end if
+    #if str($method.layer) != '':
+    layer='$method.layer',
+    #end if
+    use_raw=$method.use_raw,
+    log1p=$method.log1p,
     inplace=True)
 
-#else if $method.method == "tl.score_genes"
+#else if str($method.method) == 'pp.neighbors':
+sc.pp.neighbors(
+    adata=adata,
+    n_neighbors=$method.n_neighbors,
+    #if str($method.n_pcs) != '':
+    n_pcs=$method.n_pcs,
+    #end if
+    #if str($method.use_rep) != '':
+    use_rep='$method.use_rep',
+    #end if
+    knn=$method.knn,
+    method='$method.pp_neighbors_method',
+    metric='$method.metric',
+    random_state=$method.random_state,
+    #if str($method.key_added) != '':
+    key_added='$method.key_added',
+    #end if
+    copy=False)
+
+#else if str($method.method) == 'tl.score_genes':
 sc.tl.score_genes(
     adata=adata,
     #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')]
     gene_list=$gene_list,
     ctrl_size=$method.ctrl_size,
-    score_name='$method.score_name',
-    #if $method.gene_pool
-        #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')]
+    #if str($method.gene_pool) != '':
+        #set $gene_pool = [str(x.strip()) for x in str($method.gene_pool).split(',')]
     gene_pool=$gene_pool,
     #end if
-    @CMD_score_genes_inputs@)
+    score_name='$method.score_name',
+    @CMD_PARAMS_SCORE_GENES@)
 
-#else if $method.method == "tl.score_genes_cell_cycle"
-    #if str($method.s_genes.format) == 'file'
+#else if str($method.method) == 'tl.score_genes_cell_cycle':
+    #if str($method.s_genes.format) == 'file':
 with open('$method.s_genes.file', 'r') as s_genes_f:
     s_genes = [str(x.strip()) for x in s_genes_f.readlines()]
 print(s_genes)
     #end if
 
-    #if str($method.g2m_genes.format) == 'file'
+    #if str($method.g2m_genes.format) == 'file':
 with open('$method.g2m_genes.file', 'r') as g2m_genes_f:
     g2m_genes = [str(x.strip()) for x in g2m_genes_f.readlines()]
 print(g2m_genes)
@@ -104,155 +123,182 @@
 
 sc.tl.score_genes_cell_cycle(
     adata=adata,
-    #if str($method.s_genes.format) == 'text'
-        #set $s_genes = [str(x.strip()) for x in $method.s_genes.text.split(',')]
+    #if str($method.s_genes.format) == 'text':
+        #set $s_genes = [str(x.strip()) for x in str($method.s_genes.text).split(',')]
     s_genes=$s_genes,
-    #else if str($method.s_genes.format) == 'file'
+    #else if str($method.s_genes.format) == 'file':
     s_genes=s_genes,
     #end if
-    #if str($method.g2m_genes.format) == 'text'
-        #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.text.split(',')]
+    #if str($method.g2m_genes.format) == 'text':
+        #set $g2m_genes = [str(x.strip()) for x in str($method.g2m_genes.text).split(',')]
     g2m_genes=$g2m_genes,
-    #else if str($method.g2m_genes.format) == 'file'
+    #else if str($method.g2m_genes.format) == 'file':
     g2m_genes=g2m_genes,
     #end if
-    @CMD_score_genes_inputs@)
+    @CMD_PARAMS_SCORE_GENES@)
 
-#else if $method.method == 'pp.neighbors'
-sc.pp.neighbors(
-    adata=adata,
-    n_neighbors=$method.n_neighbors,
-    #if str($method.n_pcs) != ''
-    n_pcs=$method.n_pcs,
-    #end if
-    #if $method.use_rep
-    use_rep='$method.use_rep',
-    #end if
-    knn=$method.knn,
-    random_state=$method.random_state,
-    method='$method.pp_neighbors_method',
-    metric='$method.metric',
-    copy=False)
-
-#else if $method.method == 'tl.rank_genes_groups'
+#else if str($method.method) == 'tl.rank_genes_groups':
 sc.tl.rank_genes_groups(
     adata=adata,
+    #if str($method.groupby) != '':
     groupby='$method.groupby',
-    #if $method.groups
+    #end if
+    use_raw=$method.use_raw,
+    #if str($method.groups) != '':
     #set $group=[x.strip() for x in str($method.groups).split(',')]
-    groups=$group,
+    groups='$group',
     #end if
-    #if $method.ref.rest == 'rest'
+    #if str($method.layer) != '':
+    layer='$method.layer',
+    #end if
+    #if str($method.ref.rest) == 'rest':
     reference='$method.ref.rest',
     #else
     reference='$method.ref.reference',
     #end if
+    #if str($method.n_genes) != '':
     n_genes=$method.n_genes,
+    #end if
     method='$method.tl_rank_genes_groups_method.method',
-    #if $method.tl_rank_genes_groups_method.method != 'logreg'
+    #if str($method.tl_rank_genes_groups_method.method) != 'logreg':
     corr_method='$method.tl_rank_genes_groups_method.corr_method',
-    #else
+    #end if
+    #if str($method.tl_rank_genes_groups_method.method) == 'wilcoxon':
+    tie_correct=$method.tl_rank_genes_groups_method.tie_correct,
+    #end if
+    #if str($method.tl_rank_genes_groups_method.method) == 'logreg':
     solver='$method.tl_rank_genes_groups_method.solver.solver',
-        #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg'
-    penalty='l2',
+        #if str($method.tl_rank_genes_groups_method.solver.solver) == 'lbfgs':
+    penalty='$method.tl_rank_genes_groups_method.solver.penalty',
     fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
     max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
     multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
-        #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs'
-    penalty='l2',
+        #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'newton-cg':
+    penalty='$method.tl_rank_genes_groups_method.solver.penalty',
     fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
     max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
     multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
-        #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear'
-            #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
+        #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'liblinear':
+            #if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l1':
     penalty='l1',
-            #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
+            #else:
     penalty='l2',
     dual=$method.tl_rank_genes_groups_method.solver.penalty.dual,
-            #else
-    penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
             #end if
     fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept,
-            #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True'
+            #if str($method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept) == 'True':
     intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling,
             #end if
-            #if str($method.tl_rank_genes_groups_method.solver.random_state) != ''
+            #if str($method.tl_rank_genes_groups_method.solver.random_state) != '':
     random_state=$method.tl_rank_genes_groups_method.solver.random_state,
             #end if
-        #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag'
-    penalty='l2',
+        #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'sag':
+    penalty='$method.tl_rank_genes_groups_method.solver.penalty.penalty',
     fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
-            #if str($method.tl_rank_genes_groups_method.solver.random_state) != ''
+            #if str($method.tl_rank_genes_groups_method.solver.random_state) != '':
     random_state=$method.tl_rank_genes_groups_method.solver.random_state,
             #end if
     max_iter=$method.tl_rank_genes_groups_method.solver.max_iter,
-    multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
-        #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga'
-            #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1'
+    multi_class=$method.tl_rank_genes_groups_method.solver.multi_class,
+        #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'saga':
+            #if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l1':
     penalty='l1',
-            #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2'
+            #else if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l2':
     penalty='l2',
-            #else
-    penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen',
+            #else if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'elasticnet':
+    penalty='elasticnet',
+            #else:
+    penalty='None',
             #end if
     fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept,
-    multi_class='$method.tl_rank_genes_groups_method.solver.multi_class',
+    multi_class=$method.tl_rank_genes_groups_method.solver.multi_class,
         #end if
     tol=$method.tl_rank_genes_groups_method.tol,
     C=$method.tl_rank_genes_groups_method.c,
     #end if
-    use_raw=$method.use_raw)
+    #if str($method.key_added) != '':
+    key_added='$method.key_added',
+    #end if
+    copy=False)
 
-#else if $method.method == "tl.marker_gene_overlap"
+#else if str($method.method) == "tl.marker_gene_overlap":
 reference_markers = {}
 #for $i, $s in enumerate($method.reference_markers)
     #set $list=[x.strip() for x in str($s.values).split(',')]
 reference_markers['$s.key'] = $list
 #end for
 
-sc.tl.marker_gene_overlap(
-    adata,
-    reference_markers,
-    #if $method.key
-    key='$method.key',
-    #end if
-    method='$method.overlap.method',
-    #if $method.overlap.method == 'overlap_count' and str($method.overlap.normalize) != 'None'
-    normalize='$method.overlap.normalize',
-    #end if
-    #if str($method.top_n_markers) != ''
-    top_n_markers=$method.top_n_markers,
-    #end if
-    #if str($method.adj_pval_threshold) != ''
-    adj_pval_threshold=$method.adj_pval_threshold,
-    #end if
-    #if $method.key_added
-    key_added='$method.key_added',
-    #end if
-    inplace=True)
+# Temporary fix for the issue with "inplace=True" for Pandas dataframes.
+# see here: https://github.com/scverse/scanpy/blob/b6193502e11b84fc1b4a011ee9cf08a19da22ebf/src/scanpy/tools/_marker_gene_overlap.py#L167
+marker_overlap_result = sc.tl.marker_gene_overlap(
+                            adata,
+                            reference_markers,
+                            #if str($method.key) != '':
+                            key='$method.key',
+                            #end if
+                            method='$method.overlap.method',
+                            #if str($method.overlap.method) == 'overlap_count' and str($method.overlap.normalize) != 'None':
+                            normalize='$method.overlap.normalize',
+                            #end if
+                            #if str($method.top_n_markers) != '':
+                            top_n_markers=$method.top_n_markers,
+                            #end if
+                            #if str($method.adj_pval_threshold) != '':
+                            adj_pval_threshold=$method.adj_pval_threshold,
+                            #end if
+                            #if $method.key_added:
+                            key_added='$method.key_added',
+                            #end if
+                            inplace=False)
 
-#else if $method.method == "pp.log1p"
+adata.uns['marker_gene_overlap'] = marker_overlap_result
+
+#else if str($method.method) == "pp.log1p":
 sc.pp.log1p(
     adata,
+    #if str($method.base) != '':
+    base=$method.base,
+    #end if
+    #if str($method.layer) != '':
+    layer='$method.layer',
+    #end if
+    #if str($method.obsm) != '':
+    obsm='$method.obsm',
+    #end if
     copy=False)
 
-#else if $method.method == "pp.scale"
+#else if str($method.method) == "pp.scale":
 sc.pp.scale(
     adata,
     zero_center=$method.zero_center,
-    #if str($method.max_value) != ''
+    #if str($method.max_value) != '':
     max_value=$method.max_value,
     #end if
+    #if str($method.layer) != '':
+    layer='$method.layer',
+    #end if
+    #if str($method.obsm) != '':
+    obsm='$method.obsm',
+    #end if
+    #if str($method.mask_obs) != '':
+    mask_obs='$method.mask_obs',
+    #end if
     copy=False)
 
-#else if $method.method == "pp.sqrt"
+#else if str($method.method) == "pp.sqrt":
+
+print("stats before sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
+
 sc.pp.sqrt(
     adata,
     copy=False)
 #end if
 
-@CMD_anndata_write_outputs@
-]]></configfile>
+print("stats after sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean())
+
+@CMD_ANNDATA_WRITE_OUTPUTS@
+        ]]>
+        </configfile>
     </configfiles>
     <inputs>
         <expand macro="inputs_anndata"/>
@@ -263,66 +309,66 @@
                 <option value="tl.score_genes">Score a set of genes, using 'tl.score_genes'</option>
                 <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using 'tl.score_genes_cell_cycle'</option>
                 <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using 'tl.rank_genes_groups'</option>
-                <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using 'tl.marker_gene_overlap'</option>-->
+                <option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using 'tl.marker_gene_overlap'</option>
                 <option value="pp.log1p">Logarithmize the data matrix, using 'pp.log1p'</option>
                 <option value="pp.scale">Scale data to unit variance and zero mean, using 'pp.scale'</option>
                 <option value="pp.sqrt">Square root the data matrix, using 'pp.sqrt'</option>
             </param>
             <when value="pp.calculate_qc_metrics">
                 <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X">
-                    <expand macro="sanitize_query" />
+                    <expand macro="sanitize_query"/>
                 </param>
                 <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are">
-                    <expand macro="sanitize_query" />
+                    <expand macro="sanitize_query"/>
                 </param>
-                <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of '.var' which identify variables you could want to control for" help="Keys separated by a comma">
-                    <expand macro="sanitize_query" />
+                <param argument="qc_vars" type="text" optional="true" value="" label="Keys for boolean columns of '.var' which identify variables you could want to control for" help="Keys separated by a comma">
+                    <expand macro="sanitize_query"/>
                 </param>
-                <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover" 
+                <param argument="percent_top" type="text" value="" optional="true" label="Proportions of top genes to cover"
                     help=" Values (integers) are considered 1-indexed, '50' finds cumulative proportion to the 50th most expressed genes. Values separated by a comma. If empty don't calculate">
-                    <expand macro="sanitize_vectors" />
+                    <expand macro="sanitize_vectors"/>
                 </param>
+                <expand macro="param_layer"/>
+                <expand macro="param_use_raw"/>
+                <param argument="log1p" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Compute log1p transformed annotations"/>
             </when>
             <when value="pp.neighbors">
                 <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If 'knn' is 'True', number of nearest neighbors to be searched. If 'knn' is 'False', a Gaussian kernel width is set to the distance of the 'n_neighbors' neighbor."/>
-                <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/>
-                <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter">
-                    <expand macro="sanitize_query" />
-                </param>
+                <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use"/>
+                <expand macro="param_use_rep"/>
                 <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the 'n_neighbors' nearest neighbor."/>
-                <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/>
-                <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help="">
-                    <option value="umap">umap (McInnes et al, 2018)</option>
+                <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities">
+                    <option value="umap" selected="true">umap (McInnes et al, 2018)</option>
                     <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option>
                 </param>
-                <param argument="metric" type="select" label="Distance metric" help="">
+                <param argument="metric" type="select" label="Distance metric">
                     <expand macro="distance_metric_options"/>
                 </param>
+                <param argument="random_state" type="integer" value="0" label="Numpy random seed"/>
+                <param argument="key_added" type="text" value="" optional="true" label="Key to store neighbors, distances and connectivities" help="If specified, the neighbors data is added to .uns[key_added], distances are stored in .obsp[key_added+'_distances'] and connectivities in .obsp[key_added+'_connectivities']"/>
             </when>
             <when value="tl.score_genes">
-                <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma">
-                    <expand macro="sanitize_query" />
+                <param argument="gene_list" type="text" value="" optional="false" label="The list of gene names used for score calculation" help="Genes separated by a comma">
+                    <expand macro="sanitize_query"/>
                 </param>
-                <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled"
-                    help="If 'len(gene_list)' is not too low, you can set 'ctrl_size=len(gene_list)'."/>
-                <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set"
-                       help="Default is all genes. Genes separated by a comma">
-                    <expand macro="sanitize_query" />
+                <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" help="If 'len(gene_list)' is not too low, you can set 'ctrl_size=len(gene_list)'."/>
+                <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" help="Default is all genes. Genes separated by a comma">
+                    <expand macro="sanitize_query"/>
                 </param>
-                <expand macro="score_genes_params"/>
-                <param argument="score_name" type="text" value="score" label="Name of the field to be added in '.obs'" help="">
-                    <expand macro="sanitize_query" />
+                <param argument="score_name" type="text" value="score" label="Name of the field to be added in '.obs'">
+                    <expand macro="sanitize_query"/>
                 </param>
+                <expand macro="params_score_genes"/>
             </when>
             <when value="tl.score_genes_cell_cycle">
                 <conditional name='s_genes'>
                     <param name="format" type="select" label="Format for the list of genes associated with S phase">
+                        <option value="text" selected="true">Text</option>
                         <option value="file">File</option>
-                        <option value="text" selected="true">Text</option>
                     </param>
                     <when value="text">
                         <param name="text" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma">
-                            <expand macro="sanitize_query" />
+                            <expand macro="sanitize_query"/>
                         </param>
                     </when>
                     <when value="file">
@@ -331,41 +377,44 @@
                 </conditional>
                 <conditional name='g2m_genes'>
                     <param name="format" type="select" label="Format for the list of genes associated with G2M phase">
+                        <option value="text" selected="true">Text</option>
                         <option value="file">File</option>
-                        <option value="text" selected="true">Text</option>
                     </param>
                     <when value="text">
                         <param name="text" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma">
-                            <expand macro="sanitize_query" />
+                            <expand macro="sanitize_query"/>
                         </param>
                     </when>
                     <when value="file">
                         <param name="file" type="data" format="txt" label="File with the list of genes associated with G2M phase" help="One gene per line"/>
                     </when>
                 </conditional>
-                <expand macro="score_genes_params"/>
+                <expand macro="params_score_genes"/>
             </when>
             <when value="tl.rank_genes_groups">
-                <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help="">
-                    <expand macro="sanitize_query" />
+                <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider">
+                    <expand macro="sanitize_query"/>
                 </param>
                 <expand macro="param_use_raw"/>
                 <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups.">
-                    <expand macro="sanitize_query" />
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to perform tests on">
+                    <expand macro="sanitize_query"/>
                 </param>
                 <conditional name="ref">
                     <param name="rest" type="select" label="Comparison">
-                        <option value="rest">Compare each group to the union of the rest of the group</option>
+                        <option value="rest" selected="true">Compare each group to the union of the rest of the group</option>
                         <option value="group_id">Compare with respect to a specific group</option>
                     </param>
                     <when value="rest"/>
                     <when value="group_id">
                         <param argument="reference" type="text" value="" label="Group identifier with respect to which compare">
-                            <expand macro="sanitize_query" />
+                            <expand macro="sanitize_query"/>
                         </param>
                     </when>
                 </conditional>
-                <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/>
+                <param argument="n_genes" type="integer" min="0" value="" optional="true" label="The number of genes that appear in the returned tables" help="Defaults to all genes"/>
                 <conditional name="tl_rank_genes_groups_method">
                     <param argument="method" type="select" label="Method">
                         <option value="t-test" selected="true">t-test</option>
@@ -378,6 +427,7 @@
                     </when>
                     <when value="wilcoxon">
                         <expand macro="corr_method"/>
+                        <param argument="tie_correct" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use tie correction for 'wilcoxon' scores"/>
                     </when>
                     <when value="t-test_overestim_var">
                         <expand macro="corr_method"/>
@@ -385,18 +435,20 @@
                     <when value="logreg">
                         <conditional name="solver">
                             <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty.">
+                                <option value="lbfgs" selected="true">lbfgs</option>
                                 <option value="newton-cg">newton-cg</option>
-                                <option value="lbfgs">lbfgs</option>
                                 <option value="liblinear">liblinear</option>
                                 <option value="sag">sag</option>
                                 <option value="saga">saga</option>
                             </param>
-                            <when value="newton-cg">
+                            <when value="lbfgs">
+                                <param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/>
                                 <expand macro="fit_intercept"/>
                                 <expand macro="max_iter"/>
                                 <expand macro="multi_class"/>
-                            </when>
-                            <when value="lbfgs">
+                            </when>                            
+                            <when value="newton-cg">
+                                <param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/>
                                 <expand macro="fit_intercept"/>
                                 <expand macro="max_iter"/>
                                 <expand macro="multi_class"/>
@@ -409,26 +461,21 @@
                                         <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false"
                                             label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/>
                                     </when>
-                                    <when value="customized">
-                                        <expand macro="custom_penalty"/>
-                                    </when>
                                 </conditional>
                                 <conditional name="intercept_scaling">
-                                    <param argument="fit_intercept" type="select"
-                                        label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help="">
-                                        <option value="True">Yes</option>
+                                    <param argument="fit_intercept" type="select" label="Should a constant (a.k.a. bias or intercept) be added to the decision function?">
+                                        <option value="True" selected="true">Yes</option>
                                         <option value="False">No</option>
                                     </param>
                                     <when value="True">
-                                        <param argument="intercept_scaling" type="float" value="1.0"
-                                            label="Intercept scaling"
-                                            help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
+                                        <param argument="intercept_scaling" type="float" value="1.0" label="Intercept scaling" help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/>
                                     </when>
                                     <when value="False"/>
                                 </conditional>
                                 <expand macro="random_state"/>
                             </when>
                             <when value="sag">
+                                <param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/>
                                 <expand macro="fit_intercept"/>
                                 <expand macro="random_state"/>
                                 <expand macro="max_iter"/>
@@ -436,39 +483,42 @@
                             </when>
                             <when value="saga">
                                 <conditional name="penalty">
-                                    <expand macro="penalty"/>
+                                    <expand macro="penalty">
+                                        <option value="elasticnet">elasticnet</option>
+                                        <option value="None">None</option>
+                                    </expand>
                                     <when value="l1"/>
                                     <when value="l2"/>
-                                    <when value="customized">
-                                        <expand macro="custom_penalty"/>
-                                    </when>
+                                    <when value="elasticnet"/>
+                                    <when value="None"/>
                                 </conditional>
                                 <expand macro="fit_intercept"/>
                                 <expand macro="multi_class"/>
                             </when>
                         </conditional>
-                        <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/>
-                        <param argument="c" type="float" value="1.0" label="Inverse of regularization strength"
-                            help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
+                        <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria"/>
+                        <param argument="c" type="float" value="1.0" label="Inverse of regularization strength" help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/>
                     </when>
                 </conditional>
+                <param argument="key_added" type="text" value="" optional="true" label="The key in adata.uns information is saved to"/>
             </when>
             <!-- With inplace=True, NotImplementedError: Writing Pandas dataframes to h5ad is currently under development. Please use `inplace=False`. -->
-            <!-- <when value="tl.marker_gene_overlap">
+            <!-- Issue is fixed in the script -->
+            <when value="tl.marker_gene_overlap">
                 <repeat name="reference_markers" title="Marker genes">
-                    <param name="key" type="text" value="" label="Cell identity name" help=""/>
+                    <param name="key" type="text" value="" label="Cell identity name"/>
                     <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from 'var'"/>
                 </repeat>
                 <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/>
                 <conditional name="overlap">
                     <param argument="method" type="select" label="Method to calculate marker gene overlap">
-                        <option value="overlap_count">overlap_count: Intersection of the gene set</option>
+                        <option value="overlap_count" selected="true">overlap_count: Intersection of the gene set</option>
                         <option value="overlap_coef">overlap_coef: Overlap coefficient</option>
                         <option value="jaccard">jaccard: Jaccard index</option>
                     </param>
                     <when value="overlap_count">
                         <param argument="normalize" type="select" label="Normalization option for the marker gene overlap output">
-                            <option value="None">None</option>
+                            <option value="None" selected="true">None</option>
                             <option value="reference">reference: Normalization of the data by the total number of marker genes given in the reference annotation per group</option>
                             <option value="data">data: Normalization of the data by the total number of marker genes used for each cluster</option>
                         </param>
@@ -476,16 +526,31 @@
                     <when value="overlap_coef"/>
                     <when value="jaccard"/>
                 </conditional>
-                <param argument="top_n_markers" type="integer" optional="true" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
-                <param argument="adj_pval_threshold" type="float" optional="true" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
-                <param argument="key_added" type="text" value="marker_gene_overlap" optional="true" label="Key that will contain the marker overlap scores in 'uns'"/>
-            </when>-->
-            <when value="pp.log1p"/>
+                <param argument="top_n_markers" type="integer" optional="true" value="" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
+                <param argument="adj_pval_threshold" type="float" optional="true" value="" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/>
+                <param argument="key_added" type="text" optional="true" value="" label="Key that will contain the marker overlap scores in 'uns'"/>
+            </when>
+            <when value="pp.log1p">
+                <param argument="base" type="integer" value="" optional="true" label="Base of the logarithm." help="Natural logarithm is used by default."/>
+                <param argument="layer" type="text" value="" optional="true" label="Entry of layers to transform">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="obsm" type="text" value="" optional="true" label="Entry of obsm to transform">
+                    <expand macro="sanitize_query"/>
+                </param>
+            </when>
             <when value="pp.scale">
-                <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
-                    label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
-                <param argument="max_value" type="float" value="" optional="true" label="Maximum value"
-                    help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
+                <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
+                <param argument="max_value" type="float" value="" optional="true" label="Maximum value" help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
+                <param argument="layer" type="text" value="" label="Which element of layers to scale">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="obsm" type="text" value="" label="Which element of obsm to scale">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="mask_obs" type="text" value="" label="Restrict both the derivation of scaling parameters and the scaling itself to a certain set of observations.">
+                    <expand macro="sanitize_query"/>
+                </param>
             </when>
             <when value="pp.sqrt"/>
         </conditional>
@@ -495,387 +560,38 @@
         <expand macro="anndata_outputs"/>
     </outputs>
     <tests>
+
+        <!-- test 1 -->
         <test expect_num_outputs="2">
-            <!-- test 1 -->
-            <param name="adata" value="sparce_csr_matrix.h5ad" />
+            <param name="adata" value="sparce_csr_matrix.h5ad"/>
             <conditional name="method">
                 <param name="method" value="pp.calculate_qc_metrics"/>
-                <param name="expr_type" value="counts"/>
-                <param name="var_type" value="genes"/>
                 <param name="qc_vars" value="mito,negative"/>
-                <param name="percent_top" value=""/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.pp.calculate_qc_metrics" />
-                    <has_text_matching expression="expr_type='counts'" />
-                    <has_text_matching expression="var_type='genes'" />
-                    <has_text_matching expression="qc_vars=\['mito', 'negative'\]" />
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 2 -->
-            <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
-            <conditional name="method">
-                <param name="method" value="pp.neighbors"/>
-                <param name="n_neighbors" value="15"/>
-                <param name="knn" value="True"/>
-                <param name="random_state" value="0"/>
-                <param name="pp_neighbors_method" value="umap"/>
-                <param name="metric" value="euclidean"/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.pp.neighbors"/>
-                    <has_text_matching expression="n_neighbors=15"/>
-                    <has_text_matching expression="knn=True"/>
-                    <has_text_matching expression="random_state=0"/>
-                    <has_text_matching expression="method='umap'"/>
-                    <has_text_matching expression="metric='euclidean'"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size">
-                <assert_contents>
-                    <has_h5_keys keys="X, obs, obsm, uns, var" />
-                </assert_contents>
-            </output>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 3 -->
-            <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" />
-            <conditional name="method">
-                <param name="method" value="pp.neighbors"/>
-                <param name="n_neighbors" value="15"/>
-                <param name="knn" value="True"/>
-                <param name="pp_neighbors_method" value="gauss"/>
-                <param name="metric" value="braycurtis"/>
             </conditional>
             <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.pp.neighbors"/>
-                    <has_text_matching expression="n_neighbors=15"/>
-                    <has_text_matching expression="knn=True"/>
-                    <has_text_matching expression="random_state=0"/>
-                    <has_text_matching expression="method='gauss'"/>
-                    <has_text_matching expression="metric='braycurtis'"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 4 -->
-            <param name="adata" value="krumsiek11.h5ad" />
-            <conditional name="method">
-                <param name="method" value="tl.score_genes"/>
-                <param name="gene_list" value="Gata2, Fog1"/>
-                <param name="ctrl_size" value="2"/>
-                <param name="n_bins" value="2"/>
-                <param name="random_state" value="2"/>
-                <param name="use_raw" value="False"/>
-                <param name="score_name" value="score"/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.tl.score_genes" />
-                    <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" />
-                    <has_text_matching expression="ctrl_size=2" />
-                    <has_text_matching expression="score_name='score'" />
-                    <has_text_matching expression="n_bins=2" />
-                    <has_text_matching expression="random_state=2" />
-                    <has_text_matching expression="use_raw=False" />
-                    <has_text_matching expression="copy=False" />
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="tl.score_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 5 -->
-            <param name="adata" value="krumsiek11.h5ad" />
-            <conditional name="method">
-                <param name="method" value="tl.score_genes_cell_cycle"/>
-                <conditional name='s_genes'>
-                    <param name="format" value="text"/>
-                    <param name="text" value="Gata2, Fog1, EgrNab"/>
-                </conditional>
-                <conditional name='g2m_genes'>
-                    <param name="format" value="text"/>
-                    <param name="text" value="Gata2, Fog1, EgrNab"/>
-                </conditional>
-                <param name="n_bins" value="2"/>
-                <param name="random_state" value="1"/>
-                <param name="use_raw" value="False"/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
-                    <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
-                    <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
-                    <has_text_matching expression="n_bins=2"/>
-                    <has_text_matching expression="random_state=1"/>
-                    <has_text_matching expression="use_raw=False"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 6 -->
-            <param name="adata" value="krumsiek11.h5ad" />
-            <conditional name="method">
-                <param name="method" value="tl.rank_genes_groups"/>
-                <param name="groupby" value="cell_type"/>
-                <param name="use_raw" value="False"/>
-                <conditional name="ref">
-                    <param name="rest" value="rest"/>
-                </conditional>
-                <param name="n_genes" value="100"/>
-                <conditional name="tl_rank_genes_groups_method">
-                    <param name="method" value="t-test_overestim_var"/>
-                    <param name="corr_method" value="benjamini-hochberg"/>
-                </conditional>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
+                <param name="show_log" value="true"/>
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
-                    <has_text_matching expression="groupby='cell_type'"/>
-                    <has_text_matching expression="use_raw=False"/>
-                    <has_text_matching expression="reference='rest'"/>
-                    <has_text_matching expression="n_genes=100"/>
-                    <has_text_matching expression="method='t-test_overestim_var'"/>
-                    <has_text_matching expression="corr_method='benjamini-hochberg'"/>
+                    <has_text_matching expression="sc.pp.calculate_qc_metrics"/>
+                    <has_text_matching expression="expr_type='counts'"/>
+                    <has_text_matching expression="var_type='genes'"/>
+                    <has_text_matching expression="qc_vars=\['mito', 'negative'\]"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 7 -->
-            <param name="adata" value="pbmc68k_reduced.h5ad" />
-            <conditional name="method">
-                <param name="method" value="tl.rank_genes_groups"/>
-                <param name="groupby" value="louvain"/>
-                <param name="use_raw" value="True"/>
-                <conditional name="ref">
-                    <param name="rest" value="rest"/>
-                </conditional>
-                <param name="n_genes" value="100"/>
-                <conditional name="tl_rank_genes_groups_method">
-                    <param name="method" value="logreg"/>
-                    <conditional name="solver">
-                        <param name="solver" value="newton-cg"/>
-                        <param name="fit_intercept" value="True"/>
-                        <param name="max_iter" value="100"/>
-                        <param name="multi_class" value="auto"/>
-                    </conditional>
-                    <param name="tol" value="1e-4"/>
-                    <param name="c" value="1.0"/>
-                </conditional>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
+            <output name="anndata_out" ftype="h5ad">
                 <assert_contents>
-                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
-                    <has_text_matching expression="groupby='louvain'"/>
-                    <has_text_matching expression="use_raw=True"/>
-                    <has_text_matching expression="reference='rest'"/>
-                    <has_text_matching expression="n_genes=100"/>
-                    <has_text_matching expression="method='logreg'"/>
-                    <has_text_matching expression="solver='newton-cg'"/>
-                    <has_text_matching expression="penalty='l2'"/>
-                    <has_text_matching expression="fit_intercept=True"/>
-                    <has_text_matching expression="max_iter=100"/>
-                    <has_text_matching expression="multi_class='auto'"/>
-                    <has_text_matching expression="tol=0.0001"/>
-                    <has_text_matching expression="C=1.0"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15">
-                <assert_contents>
-                    <has_h5_keys keys="X, obs, obsm, raw/X, raw/var, uns, var" />
+                    <has_h5_keys keys="obs/n_genes_by_counts,obs/log1p_n_genes_by_counts,obs/total_counts,obs/log1p_total_counts,obs/pct_counts_in_top_50_genes,obs/pct_counts_in_top_100_genes,obs/pct_counts_in_top_200_genes,obs/pct_counts_in_top_500_genes,obs/total_counts_mito,obs/log1p_total_counts_mito,obs/pct_counts_mito,obs/total_counts_negative,obs/log1p_total_counts_negative,obs/pct_counts_negative"/>
+                    <has_h5_keys keys="var/n_cells_by_counts,var/mean_counts,var/log1p_mean_counts,var/pct_dropout_by_counts,var/total_counts,var/log1p_total_counts"/>
                 </assert_contents>
             </output>
         </test>
+
+        <!-- test 2 -->
         <test expect_num_outputs="2">
-            <!-- test 8 -->
-            <param name="adata" value="pbmc68k_reduced.h5ad" />
-            <conditional name="method">
-                <param name="method" value="tl.rank_genes_groups"/>
-                <param name="groupby" value="louvain"/>
-                <param name="use_raw" value="True"/>
-                <conditional name="ref">
-                    <param name="rest" value="rest"/>
-                </conditional>
-                <param name="n_genes" value="100"/>
-                <conditional name="tl_rank_genes_groups_method">
-                    <param name="method" value="logreg"/>
-                    <conditional name="solver">
-                        <param name="solver" value="liblinear"/>
-                        <conditional name="penalty">
-                            <param name="penalty" value="l2"/>
-                            <param name="dual" value="False"/>
-                            <conditional name="intercept_scaling">
-                                <param name="fit_intercept" value="True"/>
-                                <param name="intercept_scaling" value="1.0" />
-                            </conditional>
-                            <param name="random_state" value="1"/>
-                        </conditional>
-                    </conditional>
-                    <param name="tol" value="1e-4"/>
-                    <param name="c" value="1.0"/>
-                </conditional>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
-                    <has_text_matching expression="groupby='louvain'"/>
-                    <has_text_matching expression="use_raw=True"/>
-                    <has_text_matching expression="reference='rest'"/>
-                    <has_text_matching expression="n_genes=100"/>
-                    <has_text_matching expression="method='logreg'"/>
-                    <has_text_matching expression="solver='liblinear'"/>
-                    <has_text_matching expression="penalty='l2'"/>
-                    <has_text_matching expression="dual=False"/>
-                    <has_text_matching expression="fit_intercept=True"/>
-                    <has_text_matching expression="intercept_scaling=1.0"/>
-                    <has_text_matching expression="tol=0.0001"/>
-                    <has_text_matching expression="C=1.0"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15">
-                <assert_contents>
-                    <has_h5_keys keys="X, obs, obsm, raw/X, raw/var, uns, var" />
-                </assert_contents>
-            </output>
-        </test>
-        <!-- test expect_num_outputs="2">
-            < test 9  tl.marker_gene_overlap function was commented because inpace=True does not work>
-            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" />
-            <conditional name="method">
-                <param name="method" value="tl.marker_gene_overlap"/>
-                <repeat name="reference_markers">
-                    <param name="key" value="CD4 T cells"/>
-                    <param name="value" value="IL7R"/>
-                </repeat>
-                <repeat name="reference_markers">
-                    <param name="key" value="CD14+ Monocytes"/>
-                    <param name="value" value="CD14,LYZ"/>
-                </repeat>
-                <repeat name="reference_markers">
-                    <param name="key" value="B cells"/>
-                    <param name="value" value="MS4A1"/>
-                </repeat>
-                <conditional name="overlap">
-                    <param name="method" value="overlap_count"/>
-                    <param name="normalize" value="None"/>
-                </conditional>
-            </conditional>
-            <assert_stdout>
-                <has_text_matching expression="tl.marker_gene_overlap"/>
-                <has_text_matching expression="key='rank_genes_groups'"/>
-                <has_text_matching expression="method='overlap_count'"/>
-            </assert_stdout>
-            <output name="anndata_out" file="tl.marker_gene_overlap.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test> -->
-        <test expect_num_outputs="2">
-            <!-- test 10 -->
-            <param name="adata" value="krumsiek11.h5ad" />
-            <conditional name="method">
-                <param name="method" value="pp.log1p"/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.pp.log1p"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 11 -->
-            <param name="adata" value="krumsiek11.h5ad" />
-            <conditional name="method">
-                <param name="method" value="pp.scale"/>
-                <param name="zero_center" value="true"/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.pp.scale"/>
-                    <has_text_matching expression="zero_center=True"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="pp.scale.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 12 -->
-            <param name="adata" value="krumsiek11.h5ad" />
-            <conditional name="method">
-                <param name="method" value="pp.scale"/>
-                <param name="zero_center" value="true"/>
-                <param name="max_value" value="10"/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.pp.scale"/>
-                    <has_text_matching expression="zero_center=True"/>
-                    <has_text_matching expression="max_value=10.0"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 13 -->
-            <param name="adata" value="krumsiek11.h5ad" />
-            <conditional name="method">
-                <param name="method" value="pp.sqrt"/>
-            </conditional>
-            <section name="advanced_common">
-                <param name="show_log" value="true" />
-            </section>
-            <output name="hidden_output">
-                <assert_contents>
-                    <has_text_matching expression="sc.pp.sqrt"/>
-                </assert_contents>
-            </output>
-            <output name="anndata_out" file="pp.sqrt.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
-        </test>
-        <test expect_num_outputs="2">
-            <!-- test 13 -->
-            <param name="adata" value="sparce_csr_matrix.h5ad" />
+            <param name="adata" value="sparce_csr_matrix.h5ad"/>
             <conditional name="method">
                 <param name="method" value="pp.calculate_qc_metrics"/>
-                <param name="expr_type" value="counts"/>
-                <param name="var_type" value="genes"/>
                 <param name="qc_vars" value="mito,negative"/>
                 <param name="percent_top" value="50,100,200,300"/>
             </conditional>
@@ -891,14 +607,382 @@
                     <has_text_matching expression="percent_top=\[50, 100, 200, 300\]" />
                 </assert_contents>
             </output>
-            <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/n_genes_by_counts,obs/log1p_n_genes_by_counts,obs/total_counts,obs/log1p_total_counts,obs/pct_counts_in_top_50_genes,obs/pct_counts_in_top_100_genes,obs/pct_counts_in_top_200_genes,obs/pct_counts_in_top_300_genes,obs/total_counts_mito,obs/log1p_total_counts_mito,obs/pct_counts_mito,obs/total_counts_negative,obs/log1p_total_counts_negative,obs/pct_counts_negative"/>
+                    <has_h5_keys keys="var/mito,var/negative,var/n_cells_by_counts,var/mean_counts,var/log1p_mean_counts,var/pct_dropout_by_counts,var/total_counts,var/log1p_total_counts"/>
+                </assert_contents>
+            </output>
+        </test>
+        
+        <!-- test 3 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="pp.neighbors"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.pp.neighbors"/>
+                    <has_text_matching expression="n_neighbors=15"/>
+                    <has_text_matching expression="knn=True"/>
+                    <has_text_matching expression="random_state=0"/>
+                    <has_text_matching expression="method='umap'"/>
+                    <has_text_matching expression="metric='euclidean'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/neighbors"/>
+                    <has_h5_keys keys="obsp/connectivities,obsp/distances"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 4 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="pp.neighbors"/>
+                <param name="pp_neighbors_method" value="gauss"/>
+                <param name="metric" value="braycurtis"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.pp.neighbors"/>
+                    <has_text_matching expression="n_neighbors=15"/>
+                    <has_text_matching expression="knn=True"/>
+                    <has_text_matching expression="random_state=0"/>
+                    <has_text_matching expression="method='gauss'"/>
+                    <has_text_matching expression="metric='braycurtis'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obsp/connectivities,obsp/distances"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 5 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.score_genes"/>
+                <param name="gene_list" value="Gata2, Fog1"/>
+                <param name="ctrl_size" value="2"/>
+                <param name="n_bins" value="2"/>
+                <param name="random_state" value="2"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.score_genes"/>
+                    <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]"/>
+                    <has_text_matching expression="ctrl_size=2"/>
+                    <has_text_matching expression="score_name='score'"/>
+                    <has_text_matching expression="n_bins=2"/>
+                    <has_text_matching expression="random_state=2"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="copy=False"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/score"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 6 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.score_genes_cell_cycle"/>
+                <conditional name='s_genes'>
+                    <param name="format" value="text"/>
+                    <param name="text" value="Gata2, Fog1, EgrNab"/>
+                </conditional>
+                <conditional name='g2m_genes'>
+                    <param name="format" value="text"/>
+                    <param name="text" value="Gata2, Fog1, EgrNab"/>
+                </conditional>
+                <param name="n_bins" value="2"/>
+                <param name="random_state" value="1"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
+                    <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
+                    <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
+                    <has_text_matching expression="n_bins=2"/>
+                    <has_text_matching expression="random_state=1"/>
+                    <has_text_matching expression="use_raw=False"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/S_score,obs/G2M_score,obs/phase"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 7 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <param name="groupby" value="cell_type"/>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="t-test_overestim_var"/>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                    <has_text_matching expression="groupby='cell_type'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="reference='rest'"/>
+                    <has_text_matching expression="n_genes=100"/>
+                    <has_text_matching expression="method='t-test_overestim_var'"/>
+                    <has_text_matching expression="corr_method='benjamini-hochberg'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/rank_genes_groups"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 8 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <param name="groupby" value="cell_type"/>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="logreg"/>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                    <has_text_matching expression="groupby='cell_type'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="reference='rest'"/>
+                    <has_text_matching expression="n_genes=100"/>
+                    <has_text_matching expression="method='logreg'"/>
+                    <has_text_matching expression="solver='lbfgs'"/>
+                    <has_text_matching expression="penalty='l2'"/>
+                    <has_text_matching expression="fit_intercept=True"/>
+                    <has_text_matching expression="max_iter=100"/>
+                    <has_text_matching expression="multi_class='auto'"/>
+                    <has_text_matching expression="tol=0.0001"/>
+                    <has_text_matching expression="C=1.0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/rank_genes_groups"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 9 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.rank_genes_groups"/>
+                <param name="groupby" value="cell_type"/>
+                <param name="n_genes" value="100"/>
+                <conditional name="tl_rank_genes_groups_method">
+                    <param name="method" value="logreg"/>
+                    <conditional name="solver">
+                        <param name="solver" value="liblinear"/>
+                        <conditional name="penalty">
+                            <param name="penalty" value="l2"/>
+                            <param name="random_state" value="1"/>
+                        </conditional>
+                    </conditional>
+                </conditional>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.rank_genes_groups"/>
+                    <has_text_matching expression="groupby='cell_type'"/>
+                    <has_text_matching expression="use_raw=False"/>
+                    <has_text_matching expression="reference='rest'"/>
+                    <has_text_matching expression="n_genes=100"/>
+                    <has_text_matching expression="method='logreg'"/>
+                    <has_text_matching expression="solver='liblinear'"/>
+                    <has_text_matching expression="penalty='l2'"/>
+                    <has_text_matching expression="dual=False"/>
+                    <has_text_matching expression="fit_intercept=True"/>
+                    <has_text_matching expression="intercept_scaling=1.0"/>
+                    <has_text_matching expression="tol=0.0001"/>
+                    <has_text_matching expression="C=1.0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/rank_genes_groups"/>
+                </assert_contents>
+            </output>
+        </test> 
+
+        <!-- test 10 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="tl.marker_gene_overlap"/>
+                <repeat name="reference_markers">
+                    <param name="key" value="CD4 T cells"/>
+                    <param name="values" value="IL7R"/>
+                </repeat>
+                <repeat name="reference_markers">
+                    <param name="key" value="CD14+ Monocytes"/>
+                    <param name="values" value="CD14,LYZ"/>
+                </repeat>
+                <repeat name="reference_markers">
+                    <param name="key" value="B cells"/>
+                    <param name="values" value="MS4A1"/>
+                </repeat>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <assert_stdout>
+                <has_text_matching expression="marker_gene_overlap"/>
+            </assert_stdout>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.marker_gene_overlap"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/rank_genes_groups"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 11 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="pp.log1p"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.pp.log1p"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/log1p"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 12 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="pp.scale"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.pp.scale"/>
+                    <has_text_matching expression="zero_center=True"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="var/mean,var/std"/>
+                </assert_contents>
+            </output>        </test>
+
+        <!-- test 13 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="krumsiek11.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="pp.scale"/>
+                <param name="max_value" value="10"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.pp.scale"/>
+                    <has_text_matching expression="zero_center=True"/>
+                    <has_text_matching expression="max_value=10.0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="var/mean,var/std"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- test 14 -->
+        <test expect_num_outputs="2">
+            <param name="adata" value="random-randint.h5ad"/>
+            <conditional name="method">
+                <param name="method" value="pp.sqrt"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.pp.sqrt"/>
+                    <has_text_matching expression="stats before sqrt: min= 0.0 max= 999.0 mean= 499.83777"/>
+                    <has_text_matching expression="stats after sqrt: min= 0.0 max= 31.606962 mean= 21.079018"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/index"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
 Calculate quality control metrics., using `pp.calculate_qc_metrics`
 ===================================================================
 
-Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater. 
+Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater.
 Currently is most efficient on a sparse CSR or dense matrix.
 
 It updates the observation level metrics with
@@ -983,6 +1067,8 @@
 
 Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe which can be used to annotate clusters based on marker gene overlaps.
 
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.marker_gene_overlap.html>`__
 
 Logarithmize the data matrix (`pp.log1p`)
 =========================================
author	iuc
date	Sat, 14 Sep 2024 12:45:03 +0000
parents	ca086f24422f
children	d1cd8c147809