Mercurial > repos > iuc > scanpy_inspect
diff inspect.xml @ 17:18262103fa61 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb
author | iuc |
---|---|
date | Sat, 14 Sep 2024 12:45:03 +0000 |
parents | ca086f24422f |
children | d1cd8c147809 |
line wrap: on
line diff
--- a/inspect.xml Tue Aug 20 09:50:59 2024 +0000 +++ b/inspect.xml Sat Sep 14 12:45:03 2024 +0000 @@ -1,102 +1,121 @@ -<tool id="scanpy_inspect" name="Inspect and manipulate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> - <description> with scanpy</description> +<tool id="scanpy_inspect" name="Scanpy Inspect and manipulate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <macros> <import>macros.xml</import> - <xml name="score_genes_params"> - <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/> - <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/> + <xml name="params_score_genes"> + <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling"/> + <param argument="random_state" type="integer" value="0" label="Random seed for sampling"/> <expand macro="param_use_raw"/> </xml> - <token name="@CMD_score_genes_inputs@"><![CDATA[ + <token name="@CMD_PARAMS_SCORE_GENES@"><![CDATA[ n_bins=$method.n_bins, random_state=$method.random_state, use_raw=$method.use_raw, copy=False - ]]></token> + ]]> + </token> <xml name="corr_method"> <param argument="corr_method" type="select" label="P-value correction method"> - <option value="benjamini-hochberg">Benjamini-Hochberg</option> + <option value="benjamini-hochberg" selected="true">Benjamini-Hochberg</option> <option value="bonferroni">Bonferroni</option> </param> </xml> <xml name="fit_intercept"> - <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true" - label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""/> + <param argument="fit_intercept" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should a constant (a.k.a. bias or intercept) be added to the decision function?"/> </xml> <xml name="max_iter"> - <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge" help=""/> + <param argument="max_iter" type="integer" min="0" value="100" label="Maximum number of iterations taken for the solvers to converge"/> </xml> <xml name="multi_class"> - <param argument="multi_class" type="select" label="Multi class" help=""> + <param argument="multi_class" type="select" label="Multi class"> + <option value="auto" selected="true">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option> <option value="ovr">ovr: a binary problem is fit for each label</option> <option value="multinomial">multinomial: the multinomial loss fit across the entire probability distribution, even when the data is binary</option> - <option value="auto">auto: selects ‘ovr’ if the data is binary and otherwise selects ‘multinomial’</option> </param> </xml> <xml name="penalty"> - <param argument="penalty" type="select" label="Norm used in the penalization" help=""> + <param argument="penalty" type="select" label="Norm used in the penalization"> + <option value="l2" selected="true">l2</option> <option value="l1">l1</option> - <option value="l2">l2</option> - <option value="customized">customized</option> - </param> - </xml> - <xml name="custom_penalty"> - <param argument="pen" type="text" value="" label="Norm used in the penalization" help=""> - <expand macro="sanitize_query" /> + <yield/> </param> </xml> <xml name="random_state"> - <param argument="random_state" type="integer" value="" optional="true" - label="The seed of the pseudo random number generator to use when shuffling the data" help=""/> + <param argument="random_state" type="integer" value="" optional="true" label="The seed of the pseudo random number generator to use when shuffling the data"/> </xml> </macros> <expand macro="bio_tools"/> - <expand macro="requirements"/> + <expand macro="requirements"> + <requirement type="package" version="1.5.1">scikit-learn</requirement> + </expand> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ @CMD@ ]]></command> <configfiles> <configfile name="script_file"><![CDATA[ -@CMD_imports@ -@CMD_read_inputs@ +@CMD_IMPORTS@ +@CMD_READ_INPUTS@ -#if $method.method == "pp.calculate_qc_metrics" +#if str($method.method) == 'pp.calculate_qc_metrics': sc.pp.calculate_qc_metrics( adata=adata, expr_type='$method.expr_type', var_type='$method.var_type', - #if $method.qc_vars + #if str($method.qc_vars) != '': #set $qc_vars = [str(x.strip()) for x in str($method.qc_vars).split(',')] qc_vars=$qc_vars, #end if - #if $method.percent_top + #if str($method.percent_top) != '': #set $percent_top = [int(x.strip()) for x in str($method.percent_top).split(',')] - percent_top=$percent_top, + percent_top=$percent_top, #end if + #if str($method.layer) != '': + layer='$method.layer', + #end if + use_raw=$method.use_raw, + log1p=$method.log1p, inplace=True) -#else if $method.method == "tl.score_genes" +#else if str($method.method) == 'pp.neighbors': +sc.pp.neighbors( + adata=adata, + n_neighbors=$method.n_neighbors, + #if str($method.n_pcs) != '': + n_pcs=$method.n_pcs, + #end if + #if str($method.use_rep) != '': + use_rep='$method.use_rep', + #end if + knn=$method.knn, + method='$method.pp_neighbors_method', + metric='$method.metric', + random_state=$method.random_state, + #if str($method.key_added) != '': + key_added='$method.key_added', + #end if + copy=False) + +#else if str($method.method) == 'tl.score_genes': sc.tl.score_genes( adata=adata, #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')] gene_list=$gene_list, ctrl_size=$method.ctrl_size, - score_name='$method.score_name', - #if $method.gene_pool - #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')] + #if str($method.gene_pool) != '': + #set $gene_pool = [str(x.strip()) for x in str($method.gene_pool).split(',')] gene_pool=$gene_pool, #end if - @CMD_score_genes_inputs@) + score_name='$method.score_name', + @CMD_PARAMS_SCORE_GENES@) -#else if $method.method == "tl.score_genes_cell_cycle" - #if str($method.s_genes.format) == 'file' +#else if str($method.method) == 'tl.score_genes_cell_cycle': + #if str($method.s_genes.format) == 'file': with open('$method.s_genes.file', 'r') as s_genes_f: s_genes = [str(x.strip()) for x in s_genes_f.readlines()] print(s_genes) #end if - #if str($method.g2m_genes.format) == 'file' + #if str($method.g2m_genes.format) == 'file': with open('$method.g2m_genes.file', 'r') as g2m_genes_f: g2m_genes = [str(x.strip()) for x in g2m_genes_f.readlines()] print(g2m_genes) @@ -104,155 +123,182 @@ sc.tl.score_genes_cell_cycle( adata=adata, - #if str($method.s_genes.format) == 'text' - #set $s_genes = [str(x.strip()) for x in $method.s_genes.text.split(',')] + #if str($method.s_genes.format) == 'text': + #set $s_genes = [str(x.strip()) for x in str($method.s_genes.text).split(',')] s_genes=$s_genes, - #else if str($method.s_genes.format) == 'file' + #else if str($method.s_genes.format) == 'file': s_genes=s_genes, #end if - #if str($method.g2m_genes.format) == 'text' - #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.text.split(',')] + #if str($method.g2m_genes.format) == 'text': + #set $g2m_genes = [str(x.strip()) for x in str($method.g2m_genes.text).split(',')] g2m_genes=$g2m_genes, - #else if str($method.g2m_genes.format) == 'file' + #else if str($method.g2m_genes.format) == 'file': g2m_genes=g2m_genes, #end if - @CMD_score_genes_inputs@) + @CMD_PARAMS_SCORE_GENES@) -#else if $method.method == 'pp.neighbors' -sc.pp.neighbors( - adata=adata, - n_neighbors=$method.n_neighbors, - #if str($method.n_pcs) != '' - n_pcs=$method.n_pcs, - #end if - #if $method.use_rep - use_rep='$method.use_rep', - #end if - knn=$method.knn, - random_state=$method.random_state, - method='$method.pp_neighbors_method', - metric='$method.metric', - copy=False) - -#else if $method.method == 'tl.rank_genes_groups' +#else if str($method.method) == 'tl.rank_genes_groups': sc.tl.rank_genes_groups( adata=adata, + #if str($method.groupby) != '': groupby='$method.groupby', - #if $method.groups + #end if + use_raw=$method.use_raw, + #if str($method.groups) != '': #set $group=[x.strip() for x in str($method.groups).split(',')] - groups=$group, + groups='$group', #end if - #if $method.ref.rest == 'rest' + #if str($method.layer) != '': + layer='$method.layer', + #end if + #if str($method.ref.rest) == 'rest': reference='$method.ref.rest', #else reference='$method.ref.reference', #end if + #if str($method.n_genes) != '': n_genes=$method.n_genes, + #end if method='$method.tl_rank_genes_groups_method.method', - #if $method.tl_rank_genes_groups_method.method != 'logreg' + #if str($method.tl_rank_genes_groups_method.method) != 'logreg': corr_method='$method.tl_rank_genes_groups_method.corr_method', - #else + #end if + #if str($method.tl_rank_genes_groups_method.method) == 'wilcoxon': + tie_correct=$method.tl_rank_genes_groups_method.tie_correct, + #end if + #if str($method.tl_rank_genes_groups_method.method) == 'logreg': solver='$method.tl_rank_genes_groups_method.solver.solver', - #if $method.tl_rank_genes_groups_method.solver.solver == 'newton-cg' - penalty='l2', + #if str($method.tl_rank_genes_groups_method.solver.solver) == 'lbfgs': + penalty='$method.tl_rank_genes_groups_method.solver.penalty', fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', - #else if $method.tl_rank_genes_groups_method.solver.solver == 'lbfgs' - penalty='l2', + #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'newton-cg': + penalty='$method.tl_rank_genes_groups_method.solver.penalty', fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', - #else if $method.tl_rank_genes_groups_method.solver.solver == 'liblinear' - #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1' + #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'liblinear': + #if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l1': penalty='l1', - #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2' + #else: penalty='l2', dual=$method.tl_rank_genes_groups_method.solver.penalty.dual, - #else - penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen', #end if fit_intercept=$method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept, - #if $method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept == 'True' + #if str($method.tl_rank_genes_groups_method.solver.intercept_scaling.fit_intercept) == 'True': intercept_scaling=$method.tl_rank_genes_groups_method.solver.intercept_scaling.intercept_scaling, #end if - #if str($method.tl_rank_genes_groups_method.solver.random_state) != '' + #if str($method.tl_rank_genes_groups_method.solver.random_state) != '': random_state=$method.tl_rank_genes_groups_method.solver.random_state, #end if - #else if $method.tl_rank_genes_groups_method.solver.solver == 'sag' - penalty='l2', + #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'sag': + penalty='$method.tl_rank_genes_groups_method.solver.penalty.penalty', fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, - #if str($method.tl_rank_genes_groups_method.solver.random_state) != '' + #if str($method.tl_rank_genes_groups_method.solver.random_state) != '': random_state=$method.tl_rank_genes_groups_method.solver.random_state, #end if max_iter=$method.tl_rank_genes_groups_method.solver.max_iter, - multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', - #else if $method.tl_rank_genes_groups_method.solver.solver == 'saga' - #if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l1' + multi_class=$method.tl_rank_genes_groups_method.solver.multi_class, + #else if str($method.tl_rank_genes_groups_method.solver.solver) == 'saga': + #if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l1': penalty='l1', - #else if $method.tl_rank_genes_groups_method.solver.penalty.penalty == 'l2' + #else if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'l2': penalty='l2', - #else - penalty='$method.tl_rank_genes_groups_method.solver.penalty.pen', + #else if str($method.tl_rank_genes_groups_method.solver.penalty.penalty) == 'elasticnet': + penalty='elasticnet', + #else: + penalty='None', #end if fit_intercept=$method.tl_rank_genes_groups_method.solver.fit_intercept, - multi_class='$method.tl_rank_genes_groups_method.solver.multi_class', + multi_class=$method.tl_rank_genes_groups_method.solver.multi_class, #end if tol=$method.tl_rank_genes_groups_method.tol, C=$method.tl_rank_genes_groups_method.c, #end if - use_raw=$method.use_raw) + #if str($method.key_added) != '': + key_added='$method.key_added', + #end if + copy=False) -#else if $method.method == "tl.marker_gene_overlap" +#else if str($method.method) == "tl.marker_gene_overlap": reference_markers = {} #for $i, $s in enumerate($method.reference_markers) #set $list=[x.strip() for x in str($s.values).split(',')] reference_markers['$s.key'] = $list #end for -sc.tl.marker_gene_overlap( - adata, - reference_markers, - #if $method.key - key='$method.key', - #end if - method='$method.overlap.method', - #if $method.overlap.method == 'overlap_count' and str($method.overlap.normalize) != 'None' - normalize='$method.overlap.normalize', - #end if - #if str($method.top_n_markers) != '' - top_n_markers=$method.top_n_markers, - #end if - #if str($method.adj_pval_threshold) != '' - adj_pval_threshold=$method.adj_pval_threshold, - #end if - #if $method.key_added - key_added='$method.key_added', - #end if - inplace=True) +# Temporary fix for the issue with "inplace=True" for Pandas dataframes. +# see here: https://github.com/scverse/scanpy/blob/b6193502e11b84fc1b4a011ee9cf08a19da22ebf/src/scanpy/tools/_marker_gene_overlap.py#L167 +marker_overlap_result = sc.tl.marker_gene_overlap( + adata, + reference_markers, + #if str($method.key) != '': + key='$method.key', + #end if + method='$method.overlap.method', + #if str($method.overlap.method) == 'overlap_count' and str($method.overlap.normalize) != 'None': + normalize='$method.overlap.normalize', + #end if + #if str($method.top_n_markers) != '': + top_n_markers=$method.top_n_markers, + #end if + #if str($method.adj_pval_threshold) != '': + adj_pval_threshold=$method.adj_pval_threshold, + #end if + #if $method.key_added: + key_added='$method.key_added', + #end if + inplace=False) -#else if $method.method == "pp.log1p" +adata.uns['marker_gene_overlap'] = marker_overlap_result + +#else if str($method.method) == "pp.log1p": sc.pp.log1p( adata, + #if str($method.base) != '': + base=$method.base, + #end if + #if str($method.layer) != '': + layer='$method.layer', + #end if + #if str($method.obsm) != '': + obsm='$method.obsm', + #end if copy=False) -#else if $method.method == "pp.scale" +#else if str($method.method) == "pp.scale": sc.pp.scale( adata, zero_center=$method.zero_center, - #if str($method.max_value) != '' + #if str($method.max_value) != '': max_value=$method.max_value, #end if + #if str($method.layer) != '': + layer='$method.layer', + #end if + #if str($method.obsm) != '': + obsm='$method.obsm', + #end if + #if str($method.mask_obs) != '': + mask_obs='$method.mask_obs', + #end if copy=False) -#else if $method.method == "pp.sqrt" +#else if str($method.method) == "pp.sqrt": + +print("stats before sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean()) + sc.pp.sqrt( adata, copy=False) #end if -@CMD_anndata_write_outputs@ -]]></configfile> +print("stats after sqrt:", "min=", adata.X.min(), "max=", adata.X.max(), "mean=", adata.X.mean()) + +@CMD_ANNDATA_WRITE_OUTPUTS@ + ]]> + </configfile> </configfiles> <inputs> <expand macro="inputs_anndata"/> @@ -263,66 +309,66 @@ <option value="tl.score_genes">Score a set of genes, using 'tl.score_genes'</option> <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using 'tl.score_genes_cell_cycle'</option> <option value="tl.rank_genes_groups">Rank genes for characterizing groups, using 'tl.rank_genes_groups'</option> - <!--<option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using 'tl.marker_gene_overlap'</option>--> + <option value="tl.marker_gene_overlap">Calculate an overlap score between data-deriven marker genes and provided markers, using 'tl.marker_gene_overlap'</option> <option value="pp.log1p">Logarithmize the data matrix, using 'pp.log1p'</option> <option value="pp.scale">Scale data to unit variance and zero mean, using 'pp.scale'</option> <option value="pp.sqrt">Square root the data matrix, using 'pp.sqrt'</option> </param> <when value="pp.calculate_qc_metrics"> <param argument="expr_type" type="text" value="counts" label="Name of kind of values in X"> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> <param argument="var_type" type="text" value="genes" label="The kind of thing the variables are"> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> - <param argument="qc_vars" type="text" value="" label="Keys for boolean columns of '.var' which identify variables you could want to control for" help="Keys separated by a comma"> - <expand macro="sanitize_query" /> + <param argument="qc_vars" type="text" optional="true" value="" label="Keys for boolean columns of '.var' which identify variables you could want to control for" help="Keys separated by a comma"> + <expand macro="sanitize_query"/> </param> - <param argument="percent_top" type="text" value="" label="Proportions of top genes to cover" + <param argument="percent_top" type="text" value="" optional="true" label="Proportions of top genes to cover" help=" Values (integers) are considered 1-indexed, '50' finds cumulative proportion to the 50th most expressed genes. Values separated by a comma. If empty don't calculate"> - <expand macro="sanitize_vectors" /> + <expand macro="sanitize_vectors"/> </param> + <expand macro="param_layer"/> + <expand macro="param_use_raw"/> + <param argument="log1p" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Compute log1p transformed annotations"/> </when> <when value="pp.neighbors"> <param argument="n_neighbors" type="integer" min="0" value="15" label="The size of local neighborhood (in terms of number of neighboring data points) used for manifold approximation" help="Larger values result in more global views of the manifold, while smaller values result in more local data being preserved. In general values should be in the range 2 to 100. If 'knn' is 'True', number of nearest neighbors to be searched. If 'knn' is 'False', a Gaussian kernel width is set to the distance of the 'n_neighbors' neighbor."/> - <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/> - <param argument="use_rep" type="text" value="" optional="true" label="Indicated representation to use" help="If not set, the representation is chosen automatically: for n_vars below 50, X is used, otherwise X_pca (uns) is used. If X_pca is not present, it's computed with default parameter"> - <expand macro="sanitize_query" /> - </param> + <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use"/> + <expand macro="param_use_rep"/> <param argument="knn" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use a hard threshold to restrict the number of neighbors to n_neighbors?" help="If true, it considers a knn graph. Otherwise, it uses a Gaussian Kernel to assign low weights to neighbors more distant than the 'n_neighbors' nearest neighbor."/> - <param argument="random_state" type="integer" value="0" label="Numpy random seed" help=""/> - <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities" help=""> - <option value="umap">umap (McInnes et al, 2018)</option> + <param name="pp_neighbors_method" argument="method" type="select" label="Method for computing connectivities"> + <option value="umap" selected="true">umap (McInnes et al, 2018)</option> <option value="gauss">gauss: Gauss kernel following (Coifman et al 2005) with adaptive width (Haghverdi et al 2016)</option> </param> - <param argument="metric" type="select" label="Distance metric" help=""> + <param argument="metric" type="select" label="Distance metric"> <expand macro="distance_metric_options"/> </param> + <param argument="random_state" type="integer" value="0" label="Numpy random seed"/> + <param argument="key_added" type="text" value="" optional="true" label="Key to store neighbors, distances and connectivities" help="If specified, the neighbors data is added to .uns[key_added], distances are stored in .obsp[key_added+'_distances'] and connectivities in .obsp[key_added+'_connectivities']"/> </when> <when value="tl.score_genes"> - <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"> - <expand macro="sanitize_query" /> + <param argument="gene_list" type="text" value="" optional="false" label="The list of gene names used for score calculation" help="Genes separated by a comma"> + <expand macro="sanitize_query"/> </param> - <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" - help="If 'len(gene_list)' is not too low, you can set 'ctrl_size=len(gene_list)'."/> - <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" - help="Default is all genes. Genes separated by a comma"> - <expand macro="sanitize_query" /> + <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled" help="If 'len(gene_list)' is not too low, you can set 'ctrl_size=len(gene_list)'."/> + <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set" help="Default is all genes. Genes separated by a comma"> + <expand macro="sanitize_query"/> </param> - <expand macro="score_genes_params"/> - <param argument="score_name" type="text" value="score" label="Name of the field to be added in '.obs'" help=""> - <expand macro="sanitize_query" /> + <param argument="score_name" type="text" value="score" label="Name of the field to be added in '.obs'"> + <expand macro="sanitize_query"/> </param> + <expand macro="params_score_genes"/> </when> <when value="tl.score_genes_cell_cycle"> <conditional name='s_genes'> <param name="format" type="select" label="Format for the list of genes associated with S phase"> + <option value="text" selected="true">Text</option> <option value="file">File</option> - <option value="text" selected="true">Text</option> </param> <when value="text"> <param name="text" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> </when> <when value="file"> @@ -331,41 +377,44 @@ </conditional> <conditional name='g2m_genes'> <param name="format" type="select" label="Format for the list of genes associated with G2M phase"> + <option value="text" selected="true">Text</option> <option value="file">File</option> - <option value="text" selected="true">Text</option> </param> <when value="text"> <param name="text" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> </when> <when value="file"> <param name="file" type="data" format="txt" label="File with the list of genes associated with G2M phase" help="One gene per line"/> </when> </conditional> - <expand macro="score_genes_params"/> + <expand macro="params_score_genes"/> </when> <when value="tl.rank_genes_groups"> - <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider" help=""> - <expand macro="sanitize_query" /> + <param argument="groupby" type="text" value="" label="The key of the observations grouping to consider"> + <expand macro="sanitize_query"/> </param> <expand macro="param_use_raw"/> <param argument="groups" type="text" value="" label="Subset of groups to which comparison shall be restricted" help="e.g. ['g1', 'g2', 'g3']. If not passed, a ranking will be generated for all groups."> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> + </param> + <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to perform tests on"> + <expand macro="sanitize_query"/> </param> <conditional name="ref"> <param name="rest" type="select" label="Comparison"> - <option value="rest">Compare each group to the union of the rest of the group</option> + <option value="rest" selected="true">Compare each group to the union of the rest of the group</option> <option value="group_id">Compare with respect to a specific group</option> </param> <when value="rest"/> <when value="group_id"> <param argument="reference" type="text" value="" label="Group identifier with respect to which compare"> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> </when> </conditional> - <param argument="n_genes" type="integer" min="0" value="100" label="The number of genes that appear in the returned tables" help=""/> + <param argument="n_genes" type="integer" min="0" value="" optional="true" label="The number of genes that appear in the returned tables" help="Defaults to all genes"/> <conditional name="tl_rank_genes_groups_method"> <param argument="method" type="select" label="Method"> <option value="t-test" selected="true">t-test</option> @@ -378,6 +427,7 @@ </when> <when value="wilcoxon"> <expand macro="corr_method"/> + <param argument="tie_correct" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use tie correction for 'wilcoxon' scores"/> </when> <when value="t-test_overestim_var"> <expand macro="corr_method"/> @@ -385,18 +435,20 @@ <when value="logreg"> <conditional name="solver"> <param argument="solver" type="select" label="Algorithm to use in the optimization problem" help="For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones. For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes. ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas ‘liblinear’ and ‘saga’ handle L1 penalty."> + <option value="lbfgs" selected="true">lbfgs</option> <option value="newton-cg">newton-cg</option> - <option value="lbfgs">lbfgs</option> <option value="liblinear">liblinear</option> <option value="sag">sag</option> <option value="saga">saga</option> </param> - <when value="newton-cg"> + <when value="lbfgs"> + <param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/> <expand macro="fit_intercept"/> <expand macro="max_iter"/> <expand macro="multi_class"/> - </when> - <when value="lbfgs"> + </when> + <when value="newton-cg"> + <param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/> <expand macro="fit_intercept"/> <expand macro="max_iter"/> <expand macro="multi_class"/> @@ -409,26 +461,21 @@ <param argument="dual" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Dual (not primal) formulation?" help="Prefer primal when n_samples > n_features"/> </when> - <when value="customized"> - <expand macro="custom_penalty"/> - </when> </conditional> <conditional name="intercept_scaling"> - <param argument="fit_intercept" type="select" - label="Should a constant (a.k.a. bias or intercept) be added to the decision function?" help=""> - <option value="True">Yes</option> + <param argument="fit_intercept" type="select" label="Should a constant (a.k.a. bias or intercept) be added to the decision function?"> + <option value="True" selected="true">Yes</option> <option value="False">No</option> </param> <when value="True"> - <param argument="intercept_scaling" type="float" value="1.0" - label="Intercept scaling" - help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/> + <param argument="intercept_scaling" type="float" value="1.0" label="Intercept scaling" help="x becomes [x, self.intercept_scaling], i.e. a 'synthetic' feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight."/> </when> <when value="False"/> </conditional> <expand macro="random_state"/> </when> <when value="sag"> + <param name="penalty" type="boolean" truevalue="l2" falsevalue="None" checked="true" label="use l2 penalty?"/> <expand macro="fit_intercept"/> <expand macro="random_state"/> <expand macro="max_iter"/> @@ -436,39 +483,42 @@ </when> <when value="saga"> <conditional name="penalty"> - <expand macro="penalty"/> + <expand macro="penalty"> + <option value="elasticnet">elasticnet</option> + <option value="None">None</option> + </expand> <when value="l1"/> <when value="l2"/> - <when value="customized"> - <expand macro="custom_penalty"/> - </when> + <when value="elasticnet"/> + <when value="None"/> </conditional> <expand macro="fit_intercept"/> <expand macro="multi_class"/> </when> </conditional> - <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria" help=""/> - <param argument="c" type="float" value="1.0" label="Inverse of regularization strength" - help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/> + <param argument="tol" type="float" value="1e-4" label="Tolerance for stopping criteria"/> + <param argument="c" type="float" value="1.0" label="Inverse of regularization strength" help="It must be a positive float. Like in support vector machines, smaller values specify stronger regularization."/> </when> </conditional> + <param argument="key_added" type="text" value="" optional="true" label="The key in adata.uns information is saved to"/> </when> <!-- With inplace=True, NotImplementedError: Writing Pandas dataframes to h5ad is currently under development. Please use `inplace=False`. --> - <!-- <when value="tl.marker_gene_overlap"> + <!-- Issue is fixed in the script --> + <when value="tl.marker_gene_overlap"> <repeat name="reference_markers" title="Marker genes"> - <param name="key" type="text" value="" label="Cell identity name" help=""/> + <param name="key" type="text" value="" label="Cell identity name"/> <param name="values" type="text" value="" label="List of genes" help="Comma-separated names from 'var'"/> </repeat> <param argument="key" type="text" value="rank_genes_groups" label="Key in adata.uns where the rank_genes_groups output is stored"/> <conditional name="overlap"> <param argument="method" type="select" label="Method to calculate marker gene overlap"> - <option value="overlap_count">overlap_count: Intersection of the gene set</option> + <option value="overlap_count" selected="true">overlap_count: Intersection of the gene set</option> <option value="overlap_coef">overlap_coef: Overlap coefficient</option> <option value="jaccard">jaccard: Jaccard index</option> </param> <when value="overlap_count"> <param argument="normalize" type="select" label="Normalization option for the marker gene overlap output"> - <option value="None">None</option> + <option value="None" selected="true">None</option> <option value="reference">reference: Normalization of the data by the total number of marker genes given in the reference annotation per group</option> <option value="data">data: Normalization of the data by the total number of marker genes used for each cluster</option> </param> @@ -476,16 +526,31 @@ <when value="overlap_coef"/> <when value="jaccard"/> </conditional> - <param argument="top_n_markers" type="integer" optional="true" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> - <param argument="adj_pval_threshold" type="float" optional="true" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> - <param argument="key_added" type="text" value="marker_gene_overlap" optional="true" label="Key that will contain the marker overlap scores in 'uns'"/> - </when>--> - <when value="pp.log1p"/> + <param argument="top_n_markers" type="integer" optional="true" value="" label="Number of top data-derived marker genes to use" help="By default all calculated marker genes are used. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> + <param argument="adj_pval_threshold" type="float" optional="true" value="" label="Significance threshold on the adjusted p-values to select marker genes" help=" This can only be used when adjusted p-values are calculated by 'tl.rank_genes_groups'. If adj_pval_threshold is set along with top_n_markers, then adj_pval_threshold is ignored."/> + <param argument="key_added" type="text" optional="true" value="" label="Key that will contain the marker overlap scores in 'uns'"/> + </when> + <when value="pp.log1p"> + <param argument="base" type="integer" value="" optional="true" label="Base of the logarithm." help="Natural logarithm is used by default."/> + <param argument="layer" type="text" value="" optional="true" label="Entry of layers to transform"> + <expand macro="sanitize_query"/> + </param> + <param argument="obsm" type="text" value="" optional="true" label="Entry of obsm to transform"> + <expand macro="sanitize_query"/> + </param> + </when> <when value="pp.scale"> - <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" - label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> - <param argument="max_value" type="float" value="" optional="true" label="Maximum value" - help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> + <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/> + <param argument="max_value" type="float" value="" optional="true" label="Maximum value" help="Clip (truncate) to this value after scaling. If not set, it does not clip."/> + <param argument="layer" type="text" value="" label="Which element of layers to scale"> + <expand macro="sanitize_query"/> + </param> + <param argument="obsm" type="text" value="" label="Which element of obsm to scale"> + <expand macro="sanitize_query"/> + </param> + <param argument="mask_obs" type="text" value="" label="Restrict both the derivation of scaling parameters and the scaling itself to a certain set of observations."> + <expand macro="sanitize_query"/> + </param> </when> <when value="pp.sqrt"/> </conditional> @@ -495,387 +560,38 @@ <expand macro="anndata_outputs"/> </outputs> <tests> + + <!-- test 1 --> <test expect_num_outputs="2"> - <!-- test 1 --> - <param name="adata" value="sparce_csr_matrix.h5ad" /> + <param name="adata" value="sparce_csr_matrix.h5ad"/> <conditional name="method"> <param name="method" value="pp.calculate_qc_metrics"/> - <param name="expr_type" value="counts"/> - <param name="var_type" value="genes"/> <param name="qc_vars" value="mito,negative"/> - <param name="percent_top" value=""/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.pp.calculate_qc_metrics" /> - <has_text_matching expression="expr_type='counts'" /> - <has_text_matching expression="var_type='genes'" /> - <has_text_matching expression="qc_vars=\['mito', 'negative'\]" /> - </assert_contents> - </output> - <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 2 --> - <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" /> - <conditional name="method"> - <param name="method" value="pp.neighbors"/> - <param name="n_neighbors" value="15"/> - <param name="knn" value="True"/> - <param name="random_state" value="0"/> - <param name="pp_neighbors_method" value="umap"/> - <param name="metric" value="euclidean"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.pp.neighbors"/> - <has_text_matching expression="n_neighbors=15"/> - <has_text_matching expression="knn=True"/> - <has_text_matching expression="random_state=0"/> - <has_text_matching expression="method='umap'"/> - <has_text_matching expression="metric='euclidean'"/> - </assert_contents> - </output> - <output name="anndata_out" file="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"> - <assert_contents> - <has_h5_keys keys="X, obs, obsm, uns, var" /> - </assert_contents> - </output> - </test> - <test expect_num_outputs="2"> - <!-- test 3 --> - <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad" /> - <conditional name="method"> - <param name="method" value="pp.neighbors"/> - <param name="n_neighbors" value="15"/> - <param name="knn" value="True"/> - <param name="pp_neighbors_method" value="gauss"/> - <param name="metric" value="braycurtis"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.pp.neighbors"/> - <has_text_matching expression="n_neighbors=15"/> - <has_text_matching expression="knn=True"/> - <has_text_matching expression="random_state=0"/> - <has_text_matching expression="method='gauss'"/> - <has_text_matching expression="metric='braycurtis'"/> - </assert_contents> - </output> - <output name="anndata_out" file="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 4 --> - <param name="adata" value="krumsiek11.h5ad" /> - <conditional name="method"> - <param name="method" value="tl.score_genes"/> - <param name="gene_list" value="Gata2, Fog1"/> - <param name="ctrl_size" value="2"/> - <param name="n_bins" value="2"/> - <param name="random_state" value="2"/> - <param name="use_raw" value="False"/> - <param name="score_name" value="score"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.tl.score_genes" /> - <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" /> - <has_text_matching expression="ctrl_size=2" /> - <has_text_matching expression="score_name='score'" /> - <has_text_matching expression="n_bins=2" /> - <has_text_matching expression="random_state=2" /> - <has_text_matching expression="use_raw=False" /> - <has_text_matching expression="copy=False" /> - </assert_contents> - </output> - <output name="anndata_out" file="tl.score_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 5 --> - <param name="adata" value="krumsiek11.h5ad" /> - <conditional name="method"> - <param name="method" value="tl.score_genes_cell_cycle"/> - <conditional name='s_genes'> - <param name="format" value="text"/> - <param name="text" value="Gata2, Fog1, EgrNab"/> - </conditional> - <conditional name='g2m_genes'> - <param name="format" value="text"/> - <param name="text" value="Gata2, Fog1, EgrNab"/> - </conditional> - <param name="n_bins" value="2"/> - <param name="random_state" value="1"/> - <param name="use_raw" value="False"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.tl.score_genes_cell_cycle"/> - <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> - <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> - <has_text_matching expression="n_bins=2"/> - <has_text_matching expression="random_state=1"/> - <has_text_matching expression="use_raw=False"/> - </assert_contents> - </output> - <output name="anndata_out" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 6 --> - <param name="adata" value="krumsiek11.h5ad" /> - <conditional name="method"> - <param name="method" value="tl.rank_genes_groups"/> - <param name="groupby" value="cell_type"/> - <param name="use_raw" value="False"/> - <conditional name="ref"> - <param name="rest" value="rest"/> - </conditional> - <param name="n_genes" value="100"/> - <conditional name="tl_rank_genes_groups_method"> - <param name="method" value="t-test_overestim_var"/> - <param name="corr_method" value="benjamini-hochberg"/> - </conditional> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> - <has_text_matching expression="sc.tl.rank_genes_groups"/> - <has_text_matching expression="groupby='cell_type'"/> - <has_text_matching expression="use_raw=False"/> - <has_text_matching expression="reference='rest'"/> - <has_text_matching expression="n_genes=100"/> - <has_text_matching expression="method='t-test_overestim_var'"/> - <has_text_matching expression="corr_method='benjamini-hochberg'"/> + <has_text_matching expression="sc.pp.calculate_qc_metrics"/> + <has_text_matching expression="expr_type='counts'"/> + <has_text_matching expression="var_type='genes'"/> + <has_text_matching expression="qc_vars=\['mito', 'negative'\]"/> </assert_contents> </output> - <output name="anndata_out" file="tl.rank_genes_groups.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 7 --> - <param name="adata" value="pbmc68k_reduced.h5ad" /> - <conditional name="method"> - <param name="method" value="tl.rank_genes_groups"/> - <param name="groupby" value="louvain"/> - <param name="use_raw" value="True"/> - <conditional name="ref"> - <param name="rest" value="rest"/> - </conditional> - <param name="n_genes" value="100"/> - <conditional name="tl_rank_genes_groups_method"> - <param name="method" value="logreg"/> - <conditional name="solver"> - <param name="solver" value="newton-cg"/> - <param name="fit_intercept" value="True"/> - <param name="max_iter" value="100"/> - <param name="multi_class" value="auto"/> - </conditional> - <param name="tol" value="1e-4"/> - <param name="c" value="1.0"/> - </conditional> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> + <output name="anndata_out" ftype="h5ad"> <assert_contents> - <has_text_matching expression="sc.tl.rank_genes_groups"/> - <has_text_matching expression="groupby='louvain'"/> - <has_text_matching expression="use_raw=True"/> - <has_text_matching expression="reference='rest'"/> - <has_text_matching expression="n_genes=100"/> - <has_text_matching expression="method='logreg'"/> - <has_text_matching expression="solver='newton-cg'"/> - <has_text_matching expression="penalty='l2'"/> - <has_text_matching expression="fit_intercept=True"/> - <has_text_matching expression="max_iter=100"/> - <has_text_matching expression="multi_class='auto'"/> - <has_text_matching expression="tol=0.0001"/> - <has_text_matching expression="C=1.0"/> - </assert_contents> - </output> - <output name="anndata_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"> - <assert_contents> - <has_h5_keys keys="X, obs, obsm, raw/X, raw/var, uns, var" /> + <has_h5_keys keys="obs/n_genes_by_counts,obs/log1p_n_genes_by_counts,obs/total_counts,obs/log1p_total_counts,obs/pct_counts_in_top_50_genes,obs/pct_counts_in_top_100_genes,obs/pct_counts_in_top_200_genes,obs/pct_counts_in_top_500_genes,obs/total_counts_mito,obs/log1p_total_counts_mito,obs/pct_counts_mito,obs/total_counts_negative,obs/log1p_total_counts_negative,obs/pct_counts_negative"/> + <has_h5_keys keys="var/n_cells_by_counts,var/mean_counts,var/log1p_mean_counts,var/pct_dropout_by_counts,var/total_counts,var/log1p_total_counts"/> </assert_contents> </output> </test> + + <!-- test 2 --> <test expect_num_outputs="2"> - <!-- test 8 --> - <param name="adata" value="pbmc68k_reduced.h5ad" /> - <conditional name="method"> - <param name="method" value="tl.rank_genes_groups"/> - <param name="groupby" value="louvain"/> - <param name="use_raw" value="True"/> - <conditional name="ref"> - <param name="rest" value="rest"/> - </conditional> - <param name="n_genes" value="100"/> - <conditional name="tl_rank_genes_groups_method"> - <param name="method" value="logreg"/> - <conditional name="solver"> - <param name="solver" value="liblinear"/> - <conditional name="penalty"> - <param name="penalty" value="l2"/> - <param name="dual" value="False"/> - <conditional name="intercept_scaling"> - <param name="fit_intercept" value="True"/> - <param name="intercept_scaling" value="1.0" /> - </conditional> - <param name="random_state" value="1"/> - </conditional> - </conditional> - <param name="tol" value="1e-4"/> - <param name="c" value="1.0"/> - </conditional> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.tl.rank_genes_groups"/> - <has_text_matching expression="groupby='louvain'"/> - <has_text_matching expression="use_raw=True"/> - <has_text_matching expression="reference='rest'"/> - <has_text_matching expression="n_genes=100"/> - <has_text_matching expression="method='logreg'"/> - <has_text_matching expression="solver='liblinear'"/> - <has_text_matching expression="penalty='l2'"/> - <has_text_matching expression="dual=False"/> - <has_text_matching expression="fit_intercept=True"/> - <has_text_matching expression="intercept_scaling=1.0"/> - <has_text_matching expression="tol=0.0001"/> - <has_text_matching expression="C=1.0"/> - </assert_contents> - </output> - <output name="anndata_out" file="tl.rank_genes_groups.liblinear.krumsiek11.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"> - <assert_contents> - <has_h5_keys keys="X, obs, obsm, raw/X, raw/var, uns, var" /> - </assert_contents> - </output> - </test> - <!-- test expect_num_outputs="2"> - < test 9 tl.marker_gene_overlap function was commented because inpace=True does not work> - <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad" /> - <conditional name="method"> - <param name="method" value="tl.marker_gene_overlap"/> - <repeat name="reference_markers"> - <param name="key" value="CD4 T cells"/> - <param name="value" value="IL7R"/> - </repeat> - <repeat name="reference_markers"> - <param name="key" value="CD14+ Monocytes"/> - <param name="value" value="CD14,LYZ"/> - </repeat> - <repeat name="reference_markers"> - <param name="key" value="B cells"/> - <param name="value" value="MS4A1"/> - </repeat> - <conditional name="overlap"> - <param name="method" value="overlap_count"/> - <param name="normalize" value="None"/> - </conditional> - </conditional> - <assert_stdout> - <has_text_matching expression="tl.marker_gene_overlap"/> - <has_text_matching expression="key='rank_genes_groups'"/> - <has_text_matching expression="method='overlap_count'"/> - </assert_stdout> - <output name="anndata_out" file="tl.marker_gene_overlap.pbmc68k_reduced.h5ad" ftype="h5ad" compare="sim_size"/> - </test> --> - <test expect_num_outputs="2"> - <!-- test 10 --> - <param name="adata" value="krumsiek11.h5ad" /> - <conditional name="method"> - <param name="method" value="pp.log1p"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.pp.log1p"/> - </assert_contents> - </output> - <output name="anndata_out" file="pp.log1p.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 11 --> - <param name="adata" value="krumsiek11.h5ad" /> - <conditional name="method"> - <param name="method" value="pp.scale"/> - <param name="zero_center" value="true"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.pp.scale"/> - <has_text_matching expression="zero_center=True"/> - </assert_contents> - </output> - <output name="anndata_out" file="pp.scale.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 12 --> - <param name="adata" value="krumsiek11.h5ad" /> - <conditional name="method"> - <param name="method" value="pp.scale"/> - <param name="zero_center" value="true"/> - <param name="max_value" value="10"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.pp.scale"/> - <has_text_matching expression="zero_center=True"/> - <has_text_matching expression="max_value=10.0"/> - </assert_contents> - </output> - <output name="anndata_out" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 13 --> - <param name="adata" value="krumsiek11.h5ad" /> - <conditional name="method"> - <param name="method" value="pp.sqrt"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.pp.sqrt"/> - </assert_contents> - </output> - <output name="anndata_out" file="pp.sqrt.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 13 --> - <param name="adata" value="sparce_csr_matrix.h5ad" /> + <param name="adata" value="sparce_csr_matrix.h5ad"/> <conditional name="method"> <param name="method" value="pp.calculate_qc_metrics"/> - <param name="expr_type" value="counts"/> - <param name="var_type" value="genes"/> <param name="qc_vars" value="mito,negative"/> <param name="percent_top" value="50,100,200,300"/> </conditional> @@ -891,14 +607,382 @@ <has_text_matching expression="percent_top=\[50, 100, 200, 300\]" /> </assert_contents> </output> - <output name="anndata_out" file="pp.calculate_qc_metrics.sparce_csr_matrix.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/n_genes_by_counts,obs/log1p_n_genes_by_counts,obs/total_counts,obs/log1p_total_counts,obs/pct_counts_in_top_50_genes,obs/pct_counts_in_top_100_genes,obs/pct_counts_in_top_200_genes,obs/pct_counts_in_top_300_genes,obs/total_counts_mito,obs/log1p_total_counts_mito,obs/pct_counts_mito,obs/total_counts_negative,obs/log1p_total_counts_negative,obs/pct_counts_negative"/> + <has_h5_keys keys="var/mito,var/negative,var/n_cells_by_counts,var/mean_counts,var/log1p_mean_counts,var/pct_dropout_by_counts,var/total_counts,var/log1p_total_counts"/> + </assert_contents> + </output> + </test> + + <!-- test 3 --> + <test expect_num_outputs="2"> + <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad"/> + <conditional name="method"> + <param name="method" value="pp.neighbors"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.pp.neighbors"/> + <has_text_matching expression="n_neighbors=15"/> + <has_text_matching expression="knn=True"/> + <has_text_matching expression="random_state=0"/> + <has_text_matching expression="method='umap'"/> + <has_text_matching expression="metric='euclidean'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/neighbors"/> + <has_h5_keys keys="obsp/connectivities,obsp/distances"/> + </assert_contents> + </output> + </test> + + <!-- test 4 --> + <test expect_num_outputs="2"> + <param name="adata" value="pp.recipe_weinreb17.paul15_subsample.h5ad"/> + <conditional name="method"> + <param name="method" value="pp.neighbors"/> + <param name="pp_neighbors_method" value="gauss"/> + <param name="metric" value="braycurtis"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.pp.neighbors"/> + <has_text_matching expression="n_neighbors=15"/> + <has_text_matching expression="knn=True"/> + <has_text_matching expression="random_state=0"/> + <has_text_matching expression="method='gauss'"/> + <has_text_matching expression="metric='braycurtis'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obsp/connectivities,obsp/distances"/> + </assert_contents> + </output> + </test> + + <!-- test 5 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="tl.score_genes"/> + <param name="gene_list" value="Gata2, Fog1"/> + <param name="ctrl_size" value="2"/> + <param name="n_bins" value="2"/> + <param name="random_state" value="2"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.tl.score_genes"/> + <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]"/> + <has_text_matching expression="ctrl_size=2"/> + <has_text_matching expression="score_name='score'"/> + <has_text_matching expression="n_bins=2"/> + <has_text_matching expression="random_state=2"/> + <has_text_matching expression="use_raw=False"/> + <has_text_matching expression="copy=False"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/score"/> + </assert_contents> + </output> + </test> + + <!-- test 6 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="tl.score_genes_cell_cycle"/> + <conditional name='s_genes'> + <param name="format" value="text"/> + <param name="text" value="Gata2, Fog1, EgrNab"/> + </conditional> + <conditional name='g2m_genes'> + <param name="format" value="text"/> + <param name="text" value="Gata2, Fog1, EgrNab"/> + </conditional> + <param name="n_bins" value="2"/> + <param name="random_state" value="1"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.tl.score_genes_cell_cycle"/> + <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> + <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/> + <has_text_matching expression="n_bins=2"/> + <has_text_matching expression="random_state=1"/> + <has_text_matching expression="use_raw=False"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/S_score,obs/G2M_score,obs/phase"/> + </assert_contents> + </output> + </test> + + <!-- test 7 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="tl.rank_genes_groups"/> + <param name="groupby" value="cell_type"/> + <param name="n_genes" value="100"/> + <conditional name="tl_rank_genes_groups_method"> + <param name="method" value="t-test_overestim_var"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.tl.rank_genes_groups"/> + <has_text_matching expression="groupby='cell_type'"/> + <has_text_matching expression="use_raw=False"/> + <has_text_matching expression="reference='rest'"/> + <has_text_matching expression="n_genes=100"/> + <has_text_matching expression="method='t-test_overestim_var'"/> + <has_text_matching expression="corr_method='benjamini-hochberg'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/rank_genes_groups"/> + </assert_contents> + </output> + </test> + + <!-- test 8 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="tl.rank_genes_groups"/> + <param name="groupby" value="cell_type"/> + <param name="n_genes" value="100"/> + <conditional name="tl_rank_genes_groups_method"> + <param name="method" value="logreg"/> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.tl.rank_genes_groups"/> + <has_text_matching expression="groupby='cell_type'"/> + <has_text_matching expression="use_raw=False"/> + <has_text_matching expression="reference='rest'"/> + <has_text_matching expression="n_genes=100"/> + <has_text_matching expression="method='logreg'"/> + <has_text_matching expression="solver='lbfgs'"/> + <has_text_matching expression="penalty='l2'"/> + <has_text_matching expression="fit_intercept=True"/> + <has_text_matching expression="max_iter=100"/> + <has_text_matching expression="multi_class='auto'"/> + <has_text_matching expression="tol=0.0001"/> + <has_text_matching expression="C=1.0"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/rank_genes_groups"/> + </assert_contents> + </output> + </test> + + <!-- test 9 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="tl.rank_genes_groups"/> + <param name="groupby" value="cell_type"/> + <param name="n_genes" value="100"/> + <conditional name="tl_rank_genes_groups_method"> + <param name="method" value="logreg"/> + <conditional name="solver"> + <param name="solver" value="liblinear"/> + <conditional name="penalty"> + <param name="penalty" value="l2"/> + <param name="random_state" value="1"/> + </conditional> + </conditional> + </conditional> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.tl.rank_genes_groups"/> + <has_text_matching expression="groupby='cell_type'"/> + <has_text_matching expression="use_raw=False"/> + <has_text_matching expression="reference='rest'"/> + <has_text_matching expression="n_genes=100"/> + <has_text_matching expression="method='logreg'"/> + <has_text_matching expression="solver='liblinear'"/> + <has_text_matching expression="penalty='l2'"/> + <has_text_matching expression="dual=False"/> + <has_text_matching expression="fit_intercept=True"/> + <has_text_matching expression="intercept_scaling=1.0"/> + <has_text_matching expression="tol=0.0001"/> + <has_text_matching expression="C=1.0"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/rank_genes_groups"/> + </assert_contents> + </output> + </test> + + <!-- test 10 --> + <test expect_num_outputs="2"> + <param name="adata" value="tl.rank_genes_groups.newton-cg.pbmc68k_reduced.h5ad"/> + <conditional name="method"> + <param name="method" value="tl.marker_gene_overlap"/> + <repeat name="reference_markers"> + <param name="key" value="CD4 T cells"/> + <param name="values" value="IL7R"/> + </repeat> + <repeat name="reference_markers"> + <param name="key" value="CD14+ Monocytes"/> + <param name="values" value="CD14,LYZ"/> + </repeat> + <repeat name="reference_markers"> + <param name="key" value="B cells"/> + <param name="values" value="MS4A1"/> + </repeat> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <assert_stdout> + <has_text_matching expression="marker_gene_overlap"/> + </assert_stdout> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.tl.marker_gene_overlap"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/rank_genes_groups"/> + </assert_contents> + </output> + </test> + + <!-- test 11 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="pp.log1p"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.pp.log1p"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/log1p"/> + </assert_contents> + </output> + </test> + + <!-- test 12 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="pp.scale"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.pp.scale"/> + <has_text_matching expression="zero_center=True"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="var/mean,var/std"/> + </assert_contents> + </output> </test> + + <!-- test 13 --> + <test expect_num_outputs="2"> + <param name="adata" value="krumsiek11.h5ad"/> + <conditional name="method"> + <param name="method" value="pp.scale"/> + <param name="max_value" value="10"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.pp.scale"/> + <has_text_matching expression="zero_center=True"/> + <has_text_matching expression="max_value=10.0"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="var/mean,var/std"/> + </assert_contents> + </output> + </test> + + <!-- test 14 --> + <test expect_num_outputs="2"> + <param name="adata" value="random-randint.h5ad"/> + <conditional name="method"> + <param name="method" value="pp.sqrt"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.pp.sqrt"/> + <has_text_matching expression="stats before sqrt: min= 0.0 max= 999.0 mean= 499.83777"/> + <has_text_matching expression="stats after sqrt: min= 0.0 max= 31.606962 mean= 21.079018"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/index"/> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ Calculate quality control metrics., using `pp.calculate_qc_metrics` =================================================================== -Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater. +Calculates a number of qc metrics for an AnnData object, largely based on calculateQCMetrics from scater. Currently is most efficient on a sparse CSR or dense matrix. It updates the observation level metrics with @@ -983,6 +1067,8 @@ Marker gene overlap scores can be quoted as overlap counts, overlap coefficients, or jaccard indices. The method returns a pandas dataframe which can be used to annotate clusters based on marker gene overlaps. +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.marker_gene_overlap.html>`__ Logarithmize the data matrix (`pp.log1p`) =========================================