Mercurial > repos > iuc > scanpy_cluster_reduce_dimension
diff cluster_reduce_dimension.xml @ 17:178242b82297 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb
author | iuc |
---|---|
date | Sat, 14 Sep 2024 12:45:46 +0000 |
parents | 4d8f983cd751 |
children | cb43c5d3acd3 |
line wrap: on
line diff
--- a/cluster_reduce_dimension.xml Tue Aug 20 09:50:17 2024 +0000 +++ b/cluster_reduce_dimension.xml Sat Sep 14 12:45:46 2024 +0000 @@ -1,37 +1,44 @@ -<tool id="scanpy_cluster_reduce_dimension" name="Cluster, infer trajectories and embed" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> - <description>with scanpy</description> +<tool id="scanpy_cluster_reduce_dimension" name="Scanpy cluster, embed" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>and infer trajectories</description> <macros> <import>macros.xml</import> <xml name="pca_inputs"> <param argument="n_comps" type="integer" min="0" value="50" label="Number of principal components to compute" help="If the value is larger than the number of observations the number of observations is used instead"/> - <param argument="dtype" type="text" value="float32" label="Numpy data type string to which to convert the result" help=""> - <expand macro="sanitize_query" /> + <param argument="layer" type="text" value="" optional="true" label="If provided, which element of layers to use for PCA"> + <expand macro="sanitize_query"/> + </param> + <param argument="dtype" type="select" label="Numpy data type string to which to convert the result"> + <option value="float32" selected="true">float32</option> + <option value="int32">int32</option> + <option value="int64">int64</option> + <option value="uint32">uint32</option> + <option value="uint64">uint64</option> + <option value="float16">float16</option> + <option value="float64">float64</option> </param> <conditional name="pca"> <param argument="chunked" type="select" label="Type of PCA?"> + <option value="False" selected="true">Full PCA</option> <option value="True">Incremental PCA on segments (incremental PCA automatically zero centers and ignores settings of 'random_seed' and 'svd_solver')</option> - <option value="False" selected="true">Full PCA</option> </param> <when value="True"> <param argument="chunk_size" type="integer" min="0" value="" label="chunk_size" help="Number of observations to include in each chunk"/> </when> <when value="False"> - <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" - label="Compute standard PCA from covariance matrix?" - help="If not, it omits zero-centering variables (uses *TruncatedSVD* from scikit-learn), which allows to handle sparse input efficiently."/> + <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Compute standard PCA from covariance matrix?" help="If not, it omits zero-centering variables (uses *TruncatedSVD* from scikit-learn), which allows to handle sparse input efficiently."/> <expand macro="svd_solver"/> - <param argument="random_state" type="integer" value="0" label="Initial states for the optimization" help=""/> + <param argument="random_state" type="integer" value="0" label="Change to use different initial states for the optimization"/> </when> </conditional> - <param argument="use_highly_variable" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use highly variable genes only?" help="They should be use if they have been determined beforehand."/> + <param argument="mask_var" type="text" value="" optional="true" label="To run only on a certain set of genes given by a string referring to an array in" help="By default, uses .var['highly_variable'] if available, else everything"/> </xml> <xml name="param_random_state"> <param argument="random_state" type="integer" value="0" label="Random state" help="Change the initialization of the optimization."/> </xml> - <xml name="param_use_weights"> - <param argument="use_weights" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use weights from knn graph?"/> + <xml name="param_use_weights" token_checked=""> + <param argument="use_weights" type="boolean" truevalue="True" falsevalue="False" checked="@CHECKED@" label="Use weights from knn graph?"/> </xml> - <token name="@CMD_pca_help@"><![CDATA[ + <token name="@CMD_PCA_HELP@"><![CDATA[ The PCA is computed using the implementation of *scikit-learn*. The returned AnnData object contains: @@ -43,34 +50,25 @@ - Explained variance, equivalent to the eigenvalues of the covariance matrix This data is accessible using the inspect tool for AnnData -]]></token> - <token name="@CMD_pca_params@"><![CDATA[ - data=adata, - n_comps=min($method.n_comps, adata.n_vars), - dtype='$method.dtype', - copy=False, - chunked=$method.pca.chunked, -#if $method.pca.chunked == 'True' - chunk_size=$method.pca.chunk_size, -#else - zero_center=$method.pca.zero_center, - svd_solver='$method.pca.svd_solver', - random_state=$method.pca.random_state, -#end if - use_highly_variable=$method.use_highly_variable -]]></token> + ]]> + </token> </macros> <expand macro="bio_tools"/> <expand macro="requirements"> + <requirement type="package" version="0.5.6">umap-learn</requirement> + <requirement type="package" version="0.10.2">leidenalg</requirement> + <requirement type="package" version="0.8.2">louvain</requirement> + <requirement type="package" version="1.5.1">scikit-learn</requirement> </expand> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ @CMD@ - ]]></command> + ]]> + </command> <configfiles> <configfile name="script_file"><![CDATA[ -@CMD_imports@ -@CMD_read_inputs@ +@CMD_IMPORTS@ +@CMD_READ_INPUTS@ #if $method.method == 'tl.louvain' sc.tl.louvain( @@ -83,6 +81,7 @@ key_added='$method.key_added', directed=$method.directed, use_weights=$method.use_weights, + @CMD_PARAM_NEIGHBORS_KEY@ copy=False) #else if $method.method == 'tl.leiden' @@ -91,33 +90,60 @@ resolution=$method.resolution, random_state=$method.random_state, key_added='$method.key_added', + directed=$method.directed, use_weights=$method.use_weights, n_iterations=$method.n_iterations, + @CMD_PARAM_NEIGHBORS_KEY@ + flavor='$method.flavor', copy=False) #else if $method.method == 'pp.pca' -sc.pp.pca(@CMD_pca_params@) - -#else if $method.method == 'tl.pca' -sc.tl.pca(@CMD_pca_params@) +sc.pp.pca( + data=adata, + n_comps=min($method.n_comps, adata.n_vars-1), + #if $method.layer != '' + layer='$method.layer', + #end if + dtype='$method.dtype', + chunked=$method.pca.chunked, + #if $method.pca.chunked == 'True' + chunk_size=$method.pca.chunk_size, + #else + zero_center=$method.pca.zero_center, + #if $method.pca.svd_solver != '' + svd_solver='$method.pca.svd_solver', + #end if + random_state=$method.pca.random_state, + #end if + #if $method.mask_var != '' + mask_var='$method.mask_var', + #end if + copy=False) #else if $method.method == 'tl.diffmap' sc.tl.diffmap( adata=adata, - n_comps=min($method.n_comps, adata.n_vars), + n_comps=min($method.n_comps, adata.n_vars-1), + random_state=$method.random_state, + @CMD_PARAM_NEIGHBORS_KEY@ copy =False) #else if $method.method == 'tl.tsne' +import os sc.tl.tsne( adata=adata, #if str($method.n_pcs) != '' n_pcs=$method.n_pcs, #end if + #if $method.use_rep != '' + use_rep='$method.use_rep', + #end if perplexity=$method.perplexity, + metric='$method.metric', early_exaggeration=$method.early_exaggeration, learning_rate=$method.learning_rate, random_state=$method.random_state, - use_fast_tsne=$method.use_fast_tsne, + n_jobs = int(os.getenv("GALAXY_SLOTS", 4)), copy=False) #else if $method.method == 'tl.umap' @@ -134,6 +160,7 @@ negative_sample_rate=$method.negative_sample_rate, init_pos='$method.init_pos', random_state=$method.random_state, + @CMD_PARAM_NEIGHBORS_KEY@ copy=False) #else if $method.method == 'tl.draw_graph' @@ -145,28 +172,20 @@ sc.tl.draw_graph( adata=adata, layout='$method.layout', -#if $method.root - #set $root=([int(x.strip()) for x in str($method.root).split(',')]) - root=$root, -#end if + #if str($method.root) + root=$method.root, + #end if random_state=$method.random_state, - #if $method.init_pos - init_pos='$method.init_pos', - #end if #if str($method.adjacency) != 'None' adjacency=adjacency, #end if - #if $method.key_ext - key_ext='$method.key_ext', + #if $method.key_added_ext != '' + key_added_ext='$method.key_added_ext', #end if - copy=False) - -#else if $method.method == "tl.paga" -sc.tl.paga( - adata=adata, - groups='$method.groups', - use_rna_velocity=$method.use_rna_velocity, - model='$method.model', + #if $method.init_pos != '' + init_pos='$method.init_pos', + #end if + @CMD_PARAM_NEIGHBORS_KEY@ copy=False) #else if $method.method == "tl.dpt" @@ -176,24 +195,37 @@ n_branchings=$method.n_branchings, min_group_size=$method.min_group_size, allow_kendall_tau_shift=$method.allow_kendall_tau_shift, + @CMD_PARAM_NEIGHBORS_KEY@ + copy=False) + +#else if $method.method == "tl.paga" +sc.tl.paga( + adata=adata, + #if $method.groups != '': + groups='$method.groups', + #end if + use_rna_velocity=$method.use_rna_velocity, + model='$method.model', + @CMD_PARAM_NEIGHBORS_KEY@ copy=False) #else if $method.method == "tl.embedding_density" sc.tl.embedding_density( adata=adata, basis='$method.basis', -#if $method.groupby + #if $method.groupby != '' groupby='$method.groupby', -#end if -#if $method.key_added + #end if + #if $method.key_added != '' key_added='$method.key_added', -#end if + #end if ) #end if -@CMD_anndata_write_outputs@ -]]></configfile> +@CMD_ANNDATA_WRITE_OUTPUTS@ + ]]> + </configfile> </configfiles> <inputs> <expand macro="inputs_anndata"/> @@ -202,113 +234,123 @@ <option value="tl.louvain">Cluster cells into subgroups, using 'tl.louvain'</option> <option value="tl.leiden">Cluster cells into subgroups, using 'tl.leiden'</option> <option value="pp.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using 'pp.pca'</option> - <option value="tl.pca">Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using 'tl.pca'</option> <option value="tl.diffmap">Diffusion Maps, using 'tl.diffmap'</option> <option value="tl.tsne">t-distributed stochastic neighborhood embedding (tSNE), using 'tl.tsne'</option> <option value="tl.umap">Embed the neighborhood graph using UMAP, using 'tl.umap'</option> <option value="tl.draw_graph">Force-directed graph drawing, using 'tl.draw_graph'</option> <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using 'tl.dpt'</option> <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using 'tl.paga'</option> - <option value="tl.embedding_density">Calculate the density of cells in an embedding (per condition)</option> + <option value="tl.embedding_density">Calculate the density of cells in an embedding (per condition), using 'tl.embedding_density'</option> </param> <when value="tl.louvain"> <conditional name="flavor"> - <param argument="flavor" type="select" label="Flavor for the clustering" help=""> - <option value="vtraag">vtraag (much more powerful)</option> - <option value="igraph">igraph</option> + <param argument="flavor" type="select" label="Flavor for the clustering"> + <option value="vtraag" selected="true">vtraag (much more powerful than igraph)</option> + <option value="igraph">Built in igraph method</option> </param> <when value="vtraag"> - <param argument="resolution" type="float" value="1.0" - label="Resolution" - help="Higher resolution means finding more and smaller clusters, which defaults to 1.0. See “Time as a resolution parameter” in Lambiotte et al, 2009"/> + <param argument="resolution" type="float" value="1.0" label="Resolution" help="Higher resolution means finding more and smaller clusters, which defaults to 1.0. See “Time as a resolution parameter” in Lambiotte et al, 2014"/> </when> <when value="igraph"/> </conditional> <expand macro="param_random_state"/> - <param argument="key_added" type="text" value="louvain" optional="true" label="Key under which to add the cluster labels" help=""> - <expand macro="sanitize_query" /> + <param argument="key_added" type="text" value="louvain" optional="true" label="Key under which to add the cluster labels"> + <expand macro="sanitize_query"/> </param> <param argument="directed" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Interpret the adjacency matrix as directed graph?"/> - <expand macro="param_use_weights"/> + <expand macro="param_use_weights" checked="false"/> + <expand macro="param_neighbors_key"/> </when> <when value="tl.leiden"> <param argument="resolution" type="float" value="1" label="Coarseness of the clusterin" help="Higher values lead to more clusters"/> <expand macro="param_random_state"/> - <param argument="key_added" type="text" value="leiden" label="Key under which to add the cluster labels" help=""> - <expand macro="sanitize_query" /> + <param argument="key_added" type="text" value="leiden" label="Key under which to add the cluster labels"> + <expand macro="sanitize_query"/> </param> - <expand macro="param_use_weights"/> + <param argument="directed" type="boolean" truevalue="True" falsevalue="None" checked="false" label="Treat the graph as directed or undirected?"/> + <expand macro="param_use_weights" checked="true"/> <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform." help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/> + <expand macro="param_neighbors_key"/> + <param argument="flavor" type="select" label="Flavor for the clustering"> + <option value="leidenalg" selected="true">leidenalg</option> + <option value="igraph">Built in igraph method</option> + </param> </when> <when value="pp.pca"> <expand macro="pca_inputs"/> </when> - <when value="tl.pca"> - <expand macro="pca_inputs"/> - </when> <when value="tl.diffmap"> - <param argument="n_comps" type="integer" min="0" value="15" optional="true" label="Number of dimensions of the representation" help=""/> + <param argument="n_comps" type="integer" min="0" value="15" optional="true" label="Number of dimensions of the representation"/> + <param argument="random_state" type="integer" value="0" label="Seed used by the random number generator"/> + <expand macro="param_neighbors_key"/> </when> <when value="tl.tsne"> - <param name="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use" help=""/> - <param name="perplexity" type="float" value="30" label="Perplexity" help="The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. The choice is not extremely critical since t-SNE is quite insensitive to this parameter."/> - <param name="early_exaggeration" type="float" value="12.0" label="Early exaggeration" help="Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high."/> - <param name="learning_rate" type="float" value="1000" label="Learning rate" help="The learning rate can be a critical parameter. It should be between 100 and 1000. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high. If the cost function gets stuck in a bad local minimum increasing the learning rate helps sometimes."/> - <param name="random_state" type="integer" value="0" label="Random state" help="Change this to use different intial states for the optimization"/> - <param argument="use_fast_tsne" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Use the MulticoreTSNE package if possible?"/> + <param argument="n_pcs" type="integer" min="0" value="" optional="true" label="Number of PCs to use"/> + <expand macro="param_use_rep"/> + <param argument="perplexity" type="float" value="30" label="Perplexity" help="The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. The choice is not extremely critical since t-SNE is quite insensitive to this parameter."/> + <param argument="metric" type="select"> + <expand macro="distance_metric_options"/> + </param> + <param argument="early_exaggeration" type="float" value="12.0" label="Early exaggeration" help="Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high."/> + <param argument="learning_rate" type="float" value="1000" label="Learning rate" help="The learning rate can be a critical parameter. It should be between 100 and 1000. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high. If the cost function gets stuck in a bad local minimum increasing the learning rate helps sometimes."/> + <param argument="random_state" type="integer" value="0" label="Random state" help="Change this to use different intial states for the optimization"/> </when> <when value="tl.umap"> <param argument="min_dist" type="float" value="0.5" label="Effective minimum distance between embedded points" help="Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the 'spread' value, which determines the scale at which embedded points will be spread out. The default of in the 'umap-learn' package is 0.1."/> <param argument="spread" type="float" value="1.0" label="Effective scale of embedded points" help="In combination with 'min_dist' this determines how clustered/clumped the embedded points are."/> - <param argument="n_components" type="integer" min="0" value="2" label="Number of dimensions of the embedding" help=""/> + <param argument="n_components" type="integer" min="0" value="2" label="Number of dimensions of the embedding"/> <param argument="maxiter" type="integer" min="0" value="" optional="true" label="Number of iterations (epochs) of the optimization" help="Called 'n_epochs' in the original UMAP."/> - <param argument="alpha" type="float" value="1.0" label="Initial learning rate for the embedding optimization" help=""/> + <param argument="alpha" type="float" value="1.0" label="Initial learning rate for the embedding optimization"/> <param argument="gamma" type="float" value="1.0" label="Weighting applied to negative samples in low dimensional embedding optimization" help="Values higher than one will result in greater weight being given to negative samples."/> - <param argument="negative_sample_rate" type="integer" min="0" value="5" label="The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding" help=""/> + <param argument="negative_sample_rate" type="integer" min="0" value="5" label="The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding"/> <param argument="init_pos" type="select" label="How to initialize the low dimensional embedding" help="Called 'init' in the original UMAP"> + <option value="spectral" selected="true">Spectral embedding of the graph</option> <option value="paga">Position from paga</option> - <option value="spectral" selected="true">Spectral embedding of the graph</option> <option value="random">Initial embedding positions at random</option> </param> - <expand macro="param_random_state"/> + <param argument="random_state" type="integer" value="0" label="Seed used by the random number generator"/> + <expand macro="param_neighbors_key"/> </when> <when value="tl.draw_graph"> <expand macro="param_layout"/> - <expand macro="param_root"/> - <expand macro="param_random_state"/> - <param argument="init_pos" type="text" optional="true" value="" label="Precomputed coordinates for initialization" help="It should be a valid 2d observation (e.g. paga)"> - <expand macro="sanitize_query" /> + <param argument="root" type="integer" value="" optional="true" label="Root for tree layouts"/> + <param argument="random_state" type="integer" value="0" optional="true" label="Random state" help="For layouts with random initialization like 'fr', change this to use different intial states for the optimization."/> + <param argument="adjacency" type="data" format="mtx" optional="true" label="Sparse adjacency matrix of the graph" help="If not set, it uses the unstructured annotation (uns) / neighbors / connectivities"/> + <param argument="key_added_ext" type="text" optional="true" value="" label="External key" help="If not set, it appends 'layout'"> + <expand macro="sanitize_query"/> </param> - <param argument="adjacency" type="data" format="mtx" optional="true" label="Sparse adjacency matrix of the graph" help="If not set, it uses the unstructured annotation (uns) / neighbors / connectivities"/> - <param argument="key_ext" type="text" optional="true" value="" label="External key" help="If not set, it appends 'layout'"> - <expand macro="sanitize_query" /> + <param argument="init_pos" type="text" optional="true" value="" label="Precomputed coordinates for initialization" help="It should be a valid 2d observation (e.g. paga)"> + <expand macro="sanitize_query"/> </param> + <expand macro="param_neighbors_key"/> </when> <when value="tl.dpt"> - <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use" help=""/> - <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect" help=""/> + <param argument="n_dcs" type="integer" min="0" value="10" label="Number of diffusion components to use"/> + <param argument="n_branchings" type="integer" min="0" value="0" label="Number of branchings to detect"/> <param argument="min_group_size" type="float" min="0" value="0.01" label="Min group size" help="During recursive splitting of branches ('dpt groups') for 'n_branchings' > 1, do not consider groups that contain less than 'min_group_size' data points. If a float, 'min_group_size' refers to a fraction of the total number of data points."/> <param argument="allow_kendall_tau_shift" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Allow Kendal tau shift?" help="If a very small branch is detected upon splitting, shift away from maximum correlation in Kendall tau criterion of Haghverdi et al (2016) to stabilize the splitting."/> + <expand macro="param_neighbors_key"/> </when> <when value="tl.paga"> - <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations ('adata.obs')."> - <expand macro="sanitize_query" /> + <param argument="groups" type="text" optional="true" value="" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations ('adata.obs')."> + <expand macro="sanitize_query"/> </param> <param argument="use_rna_velocity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that 'adata.uns' contains a directed single-cell graph with key '['velocyto_transitions']'. This feature might be subject to change in the future."/> - <param argument="model" type="select" label="PAGA connectivity model" help=""> - <option value="v1.2">v1.2</option> + <param argument="model" type="select" label="PAGA connectivity model"> + <option value="v1.2" selected="true">v1.2</option> <option value="v1.0">v1.0</option> </param> + <expand macro="param_neighbors_key"/> </when> <when value="tl.embedding_density"> <param argument="basis" type="text" value="umap" label="The embedding over which the density will be calculated." help="This embedded representation should be found in adata.obsm['X_[basis]']"> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> <param argument="groupby" type="text" optional="true" value="" label="Key for categorical observation/cell annotation for which densities are calculated per category." > - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> <param argument="key_added" type="text" optional="true" value="" label="Name of the .obs covariate that will be added with the density estimates."> - <expand macro="sanitize_query" /> + <expand macro="sanitize_query"/> </param> </when> </conditional> @@ -318,22 +360,16 @@ <expand macro="anndata_outputs"/> </outputs> <tests> + + <!-- test 1 --> <test expect_num_outputs="2"> - <!-- test 1 --> - <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> + <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/> <conditional name="method"> <param name="method" value="tl.louvain"/> - <conditional name="flavor"> - <param name="flavor" value="vtraag"/> - <param name="resolution" value="1.0"/> - </conditional> <param name="random_state" value="10"/> - <param name="key_added" value="louvain"/> - <param name="directed" value="true"/> - <param name="use_weights" value="false"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -347,21 +383,23 @@ <has_text_matching expression="use_weights=False"/> </assert_contents> </output> - <output name="anndata_out" file="tl.louvain.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/louvain"/> + <has_h5_keys keys="uns/louvain"/> + </assert_contents> + </output> </test> + + <!-- test 2 --> <test expect_num_outputs="2"> - <!-- test 2 --> - <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> + <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/> <conditional name="method"> <param name="method" value="tl.leiden"/> - <param name="random_state" value="1"/> <param name="random_state" value="10"/> - <param name="key_added" value="leiden"/> - <param name="use_weights" value="false"/> - <param name="n_iterations" value="-1"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -369,29 +407,26 @@ <has_text_matching expression="resolution=1"/> <has_text_matching expression="random_state=10"/> <has_text_matching expression="key_added='leiden'"/> - <has_text_matching expression="use_weights=False"/> + <has_text_matching expression="use_weights=True"/> <has_text_matching expression="n_iterations=-1"/> </assert_contents> </output> - <output name="anndata_out" file="tl.leiden.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/leiden"/> + <has_h5_keys keys="uns/leiden"/> + </assert_contents> + </output> </test> + + <!-- test 3 --> <test expect_num_outputs="2"> - <!-- test 3 --> - <param name="adata" value="krumsiek11.h5ad" /> + <param name="adata" value="krumsiek11.h5ad"/> <conditional name="method"> <param name="method" value="pp.pca"/> - <param name="n_comps" value="50"/> - <param name="dtype" value="float32"/> - <conditional name="pca"> - <param name="chunked" value="False"/> - <param name="zero_center" value="true"/> - <param name="svd_solver" value="auto"/> - <param name="random_state" value="0"/> - </conditional> - <param name="use_highly_variable" value="false"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -400,28 +435,31 @@ <has_text_matching expression="copy=False"/> <has_text_matching expression="chunked=False"/> <has_text_matching expression="zero_center=True"/> - <has_text_matching expression="svd_solver='auto'"/> <has_text_matching expression="random_state=0"/> - <has_text_matching expression="use_highly_variable=False"/> </assert_contents> </output> - <output name="anndata_out" file="pp.pca.krumsiek11.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.15"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/pca"/> + <has_h5_keys keys="obsm/X_pca"/> + <has_h5_keys keys="varm/PCs"/> + </assert_contents> + </output> </test> + + <!-- test 4 --> <test expect_num_outputs="2"> - <!-- test 4 --> - <param name="adata" value="krumsiek11.h5ad" /> + <param name="adata" value="krumsiek11.h5ad"/> <conditional name="method"> <param name="method" value="pp.pca"/> <param name="n_comps" value="20"/> - <param name="dtype" value="float32"/> <conditional name="pca"> <param name="chunked" value="True"/> <param name="chunk_size" value="50"/> </conditional> - <param name="use_highly_variable" value="false"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -431,73 +469,48 @@ <has_text_matching expression="copy=False"/> <has_text_matching expression="chunked=True"/> <has_text_matching expression="chunk_size=50"/> - <has_text_matching expression="use_highly_variable=False"/> </assert_contents> </output> - <output name="anndata_out" file="pp.pca.krumsiek11_chunk.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/pca"/> + <has_h5_keys keys="obsm/X_pca"/> + <has_h5_keys keys="varm/PCs"/> + </assert_contents> + </output> </test> + + <!-- test 5 --> <test expect_num_outputs="2"> - <!-- test 5 --> - <param name="adata" value="krumsiek11.h5ad" /> + <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/> <conditional name="method"> - <param name="method" value="tl.pca"/> - <param name="n_comps" value="50"/> - <param name="dtype" value="float32"/> - <conditional name="pca"> - <param name="chunked" value="False"/> - <param name="zero_center" value="True"/> - <param name="svd_solver" value="auto"/> - <param name="random_state" value="0"/> - </conditional> - <param name="use_highly_variable" value="false"/> + <param name="method" value="tl.diffmap"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> - <assert_contents> - <has_text_matching expression="sc.tl.pca"/> - <has_text_matching expression="dtype='float32'"/> - <has_text_matching expression="copy=False"/> - <has_text_matching expression="chunked=False"/> - <has_text_matching expression="zero_center=True"/> - <has_text_matching expression="svd_solver='auto'"/> - <has_text_matching expression="use_highly_variable=False"/> - </assert_contents> - </output> - <output name="anndata_out" file="tl.pca.krumsiek11.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.15"/> - </test> - <test expect_num_outputs="2"> - <!-- test 6 --> - <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> - <conditional name="method"> - <param name="method" value="tl.diffmap"/> - <param name="n_comps" value="15"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="sc.tl.diffmap"/> </assert_contents> </output> - <output name="anndata_out" file="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obsm/X_diffmap"/> + <has_h5_keys keys="uns/diffmap_evals"/> + </assert_contents> + </output> </test> + + <!-- test 6 --> <test expect_num_outputs="2"> - <!-- test 7 --> - <param name="adata" value="krumsiek11.h5ad" /> + <param name="adata" value="krumsiek11.h5ad"/> <conditional name="method"> <param name="method" value="tl.tsne"/> <param name="n_pcs" value="10"/> - <param name="perplexity" value="30"/> - <param name="early_exaggeration" value="12.0"/> - <param name="learning_rate" value="1000"/> - <param name="random_state" value="0"/> - <param name="use_fast_tsne" value="true"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -507,28 +520,25 @@ <has_text_matching expression="early_exaggeration=12.0"/> <has_text_matching expression="learning_rate=1000.0"/> <has_text_matching expression="random_state=0"/> - <has_text_matching expression="use_fast_tsne=True"/> </assert_contents> </output> - <output name="anndata_out" file="tl.tsne.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/tsne"/> + <has_h5_keys keys="obsm/X_tsne"/> + </assert_contents> + </output> </test> + + <!-- test 7 --> <test expect_num_outputs="2"> - <!-- test 8 --> - <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" /> + <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad"/> <conditional name="method"> <param name="method" value="tl.umap"/> - <param name="min_dist" value="0.5"/> - <param name="spread" value="1.0"/> - <param name="n_components" value="2"/> <param name="maxiter" value="2"/> - <param name="alpha" value="1.0"/> - <param name="gamma" value="1.0"/> - <param name="negative_sample_rate" value="5"/> - <param name="init_pos" value="spectral"/> - <param name="random_state" value="0"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -544,22 +554,22 @@ <has_text_matching expression="random_state=0"/> </assert_contents> </output> - <output name="anndata_out" file="tl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"> + <output name="anndata_out" ftype="h5ad"> <assert_contents> - <has_h5_keys keys="X, obs, obsm, uns, var" /> + <has_h5_keys keys="uns/umap"/> + <has_h5_keys keys="obsm/X_umap"/> </assert_contents> </output> </test> + + <!-- test 8 --> <test expect_num_outputs="2"> - <!-- test 9 --> <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad"/> <conditional name="method"> <param name="method" value="tl.draw_graph"/> - <param name="layout" value="fa"/> - <param name="random_state" value="0"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -568,42 +578,24 @@ <has_text_matching expression="random_state=0"/> </assert_contents> </output> - <output name="anndata_out" file="tl.draw_graph.pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> - </test> - <test expect_num_outputs="2"> - <!-- test 10 --> - <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/> - <conditional name="method"> - <param name="method" value="tl.paga"/> - <param name="groups" value="paul15_clusters"/> - <param name="use_rna_velocity" value="False"/> - <param name="model" value="v1.2"/> - </conditional> - <section name="advanced_common"> - <param name="show_log" value="true" /> - </section> - <output name="hidden_output"> + <output name="anndata_out" ftype="h5ad"> <assert_contents> - <has_text_matching expression="sc.tl.paga"/> - <has_text_matching expression="groups='paul15_clusters'"/> - <has_text_matching expression="use_rna_velocity=False"/> - <has_text_matching expression="model='v1.2'"/> + <has_h5_keys keys="uns/draw_graph"/> + <has_h5_keys keys="obsm/X_draw_graph_fr"/> </assert_contents> </output> - <output name="anndata_out" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> </test> + + <!-- test 9 --> <test expect_num_outputs="2"> - <!-- test 11 --> - <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" /> + <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/> <conditional name="method"> <param name="method" value="tl.dpt"/> <param name="n_dcs" value="15"/> <param name="n_branchings" value="1"/> - <param name="min_group_size" value="0.01"/> - <param name="allow_kendall_tau_shift" value="True"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -614,18 +606,48 @@ <has_text_matching expression="allow_kendall_tau_shift=True"/> </assert_contents> </output> - <output name="anndata_out" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/dpt_groups,obs/dpt_order,obs/dpt_order_indices"/> + <has_h5_keys keys="uns/dpt_changepoints,uns/dpt_grouptips"/> + </assert_contents> + </output> </test> + + <!-- test 10 --> + <test expect_num_outputs="2"> + <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/> + <conditional name="method"> + <param name="method" value="tl.paga"/> + <param name="groups" value="paul15_clusters"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sc.tl.paga"/> + <has_text_matching expression="groups='paul15_clusters'"/> + <has_text_matching expression="use_rna_velocity=False"/> + <has_text_matching expression="model='v1.2'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="uns/paga,uns/paul15_clusters_sizes"/> + </assert_contents> + </output> + </test> + + <!-- test 11 --> <test expect_num_outputs="2"> - <!-- test 12 --> - <param name="adata" value="tl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" /> + <param name="adata" value="tl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad"/> <conditional name="method"> <param name="method" value="tl.embedding_density"/> - <param name="basis" value="umap"/> <param name="key_added" value="umap_density"/> </conditional> <section name="advanced_common"> - <param name="show_log" value="true" /> + <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> @@ -634,10 +656,16 @@ <has_text_matching expression="key_added='umap_density'"/> </assert_contents> </output> - <output name="anndata_out" file="tl.embedding_density.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/> + <output name="anndata_out" ftype="h5ad"> + <assert_contents> + <has_h5_keys keys="obs/umap_density"/> + <has_h5_keys keys="uns/umap_density_params"/> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ + Cluster cells into subgroups (`tl.louvain`) =========================================== @@ -650,6 +678,7 @@ More details on the `tl.louvain scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.louvain.html>`_ + Cluster cells into subgroups (`tl.leiden`) ========================================== @@ -660,21 +689,13 @@ More details on the `tl.leiden scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.leiden.html>`_ + Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca` ============================================================================================================ -@CMD_pca_outputs@ - More details on the `pp.pca scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.pca.html>`__ -Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca` -============================================================================================================ - -@CMD_pca_outputs@ - -More details on the `tl.pca scanpy documentation -<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.pca.html>`__ Diffusion Maps, using `tl.diffmap` ================================== @@ -698,6 +719,7 @@ More details on the `tl.diffmap scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.diffmap.html>`__ + t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne` ======================================================================= @@ -710,6 +732,7 @@ More details on the `tl.tsne scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.tsne.html>`__ + Embed the neighborhood graph using UMAP, using `tl.umap` ======================================================== @@ -721,8 +744,8 @@ nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of `umap-learn <https://github.com/lmcinnes/umap>`__ -(McInnes et al, 2018). For a few comparisons of UMAP with tSNE, see this `preprint -<https://doi.org/10.1101/298430>`__. +(McInnes et al, 2018). For a few comparisons of UMAP with tSNE, see this `paper +<https://www.nature.com/articles/nbt.4314>`__. The UMAP coordinates of data are added to the return AnnData in the multi-dimensional observations annotation (obsm). This data is accessible using the inspect tool for AnnData @@ -730,6 +753,7 @@ More details on the `tl.umap scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.umap.html>`__ + Force-directed graph drawing, using `tl.draw_graph` =================================================== @@ -749,6 +773,7 @@ More details on the `tl.draw_graph scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.draw_graph.html>`__ + Infer progression of cells through geodesic distance along the graph (`tl.dpt`) =============================================================================== @@ -808,6 +833,18 @@ More details on the `tl.paga scanpy documentation <https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.paga.html>`_ - ]]></help> + + +Calculates the density of cells in an embedding (per condition). (`tl.embedding_density`) +========================================================================================= + +Gaussian kernel density estimation is used to calculate the density of cells in an embedded space. This can be performed per category over a categorical cell annotation. + +Note that density values are scaled to be between 0 and 1. Thus, the density value at each cell is only comparable to densities in the same category. + +More details on the `tl.embedding_density scanpy documentation +<https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.embedding_density.html>`_ + ]]> + </help> <expand macro="citations"/> </tool>