Mercurial > repos > iuc > snapatac2_clustering
view dimension_reduction_clustering.xml @ 6:d258720d9a42 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/snapatac2 commit e0f59fae19e57f54ae0c351a16dd1805d12aba1d
| author | iuc |
|---|---|
| date | Tue, 25 Nov 2025 16:40:54 +0000 |
| parents | d3ea0ba3d066 |
| children |
line wrap: on
line source
<tool id="snapatac2_clustering" name="SnapATAC2 Clustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>and dimension reduction</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> <requirements> <expand macro="requirements"/> </requirements> <command detect_errors="exit_code"><![CDATA[ export NUMBA_CACHE_DIR="\${TEMP:-/tmp}"; #if $method.method == 'tl.multi_spectral' #for $i in range(len($method.adata)) cp $method.adata[$i] 'adata_${i}.h5ad' && #end for #else @CMD_PREP_ADATA@ #end if @CMD@ ]]></command> <configfiles> <configfile name="script_file"><![CDATA[ @CONF_IMPORTS@ #if $method.method == 'tl.multi_spectral' ## read all files ending with .h5ad in the working directory import glob files = sorted(glob.glob('adata_*.h5ad')) adata_list = [] for fn in files: ad = snap.read(fn, backed=None) adata_list.append(ad) #else @CONF_READ_INPUTS@ #end if #if $method.method == 'tl.spectral' #if $method.features with open('$method.features') as f: features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()] #end if ## somewhere in the SnapATAC2 code, a pandas Series is being passed where a numpy array is expected. ## This is a workaround to add the nonzero method back to pandas Series. ## Add the nonzero method back to pandas Series import pandas as pd def series_nonzero(self): return (self != 0).values.nonzero() pd.Series.nonzero = series_nonzero snap.tl.spectral( adata, n_comps = $method.n_comps, #if $method.features features = features_mask, #end if random_state = $method.random_state, #if $method.sample_size sample_size = $method.sample_size, #end if chunk_size = $method.chunk_size, distance_metric = '$method.distance_metric', weighted_by_sd = $method.weighted_by_sd, inplace = True ) #else if $method.method == 'tl.multi_spectral' #if $method.features with open('$method.features') as f: features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()] #end if embedding = snap.tl.multi_spectral( adatas = adata_list, n_comps = $method.n_comps, #if $method.features features = features_mask, #else features = None, #end if weights = None, # Will enable if requested by users random_state = $method.random_state, weighted_by_sd = $method.weighted_by_sd, ) adata = adata_list[0].copy() adata.uns['spectral_eigenvalue_joint'] = embedding[0] adata.obsm['X_joint'] = embedding[1] #else if $method.method == 'tl.umap' snap.tl.umap( adata, n_comps = $method.n_comps, #if $method.use_dims != '' #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) use_dims=$dims, #end if use_rep = '$method.use_rep', key_added = '$method.key_added', random_state = $method.random_state, inplace = True ) #else if $method.method == 'pp.knn' snap.pp.knn( adata, n_neighbors = $method.n_neighbors, #if $method.use_dims != '' #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) use_dims=$dims, #end if use_rep = '$method.use_rep', method = '$method.algorithm', inplace = True, random_state = $method.random_state ) #else if $method.method == 'tl.leiden' snap.tl.leiden( adata, resolution = $method.resolution, objective_function = '$method.objective_function', min_cluster_size = $method.min_cluster_size, n_iterations = $method.n_iterations, random_state = $method.random_state, key_added = '$method.key_added', use_leidenalg = $method.use_leidenalg, weighted = $method.weighted, inplace = True ) #else if $method.method == 'tl.kmeans' snap.tl.kmeans( adata, n_clusters = $method.n_clusters, n_iterations = $method.n_iterations, random_state = $method.random_state, use_rep = '$method.use_rep', key_added = '$method.key_added' ) #else if $method.method == 'tl.dbscan' snap.tl.dbscan( adata, eps = $method.eps, min_samples = $method.min_samples, leaf_size = $method.leaf_size, use_rep = '$method.use_rep', key_added = '$method.key_added', n_jobs = int(os.getenv("GALAXY_SLOTS", 4)) ) #else if $method.method == 'tl.hdbscan' snap.tl.hdbscan( adata, min_cluster_size = $method.min_cluster_size, #if $method.min_samples min_samples = $method.min_samples, #end if cluster_selection_epsilon = $method.cluster_selection_epsilon, alpha = $method.alpha, cluster_selection_method = '$method.cluster_selection_method', random_state = $method.random_state, use_rep = '$method.use_rep', key_added = '$method.key_added' ) ## It is implemented in select_feature function, and it is problematic if the user don't select a groupby (will return an array). i think this can be skipped unless needed ## #else if $method.method == 'tl.aggregate_X' ## snap.tl.aggregate_X( ## adata, ## #if $method.groupby != '' ## groupby = '$method.groupby', ## #end if ## normalize = '$method.normalize' ## ) #else if $method.method == 'tl.aggregate_cells' snap.tl.aggregate_cells( adata, use_rep = '$method.use_rep', #if $method.target_num_cells target_num_cells = $method.target_num_cells, #end if min_cluster_size = $method.min_cluster_size, random_state = $method.random_state, key_added = '$method.key_added', inplace = True ) #end if @CONF_ANNDATA_WRITE_OUTPUTS@ ]]></configfile> </configfiles> <inputs> <conditional name="method"> <param name="method" type="select" label="Dimension reduction and Clustering"> <option value="tl.spectral">Perform dimension reduction using Laplacian Eigenmap, using 'tl.spectral'</option> <option value="tl.multi_spectral">similar to 'tl.multi_spectral' but it can work on multiple modalities.</option> <option value="tl.umap">Compute Umap, using 'tl.umap'</option> <option value="pp.knn">Compute a neighborhood graph of observations, using 'pp.knn'</option> <option value="tl.leiden">Cluster cells into subgroups, using 'tl.leiden'</option> <option value="tl.kmeans">Cluster cells into subgroups using the K-means algorithm, using 'tl.kmeans'</option> <option value="tl.dbscan">Cluster cells into subgroups using the DBSCAN algorithm, using 'tl.dbscan'</option> <option value="tl.hdbscan">Cluster cells into subgroups using the HDBSCAN algorithm, using 'tl.hdbscan'</option> <!-- It is implemented in select_feature function in preprocessing.xml tool (implemented in upstream code, not in the xml). It is problematic if the user don't select a groupby (will return an array). i think this can be skipped unless needed --> <!-- <option value="tl.aggregate_X">Aggregate values in adata.X in a row-wise fashion, using 'tl.aggregate_X'</option> --> <option value="tl.aggregate_cells">Aggregate cells into pseudo-cells, using 'tl.aggregate_cells'</option> </param> <when value="tl.spectral"> <expand macro="param_inputs_anndata"/> <expand macro="param_n_comps"/> <param argument="features" type="data" format="txt,tabular" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/> <expand macro="param_random_state"/> <param argument="sample_size" type="float" min="0" max="1" optional="true" label="Approximate the embedding using the Nystrom algorithm by selecting a subset of cells" help="Using this only when the number of cells is too large, e.g. > 10,000,000, or the `distance_metric` is “jaccard”"/> <param argument="chunk_size" type="integer" value="20000" label="chunk size"/> <param argument="distance_metric" type="select" label="distance metric: “jaccard”, “cosine“"> <option value="jaccard">jaccard</option> <option value="cosine" selected="true">cosine</option> </param> <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/> </when> <when value="tl.multi_spectral"> <expand macro="param_inputs_anndata" multiple="true" label="list of Anndatas to use for multi_spectral" help="Please note that the embedding will be saved in the first Anndata"/> <!-- Will enable if requested by users --> <!-- <param name="weights" type="data" format="tabular" optional="true" label="Weights" help="Text file indicating weights for each modality. Each line contains only floats.If not provided, all modalities are weighted equally" /> --> <expand macro="param_n_comps"/> <param argument="features" type="data" format="txt,tabular" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/> <expand macro="param_random_state"/> <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/> </when> <when value="tl.umap"> <expand macro="param_inputs_anndata"/> <expand macro="param_n_comps" value="2" label="Number of components" help=""/> <param argument="use_dims" type="text" optional="true" label="Use these dimensions in `use_rep`" help="comma separated list of dimensions"> <expand macro="sanitize_query"/> </param> <expand macro="param_use_rep"/> <expand macro="param_key_added" key_added="umap"/> <expand macro="param_random_state"/> </when> <when value="pp.knn"> <expand macro="param_inputs_anndata"/> <param argument="n_neighbors" type="integer" value="50" label="The number of nearest neighbors to be searched"/> <param argument="use_dims" type="text" value="" optional="true" label="The dimensions used for computation"> <expand macro="sanitize_query"/> </param> <expand macro="param_use_rep" label="The key for the matrix"/> <param argument="algorithm" type="select" label="Choose method"> <option value="kdtree" selected="true">'kdtree': use the kdtree algorithm to find the nearest neighbors</option> <option value="hora">'hora': use the HNSW algorithm to find the approximate nearest neighbors</option> <option value="pynndescent">'pynndescent': use the pynndescent algorithm to find the approximate nearest neighbors</option> </param> <expand macro="param_random_state" label="Random seed for approximate nearest neighbor search" help="Note that this is only used when method='pynndescent'. Currently hora does not support random seed, so the result of hora is not reproducible."/> </when> <when value="tl.leiden"> <expand macro="param_inputs_anndata"/> <param argument="resolution" type="float" value="1" label="Parameter value controlling the coarseness of the clustering" help="Higher values lead to more clusters"/> <param argument="objective_function" type="select" label="Whether to use the Constant Potts Model (CPM) or modularity"> <option value="CPM">CPM</option> <option value="modularity" selected="true">modularity</option> <option value="RBConfiguration">RBConfiguration</option> </param> <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/> <expand macro="param_n_iterations"/> <expand macro="param_random_state"/> <expand macro="param_key_added" key_added="leiden"/> <param argument="use_leidenalg" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to use the leidenalg package for Leiden clustering"/> <param argument="weighted" type="boolean" truevalue="True" falsevalue="False" label="Whether to use the edge weights in the graph"/> </when> <when value="tl.kmeans"> <expand macro="param_inputs_anndata"/> <param argument="n_clusters" type="integer" value="5" label="Number of clusters to return"/> <expand macro="param_n_iterations"/> <expand macro="param_random_state"/> <expand macro="param_use_rep"/> <expand macro="param_key_added" key_added="kmeans"/> </when> <when value="tl.dbscan"> <expand macro="param_inputs_anndata"/> <param argument="eps" type="float" value="0.5" label=" The maximum distance between two samples for one to be considered as in the neighborhood of the other" help="This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function."/> <param argument="min_samples" type="integer" value="5" label="The number of samples (or total weight) in a neighborhood for a point to be considered as a core point."/> <param argument="leaf_size" type="integer" value="30" label="Leaf size passed to BallTree or cKDTree" help="This can affect the speed of the construction and query, as well as the memory required to store the tree."/> <expand macro="param_use_rep"/> <expand macro="param_key_added" key_added="dbscan"/> </when> <when value="tl.hdbscan"> <expand macro="param_inputs_anndata"/> <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/> <param argument="min_samples" type="integer" value="" optional="true" label="The number of samples in a neighborhood for a point to be considered a core point"/> <param argument="cluster_selection_epsilon" type="float" value="0.0" label="A distance threshold. Clusters below this value will be merged"/> <param argument="alpha" type="float" value="1.0" label="A distance scaling parameter as used in robust single linkage"/> <param argument="cluster_selection_method" type="select" label="The method used to select clusters from the condensed tree"> <option value="eom">Excess of Mass algorithm to find the most persistent clusters</option> <option value="leaf">Select the clusters at the leaves of the tree - this provides the most fine grained and homogeneous clusters</option> </param> <expand macro="param_random_state"/> <expand macro="param_use_rep"/> <expand macro="param_key_added" key_added="hdbscan"/> </when> <!-- <when value="tl.aggregate_X"> <expand macro="param_inputs_anndata"/> <expand macro="param_groupby"/> <param argument="normalize" type="select" optional="true" label="normalization method"> <option value="RPM">RPM</option> <option value="RPKM">RPKM</option> </param> </when> --> <when value="tl.aggregate_cells"> <expand macro="param_inputs_anndata"/> <expand macro="param_use_rep"/> <param argument="target_num_cells" type="integer" value="" optional="true" label="target_num_cells" help="If None, `target_num_cells = num_cells / min_cluster_size`"/> <param argument="min_cluster_size" type="integer" value="50" label="The minimum size of clusters"/> <expand macro="param_random_state"/> <expand macro="param_key_added" key_added="pseudo_cell"/> </when> </conditional> <expand macro="param_common_advanced"/> </inputs> <outputs> <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad.gz" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/> <data name="hidden_output" format="txt" label="Log file"> <filter>advanced_common['show_log']</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <!-- tl.spectral --> <conditional name="method"> <param name="method" value="tl.spectral"/> <param name="adata" location="https://zenodo.org/records/17512085/files/pp.select_features.pbmc_500_chr21.h5ad"/> <param name="n_comps" value="30"/> <param name="random_state" value="0"/> <param name="chunk_size" value="20000"/> <param name="distance_metric" value="jaccard"/> <param name="weighted_by_sd" value="True"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.spectral"/> <has_text_matching expression="random_state = 0"/> <has_text_matching expression="n_comps = 30"/> <has_text_matching expression="chunk_size = 20000"/> <has_text_matching expression="distance_metric = 'jaccard'"/> <has_text_matching expression="weighted_by_sd = True"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="uns/spectral_eigenvalue"/> <has_h5_keys keys="obsm/X_spectral"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- tl.multi_spectral --> <conditional name="method"> <param name="method" value="tl.multi_spectral"/> <param name="adata" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz,https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/> <param name="n_comps" value="30"/> <param name="random_state" value="0"/> <param name="weighted_by_sd" value="True"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.multi_spectral"/> <has_text_matching expression="random_state = 0"/> <has_text_matching expression="n_comps = 30"/> <has_text_matching expression="weighted_by_sd = True"/> <has_text_matching expression="features = None"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="uns/spectral_eigenvalue_joint"/> <has_h5_keys keys="obsm/X_joint"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- tl.umap --> <conditional name="method"> <param name="method" value="tl.umap"/> <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="n_comps" value="2"/> <param name="use_rep" value="X_spectral"/> <param name="key_added" value="umap"/> <param name="random_state" value="0"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.umap"/> <has_text_matching expression="n_comps = 2"/> <has_text_matching expression="use_rep = 'X_spectral'"/> <has_text_matching expression="key_added = 'umap'"/> <has_text_matching expression="random_state = 0"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obsm/X_umap"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- pp.knn --> <conditional name="method"> <param name="method" value="pp.knn"/> <param name="adata" location="https://zenodo.org/records/17512085/files/tl.umap.pbmc_500_chr21.h5ad"/> <param name="n_neighbors" value="50"/> <param name="use_rep" value="X_spectral"/> <param name="algorithm" value="kdtree"/> <param name="random_state" value="0"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.pp.knn"/> <has_text_matching expression="n_neighbors = 50"/> <has_text_matching expression="use_rep = 'X_spectral'"/> <has_text_matching expression="method = 'kdtree'"/> <has_text_matching expression="inplace = True"/> <has_text_matching expression="random_state = 0"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obsp/distances"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- tl.leiden --> <conditional name="method"> <param name="method" value="tl.leiden"/> <param name="adata" location="https://zenodo.org/records/17512085/files/pp.knn.pbmc_500_chr21.h5ad"/> <param name="resolution" value="2"/> <param name="objective_function" value="modularity"/> <param name="min_cluster_size" value="3"/> <param name="n_iterations" value="-1"/> <param name="random_state" value="0"/> <param name="key_added" value="leiden"/> <param name="weighted" value="False"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.leiden"/> <has_text_matching expression="resolution = 2"/> <has_text_matching expression="objective_function = 'modularity'"/> <has_text_matching expression="min_cluster_size = 3"/> <has_text_matching expression="n_iterations = -1"/> <has_text_matching expression="random_state = 0"/> <has_text_matching expression="key_added = 'leiden'"/> <has_text_matching expression="weighted = False"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/leiden"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- tl.leiden --> <conditional name="method"> <param name="method" value="tl.leiden"/> <param name="adata" location="https://zenodo.org/records/17512085/files/pp.knn.pbmc_500_chr21.h5ad"/> <param name="resolution" value="2"/> <param name="objective_function" value="RBConfiguration"/> <param name="min_cluster_size" value="3"/> <param name="n_iterations" value="-1"/> <param name="random_state" value="0"/> <param name="key_added" value="leiden"/> <param name="weighted" value="False"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.leiden"/> <has_text_matching expression="resolution = 2"/> <has_text_matching expression="objective_function = 'RBConfiguration'"/> <has_text_matching expression="min_cluster_size = 3"/> <has_text_matching expression="n_iterations = -1"/> <has_text_matching expression="random_state = 0"/> <has_text_matching expression="key_added = 'leiden'"/> <has_text_matching expression="weighted = False"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/leiden"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- tl.kmeans --> <conditional name="method"> <param name="method" value="tl.kmeans"/> <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="n_iterations" value="-1"/> <param name="random_state" value="0"/> <param name="use_rep" value="X_spectral"/> <param name="key_added" value="kmeans"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.kmeans"/> <has_text_matching expression="n_iterations = -1"/> <has_text_matching expression="random_state = 0"/> <has_text_matching expression="use_rep = 'X_spectral'"/> <has_text_matching expression="key_added = 'kmeans'"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/kmeans"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- tl.dbscan --> <conditional name="method"> <param name="method" value="tl.dbscan"/> <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="eps" value="0.5"/> <param name="min_samples" value="3"/> <param name="leaf_size" value="5"/> <param name="use_rep" value="X_spectral"/> <param name="key_added" value="dbscan"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.dbscan"/> <has_text_matching expression="eps = 0.5"/> <has_text_matching expression="min_samples = 3"/> <has_text_matching expression="leaf_size = 5"/> <has_text_matching expression="use_rep = 'X_spectral'"/> <has_text_matching expression="key_added = 'dbscan'"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/dbscan"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <!-- tl.hdbscan --> <conditional name="method"> <param name="method" value="tl.hdbscan"/> <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="min_cluster_size" value="3"/> <param name="min_samples" value="3"/> <param name="cluster_selection_method" value="eom"/> <param name="random_state" value="0"/> <param name="use_rep" value="X_spectral"/> <param name="key_added" value="hdbscan"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.hdbscan"/> <has_text_matching expression="min_cluster_size = 3"/> <has_text_matching expression="min_samples = 3"/> <has_text_matching expression="cluster_selection_method = 'eom'"/> <has_text_matching expression="random_state = 0"/> <has_text_matching expression="use_rep = 'X_spectral'"/> <has_text_matching expression="key_added = 'hdbscan'"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/hdbscan"/> </assert_contents> </output> </test> <!-- <test expect_num_outputs="2"> tl.aggregate_X <conditional name="method"> <param name="method" value="tl.aggregate_X"/> <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="normalize" value="RPKM"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.aggregate_X"/> <has_text_matching expression="normalize = 'RPKM'"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/n_fragment"/> </assert_contents> </output> </test> --> <test expect_num_outputs="2"> <!-- tl.aggregate_cells --> <conditional name="method"> <param name="method" value="tl.aggregate_cells"/> <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="use_rep" value="X_spectral"/> <param name="target_num_cells" value="5"/> <param name="min_cluster_size" value="3"/> <param name="random_state" value="0"/> <param name="key_added" value="pseudo_cell"/> </conditional> <section name="advanced_common"> <param name="show_log" value="true"/> </section> <output name="hidden_output"> <assert_contents> <has_text_matching expression="snap.tl.aggregate_cells"/> <has_text_matching expression="use_rep = 'X_spectral'"/> <has_text_matching expression="target_num_cells = 5"/> <has_text_matching expression="min_cluster_size = 3"/> <has_text_matching expression="random_state = 0"/> <has_text_matching expression="key_added = 'pseudo_cell'"/> </assert_contents> </output> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/pseudo_cell"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ Perform dimension reduction using Laplacian Eigenmap, using `tl.spectral` ========================================================================= Perform dimension reduction using Laplacian Eigenmaps. Convert the cell-by-feature count matrix into lower dimensional representations using the spectrum of the normalized graph Laplacian defined by pairwise similarity between cells. This function utilizes the matrix-free spectral embedding algorithm to compute the embedding when `distance_metric` is “cosine”, which scales linearly with the number of cells. For other types of similarity metrics, the time and space complexity scale quadratically with the number of cells. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.spectral.html>`__ Compute Laplacian Eigenmaps simultaneously on multiple modalities, with linear space and time complexity, using `tl.multi_spectral` =================================================================================================================================== This is similar to `spectral`, but it can work on multiple modalities. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.multi_spectral.html>`__ Compute Umap, using `tl.umap` ============================= Compute Umap More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.umap.html>`__ Compute a neighborhood graph of observations, using `pp.knn` ============================================================ Compute a neighborhood graph of observations. Computes a neighborhood graph of observations stored in adata using the method specified by method. The distance metric used is Euclidean. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.knn.html>`__ Cluster cells into subgroups, using `tl.leiden` =============================================== Cluster cells into subgroups. Cluster cells using the Leiden algorithm, an improved version of the Louvain algorithm. It has been proposed for single-cell analysis by. This requires having ran `knn`. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.leiden.html>`__ Cluster cells into subgroups using the K-means algorithm, using `tl.kmeans` =========================================================================== Cluster cells into subgroups using the K-means algorithm, a classical algorithm in data mining. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.kmeans.html>`__ Cluster cells into subgroups using the DBSCAN algorithm, using `tl.dbscan` ========================================================================== Cluster cells into subgroups using the DBSCAN algorithm. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.dbscan.html>`__ Cluster cells into subgroups using the HDBSCAN algorithm, using `tl.hdbscan` ============================================================================ Cluster cells into subgroups using the HDBSCAN algorithm. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.hdbscan.html>`__ .. Aggregate values in adata.X in a row-wise fashion, using `tl.aggregate_X` .. ========================================================================= .. Aggregate values in adata.X in a row-wise fashion. .. Aggregate values in adata.X in a row-wise fashion. This is used to compute RPKM or RPM values stratified by user-provided groupings. .. More details on the `SnapATAC2 documentation .. <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_X.html>`__ Aggregate cells into pseudo-cells, using `tl.aggregate_cells` ============================================================= Aggregate cells into pseudo-cells. Aggregate cells into pseudo-cells by iterative clustering. More details on the `SnapATAC2 documentation <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_cells.html>`__ ]]></help> <expand macro="citations"/> </tool>
