Mercurial > repos > iuc > snapatac2_clustering
changeset 0:af821711b356 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snapatac2 commit be132b56781bede5dc6e020aa80ca315546666cd
author | iuc |
---|---|
date | Thu, 16 May 2024 13:15:57 +0000 |
parents | |
children | 8f8bef61fd0b |
files | dimension_reduction_clustering.xml macros.xml |
diffstat | 2 files changed, 766 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dimension_reduction_clustering.xml Thu May 16 13:15:57 2024 +0000 @@ -0,0 +1,579 @@ +<tool id="snapatac2_clustering" name="SnapATAC2 Clustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>and dimension reduction</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <expand macro="requirements"/> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +export NUMBA_CACHE_DIR="\${TEMP:-/tmp}"; +@PREP_ADATA@ +@CMD@ + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ + +@CMD_imports@ +@CMD_read_inputs@ + +#if $method.method == 'tl.spectral' + #if $method.features +with open('$method.features') as f: + features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()] + #end if +sa.tl.spectral( + adata, + n_comps = $method.n_comps, + #if $method.features + features = features_mask, + #end if + random_state = $method.random_state, + #if $method.sample_size + sample_size = $method.sample_size, + #end if + chunk_size = $method.chunk_size, + distance_metric = '$method.distance_metric', + weighted_by_sd = $method.weighted_by_sd, + inplace = True +) + +#else if $method.method == 'tl.umap' +sa.tl.umap( + adata, + n_comps = $method.n_comps, + #if $method.use_dims != '' + #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) + use_dims=$dims, + #end if + use_rep = '$method.use_rep', + key_added = '$method.key_added', + random_state = $method.random_state, + inplace = True +) + +#else if $method.method == 'pp.knn' +sa.pp.knn( + adata, + n_neighbors = $method.n_neighbors, + #if $method.use_dims != '' + #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) + use_dims=$dims, + #end if + use_rep = '$method.use_rep', + method = '$method.algorithm', + inplace = True, + random_state = $method.random_state +) + +#else if $method.method == 'tl.dbscan' +sa.tl.dbscan( + adata, + eps = $method.eps, + min_samples = $method.min_samples, + leaf_size = $method.leaf_size, + use_rep = '$method.use_rep', + key_added = '$method.key_added' +) + +#else if $method.method == 'tl.hdbscan' +sa.tl.hdbscan( + adata, + min_cluster_size = $method.min_cluster_size, + #if $method.min_samples + min_samples = $method.min_samples, + #end if + cluster_selection_epsilon = $method.cluster_selection_epsilon, + alpha = $method.alpha, + cluster_selection_method = '$method.cluster_selection_method', + random_state = $method.random_state, + use_rep = '$method.use_rep', + key_added = '$method.key_added' +) + +#else if $method.method == 'tl.leiden' +sa.tl.leiden( + adata, + resolution = $method.resolution, + objective_function = '$method.objective_function', + min_cluster_size = $method.min_cluster_size, + n_iterations = $method.n_iterations, + random_state = $method.random_state, + key_added = '$method.key_added', + weighted = $method.weighted, + inplace = True +) + +#else if $method.method == 'tl.kmeans' +sa.tl.kmeans( + adata, + n_clusters = $method.n_clusters, + n_iterations = $method.n_iterations, + random_state = $method.random_state, + use_rep = '$method.use_rep', + key_added = '$method.key_added' +) + +#else if $method.method == 'tl.aggregate_X' +sa.tl.aggregate_X( + adata, + #if $method.groupby != '' + groupby = '$method.groupby', + #end if + normalize = '$method.normalize' +) + +#else if $method.method == 'tl.aggregate_cells' +sa.tl.aggregate_cells( + adata, + use_rep = '$method.use_rep', + #if $method.target_num_cells + target_num_cells = $method.target_num_cells, + #end if + min_cluster_size = $method.min_cluster_size, + random_state = $method.random_state, + key_added = '$method.key_added', + inplace = True +) +#end if + +@CMD_anndata_write_outputs@ + ]]></configfile> + </configfiles> + <inputs> + <conditional name="method"> + <param name="method" type="select" label="Dimension reduction and Clustering"> + <option value="tl.spectral">Perform dimension reduction using Laplacian Eigenmap, using 'tl.spectral'</option> + <option value="tl.umap">Compute Umap, using 'tl.umap'</option> + <option value="pp.knn">Compute a neighborhood graph of observations, using 'pp.knn'</option> + <option value="tl.leiden">Cluster cells into subgroups, using 'tl.leiden'</option> + <option value="tl.kmeans">Cluster cells into subgroups using the K-means algorithm, using 'tl.kmeans'</option> + <option value="tl.dbscan">Cluster cells into subgroups using the DBSCAN algorithm, using 'tl.dbscan'</option> + <option value="tl.hdbscan">Cluster cells into subgroups using the HDBSCAN algorithm, using 'tl.hdbscan'</option> + <option value="tl.aggregate_X">Aggregate values in adata.X in a row-wise fashion, using 'tl.aggregate_X'</option> + <option value="tl.aggregate_cells">Aggregate cells into pseudo-cells, using 'tl.aggregate_cells'</option> + </param> + <when value="tl.spectral"> + <expand macro="inputs_anndata"/> + <expand macro="param_n_comps"/> + <param argument="features" type="data" format="txt" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/> + <expand macro="param_random_state"/> + <param argument="sample_size" type="float" min="0" max="1" optional="true" label="Approximate the embedding using the Nystrom algorithm by selecting a subset of cells" help="Using this only when the number of cells is too large, e.g. > 10,000,000, or the `distance_metric` is “jaccard”"/> + <param argument="chunk_size" type="integer" value="20000" label="chunk size"/> + <param argument="distance_metric" type="select" label="distance metric: “jaccard”, “cosine“"> + <option value="jaccard">jaccard</option> + <option value="cosine">cosine</option> + </param> + <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/> + </when> + <when value="tl.umap"> + <expand macro="inputs_anndata"/> + <param argument="n_comps" type="integer" value="2" label="Number of dimensions of embedding"/> + <param argument="use_dims" type="text" optional="true" label="Use these dimensions in `use_rep`" help="comma separated list of dimensions"> + <expand macro="sanitize_query"/> + </param> + <expand macro="param_use_rep"/> + <expand macro="param_key_added" key_added="umap"/> + <expand macro="param_random_state"/> + </when> + <when value="pp.knn"> + <expand macro="inputs_anndata"/> + <param argument="n_neighbors" type="integer" value="50" label="The number of nearest neighbors to be searched"/> + <param argument="use_dims" type="text" value="" optional="true" label="The dimensions used for computation"> + <expand macro="sanitize_query"/> + </param> + <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/> + <param argument="algorithm" type="select" label="Choose method"> + <option value="kdtree" selected="true">'kdtree': use the kdtree algorithm to find the nearest neighbors</option> + <option value="hora">'hora': use the HNSW algorithm to find the approximate nearest neighbors</option> + <option value="pynndescent">'pynndescent': use the pynndescent algorithm to find the approximate nearest neighbors</option> + </param> + <param argument="random_state" type="integer" value="0" label="Random seed for approximate nearest neighbor search"/> + </when> + <when value="tl.leiden"> + <expand macro="inputs_anndata"/> + <param argument="resolution" type="float" value="1" label="Parameter value controlling the coarseness of the clustering" help="Higher values lead to more clusters"/> + <param argument="objective_function" type="select" label="Whether to use the Constant Potts Model (CPM) or modularity"> + <option value="CPM">CPM</option> + <option value="modularity">modularity</option> + <option value="RBConfiguration">RBConfiguration</option> + </param> + <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/> + <expand macro="param_n_iterations"/> + <expand macro="param_random_state"/> + <expand macro="param_key_added" key_added="leiden"/> + <param argument="weighted" type="boolean" truevalue="True" falsevalue="False" label="Whether to use the edge weights in the graph"/> + </when> + <when value="tl.kmeans"> + <expand macro="inputs_anndata"/> + <param argument="n_clusters" type="integer" value="5" label="Number of clusters to return"/> + <expand macro="param_n_iterations"/> + <expand macro="param_random_state"/> + <expand macro="param_use_rep"/> + <expand macro="param_key_added" key_added="kmeans"/> + </when> + <when value="tl.dbscan"> + <expand macro="inputs_anndata"/> + <param argument="eps" type="float" value="0.5" label=" The maximum distance between two samples for one to be considered as in the neighborhood of the other" help="This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function."/> + <param argument="min_samples" type="integer" value="5" label="The number of samples (or total weight) in a neighborhood for a point to be considered as a core point."/> + <param argument="leaf_size" type="integer" value="30" label="Leaf size passed to BallTree or cKDTree" help="This can affect the speed of the construction and query, as well as the memory required to store the tree."/> + <expand macro="param_use_rep"/> + <expand macro="param_key_added" key_added="dbscan"/> + </when> + <when value="tl.hdbscan"> + <expand macro="inputs_anndata"/> + <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/> + <param argument="min_samples" type="integer" value="" optional="true" label="The number of samples in a neighbourhood for a point to be considered a core point"/> + <param argument="cluster_selection_epsilon" type="float" value="0.0" label="A distance threshold. Clusters below this value will be merged"/> + <param argument="alpha" type="float" value="1.0" label="A distance scaling parameter as used in robust single linkage"/> + <param argument="cluster_selection_method" type="select" label="The method used to select clusters from the condensed tree"> + <option value="eom">Excess of Mass algorithm to find the most persistent clusters</option> + <option value="leaf">Select the clusters at the leaves of the tree - this provides the most fine grained and homogeneous clusters</option> + </param> + <expand macro="param_random_state"/> + <expand macro="param_use_rep"/> + <expand macro="param_key_added" key_added="hdbscan"/> + </when> + <when value="tl.aggregate_X"> + <expand macro="inputs_anndata"/> + <expand macro="param_groupby"/> + <param argument="normalize" type="select" optional="true" label="normalization method"> + <option value="RPM">RPM</option> + <option value="RPKM">RPKM</option> + </param> + </when> + <when value="tl.aggregate_cells"> + <expand macro="inputs_anndata"/> + <expand macro="param_use_rep"/> + <param argument="target_num_cells" type="integer" value="" optional="true" label="target_num_cells" help="If None, `target_num_cells = num_cells / min_cluster_size`"/> + <param argument="min_cluster_size" type="integer" value="50" label="The minimum size of clusters"/> + <expand macro="param_random_state"/> + <expand macro="param_key_added" key_added="pseudo_cell"/> + </when> + </conditional> + <expand macro="inputs_common_advanced"/> + </inputs> + <outputs> + <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/> + <data name="hidden_output" format="txt" label="Log file"> + <filter>advanced_common['show_log']</filter> + </data> + <data name="diff_peaks" format="tabular" from_work_dir="differential_peaks.tsv" label="${tool.name} on ${on_string}: Differential peaks"> + <filter>method['method'] and 'tl.diff_test' in method['method']</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="2"> + <!-- tl.spectral --> + <conditional name="method"> + <param name="method" value="tl.spectral"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/pp.select_features.pbmc_500_chr21.h5ad"/> + <param name="n_comps" value="30"/> + <param name="random_state" value="0"/> + <param name="chunk_size" value="20000"/> + <param name="distance_metric" value="jaccard"/> + <param name="weighted_by_sd" value="True"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.spectral"/> + <has_text_matching expression="random_state = 0"/> + <has_text_matching expression="n_comps = 30"/> + <has_text_matching expression="chunk_size = 20000"/> + <has_text_matching expression="distance_metric = 'jaccard'"/> + <has_text_matching expression="weighted_by_sd = True"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.umap --> + <conditional name="method"> + <param name="method" value="tl.umap"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="n_comps" value="2"/> + <param name="use_rep" value="X_spectral"/> + <param name="key_added" value="umap"/> + <param name="random_state" value="0"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.umap"/> + <has_text_matching expression="n_comps = 2"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + <has_text_matching expression="key_added = 'umap'"/> + <has_text_matching expression="random_state = 0"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- pp.knn --> + <conditional name="method"> + <param name="method" value="pp.knn"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/> + <param name="n_neighbors" value="50"/> + <param name="use_rep" value="X_spectral"/> + <param name="method_" value="kdtree"/> + <param name="inplace" value="True"/> + <param name="random_state" value="0"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.pp.knn"/> + <has_text_matching expression="n_neighbors = 50"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + <has_text_matching expression="method = 'kdtree'"/> + <has_text_matching expression="inplace = True"/> + <has_text_matching expression="random_state = 0"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.leiden --> + <conditional name="method"> + <param name="method" value="tl.leiden"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/> + <param name="resolution" value="2"/> + <param name="objective_function" value="modularity"/> + <param name="min_cluster_size" value="3"/> + <param name="n_iterations" value="-1"/> + <param name="random_state" value="0"/> + <param name="key_added" value="leiden"/> + <param name="weighted" value="False"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.leiden"/> + <has_text_matching expression="resolution = 2"/> + <has_text_matching expression="objective_function = 'modularity'"/> + <has_text_matching expression="min_cluster_size = 3"/> + <has_text_matching expression="n_iterations = -1"/> + <has_text_matching expression="random_state = 0"/> + <has_text_matching expression="key_added = 'leiden'"/> + <has_text_matching expression="weighted = False"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.leiden.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.kmeans --> + <conditional name="method"> + <param name="method" value="tl.kmeans"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="n_iterations" value="-1"/> + <param name="random_state" value="0"/> + <param name="use_rep" value="X_spectral"/> + <param name="key_added" value="kmeans"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.kmeans"/> + <has_text_matching expression="n_iterations = -1"/> + <has_text_matching expression="random_state = 0"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + <has_text_matching expression="key_added = 'kmeans'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.kmeans.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.dbscan --> + <conditional name="method"> + <param name="method" value="tl.dbscan"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="eps" value="0.5"/> + <param name="min_samples" value="3"/> + <param name="leaf_size" value="5"/> + <param name="use_rep" value="X_spectral"/> + <param name="key_added" value="dbscan"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.dbscan"/> + <has_text_matching expression="eps = 0.5"/> + <has_text_matching expression="min_samples = 3"/> + <has_text_matching expression="leaf_size = 5"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + <has_text_matching expression="key_added = 'dbscan'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.dbscan.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.hdbscan --> + <conditional name="method"> + <param name="method" value="tl.hdbscan"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="min_cluster_size" value="3"/> + <param name="min_samples" value="3"/> + <param name="cluster_selection_method" value="eom"/> + <param name="random_state" value="0"/> + <param name="use_rep" value="X_spectral"/> + <param name="key_added" value="hdbscan"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.hdbscan"/> + <has_text_matching expression="min_cluster_size = 3"/> + <has_text_matching expression="min_samples = 3"/> + <has_text_matching expression="cluster_selection_method = 'eom'"/> + <has_text_matching expression="random_state = 0"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + <has_text_matching expression="key_added = 'hdbscan'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.hdbscan.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.aggregate_X --> + <conditional name="method"> + <param name="method" value="tl.aggregate_X"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="normalize" value="RPKM"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.aggregate_X"/> + <has_text_matching expression="normalize = 'RPKM'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_X.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.aggregate_cells --> + <conditional name="method"> + <param name="method" value="tl.aggregate_cells"/> + <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="use_rep" value="X_spectral"/> + <param name="target_num_cells" value="5"/> + <param name="min_cluster_size" value="3"/> + <param name="random_state" value="0"/> + <param name="key_added" value="pseudo_cell"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.aggregate_cells"/> + <has_text_matching expression="use_rep = 'X_spectral'"/> + <has_text_matching expression="target_num_cells = 5"/> + <has_text_matching expression="min_cluster_size = 3"/> + <has_text_matching expression="random_state = 0"/> + <has_text_matching expression="key_added = 'pseudo_cell'"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_cells.pbmc_500_chr21.h5ad"/> + </test> + </tests> + <help><![CDATA[ +Perform dimension reduction using Laplacian Eigenmap, using `tl.spectral` +========================================================================= + +Perform dimension reduction using Laplacian Eigenmaps. + +Convert the cell-by-feature count matrix into lower dimensional representations using the spectrum of the normalized graph Laplacian defined by pairwise similarity between cells. This function utilizes the matrix-free spectral embedding algorithm to compute the embedding when `distance_metric` is “cosine”, which scales linearly with the number of cells. For other types of similarity metrics, the time and space complexity scale quadratically with the number of cells. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.spectral.html>`__ + +Compute Umap, using `tl.umap` +============================= + +Compute Umap + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.umap.html>`__ + +Compute a neighborhood graph of observations, using `pp.knn` +============================================================ + +Compute a neighborhood graph of observations. + +Computes a neighborhood graph of observations stored in adata using the method specified by method. The distance metric used is Euclidean. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.knn.html>`__ + +Cluster cells into subgroups, using `tl.leiden` +=============================================== + +Cluster cells into subgroups. + +Cluster cells using the Leiden algorithm, an improved version of the Louvain algorithm. It has been proposed for single-cell analysis by. This requires having ran `knn`. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.leiden.html>`__ + +Cluster cells into subgroups using the K-means algorithm, using `tl.kmeans` +=========================================================================== + +Cluster cells into subgroups using the K-means algorithm, a classical algorithm in data mining. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.kmeans.html>`__ + +Cluster cells into subgroups using the DBSCAN algorithm, using `tl.dbscan` +========================================================================== + +Cluster cells into subgroups using the DBSCAN algorithm. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.dbscan.html>`__ + +Cluster cells into subgroups using the HDBSCAN algorithm, using `tl.hdbscan` +============================================================================ + +Cluster cells into subgroups using the HDBSCAN algorithm. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.hdbscan.html>`__ + +Aggregate values in adata.X in a row-wise fashion, using `tl.aggregate_X` +========================================================================= + +Aggregate values in adata.X in a row-wise fashion. + +Aggregate values in adata.X in a row-wise fashion. This is used to compute RPKM or RPM values stratified by user-provided groupings. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_X.html>`__ + +Aggregate cells into pseudo-cells, using `tl.aggregate_cells` +============================================================= + +Aggregate cells into pseudo-cells. + +Aggregate cells into pseudo-cells by iterative clustering. + +More details on the `SnapATAC2 documentation +<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_cells.html>`__ + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu May 16 13:15:57 2024 +0000 @@ -0,0 +1,187 @@ +<macros> + <token name="@TOOL_VERSION@">2.5.3</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">23.0</token> + <xml name="requirements"> + <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement> + <requirement type="package" version="5.18.0">plotly</requirement> + <requirement type="package" version="0.2.1">python-kaleido</requirement> + <requirement type="package" version="0.19.19">polars</requirement> + <requirement type="package" version="14.0.1">pyarrow</requirement> + <requirement type="package" version="0.11.3">python-igraph</requirement> + <requirement type="package" version="0.8.33">hdbscan</requirement> + <requirement type="package" version="0.0.9">harmonypy</requirement> + <requirement type="package" version="1.7.4">scanorama</requirement> + <yield /> + </xml> + + <token name="@PREP_ADATA@"><![CDATA[ + cp '$method.adata' 'anndata.h5ad' && + ]]> + </token> + + <token name="@CMD@"><![CDATA[ + cat '$script_file' > '$hidden_output' && + python '$script_file' >> '$hidden_output' && + touch 'anndata_info.txt' && + cat 'anndata_info.txt' @CMD_prettify_stdout@ + ]]> + </token> + + <token name="@CMD_prettify_stdout@"><![CDATA[ | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g" | sed -r 's|^\s*(.*):\s(.*)|[\1]\n- \2|g' | sed 's|, |\n- |g' + ]]></token> + + <token name="@CMD_imports@"><![CDATA[ +import snapatac2 as sa +import os + ]]> + </token> + <xml name="sanitize_query" token_validinitial="string.printable"> + <sanitizer> + <valid initial="@VALIDINITIAL@"> + <remove value="'" /> + </valid> + </sanitizer> + </xml> + + <xml name="inputs_anndata"> + <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/> + </xml> + + <token name="@CMD_read_inputs@"><![CDATA[ + +adata = sa.read('anndata.h5ad', backed = None) +]]> + </token> + + <xml name="dimentions_plot"> + <param argument="width" type="integer" value="500" label="Width of the plot"/> + <param argument="height" type="integer" value="400" label="Height of the plot"/> + </xml> + + <xml name="param_groupby"> + <param argument="groupby" type="text" label="The key of the observation grouping to consider"> + <expand macro="sanitize_query" /> + </param> + </xml> + + <xml name="out_file"> + <param name="out_file" type="select" optional="true" label="Type of output file"> + <option value="png" selected="true">PNG</option> + <option value="svg">SVG</option> + <option value="pdf">PDF</option> + </param> + </xml> + <token name="@CMD_anndata_write_outputs@"><![CDATA[ +adata.write('anndata.h5ad') +with open('anndata_info.txt','w', encoding='utf-8') as ainfo: + print(adata, file=ainfo) +]]> + </token> + <xml name="inputs_common_advanced"> + <section name="advanced_common" title="Advanced Options" expanded="false"> + <param name="show_log" type="boolean" checked="false" label="Output Log?" /> + </section> + </xml> + <xml name="params_render_plot"> + <param argument="width" type="integer" value="600" label="Width of the plot"/> + <param argument="height" type="integer" value="400" label="Height of the plot"/> + <expand macro="out_file"/> + </xml> + <xml name="param_shift"> + <param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/> + <param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/> + </xml> + <xml name="param_chunk_size" tokens="size"> + <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/> + </xml> + <xml name="min_max_frag_size"> + <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/> + <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/> + </xml> + <xml name="params_data_integration"> + <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/> + <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation"> + <expand macro="sanitize_query"/> + </param> + <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider"> + <expand macro="sanitize_query" /> + </param> + <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/> + </xml> + <xml name="param_n_comps"> +s <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/> + </xml> + <xml name="param_random_state"> + <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/> + </xml> + <xml name="param_key_added" tokens="key_added"> + <param argument="key_added" type="text" value="@KEY_ADDED@" label="`adata.obs` key under which t add cluster labels"/> + </xml> + <xml name="param_use_rep"> + <param argument="use_rep" type="text" value="X_spectral" label="Use the indicated representation in `.obsm`"/> + </xml> + <xml name="genome_fasta"> + <param argument="genome_fasta" type="text" label="A fasta file containing the genome sequences or a Genome object"/> + </xml> + <xml name="background"> + <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background"> + <expand macro="sanitize_query"/> + </param> + </xml> + <xml name="mat"> + <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/> + <param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/> + </xml> + <xml name="param_network"> + <param argument="network" type="text" label="network"/> + </xml> + <xml name="param_n_iterations"> + <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform" + help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/> + </xml> + + <xml name="citations"> + <citations> + <citation type="doi">10.1038/s41592-023-02139-9</citation> + </citations> + </xml> + <xml name="render_plot_test"> + <param name="width" value="650"/> + <param name="height" value="450"/> + </xml> + <xml name="render_plot_matching_text"> + <has_text_matching expression="width = 650"/> + <has_text_matching expression="height = 450"/> + </xml> + <xml name="param_counting_strategy"> + <param argument="counting_strategy" type="select" label="he strategy to compute feature counts"> + <option value="fragment">fragment</option> + <option value="insertion" selected="true">insertion</option> + <option value="paired-insertion">paired-insertion</option> + </param> + </xml> + + <token name="@CMD_params_data_integration@"><![CDATA[ +use_rep = '$method.use_rep', +#if $method.use_dims != '' +#set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) +use_dims=$dims, +#end if +#if $method.groupby != '' +#set $groupby = ([x.strip() for x in str($method.groupby).split(',')]) +groupby=$groupby, +#end if +#if $method.key_added != '' +key_added = '$method.key_added', +#end if + ]]> + </token> + + <token name="@CMD_params_render_plot@"><![CDATA[ + width = $method.width, + height = $method.height, + out_file = 'plot.$method.out_file', + ]]> + </token> +</macros>