Mercurial > repos > iuc > snapatac2_clustering

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dimension_reduction_clustering.xml	Thu May 16 13:15:57 2024 +0000
@@ -0,0 +1,579 @@
+<tool id="snapatac2_clustering" name="SnapATAC2 Clustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>and dimension reduction</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+export NUMBA_CACHE_DIR="\${TEMP:-/tmp}";
+@PREP_ADATA@
+@CMD@
+    ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+
+@CMD_imports@
+@CMD_read_inputs@
+
+#if $method.method == 'tl.spectral'
+	#if $method.features
+with open('$method.features') as f:
+	features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()]
+	#end if
+sa.tl.spectral(
+	adata,
+	n_comps = $method.n_comps,
+	#if $method.features
+	features = features_mask,
+	#end if
+	random_state = $method.random_state,
+	#if $method.sample_size
+	sample_size = $method.sample_size,
+	#end if
+	chunk_size = $method.chunk_size,
+	distance_metric = '$method.distance_metric',
+	weighted_by_sd = $method.weighted_by_sd,
+	inplace = True
+)
+
+#else if $method.method == 'tl.umap'
+sa.tl.umap(
+	adata,
+	n_comps = $method.n_comps,
+	#if $method.use_dims != ''
+	    #set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+	use_dims=$dims,
+	#end if
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added',
+	random_state = $method.random_state,
+	inplace = True
+)
+
+#else if $method.method == 'pp.knn'
+sa.pp.knn(
+	adata,
+	n_neighbors = $method.n_neighbors,
+	#if $method.use_dims != ''
+	    #set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+	use_dims=$dims,
+	#end if
+	use_rep = '$method.use_rep',
+	method = '$method.algorithm',
+	inplace = True,
+	random_state = $method.random_state
+)
+
+#else if $method.method == 'tl.dbscan'
+sa.tl.dbscan(
+	adata,
+	eps = $method.eps,
+	min_samples = $method.min_samples,
+	leaf_size = $method.leaf_size,
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added'
+)
+
+#else if $method.method == 'tl.hdbscan'
+sa.tl.hdbscan(
+	adata,
+	min_cluster_size = $method.min_cluster_size,
+	#if $method.min_samples
+	min_samples = $method.min_samples,
+	#end if
+	cluster_selection_epsilon = $method.cluster_selection_epsilon,
+	alpha = $method.alpha,
+	cluster_selection_method = '$method.cluster_selection_method',
+	random_state = $method.random_state,
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added'
+)
+
+#else if $method.method == 'tl.leiden'
+sa.tl.leiden(
+	adata,
+	resolution = $method.resolution,
+	objective_function = '$method.objective_function',
+	min_cluster_size = $method.min_cluster_size,
+	n_iterations = $method.n_iterations,
+	random_state = $method.random_state,
+	key_added = '$method.key_added',
+	weighted = $method.weighted,
+	inplace = True
+)
+
+#else if $method.method == 'tl.kmeans'
+sa.tl.kmeans(
+	adata,
+	n_clusters = $method.n_clusters,
+	n_iterations = $method.n_iterations,
+	random_state = $method.random_state,
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added'
+)
+
+#else if $method.method == 'tl.aggregate_X'
+sa.tl.aggregate_X(
+	adata,
+	#if $method.groupby != ''
+	groupby = '$method.groupby',
+	#end if
+	normalize = '$method.normalize'
+)
+
+#else if $method.method == 'tl.aggregate_cells'
+sa.tl.aggregate_cells(
+	adata,
+	use_rep = '$method.use_rep',
+	#if $method.target_num_cells
+	target_num_cells = $method.target_num_cells,
+	#end if
+	min_cluster_size = $method.min_cluster_size,
+	random_state = $method.random_state,
+	key_added = '$method.key_added',
+	inplace = True
+)
+#end if
+
+@CMD_anndata_write_outputs@
+	]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="method">
+            <param name="method" type="select" label="Dimension reduction and Clustering">
+                <option value="tl.spectral">Perform dimension reduction using Laplacian Eigenmap, using 'tl.spectral'</option>
+                <option value="tl.umap">Compute Umap, using 'tl.umap'</option>
+                <option value="pp.knn">Compute a neighborhood graph of observations, using 'pp.knn'</option>
+                <option value="tl.leiden">Cluster cells into subgroups, using 'tl.leiden'</option>
+                <option value="tl.kmeans">Cluster cells into subgroups using the K-means algorithm, using 'tl.kmeans'</option>
+                <option value="tl.dbscan">Cluster cells into subgroups using the DBSCAN algorithm, using 'tl.dbscan'</option>
+                <option value="tl.hdbscan">Cluster cells into subgroups using the HDBSCAN algorithm, using 'tl.hdbscan'</option>
+                <option value="tl.aggregate_X">Aggregate values in adata.X in a row-wise fashion, using 'tl.aggregate_X'</option>
+                <option value="tl.aggregate_cells">Aggregate cells into pseudo-cells, using 'tl.aggregate_cells'</option>
+            </param>
+            <when value="tl.spectral">
+                <expand macro="inputs_anndata"/>
+                <expand macro="param_n_comps"/>
+                <param argument="features" type="data" format="txt" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/>
+                <expand macro="param_random_state"/>
+                <param argument="sample_size" type="float" min="0" max="1" optional="true" label="Approximate the embedding using the Nystrom algorithm by selecting a subset of cells" help="Using this only when the number of cells is too large, e.g. &gt; 10,000,000, or the `distance_metric` is “jaccard”"/>
+                <param argument="chunk_size" type="integer" value="20000" label="chunk size"/>
+                <param argument="distance_metric" type="select" label="distance metric: “jaccard”, “cosine“">
+                    <option value="jaccard">jaccard</option>
+                    <option value="cosine">cosine</option>
+                </param>
+                <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/>
+            </when>
+            <when value="tl.umap">
+                <expand macro="inputs_anndata"/>
+                <param argument="n_comps" type="integer" value="2" label="Number of dimensions of embedding"/>
+                <param argument="use_dims" type="text" optional="true" label="Use these dimensions in `use_rep`" help="comma separated list of dimensions">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="umap"/>
+                <expand macro="param_random_state"/>
+            </when>
+            <when value="pp.knn">
+                <expand macro="inputs_anndata"/>
+                <param argument="n_neighbors" type="integer" value="50" label="The number of nearest neighbors to be searched"/>
+                <param argument="use_dims" type="text" value="" optional="true" label="The dimensions used for computation">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+                <param argument="algorithm" type="select" label="Choose method">
+                    <option value="kdtree" selected="true">'kdtree': use the kdtree algorithm to find the nearest neighbors</option>
+                    <option value="hora">'hora': use the HNSW algorithm to find the approximate nearest neighbors</option>
+                    <option value="pynndescent">'pynndescent': use the pynndescent algorithm to find the approximate nearest neighbors</option>
+                </param>
+                <param argument="random_state" type="integer" value="0" label="Random seed for approximate nearest neighbor search"/>
+            </when>
+            <when value="tl.leiden">
+                <expand macro="inputs_anndata"/>
+                <param argument="resolution" type="float" value="1" label="Parameter value controlling the coarseness of the clustering" help="Higher values lead to more clusters"/>
+                <param argument="objective_function" type="select" label="Whether to use the Constant Potts Model (CPM) or modularity">
+                    <option value="CPM">CPM</option>
+                    <option value="modularity">modularity</option>
+                    <option value="RBConfiguration">RBConfiguration</option>
+                </param>
+                <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/>
+                <expand macro="param_n_iterations"/>
+                <expand macro="param_random_state"/>
+                <expand macro="param_key_added" key_added="leiden"/>
+                <param argument="weighted" type="boolean" truevalue="True" falsevalue="False" label="Whether to use the edge weights in the graph"/>
+            </when>
+            <when value="tl.kmeans">
+                <expand macro="inputs_anndata"/>
+                <param argument="n_clusters" type="integer" value="5" label="Number of clusters to return"/>
+                <expand macro="param_n_iterations"/>
+                <expand macro="param_random_state"/>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="kmeans"/>
+            </when>
+            <when value="tl.dbscan">
+                <expand macro="inputs_anndata"/>
+                <param argument="eps" type="float" value="0.5" label=" The maximum distance between two samples for one to be considered as in the neighborhood of the other" help="This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function."/>
+                <param argument="min_samples" type="integer" value="5" label="The number of samples (or total weight) in a neighborhood for a point to be considered as a core point."/>
+                <param argument="leaf_size" type="integer" value="30" label="Leaf size passed to BallTree or cKDTree" help="This can affect the speed of the construction and query, as well as the memory required to store the tree."/>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="dbscan"/>
+            </when>
+            <when value="tl.hdbscan">
+                <expand macro="inputs_anndata"/>
+                <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/>
+                <param argument="min_samples" type="integer" value="" optional="true" label="The number of samples in a neighbourhood for a point to be considered a core point"/>
+                <param argument="cluster_selection_epsilon" type="float" value="0.0" label="A distance threshold. Clusters below this value will be merged"/>
+                <param argument="alpha" type="float" value="1.0" label="A distance scaling parameter as used in robust single linkage"/>
+                <param argument="cluster_selection_method" type="select" label="The method used to select clusters from the condensed tree">
+                    <option value="eom">Excess of Mass algorithm to find the most persistent clusters</option>
+                    <option value="leaf">Select the clusters at the leaves of the tree - this provides the most fine grained and homogeneous clusters</option>
+                </param>
+                <expand macro="param_random_state"/>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="hdbscan"/>
+            </when>
+            <when value="tl.aggregate_X">
+                <expand macro="inputs_anndata"/>
+                <expand macro="param_groupby"/>
+                <param argument="normalize" type="select" optional="true" label="normalization method">
+                    <option value="RPM">RPM</option>
+                    <option value="RPKM">RPKM</option>
+                </param>
+            </when>
+            <when value="tl.aggregate_cells">
+                <expand macro="inputs_anndata"/>
+                <expand macro="param_use_rep"/>
+                <param argument="target_num_cells" type="integer" value="" optional="true" label="target_num_cells" help="If None, `target_num_cells = num_cells / min_cluster_size`"/>
+                <param argument="min_cluster_size" type="integer" value="50" label="The minimum size of clusters"/>
+                <expand macro="param_random_state"/>
+                <expand macro="param_key_added" key_added="pseudo_cell"/>
+            </when>
+        </conditional>
+        <expand macro="inputs_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/>
+        <data name="hidden_output" format="txt" label="Log file">
+            <filter>advanced_common['show_log']</filter>
+        </data>
+        <data name="diff_peaks" format="tabular" from_work_dir="differential_peaks.tsv" label="${tool.name} on ${on_string}: Differential peaks">
+            <filter>method['method'] and 'tl.diff_test' in method['method']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <!-- tl.spectral -->
+            <conditional name="method">
+                <param name="method" value="tl.spectral"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/pp.select_features.pbmc_500_chr21.h5ad"/>
+                <param name="n_comps" value="30"/>
+                <param name="random_state" value="0"/>
+                <param name="chunk_size" value="20000"/>
+                <param name="distance_metric" value="jaccard"/>
+                <param name="weighted_by_sd" value="True"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.spectral"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="n_comps = 30"/>
+                    <has_text_matching expression="chunk_size = 20000"/>
+                    <has_text_matching expression="distance_metric = 'jaccard'"/>
+                    <has_text_matching expression="weighted_by_sd = True"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.umap -->
+            <conditional name="method">
+                <param name="method" value="tl.umap"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="n_comps" value="2"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="umap"/>
+                <param name="random_state" value="0"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.umap"/>
+                    <has_text_matching expression="n_comps = 2"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'umap'"/>
+                    <has_text_matching expression="random_state = 0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.knn -->
+            <conditional name="method">
+                <param name="method" value="pp.knn"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/>
+                <param name="n_neighbors" value="50"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="method_" value="kdtree"/>
+                <param name="inplace" value="True"/>
+                <param name="random_state" value="0"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.knn"/>
+                    <has_text_matching expression="n_neighbors = 50"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="method = 'kdtree'"/>
+                    <has_text_matching expression="inplace = True"/>
+                    <has_text_matching expression="random_state = 0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.leiden -->
+            <conditional name="method">
+                <param name="method" value="tl.leiden"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/>
+                <param name="resolution" value="2"/>
+                <param name="objective_function" value="modularity"/>
+                <param name="min_cluster_size" value="3"/>
+                <param name="n_iterations" value="-1"/>
+                <param name="random_state" value="0"/>
+                <param name="key_added" value="leiden"/>
+                <param name="weighted" value="False"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.leiden"/>
+                    <has_text_matching expression="resolution = 2"/>
+                    <has_text_matching expression="objective_function = 'modularity'"/>
+                    <has_text_matching expression="min_cluster_size = 3"/>
+                    <has_text_matching expression="n_iterations = -1"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="key_added = 'leiden'"/>
+                    <has_text_matching expression="weighted = False"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.leiden.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.kmeans -->
+            <conditional name="method">
+                <param name="method" value="tl.kmeans"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="n_iterations" value="-1"/>
+                <param name="random_state" value="0"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="kmeans"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.kmeans"/>
+                    <has_text_matching expression="n_iterations = -1"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'kmeans'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.kmeans.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.dbscan -->
+            <conditional name="method">
+                <param name="method" value="tl.dbscan"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="eps" value="0.5"/>
+                <param name="min_samples" value="3"/>
+                <param name="leaf_size" value="5"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="dbscan"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.dbscan"/>
+                    <has_text_matching expression="eps = 0.5"/>
+                    <has_text_matching expression="min_samples = 3"/>
+                    <has_text_matching expression="leaf_size = 5"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'dbscan'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.dbscan.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.hdbscan -->
+            <conditional name="method">
+                <param name="method" value="tl.hdbscan"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="min_cluster_size" value="3"/>
+                <param name="min_samples" value="3"/>
+                <param name="cluster_selection_method" value="eom"/>
+                <param name="random_state" value="0"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="hdbscan"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.hdbscan"/>
+                    <has_text_matching expression="min_cluster_size = 3"/>
+                    <has_text_matching expression="min_samples = 3"/>
+                    <has_text_matching expression="cluster_selection_method = 'eom'"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'hdbscan'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.hdbscan.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.aggregate_X -->
+            <conditional name="method">
+                <param name="method" value="tl.aggregate_X"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="normalize" value="RPKM"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.aggregate_X"/>
+                    <has_text_matching expression="normalize = 'RPKM'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_X.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.aggregate_cells -->
+            <conditional name="method">
+                <param name="method" value="tl.aggregate_cells"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="target_num_cells" value="5"/>
+                <param name="min_cluster_size" value="3"/>
+                <param name="random_state" value="0"/>
+                <param name="key_added" value="pseudo_cell"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.aggregate_cells"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="target_num_cells = 5"/>
+                    <has_text_matching expression="min_cluster_size = 3"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="key_added = 'pseudo_cell'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_cells.pbmc_500_chr21.h5ad"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Perform dimension reduction using Laplacian Eigenmap, using `tl.spectral`
+=========================================================================
+
+Perform dimension reduction using Laplacian Eigenmaps.
+
+Convert the cell-by-feature count matrix into lower dimensional representations using the spectrum of the normalized graph Laplacian defined by pairwise similarity between cells. This function utilizes the matrix-free spectral embedding algorithm to compute the embedding when `distance_metric` is “cosine”, which scales linearly with the number of cells. For other types of similarity metrics, the time and space complexity scale quadratically with the number of cells.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.spectral.html>`__
+
+Compute Umap, using `tl.umap`
+=============================
+
+Compute Umap
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.umap.html>`__
+
+Compute a neighborhood graph of observations, using `pp.knn`
+============================================================
+
+Compute a neighborhood graph of observations.
+
+Computes a neighborhood graph of observations stored in adata using the method specified by method. The distance metric used is Euclidean.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.knn.html>`__
+
+Cluster cells into subgroups, using `tl.leiden`
+===============================================
+
+Cluster cells into subgroups.
+
+Cluster cells using the Leiden algorithm, an improved version of the Louvain algorithm. It has been proposed for single-cell analysis by. This requires having ran `knn`.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.leiden.html>`__
+
+Cluster cells into subgroups using the K-means algorithm, using `tl.kmeans`
+===========================================================================
+
+Cluster cells into subgroups using the K-means algorithm, a classical algorithm in data mining.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.kmeans.html>`__
+
+Cluster cells into subgroups using the DBSCAN algorithm, using `tl.dbscan`
+==========================================================================
+
+Cluster cells into subgroups using the DBSCAN algorithm.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.dbscan.html>`__
+
+Cluster cells into subgroups using the HDBSCAN algorithm, using `tl.hdbscan`
+============================================================================
+
+Cluster cells into subgroups using the HDBSCAN algorithm.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.hdbscan.html>`__
+
+Aggregate values in adata.X in a row-wise fashion, using `tl.aggregate_X`
+=========================================================================
+
+Aggregate values in adata.X in a row-wise fashion.
+
+Aggregate values in adata.X in a row-wise fashion. This is used to compute RPKM or RPM values stratified by user-provided groupings.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_X.html>`__
+
+Aggregate cells into pseudo-cells, using `tl.aggregate_cells`
+=============================================================
+
+Aggregate cells into pseudo-cells.
+
+Aggregate cells into pseudo-cells by iterative clustering.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_cells.html>`__
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu May 16 13:15:57 2024 +0000
@@ -0,0 +1,187 @@
+<macros>
+	<token name="@TOOL_VERSION@">2.5.3</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">23.0</token>
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>
+        <requirement type="package" version="5.18.0">plotly</requirement>
+        <requirement type="package" version="0.2.1">python-kaleido</requirement>
+        <requirement type="package" version="0.19.19">polars</requirement>
+        <requirement type="package" version="14.0.1">pyarrow</requirement>
+        <requirement type="package" version="0.11.3">python-igraph</requirement>
+        <requirement type="package" version="0.8.33">hdbscan</requirement>
+        <requirement type="package" version="0.0.9">harmonypy</requirement>
+        <requirement type="package" version="1.7.4">scanorama</requirement>
+        <yield />
+    </xml>
+
+    <token name="@PREP_ADATA@"><![CDATA[
+        cp '$method.adata' 'anndata.h5ad' &&
+        ]]>
+    </token>
+
+    <token name="@CMD@"><![CDATA[
+        cat '$script_file' > '$hidden_output' &&
+        python '$script_file' >> '$hidden_output' &&
+		touch 'anndata_info.txt' &&
+		cat 'anndata_info.txt' @CMD_prettify_stdout@
+        ]]>
+    </token>
+
+    <token name="@CMD_prettify_stdout@"><![CDATA[ | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g"  | sed -r 's|^\s*(.*):\s(.*)|[\1]\n-    \2|g' | sed 's|, |\n-    |g'
+    ]]></token>
+
+    <token name="@CMD_imports@"><![CDATA[
+import snapatac2 as sa
+import os
+    ]]>
+    </token>
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+            </valid>
+        </sanitizer>
+    </xml>
+
+    <xml name="inputs_anndata">
+        <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/>
+    </xml>
+
+    <token name="@CMD_read_inputs@"><![CDATA[
+
+adata = sa.read('anndata.h5ad', backed = None)
+]]>
+    </token>
+
+    <xml name="dimentions_plot">
+        <param argument="width" type="integer" value="500" label="Width of the plot"/>
+		<param argument="height" type="integer" value="400" label="Height of the plot"/>
+    </xml>
+
+    <xml name="param_groupby">
+        <param argument="groupby" type="text" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+    </xml>
+
+    <xml name="out_file">
+        <param name="out_file" type="select" optional="true" label="Type of output file">
+            <option value="png" selected="true">PNG</option>
+            <option value="svg">SVG</option>
+            <option value="pdf">PDF</option>
+		</param>
+    </xml>
+    <token name="@CMD_anndata_write_outputs@"><![CDATA[
+adata.write('anndata.h5ad')
+with open('anndata_info.txt','w', encoding='utf-8') as ainfo:
+    print(adata, file=ainfo)
+]]>
+    </token>
+    <xml name="inputs_common_advanced">
+        <section name="advanced_common" title="Advanced Options" expanded="false">
+            <param name="show_log" type="boolean" checked="false" label="Output Log?" />
+        </section>
+    </xml>
+    <xml name="params_render_plot">
+        <param argument="width" type="integer" value="600" label="Width of the plot"/>
+        <param argument="height" type="integer" value="400" label="Height of the plot"/>
+        <expand macro="out_file"/>
+    </xml>
+    <xml name="param_shift">
+    	<param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/>
+    	<param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/>
+    </xml>
+    <xml name="param_chunk_size" tokens="size">
+    	<param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
+    </xml>
+	<xml name="min_max_frag_size">
+		<param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
+		<param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
+	</xml>
+	<xml name="params_data_integration">
+		<param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+		<param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
+			<expand macro="sanitize_query"/>
+		</param>
+		<param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+		<param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
+	</xml>
+    <xml name="param_n_comps">
+s        <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/>
+    </xml>
+    <xml name="param_random_state">
+        <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/>
+    </xml>
+    <xml name="param_key_added" tokens="key_added">
+        <param argument="key_added" type="text" value="@KEY_ADDED@"  label="`adata.obs` key under which t add cluster labels"/>
+    </xml>
+    <xml name="param_use_rep">
+        <param argument="use_rep" type="text" value="X_spectral" label="Use the indicated representation in `.obsm`"/>
+    </xml>
+    <xml name="genome_fasta">
+        <param argument="genome_fasta" type="text" label="A fasta file containing the genome sequences or a Genome object"/>
+    </xml>
+    <xml name="background">
+        <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background">
+			<expand macro="sanitize_query"/>
+		</param>
+    </xml>
+    <xml name="mat">
+        <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/>
+		<param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/>
+    </xml>
+    <xml name="param_network">
+        <param argument="network" type="text" label="network"/>
+    </xml>
+    <xml name="param_n_iterations">
+        <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"
+            help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-023-02139-9</citation>
+        </citations>
+    </xml>
+    <xml name="render_plot_test">
+    	<param name="width" value="650"/>
+        <param name="height" value="450"/>
+    </xml>
+    <xml name="render_plot_matching_text">
+    	<has_text_matching expression="width = 650"/>
+        <has_text_matching expression="height = 450"/>
+    </xml>
+    <xml name="param_counting_strategy">
+        <param argument="counting_strategy" type="select" label="he strategy to compute feature counts">
+            <option value="fragment">fragment</option>
+            <option value="insertion" selected="true">insertion</option>
+            <option value="paired-insertion">paired-insertion</option>
+        </param>
+    </xml>
+
+    <token name="@CMD_params_data_integration@"><![CDATA[
+use_rep = '$method.use_rep',
+#if $method.use_dims != ''
+#set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+use_dims=$dims,
+#end if
+#if $method.groupby != ''
+#set $groupby = ([x.strip() for x in str($method.groupby).split(',')])
+groupby=$groupby,
+#end if
+#if $method.key_added != ''
+key_added = '$method.key_added',
+#end if
+    ]]>
+    </token>
+
+    <token name="@CMD_params_render_plot@"><![CDATA[
+    width = $method.width,
+    height = $method.height,
+    out_file = 'plot.$method.out_file',
+    ]]>
+    </token>
+</macros>