Mercurial > repos > iuc > snapatac2_clustering

--- a/dimension_reduction_clustering.xml	Thu Nov 07 13:07:58 2024 +0000
+++ b/dimension_reduction_clustering.xml	Tue Nov 25 16:40:54 2025 +0000
@@ -9,21 +9,49 @@
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
 export NUMBA_CACHE_DIR="\${TEMP:-/tmp}";
-@PREP_ADATA@
+#if $method.method == 'tl.multi_spectral'
+    #for $i in range(len($method.adata))
+        cp $method.adata[$i] 'adata_${i}.h5ad' &&
+    #end for
+#else
+    @CMD_PREP_ADATA@
+#end if
 @CMD@
     ]]></command>
     <configfiles>
         <configfile name="script_file"><![CDATA[

-@CMD_imports@
-@CMD_read_inputs@
+@CONF_IMPORTS@
+#if $method.method == 'tl.multi_spectral'
+## read all files ending with .h5ad in the working directory
+import glob
+files = sorted(glob.glob('adata_*.h5ad'))
+
+adata_list = []
+for fn in files:
+    ad = snap.read(fn, backed=None)
+    adata_list.append(ad)
+#else
+@CONF_READ_INPUTS@
+#end if

 #if $method.method == 'tl.spectral'
     #if $method.features
 with open('$method.features') as f:
     features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()]
     #end if
-sa.tl.spectral(
+
+## somewhere in the SnapATAC2 code, a pandas Series is being passed where a numpy array is expected.
+## This is a workaround to add the nonzero method back to pandas Series.
+## Add the nonzero method back to pandas Series
+import pandas as pd
+def series_nonzero(self):
+    return (self != 0).values.nonzero()
+
+pd.Series.nonzero = series_nonzero
+
+
+snap.tl.spectral(
     adata,
     n_comps = $method.n_comps,
     #if $method.features
@@ -39,8 +67,31 @@
     inplace = True
 )

+#else if $method.method == 'tl.multi_spectral'
+    #if $method.features
+with open('$method.features') as f:
+    features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()]
+    #end if
+
+embedding = snap.tl.multi_spectral(
+    adatas = adata_list,
+    n_comps = $method.n_comps,
+    #if $method.features
+    features = features_mask,
+    #else
+    features = None,
+    #end if
+    weights = None,  # Will enable if requested by users
+    random_state = $method.random_state,
+    weighted_by_sd = $method.weighted_by_sd,
+)
+
+adata = adata_list[0].copy()
+adata.uns['spectral_eigenvalue_joint'] = embedding[0]
+adata.obsm['X_joint'] = embedding[1]
+
 #else if $method.method == 'tl.umap'
-sa.tl.umap(
+snap.tl.umap(
     adata,
     n_comps = $method.n_comps,
     #if $method.use_dims != ''
@@ -54,7 +105,7 @@
 )

 #else if $method.method == 'pp.knn'
-sa.pp.knn(
+snap.pp.knn(
     adata,
     n_neighbors = $method.n_neighbors,
     #if $method.use_dims != ''
@@ -67,18 +118,43 @@
     random_state = $method.random_state
 )

+#else if $method.method == 'tl.leiden'
+snap.tl.leiden(
+    adata,
+    resolution = $method.resolution,
+    objective_function = '$method.objective_function',
+    min_cluster_size = $method.min_cluster_size,
+    n_iterations = $method.n_iterations,
+    random_state = $method.random_state,
+    key_added = '$method.key_added',
+    use_leidenalg = $method.use_leidenalg,
+    weighted = $method.weighted,
+    inplace = True
+)
+
+#else if $method.method == 'tl.kmeans'
+snap.tl.kmeans(
+    adata,
+    n_clusters = $method.n_clusters,
+    n_iterations = $method.n_iterations,
+    random_state = $method.random_state,
+    use_rep = '$method.use_rep',
+    key_added = '$method.key_added'
+)
+
 #else if $method.method == 'tl.dbscan'
-sa.tl.dbscan(
+snap.tl.dbscan(
     adata,
     eps = $method.eps,
     min_samples = $method.min_samples,
     leaf_size = $method.leaf_size,
     use_rep = '$method.use_rep',
-    key_added = '$method.key_added'
+    key_added = '$method.key_added',
+    n_jobs = int(os.getenv("GALAXY_SLOTS", 4))
 )

 #else if $method.method == 'tl.hdbscan'
-sa.tl.hdbscan(
+snap.tl.hdbscan(
     adata,
     min_cluster_size = $method.min_cluster_size,
     #if $method.min_samples
@@ -92,43 +168,18 @@
     key_added = '$method.key_added'
 )

-#else if $method.method == 'tl.leiden'
-sa.tl.leiden(
-    adata,
-    resolution = $method.resolution,
-    objective_function = '$method.objective_function',
-    #if $method.objective_function == 'RBConfiguration'
-    use_leidenalg = True,
-    #end if
-    min_cluster_size = $method.min_cluster_size,
-    n_iterations = $method.n_iterations,
-    random_state = $method.random_state,
-    key_added = '$method.key_added',
-    weighted = $method.weighted,
-    inplace = True
-)
-
-#else if $method.method == 'tl.kmeans'
-sa.tl.kmeans(
-    adata,
-    n_clusters = $method.n_clusters,
-    n_iterations = $method.n_iterations,
-    random_state = $method.random_state,
-    use_rep = '$method.use_rep',
-    key_added = '$method.key_added'
-)
-
-#else if $method.method == 'tl.aggregate_X'
-sa.tl.aggregate_X(
-    adata,
-    #if $method.groupby != ''
-    groupby = '$method.groupby',
-    #end if
-    normalize = '$method.normalize'
-)
+## It is implemented in select_feature function, and it is problematic if the user don't select a groupby  (will return an array). i think this can be skipped unless needed
+## #else if $method.method == 'tl.aggregate_X'
+## snap.tl.aggregate_X(
+##     adata,
+##     #if $method.groupby != ''
+##     groupby = '$method.groupby',
+##     #end if
+##     normalize = '$method.normalize'
+## )

 #else if $method.method == 'tl.aggregate_cells'
-sa.tl.aggregate_cells(
+snap.tl.aggregate_cells(
     adata,
     use_rep = '$method.use_rep',
     #if $method.target_num_cells
@@ -141,26 +192,28 @@
 )
 #end if

-@CMD_anndata_write_outputs@
+@CONF_ANNDATA_WRITE_OUTPUTS@
     ]]></configfile>
     </configfiles>
     <inputs>
         <conditional name="method">
             <param name="method" type="select" label="Dimension reduction and Clustering">
                 <option value="tl.spectral">Perform dimension reduction using Laplacian Eigenmap, using 'tl.spectral'</option>
+                <option value="tl.multi_spectral">similar to 'tl.multi_spectral' but it can work on multiple modalities.</option>
                 <option value="tl.umap">Compute Umap, using 'tl.umap'</option>
                 <option value="pp.knn">Compute a neighborhood graph of observations, using 'pp.knn'</option>
                 <option value="tl.leiden">Cluster cells into subgroups, using 'tl.leiden'</option>
                 <option value="tl.kmeans">Cluster cells into subgroups using the K-means algorithm, using 'tl.kmeans'</option>
                 <option value="tl.dbscan">Cluster cells into subgroups using the DBSCAN algorithm, using 'tl.dbscan'</option>
                 <option value="tl.hdbscan">Cluster cells into subgroups using the HDBSCAN algorithm, using 'tl.hdbscan'</option>
-                <option value="tl.aggregate_X">Aggregate values in adata.X in a row-wise fashion, using 'tl.aggregate_X'</option>
+                <!-- It is implemented in select_feature function in preprocessing.xml tool (implemented in upstream code, not in the xml). It is problematic if the user don't select a groupby  (will return an array). i think this can be skipped unless needed -->
+                <!-- <option value="tl.aggregate_X">Aggregate values in adata.X in a row-wise fashion, using 'tl.aggregate_X'</option> -->
                 <option value="tl.aggregate_cells">Aggregate cells into pseudo-cells, using 'tl.aggregate_cells'</option>
             </param>
             <when value="tl.spectral">
-                <expand macro="inputs_anndata"/>
+                <expand macro="param_inputs_anndata"/>
                 <expand macro="param_n_comps"/>
-                <param argument="features" type="data" format="txt" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/>
+                <param argument="features" type="data" format="txt,tabular" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/>
                 <expand macro="param_random_state"/>
                 <param argument="sample_size" type="float" min="0" max="1" optional="true" label="Approximate the embedding using the Nystrom algorithm by selecting a subset of cells" help="Using this only when the number of cells is too large, e.g. &gt; 10,000,000, or the `distance_metric` is “jaccard”"/>
                 <param argument="chunk_size" type="integer" value="20000" label="chunk size"/>
@@ -170,9 +223,18 @@
                 </param>
                 <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/>
             </when>
+            <when value="tl.multi_spectral">
+                <expand macro="param_inputs_anndata" multiple="true" label="list of Anndatas to use for multi_spectral" help="Please note that the embedding will be saved in the first Anndata"/>
+                <!-- Will enable if requested by users -->
+                <!-- <param name="weights" type="data" format="tabular" optional="true" label="Weights" help="Text file indicating weights for each modality. Each line contains only floats.If not provided, all modalities are weighted equally" /> -->
+                <expand macro="param_n_comps"/>
+                <param argument="features" type="data" format="txt,tabular" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/>
+                <expand macro="param_random_state"/>
+                <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/>
+            </when>
             <when value="tl.umap">
-                <expand macro="inputs_anndata"/>
-                <param argument="n_comps" type="integer" value="2" label="Number of dimensions of embedding"/>
+                <expand macro="param_inputs_anndata"/>
+                <expand macro="param_n_comps" value="2" label="Number of components" help=""/>
                 <param argument="use_dims" type="text" optional="true" label="Use these dimensions in `use_rep`" help="comma separated list of dimensions">
                     <expand macro="sanitize_query"/>
                 </param>
@@ -181,21 +243,21 @@
                 <expand macro="param_random_state"/>
             </when>
             <when value="pp.knn">
-                <expand macro="inputs_anndata"/>
+                <expand macro="param_inputs_anndata"/>
                 <param argument="n_neighbors" type="integer" value="50" label="The number of nearest neighbors to be searched"/>
                 <param argument="use_dims" type="text" value="" optional="true" label="The dimensions used for computation">
                     <expand macro="sanitize_query"/>
                 </param>
-                <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+                <expand macro="param_use_rep" label="The key for the matrix"/>
                 <param argument="algorithm" type="select" label="Choose method">
                     <option value="kdtree" selected="true">'kdtree': use the kdtree algorithm to find the nearest neighbors</option>
                     <option value="hora">'hora': use the HNSW algorithm to find the approximate nearest neighbors</option>
                     <option value="pynndescent">'pynndescent': use the pynndescent algorithm to find the approximate nearest neighbors</option>
                 </param>
-                <param argument="random_state" type="integer" value="0" label="Random seed for approximate nearest neighbor search"/>
+                <expand macro="param_random_state" label="Random seed for approximate nearest neighbor search" help="Note that this is only used when method='pynndescent'. Currently hora does not support random seed, so the result of hora is not reproducible."/>
             </when>
             <when value="tl.leiden">
-                <expand macro="inputs_anndata"/>
+                <expand macro="param_inputs_anndata"/>
                 <param argument="resolution" type="float" value="1" label="Parameter value controlling the coarseness of the clustering" help="Higher values lead to more clusters"/>
                 <param argument="objective_function" type="select" label="Whether to use the Constant Potts Model (CPM) or modularity">
                     <option value="CPM">CPM</option>
@@ -206,10 +268,11 @@
                 <expand macro="param_n_iterations"/>
                 <expand macro="param_random_state"/>
                 <expand macro="param_key_added" key_added="leiden"/>
+                <param argument="use_leidenalg" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to use the leidenalg package for Leiden clustering"/>
                 <param argument="weighted" type="boolean" truevalue="True" falsevalue="False" label="Whether to use the edge weights in the graph"/>
             </when>
             <when value="tl.kmeans">
-                <expand macro="inputs_anndata"/>
+                <expand macro="param_inputs_anndata"/>
                 <param argument="n_clusters" type="integer" value="5" label="Number of clusters to return"/>
                 <expand macro="param_n_iterations"/>
                 <expand macro="param_random_state"/>
@@ -217,7 +280,7 @@
                 <expand macro="param_key_added" key_added="kmeans"/>
             </when>
             <when value="tl.dbscan">
-                <expand macro="inputs_anndata"/>
+                <expand macro="param_inputs_anndata"/>
                 <param argument="eps" type="float" value="0.5" label=" The maximum distance between two samples for one to be considered as in the neighborhood of the other" help="This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function."/>
                 <param argument="min_samples" type="integer" value="5" label="The number of samples (or total weight) in a neighborhood for a point to be considered as a core point."/>
                 <param argument="leaf_size" type="integer" value="30" label="Leaf size passed to BallTree or cKDTree" help="This can affect the speed of the construction and query, as well as the memory required to store the tree."/>
@@ -225,9 +288,9 @@
                 <expand macro="param_key_added" key_added="dbscan"/>
             </when>
             <when value="tl.hdbscan">
-                <expand macro="inputs_anndata"/>
+                <expand macro="param_inputs_anndata"/>
                 <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/>
-                <param argument="min_samples" type="integer" value="" optional="true" label="The number of samples in a neighbourhood for a point to be considered a core point"/>
+                <param argument="min_samples" type="integer" value="" optional="true" label="The number of samples in a neighborhood for a point to be considered a core point"/>
                 <param argument="cluster_selection_epsilon" type="float" value="0.0" label="A distance threshold. Clusters below this value will be merged"/>
                 <param argument="alpha" type="float" value="1.0" label="A distance scaling parameter as used in robust single linkage"/>
                 <param argument="cluster_selection_method" type="select" label="The method used to select clusters from the condensed tree">
@@ -238,16 +301,16 @@
                 <expand macro="param_use_rep"/>
                 <expand macro="param_key_added" key_added="hdbscan"/>
             </when>
-            <when value="tl.aggregate_X">
-                <expand macro="inputs_anndata"/>
+            <!-- <when value="tl.aggregate_X">
+                <expand macro="param_inputs_anndata"/>
                 <expand macro="param_groupby"/>
                 <param argument="normalize" type="select" optional="true" label="normalization method">
                     <option value="RPM">RPM</option>
                     <option value="RPKM">RPKM</option>
                 </param>
-            </when>
+            </when> -->
             <when value="tl.aggregate_cells">
-                <expand macro="inputs_anndata"/>
+                <expand macro="param_inputs_anndata"/>
                 <expand macro="param_use_rep"/>
                 <param argument="target_num_cells" type="integer" value="" optional="true" label="target_num_cells" help="If None, `target_num_cells = num_cells / min_cluster_size`"/>
                 <param argument="min_cluster_size" type="integer" value="50" label="The minimum size of clusters"/>
@@ -255,23 +318,20 @@
                 <expand macro="param_key_added" key_added="pseudo_cell"/>
             </when>
         </conditional>
-        <expand macro="inputs_common_advanced"/>
+        <expand macro="param_common_advanced"/>
     </inputs>
     <outputs>
-        <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/>
+        <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad.gz" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/>
         <data name="hidden_output" format="txt" label="Log file">
             <filter>advanced_common['show_log']</filter>
         </data>
-        <data name="diff_peaks" format="tabular" from_work_dir="differential_peaks.tsv" label="${tool.name} on ${on_string}: Differential peaks">
-            <filter>method['method'] and 'tl.diff_test' in method['method']</filter>
-        </data>
     </outputs>
     <tests>
         <test expect_num_outputs="2">
             <!-- tl.spectral -->
             <conditional name="method">
                 <param name="method" value="tl.spectral"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.select_features.pbmc_500_chr21.h5ad"/>
                 <param name="n_comps" value="30"/>
                 <param name="random_state" value="0"/>
                 <param name="chunk_size" value="20000"/>
@@ -283,7 +343,7 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.spectral"/>
+                    <has_text_matching expression="snap.tl.spectral"/>
                     <has_text_matching expression="random_state = 0"/>
                     <has_text_matching expression="n_comps = 30"/>
                     <has_text_matching expression="chunk_size = 20000"/>
@@ -291,13 +351,46 @@
                     <has_text_matching expression="weighted_by_sd = True"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/spectral_eigenvalue"/>
+                    <has_h5_keys keys="obsm/X_spectral"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.multi_spectral -->
+            <conditional name="method">
+                <param name="method" value="tl.multi_spectral"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_RNA.h5ad.gz,https://zenodo.org/records/17512085/files/snap_datasets_pbmc10k_multiome_ATAC.h5ad.gz"/>
+                <param name="n_comps" value="30"/>
+                <param name="random_state" value="0"/>
+                <param name="weighted_by_sd" value="True"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="snap.tl.multi_spectral"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="n_comps = 30"/>
+                    <has_text_matching expression="weighted_by_sd = True"/>
+                    <has_text_matching expression="features = None"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="uns/spectral_eigenvalue_joint"/>
+                    <has_h5_keys keys="obsm/X_joint"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="2">
             <!-- tl.umap -->
             <conditional name="method">
                 <param name="method" value="tl.umap"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/>
                 <param name="n_comps" value="2"/>
                 <param name="use_rep" value="X_spectral"/>
                 <param name="key_added" value="umap"/>
@@ -308,24 +401,27 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.umap"/>
+                    <has_text_matching expression="snap.tl.umap"/>
                     <has_text_matching expression="n_comps = 2"/>
                     <has_text_matching expression="use_rep = 'X_spectral'"/>
                     <has_text_matching expression="key_added = 'umap'"/>
                     <has_text_matching expression="random_state = 0"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.umap.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obsm/X_umap"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="2">
             <!-- pp.knn -->
             <conditional name="method">
                 <param name="method" value="pp.knn"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.umap.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/tl.umap.pbmc_500_chr21.h5ad"/>
                 <param name="n_neighbors" value="50"/>
                 <param name="use_rep" value="X_spectral"/>
-                <param name="method_" value="kdtree"/>
-                <param name="inplace" value="True"/>
+                <param name="algorithm" value="kdtree"/>
                 <param name="random_state" value="0"/>
             </conditional>
             <section name="advanced_common">
@@ -333,7 +429,7 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.pp.knn"/>
+                    <has_text_matching expression="snap.pp.knn"/>
                     <has_text_matching expression="n_neighbors = 50"/>
                     <has_text_matching expression="use_rep = 'X_spectral'"/>
                     <has_text_matching expression="method = 'kdtree'"/>
@@ -341,13 +437,17 @@
                     <has_text_matching expression="random_state = 0"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.knn.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obsp/distances"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="2">
             <!-- tl.leiden -->
             <conditional name="method">
                 <param name="method" value="tl.leiden"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.knn.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.knn.pbmc_500_chr21.h5ad"/>
                 <param name="resolution" value="2"/>
                 <param name="objective_function" value="modularity"/>
                 <param name="min_cluster_size" value="3"/>
@@ -361,7 +461,7 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.leiden"/>
+                    <has_text_matching expression="snap.tl.leiden"/>
                     <has_text_matching expression="resolution = 2"/>
                     <has_text_matching expression="objective_function = 'modularity'"/>
                     <has_text_matching expression="min_cluster_size = 3"/>
@@ -371,13 +471,17 @@
                     <has_text_matching expression="weighted = False"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/leiden"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="2">
             <!-- tl.leiden -->
             <conditional name="method">
                 <param name="method" value="tl.leiden"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/pp.knn.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/pp.knn.pbmc_500_chr21.h5ad"/>
                 <param name="resolution" value="2"/>
                 <param name="objective_function" value="RBConfiguration"/>
                 <param name="min_cluster_size" value="3"/>
@@ -391,7 +495,7 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.leiden"/>
+                    <has_text_matching expression="snap.tl.leiden"/>
                     <has_text_matching expression="resolution = 2"/>
                     <has_text_matching expression="objective_function = 'RBConfiguration'"/>
                     <has_text_matching expression="min_cluster_size = 3"/>
@@ -401,13 +505,17 @@
                     <has_text_matching expression="weighted = False"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.leiden.RBConfiguration.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/leiden"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="2">
             <!-- tl.kmeans -->
             <conditional name="method">
                 <param name="method" value="tl.kmeans"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/>
                 <param name="n_iterations" value="-1"/>
                 <param name="random_state" value="0"/>
                 <param name="use_rep" value="X_spectral"/>
@@ -418,20 +526,24 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.kmeans"/>
+                    <has_text_matching expression="snap.tl.kmeans"/>
                     <has_text_matching expression="n_iterations = -1"/>
                     <has_text_matching expression="random_state = 0"/>
                     <has_text_matching expression="use_rep = 'X_spectral'"/>
                     <has_text_matching expression="key_added = 'kmeans'"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.kmeans.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/kmeans"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="2">
             <!-- tl.dbscan -->
             <conditional name="method">
                 <param name="method" value="tl.dbscan"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/>
                 <param name="eps" value="0.5"/>
                 <param name="min_samples" value="3"/>
                 <param name="leaf_size" value="5"/>
@@ -443,7 +555,7 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.dbscan"/>
+                    <has_text_matching expression="snap.tl.dbscan"/>
                     <has_text_matching expression="eps = 0.5"/>
                     <has_text_matching expression="min_samples = 3"/>
                     <has_text_matching expression="leaf_size = 5"/>
@@ -451,13 +563,17 @@
                     <has_text_matching expression="key_added = 'dbscan'"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.dbscan.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/dbscan"/>
+                </assert_contents>
+            </output>
         </test>
         <test expect_num_outputs="2">
             <!-- tl.hdbscan -->
             <conditional name="method">
                 <param name="method" value="tl.hdbscan"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/>
                 <param name="min_cluster_size" value="3"/>
                 <param name="min_samples" value="3"/>
                 <param name="cluster_selection_method" value="eom"/>
@@ -470,7 +586,7 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.hdbscan"/>
+                    <has_text_matching expression="snap.tl.hdbscan"/>
                     <has_text_matching expression="min_cluster_size = 3"/>
                     <has_text_matching expression="min_samples = 3"/>
                     <has_text_matching expression="cluster_selection_method = 'eom'"/>
@@ -479,13 +595,17 @@
                     <has_text_matching expression="key_added = 'hdbscan'"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.hdbscan.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/hdbscan"/>
+                </assert_contents>
+            </output>
         </test>
-        <test expect_num_outputs="2">
-            <!-- tl.aggregate_X -->
+        <!-- <test expect_num_outputs="2">
+            tl.aggregate_X
             <conditional name="method">
                 <param name="method" value="tl.aggregate_X"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/>
                 <param name="normalize" value="RPKM"/>
             </conditional>
             <section name="advanced_common">
@@ -493,17 +613,21 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.aggregate_X"/>
+                    <has_text_matching expression="snap.tl.aggregate_X"/>
                     <has_text_matching expression="normalize = 'RPKM'"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.aggregate_X.pbmc_500_chr21.h5ad"/>
-        </test>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/n_fragment"/>
+                </assert_contents>
+            </output>
+        </test> -->
         <test expect_num_outputs="2">
             <!-- tl.aggregate_cells -->
             <conditional name="method">
                 <param name="method" value="tl.aggregate_cells"/>
-                <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="adata" location="https://zenodo.org/records/17512085/files/tl.spectral.pbmc_500_chr21.h5ad"/>
                 <param name="use_rep" value="X_spectral"/>
                 <param name="target_num_cells" value="5"/>
                 <param name="min_cluster_size" value="3"/>
@@ -515,7 +639,7 @@
             </section>
             <output name="hidden_output">
                 <assert_contents>
-                    <has_text_matching expression="sa.tl.aggregate_cells"/>
+                    <has_text_matching expression="snap.tl.aggregate_cells"/>
                     <has_text_matching expression="use_rep = 'X_spectral'"/>
                     <has_text_matching expression="target_num_cells = 5"/>
                     <has_text_matching expression="min_cluster_size = 3"/>
@@ -523,7 +647,11 @@
                     <has_text_matching expression="key_added = 'pseudo_cell'"/>
                 </assert_contents>
             </output>
-            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.aggregate_cells.pbmc_500_chr21.h5ad"/>
+            <output name="anndata_out" ftype="h5ad">
+                <assert_contents>
+                    <has_h5_keys keys="obs/pseudo_cell"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
@@ -535,7 +663,15 @@
 Convert the cell-by-feature count matrix into lower dimensional representations using the spectrum of the normalized graph Laplacian defined by pairwise similarity between cells. This function utilizes the matrix-free spectral embedding algorithm to compute the embedding when `distance_metric` is “cosine”, which scales linearly with the number of cells. For other types of similarity metrics, the time and space complexity scale quadratically with the number of cells.

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.spectral.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.spectral.html>`__
+
+Compute Laplacian Eigenmaps simultaneously on multiple modalities, with linear space and time complexity, using `tl.multi_spectral`
+===================================================================================================================================
+
+This is similar to `spectral`, but it can work on multiple modalities.
+
+More details on the `SnapATAC2 documentation
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.multi_spectral.html>`__

 Compute Umap, using `tl.umap`
 =============================
@@ -543,7 +679,7 @@
 Compute Umap

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.umap.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.umap.html>`__

 Compute a neighborhood graph of observations, using `pp.knn`
 ============================================================
@@ -553,7 +689,7 @@
 Computes a neighborhood graph of observations stored in adata using the method specified by method. The distance metric used is Euclidean.

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.knn.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.pp.knn.html>`__

 Cluster cells into subgroups, using `tl.leiden`
 ===============================================
@@ -563,7 +699,7 @@
 Cluster cells using the Leiden algorithm, an improved version of the Louvain algorithm. It has been proposed for single-cell analysis by. This requires having ran `knn`.

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.leiden.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.leiden.html>`__

 Cluster cells into subgroups using the K-means algorithm, using `tl.kmeans`
 ===========================================================================
@@ -571,7 +707,7 @@
 Cluster cells into subgroups using the K-means algorithm, a classical algorithm in data mining.

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.kmeans.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.kmeans.html>`__

 Cluster cells into subgroups using the DBSCAN algorithm, using `tl.dbscan`
 ==========================================================================
@@ -579,7 +715,7 @@
 Cluster cells into subgroups using the DBSCAN algorithm.

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.dbscan.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.dbscan.html>`__

 Cluster cells into subgroups using the HDBSCAN algorithm, using `tl.hdbscan`
 ============================================================================
@@ -587,17 +723,17 @@
 Cluster cells into subgroups using the HDBSCAN algorithm.

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.hdbscan.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.hdbscan.html>`__

-Aggregate values in adata.X in a row-wise fashion, using `tl.aggregate_X`
-=========================================================================
+.. Aggregate values in adata.X in a row-wise fashion, using `tl.aggregate_X`
+.. =========================================================================

-Aggregate values in adata.X in a row-wise fashion.
+.. Aggregate values in adata.X in a row-wise fashion.

-Aggregate values in adata.X in a row-wise fashion. This is used to compute RPKM or RPM values stratified by user-provided groupings.
+.. Aggregate values in adata.X in a row-wise fashion. This is used to compute RPKM or RPM values stratified by user-provided groupings.

-More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_X.html>`__
+.. More details on the `SnapATAC2 documentation
+.. <https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_X.html>`__

 Aggregate cells into pseudo-cells, using `tl.aggregate_cells`
 =============================================================
@@ -607,7 +743,7 @@
 Aggregate cells into pseudo-cells by iterative clustering.

 More details on the `SnapATAC2 documentation
-<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_cells.html>`__
+<https://scverse.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_cells.html>`__
     ]]></help>
     <expand macro="citations"/>
 </tool>
--- a/macros.xml	Thu Nov 07 13:07:58 2024 +0000
+++ b/macros.xml	Tue Nov 25 16:40:54 2025 +0000
@@ -1,7 +1,7 @@
 <macros>
-    <token name="@TOOL_VERSION@">2.6.4</token>
-    <token name="@VERSION_SUFFIX@">1</token>
-    <token name="@PROFILE@">23.0</token>
+    <token name="@TOOL_VERSION@">2.8.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">24.0</token>
     <xml name="xrefs">
         <xrefs>
             <xref type="bio.tools">snapatac</xref>
@@ -9,168 +9,74 @@
     </xml>
     <xml name="requirements">
         <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>
-        <requirement type="package" version="5.22.0">plotly</requirement>
-        <requirement type="package" version="0.2.1">python-kaleido</requirement>
-        <requirement type="package" version="1.1.0">polars</requirement>
-        <requirement type="package" version="16.1.0">pyarrow</requirement>
-        <requirement type="package" version="0.11.6">python-igraph</requirement>
-        <requirement type="package" version="0.8.37">hdbscan</requirement>
-        <requirement type="package" version="0.0.10">harmonypy</requirement>
-        <requirement type="package" version="1.7.4">scanorama</requirement>
-        <requirement type="package" version="3.0.1">macs3</requirement>
-        <requirement type="package" version="0.70.16">multiprocess</requirement>
-        <requirement type="package" version="0.10.2">leidenalg</requirement>
+            <requirement type="package" version="0.8.37">hdbscan</requirement>
+            <requirement type="package" version="0.10.2">leidenalg</requirement>
+            <requirement type="package" version="0.5.7">umap-learn</requirement>
+            <requirement type="package" version="3.0.4">xgboost</requirement>
+            <requirement type="package" version="0.2.1">python-kaleido</requirement>
+            <requirement type="package" version="1.31.0">polars</requirement>
+            <requirement type="package" version="5.24.1">plotly</requirement>
+            <requirement type="package" version="0.2.1">python-kaleido</requirement>
+            <requirement type="package" version="0.0.10">harmonypy</requirement>
+            <requirement type="package" version="1.7.4">scanorama</requirement>
         <yield />
     </xml>

-    <token name="@PREP_ADATA@"><![CDATA[
+    <!-- command section -->
+    <token name="@CMD_PREP_ADATA@"><![CDATA[
+        ## ln -s does not work here
         cp '$method.adata' 'anndata.h5ad' &&
-        ]]>
-    </token>
-
+    ]]></token>
     <token name="@CMD@"><![CDATA[
         cat '$script_file' > '$hidden_output' &&
         python '$script_file' >> '$hidden_output' &&
         touch 'anndata_info.txt' &&
-        cat 'anndata_info.txt' @CMD_prettify_stdout@
-        ]]>
-    </token>
-
-    <token name="@CMD_prettify_stdout@"><![CDATA[ | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g"  | sed -r 's|^\s*(.*):\s(.*)|[\1]\n-    \2|g' | sed 's|, |\n-    |g'
+        cat 'anndata_info.txt' @CMD_PRETTIFY_STDOUT@
+    ]]></token>
+    <token name="@CMD_PRETTIFY_STDOUT@"><![CDATA[
+        | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g"  | sed -r 's|^\s*(.*):\s(.*)|[\1]\n-    \2|g' | sed 's|, |\n-    |g'
+    ]]></token>
+    <token name="@CMD_GET_GFF@"><![CDATA[
+        #if $method.gff_file_condi.gffSource == 'cached':
+            ln -s '$method.gff_file_condi.gff_pre_installed.fields.path' gff &&
+        #else:
+            ln -s '$method.gff_file_condi.gff_history' gff &&
+        #end if
+    ]]></token>
+    <token name="@CMD_GET_FASTA@"><![CDATA[
+        #if $method.fasta_file_condi.fastaSource == 'indexed':
+            zcat '$method.fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa &&
+            echo "Using built-in FASTA: '$method.fasta_file_condi.fasta_pre_installed.fields.name'" >&2 &&
+        #else:
+            #if $method.fasta_file_condi.fasta_history.ext.endswith('.gz')
+                zcat '$method.fasta_file_condi.fasta_history' > fasta.fa &&
+            #else:
+            ln -s '$method.fasta_file_condi.fasta_history' fasta.fa &&
+            #end if
+        #end if
     ]]></token>

-    <token name="@CMD_imports@"><![CDATA[
-import snapatac2 as sa
+    <!-- Config section -->
+    <token name="@CONF_IMPORTS@"><![CDATA[
+import snapatac2 as snap
 import os
-    ]]>
-    </token>
-    <xml name="sanitize_query" token_validinitial="string.printable">
-        <sanitizer>
-            <valid initial="@VALIDINITIAL@">
-                <remove value="&apos;" />
-            </valid>
-        </sanitizer>
-    </xml>
-
-    <xml name="inputs_anndata">
-        <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/>
-    </xml>
-
-    <token name="@CMD_read_inputs@"><![CDATA[
-
-adata = sa.read('anndata.h5ad', backed = None)
-]]>
-    </token>
-
-    <xml name="dimentions_plot">
-        <param argument="width" type="integer" value="500" label="Width of the plot"/>
-        <param argument="height" type="integer" value="400" label="Height of the plot"/>
-    </xml>
-
-    <xml name="param_groupby">
-        <param argument="groupby" type="text" label="The key of the observation grouping to consider">
-            <expand macro="sanitize_query" />
-        </param>
-    </xml>
-
-    <xml name="out_file">
-        <param name="out_file" type="select" optional="true" label="Type of output plot">
-            <option value="png" selected="true">PNG</option>
-            <option value="svg">SVG</option>
-            <option value="pdf">PDF</option>
-        </param>
-    </xml>
-    <token name="@CMD_anndata_write_outputs@"><![CDATA[
-adata.write('anndata.h5ad')
+    ]]></token>
+    <token name="@CONF_READ_INPUTS@"><![CDATA[
+adata = snap.read('anndata.h5ad', backed = None)
+    ]]></token>
+    <token name="@CONF_ANNDATA_WRITE_OUTPUTS@"><![CDATA[
+adata.write_h5ad('anndata.h5ad.gz', compression='gzip')
 with open('anndata_info.txt','w', encoding='utf-8') as ainfo:
     print(adata, file=ainfo)
-]]>
-    </token>
-    <xml name="inputs_common_advanced">
-        <section name="advanced_common" title="Advanced Options" expanded="false">
-            <param name="show_log" type="boolean" checked="false" label="Output Log?" />
-        </section>
-    </xml>
-    <xml name="params_render_plot">
-        <param argument="width" type="integer" value="600" label="Width of the plot"/>
-        <param argument="height" type="integer" value="400" label="Height of the plot"/>
-        <expand macro="out_file"/>
-    </xml>
-    <xml name="param_shift">
-        <param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/>
-        <param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/>
-    </xml>
-    <xml name="param_chunk_size" tokens="size">
-        <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
-    </xml>
-    <xml name="min_max_frag_size">
-        <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
-        <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
-    </xml>
-    <xml name="params_data_integration">
-        <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
-        <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
-            <expand macro="sanitize_query"/>
-        </param>
-        <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
-            <expand macro="sanitize_query" />
-        </param>
-        <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
-    </xml>
-    <xml name="param_n_comps">
-        <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/>
-    </xml>
-    <xml name="param_random_state">
-        <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/>
-    </xml>
-    <xml name="param_key_added" tokens="key_added">
-        <param argument="key_added" type="text" value="@KEY_ADDED@"  label="`adata.obs` key under which t add cluster labels"/>
-    </xml>
-    <xml name="param_use_rep">
-        <param argument="use_rep" type="text" value="X_spectral" label="Use the indicated representation in `.obsm`"/>
-    </xml>
-    <xml name="genome_fasta">
-        <param argument="genome_fasta" type="text" label="A fasta file containing the genome sequences or a Genome object"/>
-    </xml>
-    <xml name="background">
-        <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background">
-            <expand macro="sanitize_query"/>
-        </param>
-    </xml>
-    <xml name="mat">
-        <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/>
-        <param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/>
-    </xml>
-    <xml name="param_network">
-        <param argument="network" type="text" label="network"/>
-    </xml>
-    <xml name="param_n_iterations">
-        <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"
-            help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
-    </xml>
-
-    <xml name="citations">
-        <citations>
-            <citation type="doi">10.1038/s41592-023-02139-9</citation>
-        </citations>
-    </xml>
-    <xml name="render_plot_test">
-        <param name="width" value="650"/>
-        <param name="height" value="450"/>
-    </xml>
-    <xml name="render_plot_matching_text">
-        <has_text_matching expression="width = 650"/>
-        <has_text_matching expression="height = 450"/>
-    </xml>
-    <xml name="param_counting_strategy">
-        <param argument="counting_strategy" type="select" label="The strategy to compute feature counts">
-            <option value="fragment">"fragment": based on the number of fragments that overlap with a region of interest</option>
-            <option value="insertion" selected="true">"insertion": based on the number of insertions that overlap with a region of interest</option>
-            <option value="paired-insertion">"paired-insertion": similar to "insertion", but it only counts the insertions once if the pair of insertions of a fragment are both within the same region of interest</option>
-        </param>
-    </xml>
-
-    <token name="@CMD_params_data_integration@"><![CDATA[
+    ]]></token>
+    <token name="@CONF_PARAMS_RENDER_PLOT@"><![CDATA[
+    width = $method.width,
+    height = $method.height,
+    show = False,
+    interactive = False,
+    out_file = 'plot.$method.out_file',
+    ]]></token>
+    <token name="@CONF_PARAMS_DATA_INTEGRATION@"><![CDATA[
 use_rep = '$method.use_rep',
 #if $method.use_dims != ''
 #set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
@@ -183,13 +89,168 @@
 #if $method.key_added != ''
 key_added = '$method.key_added',
 #end if
-    ]]>
-    </token>
+    ]]></token>
+    <token name="@CONF_IMPORT_MEME@"><![CDATA[
+motifs = read_motifs("input.meme")
+for motif in motifs:
+    motif.name = motif.id.split('+')[0]
+
+unique_motifs = {}
+for motif in motifs:
+    name = motif.name
+    if (
+            name not in unique_motifs or
+            unique_motifs[name].info_content() < motif.info_content()
+        ):
+        unique_motifs[name] = motif
+motifs = list(unique_motifs.values())
+
+
+#else:
+motifs = read_motifs("input.meme")
+for motif in motifs:
+    motif.name = motif.id.split('_')[0]
+    motif.family = motif.id.split('+')[-1]
+    ]]></token>
+
+    <!-- input section -->
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+                <yield/>
+            </valid>
+        </sanitizer>
+    </xml>

-    <token name="@CMD_params_render_plot@"><![CDATA[
-    width = $method.width,
-    height = $method.height,
-    out_file = 'plot.$method.out_file',
-    ]]>
-    </token>
+    <xml name="param_inputs_anndata" token_multiple="false" token_label="Annotated data matrix">
+        <param name="adata" type="data" multiple="@MULTIPLE@" format="h5ad" label="@LABEL@"/>
+    </xml>
+    <xml name="param_groupby">
+        <param argument="groupby" type="text" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+    </xml>
+    <xml name="param_common_advanced">
+        <section name="advanced_common" title="Advanced Options" expanded="false">
+            <param name="show_log" type="boolean" checked="false" label="Output Log?" />
+        </section>
+    </xml>
+    <xml name="param_render_plot">
+        <param argument="width" type="integer" value="600" label="Width of the plot"/>
+        <param argument="height" type="integer" value="400" label="Height of the plot"/>
+        <param name="out_file" type="select" optional="true" label="Type of output plot">
+            <option value="png" selected="true">PNG</option>
+            <option value="svg">SVG</option>
+            <option value="pdf">PDF</option>
+            <option value="html">HTML</option>
+        </param>
+    </xml>
+    <xml name="param_shift" tokens="varname" token_value="0" token_label="Insertion site correction for the left end">
+        <param argument="@VARNAME@" type="integer" value="@VALUE@" label="@LABEL@" help="Note this has no effect on single-end reads"/>
+    </xml>
+    <xml name="param_chunk_size" tokens="size">
+        <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
+    </xml>
+    <xml name="param_min_max_frag_size">
+        <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
+        <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
+    </xml>
+    <xml name="param_data_integration">
+        <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+        <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
+            <expand macro="sanitize_query"/>
+        </param>
+        <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+        <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
+    </xml>
+    <xml name="param_random_state" token_label="Seed of the random state generator" token_help="">
+        <param argument="random_state" type="integer" value="0" label="@LABEL@" help="@HELP@"/>
+    </xml>
+    <xml name="param_key_added" tokens="key_added">
+        <param argument="key_added" type="text" value="@KEY_ADDED@"  label="`adata.obs` key under which t add cluster labels"/>
+    </xml>
+    <xml name="param_use_rep" token_label="Use the indicated representation in `.obsm`">
+        <param argument="use_rep" type="text" value="X_spectral" label="@LABEL@"/>
+    </xml>
+    <xml name="param_n_iterations">
+        <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"
+            help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
+    </xml>
+    <xml name="param_counting_strategy">
+        <param argument="counting_strategy" type="select" label="The strategy to compute feature counts">
+            <option value="fragment">"fragment": based on the number of fragments that overlap with a region of interest</option>
+            <option value="insertion">"insertion": based on the number of insertions that overlap with a region of interest</option>
+            <option value="paired-insertion" selected="true">"paired-insertion": similar to "insertion", but it only counts the insertions once if the pair of insertions of a fragment are both within the same region of interest</option>
+        </param>
+    </xml>
+    <xml name="param_chrom_sizes">
+        <param argument="chrom_sizes" type="data" format="tabular" label="Chromosome sizes" help="First column the chromosome name and second column the size"/>
+    </xml>
+    <xml name="param_genome_fasta">
+        <conditional name="fasta_file_condi">
+            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA.">
+                <option value="indexed" selected="true">Use a built-in FASTA</option>
+                <option value="history">Use a FASTA from history</option>
+            </param>
+            <when value="indexed">
+                <param name="fasta_pre_installed" type="select" label="Select a FASTA file" help="Select the FASTA file from a list of pre-installed genomes">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="fasta_history" type="data" format="fasta,fasta.gz" label="FASTA file" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="param_gene_anno">
+        <conditional name="gff_file_condi">
+            <param name="gffSource" type="select" label="Select a built-in GFF file or one from your history"  help="Choose history if you don't see the correct GFF" >
+                <option value="cached" selected="true">Use a built-in GFF</option>
+                <option value="history">Use a GFF from history</option>
+            </param>
+            <when value="cached">
+                <param name="gff_pre_installed" type="select" label="Select a GFF file" help="Select the GFF from a list of pre-installed files">
+                    <options from_data_table="gene_sets">
+                        <filter type="sort_by" column="1" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="gff_history" type="data" format="gff3.gz" label="Select a GFF file" help="Make sure that the GFF corresponds to the same genome as the FASTA"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="param_n_comps" token_value="30" token_label="Number of dimensions to keep" token_help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30.">
+        <param argument="n_comps" type="integer" value="@VALUE@" label="@LABEL@" help="@HELP@"/>
+    </xml>
+    <xml name="param_meme_table">
+        <param name="motifs" type="select" label="Select list of transcription factor motifs">
+            <options from_data_table="meme">
+                <filter type="sort_by" column="2" />
+            </options>
+        </param>
+    </xml>
+
+
+    <!-- test section -->
+    <xml name="test_param_render_plot">
+        <param name="width" value="650"/>
+        <param name="height" value="450"/>
+    </xml>
+    <xml name="test_render_plot_matching_text">
+        <has_text_matching expression="width = 650"/>
+        <has_text_matching expression="height = 450"/>
+    </xml>
+
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-023-02139-9</citation>
+        </citations>
+    </xml>
 </macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,1 @@
+hg38	hg38	Human (hg38)	${__HERE__}/chr21_small.fasta.gz
\ No newline at end of file
Binary file test-data/chr21.gff3.gz has changed
Binary file test-data/chr21_small.fasta.gz has changed
Binary file test-data/cisBP_human.meme.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_sets.loc	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,1 @@
+hg38	hg38	hg38GFF	${__HERE__}/chr21.gff3.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme.loc	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,1 @@
+cisbp	snap.datasets.cis_bp(unique=True)	${__HERE__}/cisBP_human.meme.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,17 @@
+#This file lists the locations and dbkeys of all the genome and transcriptome fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel4.5	apiMel4.5	Honeybee (Apis mellifera): apiMel4.5	/path/to/genome/apiMel4.5/apiMel4.5.fa
+#hg38canon	hg38	Human (Homo sapiens): hg38 Canonical	/path/to/genome/hg38/hg38canon.fa
+#hg38full	hg38	Human (Homo sapiens): hg38 Full	/path/to/genome/hg38/hg38full.fa
+#hg38full.90	hg38    Human (Homo sapiens): hg38 Full Trans v90	/path/to/genome/hg38/hg38fulltrans.fa
+
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg38 above.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gene_sets.loc.sample	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,14 @@
+# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format.
+#
+# The gene_sets.loc file syntax is:
+#<unique_build_id>	<dbkey>	<display_name>	<path>
+#
+# Please ensure that the above fields are tab separated.
+#
+# In case you have TWO or MORE providers PER dbkey, the one mentioned
+# first in the file, should have the "default" priority.
+#
+#Example:
+#
+#Homo_sapiens.GRCh38.90	hg38	GRCh38 (hg38) annotation from Ensembl, release 90	/depot/data2/galaxy/hg38/gene_sets/Homo_sapiens.GRCh38.90.gtf
+#Homo_sapiens.GRCh37.87	hg19	GRCh37 (hg19) annotation from Ensembl, release 87	/depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.87.gtf
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/meme.loc.sample	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,13 @@
+# This is a sample file distributed with snapatac2 which enables the tool to perform motif enrichment analysis
+#
+# The meme.loc file syntax is:
+#<unique_id>	<display_name>	<path>
+#
+# Please ensure that the above fields are tab separated.
+#
+# Currently the files should be downloaded manually
+#
+#Example:
+#
+#cisbp cis_bp(unique=True)	/path/to/cisBP_human.meme.gz
+#meuleman_2020	Meuleman_2020 /path/to/Meuleman_2020.meme.gz
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Locations of all gff files with annotations of genome builds -->
+    <table name="gene_sets" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gene_sets.loc" />
+    </table>
+    <!-- Locations of all meme files -->
+    <table name="meme" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/meme.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Nov 25 16:40:54 2025 +0000
@@ -0,0 +1,14 @@
+<tables>
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+    <table name="gene_sets" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/gene_sets.loc" />
+    </table>
+    <table name="meme" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/meme.loc" />
+    </table>
+</tables>
\ No newline at end of file