# HG changeset patch # User iuc # Date 1764088847 0 # Node ID 20c4011e145803bc0d9000e5147d6ce7b0001821 # Parent c5c94e01a1b5e55afee5286948e008cf185a7bf1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/snapatac2 commit e0f59fae19e57f54ae0c351a16dd1805d12aba1d diff -r c5c94e01a1b5 -r 20c4011e1458 macros.xml --- a/macros.xml Thu Nov 07 13:07:49 2024 +0000 +++ b/macros.xml Tue Nov 25 16:40:47 2025 +0000 @@ -1,7 +1,7 @@ - 2.6.4 - 1 - 23.0 + 2.8.0 + 0 + 24.0 snapatac @@ -9,168 +9,74 @@ snapatac2 - plotly - python-kaleido - polars - pyarrow - python-igraph - hdbscan - harmonypy - scanorama - macs3 - multiprocess - leidenalg + hdbscan + leidenalg + umap-learn + xgboost + python-kaleido + polars + plotly + python-kaleido + harmonypy + scanorama - + - - + ]]> '$hidden_output' && python '$script_file' >> '$hidden_output' && touch 'anndata_info.txt' && - cat 'anndata_info.txt' @CMD_prettify_stdout@ - ]]> - - - + + + fasta.fa && + echo "Using built-in FASTA: '$method.fasta_file_condi.fasta_pre_installed.fields.name'" >&2 && + #else: + #if $method.fasta_file_condi.fasta_history.ext.endswith('.gz') + zcat '$method.fasta_file_condi.fasta_history' > fasta.fa && + #else: + ln -s '$method.fasta_file_condi.fasta_history' fasta.fa && + #end if + #end if ]]> - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - -

- -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 10.1038/s41592-023-02139-9 - - - - - - - - - - - - - - - - - - - + + - + ]]> + + + + + + + + + + + - - + + + + + + + + + +

+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1038/s41592-023-02139-9 + + diff -r c5c94e01a1b5 -r 20c4011e1458 preprocessing.xml --- a/preprocessing.xml Thu Nov 07 13:07:49 2024 +0000 +++ b/preprocessing.xml Tue Nov 25 16:40:47 2025 +0000 @@ -8,28 +8,28 @@ @@ -229,7 +333,12 @@ - + + + + + + @@ -239,15 +348,15 @@ - - + + - + @@ -259,58 +368,80 @@ - + + - - - - - - - + + + + + + + - - + + + + + + + + + + + + + - + - + - - + + + + + - + - + - + - + @@ -319,9 +450,9 @@ - + - + @@ -329,56 +460,88 @@ - + - + - + - + - + - + - + + + + + + + - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + - + method['method'] == 'pp.make_fragment_file' - - method['method'] != 'pp.make_fragment_file' + + method['method'] != 'pp.make_fragment_file' and method['method'] != 'ex.export_fragments' and method['method'] != 'ex.export_coverage' + + + + + + method['method'] == 'ex.export_fragments' + + + + method['method'] == 'ex.export_coverage' + advanced_common['show_log'] @@ -388,7 +551,7 @@ - + @@ -399,43 +562,50 @@ - + - + - - - - - + + + + + + - - +

- +

- - - + + - - + - + + + + + + + - + - - + + + + + @@ -450,7 +620,47 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ +

+ + + @@ -461,52 +671,46 @@ - - - - - - - - - -

- -

- + - + - - - - + + +

+ + + - - - + + + - + + + + + - + @@ -517,20 +721,26 @@ - + - + + + + + + + - + @@ -541,20 +751,25 @@ - + - + + + + + + - +

@@ -562,17 +777,21 @@

- + - + + + + + - + @@ -581,39 +800,54 @@

+ + + - + - + + + + + - +

+ + + - + - + + + + + + - + @@ -624,7 +858,7 @@ - + @@ -632,13 +866,41 @@ - + + + + + - + + + + +

+ +

+ + + + + + + + + + + + + + + + + + @@ -647,55 +909,164 @@ - + - + + + + + - - + + - - - - + + +

- - - + + + + + - + + + + + - + - - - - + + +

- - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `__ +`__ -Import data fragment file` and compute basic QC metrics, using `pp.import_data` -=============================================================================== +Generate cell by bin count matrix, using `pp.import_fragments` +============================================================== Import data fragment files and compute basic QC metrics. -A fragment refers to the sequence data originating from a distinct location in the genome. In single-ended sequencing, one read equates to a fragment. However, in paired-ended sequencing, a fragment is defined by a pair of reads. This function is designed to handle, store, and process input files with fragment data, further yielding a range of basic Quality Control (QC) metrics. These metrics include the total number of unique fragments, duplication rates, and the percentage of mitochondrial DNA detected. - -How fragments are stored is dependent on the sequencing approach utilized. For single-ended sequencing, fragments are found in `.obsm['fragment_single']`. In contrast, for paired-ended sequencing, they are located in `.obsm['fragment_paired']`. +This function is used to generate and add a cell by bin count matrix to the AnnData object. +This function accepts both single-end and paired-end reads. If the records in the fragment file contain 6 columns with the last column representing the strand of the fragment, the fragments are considered single-ended. Otherwise, the fragments are considered paired-ended. More details on the `SnapATAC2 documentation -`__ +`__ + +Generate cell by bin count matrix, using `pp.import_contacts` +============================================================= + +Import chromatin contacts. + +More details on the `SnapATAC2 documentation +`__ Generate cell by bin count matrix, using `pp.add_tile_matrix` ============================================================= @@ -732,10 +1110,8 @@ This function is used to generate and add a cell by bin count matrix to the AnnData object. -`import_data` must be ran first in order to use this function. - More details on the `SnapATAC2 documentation -`__ +`__ Generate cell by gene activity matrix, using `pp.make_gene_matrix` ================================================================== @@ -744,10 +1120,8 @@ Generate cell by gene activity matrix by counting the TN5 insertions in gene body regions. The result will be stored in a new file and a new AnnData object will be created. -`import_data` must be ran first in order to use this function. - More details on the `SnapATAC2 documentation -`__ +`__ Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells` ============================================================================================ @@ -755,15 +1129,15 @@ Filter cell outliers based on counts and numbers of genes expressed. For instance, only keep cells with at least `min_counts` counts or `min_ts`` TSS enrichment scores. This is to filter measurement outliers, i.e. “unreliable” observations. More details on the `SnapATAC2 documentation -`__ +`__ Perform feature selection, using `pp.select_features` ===================================================== -Perform feature selection by selecting the most accessibile features across all cells unless `max_iter` > 1 +Perform feature selection by selecting the most accessible features across all cells unless `max_iter` > 1 More details on the `SnapATAC2 documentation -`__ +`__ Compute probability of being a doublet using the scrublet algorithm, using `pp.scrublet` ======================================================================================== @@ -773,7 +1147,7 @@ This function identifies doublets by generating simulated doublets using randomly pairing chromatin accessibility profiles of individual cells. The simulated doublets are then embedded alongside the original cells using the spectral embedding algorithm in this package. A k-nearest-neighbor classifier is trained to distinguish between the simulated doublets and the authentic cells. This trained classifier produces a “doublet score” for each cell. The doublet scores are then converted into probabilities using a Gaussian mixture model. More details on the `SnapATAC2 documentation -`__ +`__ Remove doublets according to the doublet probability or doublet score, using `pp.filter_doublets` ================================================================================================= @@ -783,7 +1157,7 @@ The user can choose to remove doublets by either the doublet probability or the doublet score. `scrublet` must be ran first in order to use this function. More details on the `SnapATAC2 documentation -`__ +`__ A modified MNN-Correct algorithm based on cluster centroid, using `pp.mnc_correct` ================================================================================== @@ -791,7 +1165,7 @@ A modified MNN-Correct algorithm based on cluster centroid. More details on the `SnapATAC2 documentation -`__ +`__ Use harmonypy to integrate different experiments,using `pp.harmony` =================================================================== @@ -801,37 +1175,33 @@ Harmony is an algorithm for integrating single-cell data from multiple experiments. This function uses the python port of Harmony, `harmonypy`, to integrate single-cell data stored in an AnnData object. This function should be run after performing dimension reduction. More details on the `SnapATAC2 documentation -`__ +`__ Use Scanorama to integrate different experiments, using `pp.scanorama_integrate` -======================================================================================== +================================================================================ Use Scanorama to integrate different experiments. Scanorama is an algorithm for integrating single-cell data from multiple experiments stored in an AnnData object. This function should be run after performing `tl.spectral` but before computing the neighbor graph. More details on the `SnapATAC2 documentation -`__ +`__ -Compute the fragment size distribution of the dataset, using `metrics.frag_size_distr` -====================================================================================== +Export fragments for each group of cells, using `ex.export_fragments` +===================================================================== -Compute the fragment size distribution of the dataset. - -This function computes the fragment size distribution of the dataset. Note that it does not operate at the single-cell level. The result is stored in a vector where each element represents the number of fragments and the index represents the fragment length. The first posision of the vector is reserved for fragments with size larger than the `max_recorded_size` parameter. `import_data` must be ran first in order to use this function. +Export and save fragments for a group of cells in a BED format file. More details on the `SnapATAC2 documentation -`__ +`__ -Compute the TSS enrichment score (TSSe) for each cell, using `metrics.tsse` -=========================================================================== +Export fragments for each group of cells, using `ex.export_coverage` +===================================================================== -Compute the TSS enrichment score (TSSe) for each cell. - -`import_data` must be ran first in order to use this function. +Export and save coverage for a group of cells in a bedgraph or bigwig format file. More details on the `SnapATAC2 documentation -`__ +`__ ]]> diff -r c5c94e01a1b5 -r 20c4011e1458 test-data/all_fasta.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/all_fasta.loc Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,1 @@ +hg38 hg38 Human (hg38) ${__HERE__}/chr21_small.fasta.gz \ No newline at end of file diff -r c5c94e01a1b5 -r 20c4011e1458 test-data/chr21.gff3.gz Binary file test-data/chr21.gff3.gz has changed diff -r c5c94e01a1b5 -r 20c4011e1458 test-data/chr21_small.fasta.gz Binary file test-data/chr21_small.fasta.gz has changed diff -r c5c94e01a1b5 -r 20c4011e1458 test-data/cisBP_human.meme.gz Binary file test-data/cisBP_human.meme.gz has changed diff -r c5c94e01a1b5 -r 20c4011e1458 test-data/gene_sets.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_sets.loc Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,1 @@ +hg38 hg38 hg38GFF ${__HERE__}/chr21.gff3.gz \ No newline at end of file diff -r c5c94e01a1b5 -r 20c4011e1458 test-data/meme.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/meme.loc Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,1 @@ +cisbp snap.datasets.cis_bp(unique=True) ${__HERE__}/cisBP_human.meme.gz \ No newline at end of file diff -r c5c94e01a1b5 -r 20c4011e1458 tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,17 @@ +#This file lists the locations and dbkeys of all the genome and transcriptome fasta files +#under the "genome" directory (a directory that contains a directory +#for each build. This file has the format (white space characters are +#TAB characters): +# +# +# +#So, all_fasta.loc could look something like this: +# +#apiMel4.5 apiMel4.5 Honeybee (Apis mellifera): apiMel4.5 /path/to/genome/apiMel4.5/apiMel4.5.fa +#hg38canon hg38 Human (Homo sapiens): hg38 Canonical /path/to/genome/hg38/hg38canon.fa +#hg38full hg38 Human (Homo sapiens): hg38 Full /path/to/genome/hg38/hg38full.fa +#hg38full.90 hg38 Human (Homo sapiens): hg38 Full Trans v90 /path/to/genome/hg38/hg38fulltrans.fa + +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg38 above. \ No newline at end of file diff -r c5c94e01a1b5 -r 20c4011e1458 tool-data/gene_sets.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gene_sets.loc.sample Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,14 @@ +# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format. +# +# The gene_sets.loc file syntax is: +# +# +# Please ensure that the above fields are tab separated. +# +# In case you have TWO or MORE providers PER dbkey, the one mentioned +# first in the file, should have the "default" priority. +# +#Example: +# +#Homo_sapiens.GRCh38.90 hg38 GRCh38 (hg38) annotation from Ensembl, release 90 /depot/data2/galaxy/hg38/gene_sets/Homo_sapiens.GRCh38.90.gtf +#Homo_sapiens.GRCh37.87 hg19 GRCh37 (hg19) annotation from Ensembl, release 87 /depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.87.gtf \ No newline at end of file diff -r c5c94e01a1b5 -r 20c4011e1458 tool-data/meme.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/meme.loc.sample Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,13 @@ +# This is a sample file distributed with snapatac2 which enables the tool to perform motif enrichment analysis +# +# The meme.loc file syntax is: +# +# +# Please ensure that the above fields are tab separated. +# +# Currently the files should be downloaded manually +# +#Example: +# +#cisbp cis_bp(unique=True) /path/to/cisBP_human.meme.gz +#meuleman_2020 Meuleman_2020 /path/to/Meuleman_2020.meme.gz \ No newline at end of file diff -r c5c94e01a1b5 -r 20c4011e1458 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,17 @@ + + + + value, dbkey, name, path + +

+ + + value, dbkey, name, path + +

+ + + value, name, path + +

+ \ No newline at end of file diff -r c5c94e01a1b5 -r 20c4011e1458 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Nov 25 16:40:47 2025 +0000 @@ -0,0 +1,14 @@ + + + value, dbkey, name, path + +

+ + value, dbkey, name, path + +

+ + value, name, path + +

+ \ No newline at end of file