Mercurial > repos > iuc > snapatac2_clustering
changeset 1:8f8bef61fd0b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/snapatac2 commit 1e34deee1e39c0c65e1e29a9d28becc7aaf23a4f
author | iuc |
---|---|
date | Thu, 23 May 2024 15:19:58 +0000 |
parents | af821711b356 |
children | 94ae400dde2f |
files | dimension_reduction_clustering.xml macros.xml |
diffstat | 2 files changed, 167 insertions(+), 131 deletions(-) [+] |
line wrap: on
line diff
--- a/dimension_reduction_clustering.xml Thu May 16 13:15:57 2024 +0000 +++ b/dimension_reduction_clustering.xml Thu May 23 15:19:58 2024 +0000 @@ -18,127 +18,130 @@ @CMD_read_inputs@ #if $method.method == 'tl.spectral' - #if $method.features + #if $method.features with open('$method.features') as f: - features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()] - #end if + features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()] + #end if sa.tl.spectral( - adata, - n_comps = $method.n_comps, - #if $method.features - features = features_mask, - #end if - random_state = $method.random_state, - #if $method.sample_size - sample_size = $method.sample_size, - #end if - chunk_size = $method.chunk_size, - distance_metric = '$method.distance_metric', - weighted_by_sd = $method.weighted_by_sd, - inplace = True + adata, + n_comps = $method.n_comps, + #if $method.features + features = features_mask, + #end if + random_state = $method.random_state, + #if $method.sample_size + sample_size = $method.sample_size, + #end if + chunk_size = $method.chunk_size, + distance_metric = '$method.distance_metric', + weighted_by_sd = $method.weighted_by_sd, + inplace = True ) #else if $method.method == 'tl.umap' sa.tl.umap( - adata, - n_comps = $method.n_comps, - #if $method.use_dims != '' - #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) - use_dims=$dims, - #end if - use_rep = '$method.use_rep', - key_added = '$method.key_added', - random_state = $method.random_state, - inplace = True + adata, + n_comps = $method.n_comps, + #if $method.use_dims != '' + #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) + use_dims=$dims, + #end if + use_rep = '$method.use_rep', + key_added = '$method.key_added', + random_state = $method.random_state, + inplace = True ) #else if $method.method == 'pp.knn' sa.pp.knn( - adata, - n_neighbors = $method.n_neighbors, - #if $method.use_dims != '' - #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) - use_dims=$dims, - #end if - use_rep = '$method.use_rep', - method = '$method.algorithm', - inplace = True, - random_state = $method.random_state + adata, + n_neighbors = $method.n_neighbors, + #if $method.use_dims != '' + #set $dims = ([x.strip() for x in str($method.use_dims).split(',')]) + use_dims=$dims, + #end if + use_rep = '$method.use_rep', + method = '$method.algorithm', + inplace = True, + random_state = $method.random_state ) #else if $method.method == 'tl.dbscan' sa.tl.dbscan( - adata, - eps = $method.eps, - min_samples = $method.min_samples, - leaf_size = $method.leaf_size, - use_rep = '$method.use_rep', - key_added = '$method.key_added' + adata, + eps = $method.eps, + min_samples = $method.min_samples, + leaf_size = $method.leaf_size, + use_rep = '$method.use_rep', + key_added = '$method.key_added' ) #else if $method.method == 'tl.hdbscan' sa.tl.hdbscan( - adata, - min_cluster_size = $method.min_cluster_size, - #if $method.min_samples - min_samples = $method.min_samples, - #end if - cluster_selection_epsilon = $method.cluster_selection_epsilon, - alpha = $method.alpha, - cluster_selection_method = '$method.cluster_selection_method', - random_state = $method.random_state, - use_rep = '$method.use_rep', - key_added = '$method.key_added' + adata, + min_cluster_size = $method.min_cluster_size, + #if $method.min_samples + min_samples = $method.min_samples, + #end if + cluster_selection_epsilon = $method.cluster_selection_epsilon, + alpha = $method.alpha, + cluster_selection_method = '$method.cluster_selection_method', + random_state = $method.random_state, + use_rep = '$method.use_rep', + key_added = '$method.key_added' ) #else if $method.method == 'tl.leiden' sa.tl.leiden( - adata, - resolution = $method.resolution, - objective_function = '$method.objective_function', - min_cluster_size = $method.min_cluster_size, - n_iterations = $method.n_iterations, - random_state = $method.random_state, - key_added = '$method.key_added', - weighted = $method.weighted, - inplace = True + adata, + resolution = $method.resolution, + objective_function = '$method.objective_function', + #if $method.objective_function == 'RBConfiguration' + use_leidenalg = True, + #end if + min_cluster_size = $method.min_cluster_size, + n_iterations = $method.n_iterations, + random_state = $method.random_state, + key_added = '$method.key_added', + weighted = $method.weighted, + inplace = True ) #else if $method.method == 'tl.kmeans' sa.tl.kmeans( - adata, - n_clusters = $method.n_clusters, - n_iterations = $method.n_iterations, - random_state = $method.random_state, - use_rep = '$method.use_rep', - key_added = '$method.key_added' + adata, + n_clusters = $method.n_clusters, + n_iterations = $method.n_iterations, + random_state = $method.random_state, + use_rep = '$method.use_rep', + key_added = '$method.key_added' ) #else if $method.method == 'tl.aggregate_X' sa.tl.aggregate_X( - adata, - #if $method.groupby != '' - groupby = '$method.groupby', - #end if - normalize = '$method.normalize' + adata, + #if $method.groupby != '' + groupby = '$method.groupby', + #end if + normalize = '$method.normalize' ) #else if $method.method == 'tl.aggregate_cells' sa.tl.aggregate_cells( - adata, - use_rep = '$method.use_rep', - #if $method.target_num_cells - target_num_cells = $method.target_num_cells, - #end if - min_cluster_size = $method.min_cluster_size, - random_state = $method.random_state, - key_added = '$method.key_added', - inplace = True + adata, + use_rep = '$method.use_rep', + #if $method.target_num_cells + target_num_cells = $method.target_num_cells, + #end if + min_cluster_size = $method.min_cluster_size, + random_state = $method.random_state, + key_added = '$method.key_added', + inplace = True ) #end if @CMD_anndata_write_outputs@ - ]]></configfile> + ]]></configfile> </configfiles> <inputs> <conditional name="method"> @@ -162,7 +165,7 @@ <param argument="chunk_size" type="integer" value="20000" label="chunk size"/> <param argument="distance_metric" type="select" label="distance metric: “jaccard”, “cosine“"> <option value="jaccard">jaccard</option> - <option value="cosine">cosine</option> + <option value="cosine" selected="true">cosine</option> </param> <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/> </when> @@ -195,7 +198,7 @@ <param argument="resolution" type="float" value="1" label="Parameter value controlling the coarseness of the clustering" help="Higher values lead to more clusters"/> <param argument="objective_function" type="select" label="Whether to use the Constant Potts Model (CPM) or modularity"> <option value="CPM">CPM</option> - <option value="modularity">modularity</option> + <option value="modularity" selected="true">modularity</option> <option value="RBConfiguration">RBConfiguration</option> </param> <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/> @@ -267,7 +270,7 @@ <!-- tl.spectral --> <conditional name="method"> <param name="method" value="tl.spectral"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/pp.select_features.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.select_features.pbmc_500_chr21.h5ad"/> <param name="n_comps" value="30"/> <param name="random_state" value="0"/> <param name="chunk_size" value="20000"/> @@ -287,13 +290,13 @@ <has_text_matching expression="weighted_by_sd = True"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- tl.umap --> <conditional name="method"> <param name="method" value="tl.umap"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="n_comps" value="2"/> <param name="use_rep" value="X_spectral"/> <param name="key_added" value="umap"/> @@ -311,13 +314,13 @@ <has_text_matching expression="random_state = 0"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.umap.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- pp.knn --> <conditional name="method"> <param name="method" value="pp.knn"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.umap.pbmc_500_chr21.h5ad"/> <param name="n_neighbors" value="50"/> <param name="use_rep" value="X_spectral"/> <param name="method_" value="kdtree"/> @@ -337,13 +340,13 @@ <has_text_matching expression="random_state = 0"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/pp.knn.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- tl.leiden --> <conditional name="method"> <param name="method" value="tl.leiden"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.knn.pbmc_500_chr21.h5ad"/> <param name="resolution" value="2"/> <param name="objective_function" value="modularity"/> <param name="min_cluster_size" value="3"/> @@ -367,13 +370,43 @@ <has_text_matching expression="weighted = False"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.leiden.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.leiden.modularity.pbmc_500_chr21.h5ad"/> + </test> + <test expect_num_outputs="2"> + <!-- tl.leiden --> + <conditional name="method"> + <param name="method" value="tl.leiden"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/pp.knn.pbmc_500_chr21.h5ad"/> + <param name="resolution" value="2"/> + <param name="objective_function" value="RBConfiguration"/> + <param name="min_cluster_size" value="3"/> + <param name="n_iterations" value="-1"/> + <param name="random_state" value="0"/> + <param name="key_added" value="leiden"/> + <param name="weighted" value="False"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true"/> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="sa.tl.leiden"/> + <has_text_matching expression="resolution = 2"/> + <has_text_matching expression="objective_function = 'RBConfiguration'"/> + <has_text_matching expression="min_cluster_size = 3"/> + <has_text_matching expression="n_iterations = -1"/> + <has_text_matching expression="random_state = 0"/> + <has_text_matching expression="key_added = 'leiden'"/> + <has_text_matching expression="weighted = False"/> + </assert_contents> + </output> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.leiden.RBConfiguration.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- tl.kmeans --> <conditional name="method"> <param name="method" value="tl.kmeans"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="n_iterations" value="-1"/> <param name="random_state" value="0"/> <param name="use_rep" value="X_spectral"/> @@ -391,13 +424,13 @@ <has_text_matching expression="key_added = 'kmeans'"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.kmeans.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.kmeans.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- tl.dbscan --> <conditional name="method"> <param name="method" value="tl.dbscan"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="eps" value="0.5"/> <param name="min_samples" value="3"/> <param name="leaf_size" value="5"/> @@ -417,13 +450,13 @@ <has_text_matching expression="key_added = 'dbscan'"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.dbscan.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.dbscan.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- tl.hdbscan --> <conditional name="method"> <param name="method" value="tl.hdbscan"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="min_cluster_size" value="3"/> <param name="min_samples" value="3"/> <param name="cluster_selection_method" value="eom"/> @@ -445,13 +478,13 @@ <has_text_matching expression="key_added = 'hdbscan'"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.hdbscan.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.hdbscan.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- tl.aggregate_X --> <conditional name="method"> <param name="method" value="tl.aggregate_X"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="normalize" value="RPKM"/> </conditional> <section name="advanced_common"> @@ -463,13 +496,13 @@ <has_text_matching expression="normalize = 'RPKM'"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_X.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.aggregate_X.pbmc_500_chr21.h5ad"/> </test> <test expect_num_outputs="2"> <!-- tl.aggregate_cells --> <conditional name="method"> <param name="method" value="tl.aggregate_cells"/> - <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/> + <param name="adata" location="https://zenodo.org/records/11260316/files/tl.spectral.pbmc_500_chr21.h5ad"/> <param name="use_rep" value="X_spectral"/> <param name="target_num_cells" value="5"/> <param name="min_cluster_size" value="3"/> @@ -489,7 +522,7 @@ <has_text_matching expression="key_added = 'pseudo_cell'"/> </assert_contents> </output> - <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_cells.pbmc_500_chr21.h5ad"/> + <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11260316/files/tl.aggregate_cells.pbmc_500_chr21.h5ad"/> </test> </tests> <help><![CDATA[
--- a/macros.xml Thu May 16 13:15:57 2024 +0000 +++ b/macros.xml Thu May 23 15:19:58 2024 +0000 @@ -1,6 +1,6 @@ <macros> - <token name="@TOOL_VERSION@">2.5.3</token> - <token name="@VERSION_SUFFIX@">0</token> + <token name="@TOOL_VERSION@">2.5.3</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@PROFILE@">23.0</token> <xml name="requirements"> <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement> @@ -12,6 +12,9 @@ <requirement type="package" version="0.8.33">hdbscan</requirement> <requirement type="package" version="0.0.9">harmonypy</requirement> <requirement type="package" version="1.7.4">scanorama</requirement> + <requirement type="package" version="3.0.1">macs3</requirement> + <requirement type="package" version="0.70.16">multiprocess</requirement> + <requirement type="package" version="0.10.2">leidenalg</requirement> <yield /> </xml> @@ -23,8 +26,8 @@ <token name="@CMD@"><![CDATA[ cat '$script_file' > '$hidden_output' && python '$script_file' >> '$hidden_output' && - touch 'anndata_info.txt' && - cat 'anndata_info.txt' @CMD_prettify_stdout@ + touch 'anndata_info.txt' && + cat 'anndata_info.txt' @CMD_prettify_stdout@ ]]> </token> @@ -56,7 +59,7 @@ <xml name="dimentions_plot"> <param argument="width" type="integer" value="500" label="Width of the plot"/> - <param argument="height" type="integer" value="400" label="Height of the plot"/> + <param argument="height" type="integer" value="400" label="Height of the plot"/> </xml> <xml name="param_groupby"> @@ -66,11 +69,11 @@ </xml> <xml name="out_file"> - <param name="out_file" type="select" optional="true" label="Type of output file"> + <param name="out_file" type="select" optional="true" label="Type of output plot"> <option value="png" selected="true">PNG</option> <option value="svg">SVG</option> <option value="pdf">PDF</option> - </param> + </param> </xml> <token name="@CMD_anndata_write_outputs@"><![CDATA[ adata.write('anndata.h5ad') @@ -89,28 +92,28 @@ <expand macro="out_file"/> </xml> <xml name="param_shift"> - <param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/> - <param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/> + <param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/> + <param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/> </xml> <xml name="param_chunk_size" tokens="size"> - <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/> + <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/> + </xml> + <xml name="min_max_frag_size"> + <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/> + <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/> </xml> - <xml name="min_max_frag_size"> - <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/> - <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/> - </xml> - <xml name="params_data_integration"> - <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/> - <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation"> - <expand macro="sanitize_query"/> - </param> - <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider"> + <xml name="params_data_integration"> + <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/> + <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation"> + <expand macro="sanitize_query"/> + </param> + <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider"> <expand macro="sanitize_query" /> </param> - <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/> - </xml> + <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/> + </xml> <xml name="param_n_comps"> -s <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/> + <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/> </xml> <xml name="param_random_state"> <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/> @@ -126,12 +129,12 @@ </xml> <xml name="background"> <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background"> - <expand macro="sanitize_query"/> - </param> + <expand macro="sanitize_query"/> + </param> </xml> <xml name="mat"> <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/> - <param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/> + <param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/> </xml> <xml name="param_network"> <param argument="network" type="text" label="network"/> @@ -147,11 +150,11 @@ </citations> </xml> <xml name="render_plot_test"> - <param name="width" value="650"/> + <param name="width" value="650"/> <param name="height" value="450"/> </xml> <xml name="render_plot_matching_text"> - <has_text_matching expression="width = 650"/> + <has_text_matching expression="width = 650"/> <has_text_matching expression="height = 450"/> </xml> <xml name="param_counting_strategy">