scanpy_normalize: normalize.xml comparison

comparison normalize.xml @ 17:5dada6f76047 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb

author	iuc
date	Sat, 14 Sep 2024 12:42:55 +0000
parents	d844935c906c
children

comparison

equal deleted inserted replaced

-:9f354c7c8c92
+:5dada6f76047
-<tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
+<tool id="scanpy_normalize" name="Scanpy normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
-<description>and impute with scanpy</description>
+<description>and impute</description>
 <macros>
 <import>macros.xml</import>
 </macros>
 <expand macro="bio_tools"/>
-<expand macro="requirements"/>
+<expand macro="requirements">
+<requirement type="package" version="3.0.0">magic-impute</requirement>
+</expand>
 <expand macro="version_command"/>
 <command detect_errors="exit_code"><![CDATA[
 @CMD@
 ]]></command>
 <configfiles>
 <configfile name="script_file"><![CDATA[
-@CMD_imports@
+@CMD_IMPORTS@
-@CMD_read_inputs@
+@CMD_READ_INPUTS@
-#if $method.method == "pp.normalize_total"
+#if str($method.method) == 'pp.normalize_total':
 sc.pp.normalize_total(
 adata,
-#if str($method.target_sum) != ''
+#if str($method.target_sum) != '':
 target_sum=$method.target_sum,
 #end if
 exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed,
-#if $method.exclude_highly_expressed.exclude_highly_expressed == "True"
+#if str($method.exclude_highly_expressed.exclude_highly_expressed) == 'True':
 max_fraction=$method.exclude_highly_expressed.max_fraction,
 #end if
-#if $method.key_added
+#if str($method.key_added) != '':
 key_added='$method.key_added',
 #end if
-#if $method.layers
+#if str($method.layer) != '':
-#if str($method.layers) != 'all'
+layer='$method.layer',
-layers[str(x.strip()) for x in str($method.layers).split(',')],
-#else
-layers='$method.layers',
-#end if
-#end if
-#if str($method.layer_norm) != "None"
-layer_norm='$method.layer_norm',
 #end if
 inplace=True)
-#else if $method.method == "pp.recipe_zheng17"
+#else if str($method.method) == 'pp.recipe_zheng17':
 sc.pp.recipe_zheng17(
 adata=adata,
 n_top_genes=$method.n_top_genes,
 log=$method.log,
 plot=False,
 copy=False)
-#else if $method.method == "pp.recipe_weinreb17"
+#else if str($method.method) == 'pp.recipe_weinreb17':
 sc.pp.recipe_weinreb17(
 adata=adata,
 log=$method.log,
-mean_threshold=$method.mean_threshold,
+mean_threshold=0.01,
-cv_threshold=$method.cv_threshold,
+cv_threshold=2,
-n_pcs=$method.n_pcs,
+n_pcs=50,
-svd_solver='$method.svd_solver',
+svd_solver='randomized',
-random_state=$method.random_state,
+random_state=0,
 copy=False)
-#else if $method.method == "pp.recipe_seurat"
+#else if str($method.method) == 'pp.recipe_seurat':
 sc.pp.recipe_seurat(
 adata=adata,
 log=$method.log,
 plot=False,
 copy=False)
-#else if $method.method == "external.pp.magic"
+#else if str($method.method) == 'external.pp.magic':
+print("stats before magic:", "min=", f"{adata.X.min():.5f}", "max=", f"{adata.X.max():.5f}", "mean=", f"{adata.X.mean():.5f}")
 sc.external.pp.magic(
 adata=adata,
 name_list='$method.name_list',
 knn=$method.knn,
-#if str($method.decay) != ''
+#if str($method.decay) != '':
 decay=$method.decay,
 #end if
-#if str($method.knn_max) != ''
+#if str($method.knn_max) != '':
 knn_max=$method.knn_max,
 #end if
-#if $method.t == -1
+#if $method.t == -1:
 t='auto',
 #else
 t=$method.t,
 #end if
-#if str($method.n_pca) != ''
+#if str($method.n_pca) != '':
 n_pca=$method.n_pca,
 #end if
 solver='$method.solver',
 knn_dist='$method.knn_dist',
+#if str($method.random_state) != '':
 random_state=$method.random_state,
+#else
+random_state=None,
+#end if
 copy=False)
+#if str($method.name_list) == 'all_genes':
+print("stats after magic:", "min=", f"{adata.X.min():.5f}", "max=", f"{adata.X.max():.5f}", "mean=", f"{adata.X.mean():.5f}")
+#end if
 #end if
-@CMD_anndata_write_outputs@
+@CMD_ANNDATA_WRITE_OUTPUTS@
+]]>
-]]></configfile>
+</configfile>
 </configfiles>
 <inputs>
 <expand macro="inputs_anndata"/>
 <conditional name="method">
 <param argument="method" type="select" label="Method used for normalization">
 </param>
 <when value="pp.normalize_total">
 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
 <conditional name="exclude_highly_expressed">
 <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
+<option value="False" selected="true">No</option>
 <option value="True">Yes</option>
-<option value="False" selected="true">No</option>
 </param>
 <when value="True">
-<param argument="max_fraction" type="float" value="0.05" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
+<param argument="max_fraction" type="float" value="0.05" label="Consider cells as highly expressed that have more counts than this value of the original total counts in at least one cell."/>
 </when>
 <when value="False"/>
 </conditional>
-<param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored" help="">
+<param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
 </param>
-<param argument="layers" type="text" value="" optional="true" label="List of layers to normalize" help="'All' will normalize all layers. The list should be comma-separated.">
+<param argument="layer" type="text" value="" label="Layer to normalize instead of X. If not provided, X is normalized.">
-<expand macro="sanitize_query" />
+<expand macro="sanitize_query"/>
-</param>
-<param argument="layer_norm" type="select" label="How to normalize layers?">
-<option value="None">None: after normalization, for each layer in layers each cell has a total count equal to the median of the median of the total counts (cells) before normalization of the layer.</option>
-<option value="after">After: for each layer in layers each cell has a total count equal to target_sum.</option>
-<option value="X">X: for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization.</option>
 </param>
 </when>
 <when value="pp.recipe_zheng17">
-<param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
+<param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep"/>
-<expand macro="param_log"/>
+<expand macro="param_log" checked="true"/>
 </when>
 <when value="pp.recipe_weinreb17">
-<expand macro="param_log"/>
+<expand macro="param_log" checked="true"/>
-<param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
-<param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
-<param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
-<expand macro="svd_solver"/>
-<expand macro="pca_random_state"/>
 </when>
 <when value="pp.recipe_seurat">
-<expand macro="param_log"/>
+<expand macro="param_log" checked="true"/>
 </when>
 <when value="external.pp.magic">
 <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">
-<option value="all_genes">All genes</option>
+<option value="all_genes" selected="true">All genes</option>
 <option value="pca_only">PCA only</option>
 </param>
-<param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/>
+<param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel"/>
-<param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails"
+<param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" help="If not set, alpha decaying kernel is not used"/>
-help="If not set, alpha decaying kernel is not used" />
+<param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" help="If not set, will be set to 3 * knn"/>
-<param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection"
+<param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion" help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data."/>
-help="If not set, will be set to 3 * knn" />
-<param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"
-help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." />
 <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"
-help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." />
+help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed."/>
 <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">
-<option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option>
+<option value="exact" selected="true">"exact", the implementation described in van Dijk et al. (2018) </option>
 <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option>
 </param>
 <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html">
 <expand macro="distance_metric_options"/>
 </param>
-<expand macro="param_random_state"/>
+<param argument="random_state" type="integer" optional="true" label="Random seed" help="Defaults to the global numpy random number generator."/>
 </when>
 </conditional>
 <expand macro="inputs_common_advanced"/>
 </inputs>
 <outputs>
 <expand macro="anndata_outputs"/>
 </outputs>
 <tests>
-<test expect_num_outputs="2">
 <!-- test 1 -->
-<param name="adata" value="krumsiek11.h5ad" />
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.normalize_total"/>
-<conditional name="exclude_highly_expressed">
-<param name="exclude_highly_expressed" value="False"/>
-</conditional>
 <param name="key_added" value="n_counts"/>
-<param name="layers" value="all"/>
+</conditional>
-<param name="layer_norm" value="None"/>
+<section name="advanced_common">
-</conditional>
+<param name="show_log" value="true"/>
-<section name="advanced_common">
-<param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.normalize_total"/>
 <has_text_matching expression="exclude_highly_expressed=False"/>
 <has_text_matching expression="key_added='n_counts'"/>
-<has_text_matching expression="layers='all'"/>
+</assert_contents>
-</assert_contents>
+</output>
-</output>
+<output name="anndata_out" ftype="h5ad">
-<output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<assert_contents>
-</test>
+<has_h5_keys keys="obs/n_counts"/>
-<test expect_num_outputs="2">
+</assert_contents>
-<!-- test 2 -->
+</output>
+</test>
+<!-- test 2 -->
+<test expect_num_outputs="2">
 <param name="adata" value="random-randint.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.recipe_zheng17"/>
-<param name="n_top_genes" value="1000"/>
+</conditional>
-<param name="log" value="True"/>
+<section name="advanced_common">
-</conditional>
+<param name="show_log" value="true"/>
-<section name="advanced_common">
-<param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.recipe_zheng17"/>
 <has_text_matching expression="n_top_genes=1000"/>
 <has_text_matching expression="log=True"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="obs/n_counts_all"/>
-<!-- test 3 -->
+<has_h5_keys keys="var/n_counts,var/mean,var/std"/>
-<param name="adata" value="paul15_subsample.h5ad" />
+<has_h5_keys keys="uns/log1p"/>
+</assert_contents>
+</output>
+</test>
+<!-- test 3 -->
+<test expect_num_outputs="2">
+<param name="adata" value="paul15_subsample.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.recipe_weinreb17"/>
-<param name="log" value="True"/>
+</conditional>
-<param name="mean_threshold" value="0.01"/>
+<section name="advanced_common">
-<param name="cv_threshold" value="2.0"/>
+<param name="show_log" value="true"/>
-<param name="n_pcs" value="50"/>
-<param name="svd_solver" value="randomized"/>
-<param name="random_state" value="0"/>
-</conditional>
-<section name="advanced_common">
-<param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.recipe_weinreb17"/>
 <has_text_matching expression="log=True"/>
 <has_text_matching expression="mean_threshold=0.01"/>
-<has_text_matching expression="cv_threshold=2.0"/>
+<has_text_matching expression="cv_threshold=2"/>
 <has_text_matching expression="n_pcs=50"/>
 <has_text_matching expression="svd_solver='randomized'"/>
 <has_text_matching expression="random_state=0"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="uns/log1p"/>
-<!-- test 4 -->
+</assert_contents>
-<param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
+</output>
+</test>
+<!-- test 4 -->
+<test expect_num_outputs="2">
+<param name="adata" value="pp.recipe_zheng17.random-randint.h5ad"/>
 <conditional name="method">
 <param name="method" value="pp.recipe_seurat"/>
-<param name="log" value="True"/>
+</conditional>
-</conditional>
+<section name="advanced_common">
-<section name="advanced_common">
+<param name="show_log" value="true"/>
-<param name="show_log" value="true" />
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="sc.pp.recipe_seurat"/>
 <has_text_matching expression="log=True"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>
+<output name="anndata_out" ftype="h5ad">
-</test>
+<assert_contents>
-<test expect_num_outputs="2">
+<has_h5_keys keys="obs/n_genes"/>
-<!-- test 5 -->
+<has_h5_keys keys="var/n_cells"/>
-<param name="adata" value="krumsiek11.h5ad" />
+<has_h5_keys keys="uns/log1p"/>
+</assert_contents>
+</output>
+</test>
+<!-- test 5 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="external.pp.magic"/>
-<param name="name_list" value="all_genes"/>
 <param name="t" value="-1"/>
 <param name="n_pca" value="5"/>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="external.pp.magic"/>
 <has_text_matching expression="name_list='all_genes'"/>
 <has_text_matching expression="t='auto'"/>
 <has_text_matching expression="n_pca=5"/>
-</assert_contents>
+<has_text_matching expression="stats before magic: min= -0.01630 max= 1.01060 mean= 0.28644"/>
-</output>
+<has_text_matching expression="stats after magic: min= -0.00857 max= 1.00546 mean= 0.28645"/>
-<output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+</assert_contents>
-</test>
+</output>
-<test expect_num_outputs="2">
+<output name="anndata_out" ftype="h5ad">
-<!-- test 6 -->
+<assert_contents>
-<param name="adata" value="krumsiek11.h5ad" />
+<has_h5_keys keys="obs/cell_type"/>
+</assert_contents>
+</output>
+</test>
+<!-- test 6 -->
+<test expect_num_outputs="2">
+<param name="adata" value="krumsiek11.h5ad"/>
 <conditional name="method">
 <param name="method" value="external.pp.magic"/>
 <param name="name_list" value="pca_only"/>
 <param name="t" value="3"/>
 <param name="n_pca" value="5"/>
 </conditional>
 <section name="advanced_common">
-<param name="show_log" value="true" />
+<param name="show_log" value="true"/>
 </section>
 <output name="hidden_output">
 <assert_contents>
 <has_text_matching expression="external.pp.magic"/>
 <has_text_matching expression="name_list='pca_only'"/>
 <has_text_matching expression="t=3"/>
 <has_text_matching expression="n_pca=5"/>
 </assert_contents>
 </output>
-<output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+<output name="anndata_out" ftype="h5ad">
+<assert_contents>
+<has_h5_keys keys="obsm/X_magic"/>
+</assert_contents>
+</output>
 <assert_stdout>
 <has_text text="X_magic"/>
 </assert_stdout>
 </test>
 </tests>
 <help><![CDATA[
-Normalize total counts per cell (`pp.normalize_per_cell`)
+Normalize total counts per cell (`pp.normalize_total`)
-=========================================================
+======================================================
-Normalize each cell by total counts over all genes, so that every cell has
+Normalize each cell by total counts over all genes, so that every cell has the same total count after normalization. If choosing target_sum=1e6, this is CPM normalization.
-the same total count after normalization.
 Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
 More details on the `scanpy documentation
-<https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_total.html>`__
 Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
 ==================================================================================================================
 Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)
 ============================================================================================================
 MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.
 The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018).
 - Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.
 - Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.
 More details on the `scanpy documentation

Mercurial > repos > iuc > scanpy_normalize

comparison normalize.xml @ 17:5dada6f76047 draft