Mercurial > repos > iuc > scanpy_normalize

diff normalize.xml @ 0:ed64c90a9b93 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author: iuc
date: Mon, 04 Mar 2019 10:16:12 -0500
children: a9f14e2d1655
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/normalize.xml	Mon Mar 04 10:16:12 2019 -0500
@@ -0,0 +1,380 @@
+<tool id="scanpy_normalize" name="Normalize with scanpy" version="@galaxy_version@">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+@CMD@
+      ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+
+#if $method.method == "pp.normalize_per_cell"
+sc.pp.normalize_per_cell(
+    data=adata,
+    #if $method.counts_per_cell_after
+    counts_per_cell_after=$method.counts_per_cell_after,
+    #end if
+    #if $method.counts_per_cell
+    counts_per_cell=np.loadtxt('$method.counts_per_cell'),
+    #end if
+    key_n_counts='$method.key_n_counts',
+    copy=False)
+adata.obs.to_csv('$anndata_obs', sep='\t')
+#elif $method.method == "pp.recipe_zheng17"
+sc.pp.recipe_zheng17(
+    adata=adata,
+    n_top_genes=$method.n_top_genes,
+    log=$method.log,
+    plot=False,
+    copy=False)
+#elif $method.method == "pp.recipe_weinreb17"
+sc.pp.recipe_weinreb17(
+    adata=adata,
+    log=$method.log,
+    mean_threshold=$method.mean_threshold,
+    cv_threshold=$method.cv_threshold,
+    n_pcs=$method.n_pcs,
+    svd_solver='$method.svd_solver',
+    random_state=$method.random_state,
+    copy=False)
+#elif $method.method == "pp.recipe_seurat"
+sc.pp.recipe_seurat(
+    adata=adata,
+    log=$method.log,
+    plot=False,
+    copy=False)
+#elif $method.method == "pp.log1p"
+sc.pp.log1p(
+    data=adata,
+    copy=False)
+#elif $method.method == "pp.scale"
+sc.pp.scale(
+    data=adata,
+    zero_center=$method.zero_center,
+    #if $method.max_value
+    max_value=$method.max_value,
+    #end if
+    copy=False)
+#elif $method.method == "pp.sqrt"
+sc.pp.sqrt(
+    data=adata,
+    copy=False)
+#elif $method.method == "pp.downsample_counts"
+sc.pp.downsample_counts(
+    adata=adata,
+    target_counts=$method.target_counts,
+    random_state=$method.random_state,
+    copy=False)
+#end if
+
+@CMD_anndata_write_outputs@
+
+]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="inputs_anndata"/>
+        <conditional name="method">
+            <param argument="method" type="select" label="Method used for plotting">
+                <option value="pp.normalize_per_cell">Normalize total counts per cell, using `pp.normalize_per_cell`</option>
+                <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using `pp.recipe_zheng17`</option>
+                <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using `pp.recipe_weinreb17`</option>
+                <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using `pp.recipe_seurat`</option>
+                <option value="pp.log1p">Logarithmize the data matrix, using `pp.log1p`</option>
+                <option value="pp.scale">Scale data to unit variance and zero mean, using `pp.scale`</option>
+                <option value="pp.sqrt">Square root the data matrix, using `pp.sqrt`</option>
+                <option value="pp.downsample_counts">Downsample counts, using `pp.downsample_counts`</option>
+            </param>
+            <when value="pp.normalize_per_cell">
+                <param argument="counts_per_cell_after" type="float" value="" optional="true" label="Counts per cell after" help="If not provided, after normalization, each cell has a total count equal to the median of the *counts_per_cell* before normalization."/>
+                <param argument="counts_per_cell" type="data" format="tabular,txt" optional="true" label="Precomputed counts per cell" help=""/>
+                <param argument="key_n_counts" type="text" value="n_counts" label="Name of the field in `adata.obs` where the total counts per cell will be stored" help=""/>
+            </when>
+            <when value="pp.recipe_zheng17">
+                <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
+                <expand macro="param_log"/>
+            </when>
+            <when value="pp.recipe_weinreb17">
+                <expand macro="param_log"/>
+                <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
+                <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
+                <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
+                <expand macro="svd_solver"/>
+                <expand macro="pca_random_state"/>
+            </when>
+            <when value="pp.recipe_seurat">
+                <expand macro="param_log"/>
+            </when>
+            <when value="pp.log1p"/>
+            <when value="pp.scale">
+                <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
+                    label="Zero center?" help="If not, it omits zero-centering variables, which allows to handle sparse input efficiently."/>
+                <param argument="max_value" type="float" value="" optional="true" label="Maximum value"
+                    help="Clip (truncate) to this value after scaling. If not set, it does not clip."/>
+            </when>
+            <when value="pp.sqrt"/>
+            <when value="pp.downsample_counts">
+                <param argument="target_counts" type="integer" min="0" value="20000"
+                    label="Target number of counts for downsampling" help="Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts."/>
+                <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling" help=""/>
+            </when>
+        </conditional>
+        <expand macro="anndata_output_format"/>
+    </inputs>
+    <outputs>
+        <expand macro="anndata_outputs"/>
+        <data name="anndata_obs" format="tabular" label="${tool.name} on ${on_string}: Annotation of observations">
+            <filter>method['method'] == 'pp.normalize_per_cell'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.normalize_per_cell"/>
+                <param name="counts_per_cell_after" value="2"/>
+                <param name="counts_per_cell" value="krumsiek11_counts_per_cell"/>
+                <param name="key_n_counts" value="n_counts"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad"/>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.normalize_per_cell"/>
+                <has_text_matching expression="counts_per_cell_after=2.0"/>
+                <has_text_matching expression="counts_per_cell=np.loadtxt"/>
+                <has_text_matching expression="key_n_counts='n_counts'"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.normalize_per_cell.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+            <output name="anndata_obs" file="pp.normalize_per_cell.obs.krumsiek11.tabular"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="random-randint.h5ad"/>
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.recipe_zheng17"/>
+                <param name="n_top_genes" value="1000"/>
+                <param name="log" value="True"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad"/>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.recipe_zheng17"/>
+                <has_text_matching expression="n_top_genes=1000"/>
+                <has_text_matching expression="log=True"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="paul15_subsample.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.recipe_weinreb17"/>
+                <param name="log" value="True"/>
+                <param name="mean_threshold" value="0.01"/>
+                <param name="cv_threshold" value="2.0"/>
+                <param name="n_pcs" value="50"/>
+                <param name="svd_solver" value="randomized"/>
+                <param name="random_state" value="0"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.recipe_weinreb17"/>
+                <has_text_matching expression="log=True"/>
+                <has_text_matching expression="mean_threshold=0.01"/>
+                <has_text_matching expression="cv_threshold=2.0"/>
+                <has_text_matching expression="n_pcs=50"/>
+                <has_text_matching expression="svd_solver='randomized'"/>
+                <has_text_matching expression="random_state=0"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.recipe_seurat"/>
+                <param name="log" value="True"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad"/>
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.recipe_seurat"/>
+                <has_text_matching expression="log=True"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.log1p"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.log1p"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.log1p.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.scale"/>
+                <param name="zero_center" value="true"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.scale"/>
+                <has_text_matching expression="zero_center=True"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.scale.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.scale"/>
+                <param name="zero_center" value="true"/>
+                <param name="max_value" value="10"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.scale"/>
+                <has_text_matching expression="zero_center=True"/>
+                <has_text_matching expression="max_value=10.0"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.scale_max_value.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="krumsiek11.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.sqrt"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.sqrt"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" file="pp.sqrt.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
+        </test>
+        <test>
+            <conditional name="input">
+                <param name="format" value="h5ad" />
+                <param name="adata" value="random-randint.h5ad" />
+            </conditional>
+            <conditional name="method">
+                <param name="method" value="pp.downsample_counts"/>
+                <param name="target_counts" value="20000"/>
+                <param name="random_state" value="0"/>
+            </conditional>
+            <param name="anndata_output_format" value="h5ad" />
+            <assert_stdout>
+                <has_text_matching expression="sc.pp.downsample_counts"/>
+                <has_text_matching expression="target_counts=20000"/>
+                <has_text_matching expression="random_state=0"/>
+            </assert_stdout>
+            <output name="anndata_out_h5ad" ftype="h5">
+                <assert_contents>
+                    <has_h5_keys keys="X, obs, var" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Normalize total counts per cell (`pp.normalize_per_cell`)
+=========================================================
+
+Normalize each cell by total counts over all genes, so that every cell has
+the same total count after normalization.
+
+Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.normalize_per_cell.html>`__
+
+
+Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
+==================================================================================================================
+
+Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
+
+The recipe runs the following steps:
+
+- only consider genes with more than 1 count
+- normalize with total UMI count per cell
+- select highly-variable genes
+- subset the genes
+- renormalize after filtering
+- log transform (if needed)
+- scale to unit variance and shift to zero mean
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_zheng17.html>`__
+
+
+Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
+==============================================================================
+
+Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_weinreb17.html>`__
+
+
+Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
+==========================================================================
+
+This uses a particular preprocessing.
+
+Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.recipe_seurat.html>`__
+
+Logarithmize the data matrix (`pp.log1p`)
+=========================================
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.log1p.html>`__
+
+Scale data to unit variance and zero mean (`pp.scale`)
+======================================================
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.scale.html>`__
+
+Computes the square root the data matrix (`pp.sqrt`)
+====================================================
+
+`X = sqrt(X)`
+
+Downsample counts (`pp.downsample_counts`)
+==========================================
+
+Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
+has been implemented by M. D. Luecken.
+
+More details on the `scanpy documentation
+<https://scanpy.readthedocs.io/en/latest/api/scanpy.pp.downsample_counts.html>`__
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
author	iuc
date	Mon, 04 Mar 2019 10:16:12 -0500
parents
children	a9f14e2d1655