view normalize.xml @ 12:94c19fb1281c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 6fc3a3aa54d7ece1fd8bfffe62ad7e2b620539e0
author iuc
date Fri, 17 Nov 2023 09:16:03 +0000
parents c9cb76cf8d6c
children 7b9fafe32c86
line wrap: on
line source

<tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
    <description>with scanpy</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "pp.normalize_total"
sc.pp.normalize_total(
    adata,
    #if str($method.target_sum) != ''
    target_sum=$method.target_sum,
    #end if
    exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed,
    #if $method.exclude_highly_expressed.exclude_highly_expressed == "True"
    max_fraction=$method.exclude_highly_expressed.max_fraction,
    #end if
    #if $method.key_added
    key_added='$method.key_added',
    #end if
    #if $method.layers
        #if str($method.layers) != 'all'
    layers[str(x.strip()) for x in str($method.layers).split(',')],
        #else
    layers='$method.layers',
        #end if
    #end if
    #if str($method.layer_norm) != "None"
        layer_norm='$method.layer_norm',
    #end if
    inplace=True)

#else if $method.method == "pp.recipe_zheng17"
sc.pp.recipe_zheng17(
    adata=adata,
    n_top_genes=$method.n_top_genes,
    log=$method.log,
    plot=False,
    copy=False)

#else if $method.method == "pp.recipe_weinreb17"
sc.pp.recipe_weinreb17(
    adata=adata,
    log=$method.log,
    mean_threshold=$method.mean_threshold,
    cv_threshold=$method.cv_threshold,
    n_pcs=$method.n_pcs,
    svd_solver='$method.svd_solver',
    random_state=$method.random_state,
    copy=False)

#else if $method.method == "pp.recipe_seurat"
sc.pp.recipe_seurat(
    adata=adata,
    log=$method.log,
    plot=False,
    copy=False)

#end if

@CMD_anndata_write_outputs@

]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for normalization">
                <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option>
                <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option>
                <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option>
                <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option>
            </param>
            <when value="pp.normalize_total">
                <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
                <conditional name="exclude_highly_expressed">
                    <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
                        <option value="True">Yes</option>
                        <option value="False" selected="true">No</option>
                    </param>
                    <when value="True">
                        <param argument="max_fraction" type="float" value="0.05" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
                    </when>
                    <when value="False"/>
                </conditional>
                <param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored" help="">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="layers" type="text" value="" optional="true" label="List of layers to normalize" help="'All' will normalize all layers. The list should be comma-separated.">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="layer_norm" type="select" label="How to normalize layers?">
                    <option value="None">None: after normalization, for each layer in layers each cell has a total count equal to the median of the median of the total counts (cells) before normalization of the layer.</option>
                    <option value="after">After: for each layer in layers each cell has a total count equal to target_sum.</option>
                    <option value="X">X: for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization.</option>
                </param>
            </when>
            <when value="pp.recipe_zheng17">
                <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
                <expand macro="param_log"/>
            </when>
            <when value="pp.recipe_weinreb17">
                <expand macro="param_log"/>
                <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
                <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
                <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
                <expand macro="svd_solver"/>
                <expand macro="pca_random_state"/>
            </when>
            <when value="pp.recipe_seurat">
                <expand macro="param_log"/>
            </when>
        </conditional>
        <expand macro="inputs_common_advanced"/>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <!-- test 1 -->
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.normalize_total"/>
                <conditional name="exclude_highly_expressed">
                    <param name="exclude_highly_expressed" value="False"/>
                </conditional>
                <param name="key_added" value="n_counts"/>
                <param name="layers" value="all"/>
                <param name="layer_norm" value="None"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.normalize_total"/>
                    <has_text_matching expression="exclude_highly_expressed=False"/>
                    <has_text_matching expression="key_added='n_counts'"/>
                    <has_text_matching expression="layers='all'"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <test expect_num_outputs="2">
            <!-- test 2 -->
            <param name="adata" value="random-randint.h5ad"/>
            <conditional name="method">
                <param name="method" value="pp.recipe_zheng17"/>
                <param name="n_top_genes" value="1000"/>
                <param name="log" value="True"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.recipe_zheng17"/>
                    <has_text_matching expression="n_top_genes=1000"/>
                    <has_text_matching expression="log=True"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/>
        </test>
        <test expect_num_outputs="2">
            <!-- test 3 -->
            <param name="adata" value="paul15_subsample.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.recipe_weinreb17"/>
                <param name="log" value="True"/>
                <param name="mean_threshold" value="0.01"/>
                <param name="cv_threshold" value="2.0"/>
                <param name="n_pcs" value="50"/>
                <param name="svd_solver" value="randomized"/>
                <param name="random_state" value="0"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.recipe_weinreb17"/>
                    <has_text_matching expression="log=True"/>
                    <has_text_matching expression="mean_threshold=0.01"/>
                    <has_text_matching expression="cv_threshold=2.0"/>
                    <has_text_matching expression="n_pcs=50"/>
                    <has_text_matching expression="svd_solver='randomized'"/>
                    <has_text_matching expression="random_state=0"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <test expect_num_outputs="2">
            <!-- test 4 -->
            <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.recipe_seurat"/>
                <param name="log" value="True"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.recipe_seurat"/>
                    <has_text_matching expression="log=True"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>
        </test>
    </tests>
    <help><![CDATA[
Normalize total counts per cell (`pp.normalize_per_cell`)
=========================================================

Normalize each cell by total counts over all genes, so that every cell has
the same total count after normalization.

Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.normalize_per_cell.html>`__


Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
==================================================================================================================

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

The recipe runs the following steps:

- only consider genes with more than 1 count
- normalize with total UMI count per cell
- select highly-variable genes
- subset the genes
- renormalize after filtering
- log transform (if needed)
- scale to unit variance and shift to zero mean

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_zheng17.html>`__


Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
==============================================================================

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_weinreb17.html>`__


Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
==========================================================================

This uses a particular preprocessing.

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__

    ]]></help>
    <expand macro="citations"/>
</tool>