view normalize.xml @ 5:ec32793ce1dd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 6b5d0d6f038ebd0fae5dbca02ada51555518ed85"
author iuc
date Mon, 10 Feb 2020 05:28:35 -0500
parents d7ea9b5e6df1
children 3bcd5445cd7a
line wrap: on
line source

<tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@" profile="@profile@">
    <description>with scanpy</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "pp.normalize_total"
sc.pp.normalize_total(
    adata,
    #if str($method.target_sum)!= ''
    target_sum=$method.target_sum,
    #end if
    exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed,
    #if $method.exclude_highly_expressed.exclude_highly_expressed == "True"
    max_fraction=$method.exclude_highly_expressed.max_fraction,
    #end if
    #if str($method.key_added) != ''
    key_added='$method.key_added',
    #end if
    #if str($method.layers) != ''
        #if str($method.layers) != 'all'
    layers[str(x.strip()) for x in str($method.layers).split(',')],
        #else
    layers='$method.layers',
        #end if
    #end if
    #if str($method.layer_norm) != "None"
        layer_norm='$method.layer_norm',
    #end if
    inplace=True)

#else if $method.method == "pp.recipe_zheng17"
sc.pp.recipe_zheng17(
    adata=adata,
    n_top_genes=$method.n_top_genes,
    log=$method.log,
    plot=False,
    copy=False)

#else if $method.method == "pp.recipe_weinreb17"
sc.pp.recipe_weinreb17(
    adata=adata,
    log=$method.log,
    mean_threshold=$method.mean_threshold,
    cv_threshold=$method.cv_threshold,
    n_pcs=$method.n_pcs,
    svd_solver='$method.svd_solver',
    random_state=$method.random_state,
    copy=False)

#else if $method.method == "pp.recipe_seurat"
sc.pp.recipe_seurat(
    adata=adata,
    log=$method.log,
    plot=False,
    copy=False)

#end if

@CMD_anndata_write_outputs@

]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for normalization">
                <option value="pp.normalize_total">Normalize counts per cell, using 'pp.normalize_total'</option>
                <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option>
                <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option>
                <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option>
            </param>
            <when value="pp.normalize_total">
                <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
                <conditional name="exclude_highly_expressed">
                    <param argument="exclude_highly_expressed" type="select" label="Exclude (very) highly expressed genes for the computation of the normalization factor (size factor) for each cell" help=" A gene is considered highly expressed, if it has more than max_fraction of the total counts in at least one cell. The not-excluded genes will sum up to target_sum">
                        <option value="True">Yes</option>
                        <option value="False" selected="true">No</option>
                    </param>
                    <when value="True">
                        <param argument="max_fraction" type="float" value="0.05" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
                    </when>
                    <when value="False"/>
                </conditional>
                <param argument="key_added" type="text" value="" optional="true" label="Name of the field in 'adata.obs' where the normalization factor is stored" help=""/>
                <param argument="layers" type="text" value="" optional="true" label="List of layers to normalize" help="'All' will normalize all layers. The list should be comma-separated."/>
                <param argument="layer_norm" type="select" label="How to normalize layers?">
                    <option value="None">None: after normalization, for each layer in layers each cell has a total count equal to the median of the median of the total counts (cells) before normalization of the layer.</option>
                    <option value="after">After: for each layer in layers each cell has a total count equal to target_sum.</option>
                    <option value="X">X: for each layer in layers each cell has a total count equal to the median of total counts for observations (cells) of adata.X before normalization.</option>
                </param>
            </when>
            <when value="pp.recipe_zheng17">
                <param argument="n_top_genes" type="integer" min="0" value="1000" label="Number of genes to keep" help=""/>
                <expand macro="param_log"/>
            </when>
            <when value="pp.recipe_weinreb17">
                <expand macro="param_log"/>
                <param argument="mean_threshold" type="float" value="0.01" label="Mean threshold" help=""/>
                <param argument="cv_threshold" type="float" value="2" label="CV threshold" help=""/>
                <param argument="n_pcs" type="integer" min="0" value="50" label="Number of principal component" help=""/>
                <expand macro="svd_solver"/>
                <expand macro="pca_random_state"/>
            </when>
            <when value="pp.recipe_seurat">
                <expand macro="param_log"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
    </outputs>
    <tests>
        <test>
            <!-- test 1 -->
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.normalize_total"/>
                <conditional name="exclude_highly_expressed">
                    <param name="exclude_highly_expressed" value="False"/>
                </conditional>
                <param name="key_added" value="n_counts"/>
                <param name="layers" value="all"/>
                <param name="layer_norm" value="None"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.normalize_total"/>
                <has_text_matching expression="exclude_highly_expressed=False"/>
                <has_text_matching expression="key_added='n_counts'"/>
                <has_text_matching expression="layers='all'"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <test>
            <!-- test 2 -->
            <param name="adata" value="random-randint.h5ad"/>
            <conditional name="method">
                <param name="method" value="pp.recipe_zheng17"/>
                <param name="n_top_genes" value="1000"/>
                <param name="log" value="True"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.recipe_zheng17"/>
                <has_text_matching expression="n_top_genes=1000"/>
                <has_text_matching expression="log=True"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <test>
            <!-- test 3 -->
            <param name="adata" value="paul15_subsample.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.recipe_weinreb17"/>
                <param name="log" value="True"/>
                <param name="mean_threshold" value="0.01"/>
                <param name="cv_threshold" value="2.0"/>
                <param name="n_pcs" value="50"/>
                <param name="svd_solver" value="randomized"/>
                <param name="random_state" value="0"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.recipe_weinreb17"/>
                <has_text_matching expression="log=True"/>
                <has_text_matching expression="mean_threshold=0.01"/>
                <has_text_matching expression="cv_threshold=2.0"/>
                <has_text_matching expression="n_pcs=50"/>
                <has_text_matching expression="svd_solver='randomized'"/>
                <has_text_matching expression="random_state=0"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <test>
            <!-- test 4 -->
            <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.recipe_seurat"/>
                <param name="log" value="True"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.recipe_seurat"/>
                <has_text_matching expression="log=True"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        
    </tests>
    <help><![CDATA[
Normalize total counts per cell (`pp.normalize_per_cell`)
=========================================================

Normalize each cell by total counts over all genes, so that every cell has
the same total count after normalization.

Similar functions are used, for example, by Seurat, Cell Ranger or SPRING.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.normalize_per_cell.html>`__


Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`)
==================================================================================================================

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

The recipe runs the following steps:

- only consider genes with more than 1 count
- normalize with total UMI count per cell
- select highly-variable genes
- subset the genes
- renormalize after filtering
- log transform (if needed)
- scale to unit variance and shift to zero mean

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_zheng17.html>`__


Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`)
==============================================================================

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_weinreb17.html>`__


Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`)
==========================================================================

This uses a particular preprocessing.

Expects non-logarithmized data. If using logarithmized data, pass `log=False`.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_seurat.html>`__

    ]]></help>
    <expand macro="citations"/>
</tool>