view remove_confounders.xml @ 16:a39040ae0a38 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 0ef475df22ba51263bb3d1db9f8797c723db35b0
author iuc
date Tue, 20 Aug 2024 09:51:42 +0000
parents c65fce500915
children b70219da4a96
line wrap: on
line source

<tool id="scanpy_remove_confounders" name="Remove confounders" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
    <description>with scanpy</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "pp.regress_out"
sc.pp.regress_out(
   adata=adata,
   #set $keys = [str(x.strip()) for x in str($method.keys).split(',')]
   keys=$keys,
   copy=False)

#else if $method.method == "pp.mnn_correct"
    #for i, filepath in enumerate($methods.extra_adata)
adata_$i = ad.read('$filepath')
    #end for

sc.pp.mnn_correct(
    adata,
    #for i, filepath in enumerate($methods.extra_adata)
    adata_$i,
    #end for
    #if $methods.var_subset
    #set $var_subset=([x.strip() for x in str($method.var_subset).split(',')])
    var_subset=$var_subset,
    #end if
    batch_key='$method.batch_key',
    index_unique='$method.index_unique'
    #if $methods.batch_categories
    #set $batch_categories=([x.strip() for x in str($method.batch_categories).split(',')])
    batch_categories=$batch_categories,
    #end if
    k=$method.k,
    sigma=$method.sigma,
    cos_norm_in=$method.cos_norm_in,
    cos_norm_out=$method.cos_norm_out,
    svd_dim=$method.svd_dim,
    var_adj=$method.var_adj,
    compute_angle=$method.compute_angle,
    mnn_order='$method.mnn_order',
    svd_mode='$method.svd_mode',
    do_concatenate=True,
    save_raw=True,
    n_jobs=\${GALAXY_SLOTS:-4})

#else if $method.method == "pp.combat"
sc.pp.combat(
    adata,
    key='$method.key',
    inplace=True)

#end if

@CMD_anndata_write_outputs@
]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for plotting">
                <option value="pp.regress_out">Regress out unwanted sources of variation, using 'pp.regress_out'</option>
                <option value="pp.mnn_correct">Correct batch effects by matching mutual nearest neighbors, using 'pp.mnn_correct'</option>
                <option value="pp.combat">Correct batch effects with ComBat function, using 'pp.combat'</option>
            </param>
            <when value="pp.regress_out">
                <param argument="keys" type="text" value="" label="Keys for observation annotation on which to regress on" help="Keys separated by a comma">
                    <expand macro="sanitize_query" />
                </param>
            </when>
            <when value="pp.mnn_correct">
                <param name="extra_adata" type="data" multiple="true" optional="true" format="h5ad" label="Extra annotated data matrix" help="They should have same number of variables."/>
                <param argument="var_subset" type="text" value="" optional="true" label="The subset of vars to be used when performing MNN correction" help="List of comma-separated key from '.var_names'. If not set, all vars are used">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate">
                    <expand macro="sanitize_query" />
                </param>
                <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices">
                    <option value="-">-</option>
                    <option value="_">_</option>
                    <option value=" "> </option>
                    <option value="/">/</option>
                </param>
                <param argument="batch_categories" type="text" value="" optional="true" label="Batch categories for the concatenate" help="List of comma-separated key">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="k" type="integer" value="20" label="Number of mutual nearest neighbors"/>
                <param argument="sigma" type="float" value="1" label="The bandwidth of the Gaussian smoothing kernel used to compute the correction vectors"/>
                <param argument="cos_norm_in" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed on the input data prior to calculating distances between cells?"/>
                <param argument="cos_norm_out" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed prior to computing corrected expression values?"/>
                <param argument="svd_dim" type="integer" value="" optional="true" label="Number of dimensions to use for summarizing biological substructure within each batch" help="If not set, biological components will not be removed from the correction vectors."/>
                <param argument="var_adj" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Adjust variance of the correction vectors?" help="This step takes most computing time."/>
                <param argument="compute_angle" type="boolean" truevalue="True" falsevalue="False" checked="false" label="compute the angle between each cell’s correction vector and the biological subspace of the reference batch?"/>
                <param argument="mnn_order" type="text" value="" optional="true" label="The order in which batches are to be corrected" help="List of comma-separated key. If not set, datas are corrected sequentially">
                    <expand macro="sanitize_query" />
                </param>
                <param name="svd_mode" type="select" label="SVD mode">
                    <option value="svd">svd: SVD using a non-randomized SVD-via-ID algorithm</option>
                    <option value="rsvd" selected="true">rsvd: SVD using a randomized SVD-via-ID algorithm</option>
                    <option value="irlb">irlb: truncated SVD by implicitly restarted Lanczos bidiagonalization</option>
                </param>
            </when>
            <when value="pp.combat">
                <param argument="key" type="text" value="batch" label="Key to a categorical annotation from adata.obs that will be used for batch effect removal">
                    <expand macro="sanitize_query" />
                </param>
            </when>
        </conditional>
        <expand macro="inputs_common_advanced"/>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <!-- test 0 -->
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.regress_out"/>
                <param name="keys" value="cell_type"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.regress_out"/>
                    <has_text_matching expression="keys=\['cell_type'\]"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.regress_out.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <!--<test expect_num_outputs="2">
            < test 2 >
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.mnn_correct"/>
                <param name="reg_keys" value="cell_type"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.mnn_correct"/>
                <has_text_matching expression="keys='cell_type'"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.mnn_correct.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>-->
        <test expect_num_outputs="2">
            <!-- test 1 -->
            <param name="adata" value="blobs.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.combat"/>
                <param name="key" value="blobs"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.combat"/>
                    <has_text_matching expression="key='blobs'"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.combat.blobs.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
Regress out unwanted sources of variation, using `pp.regress_out`
=================================================================

Regress out unwanted sources of variation, using simple linear regression. This is 
inspired by Seurat's `regressOut` function in R.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.regress_out.html>`__

Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct`
==================================================================================

This uses the implementation of mnnpy. Depending on do_concatenate, it returns AnnData objects in the 
original order containing corrected expression values or a concatenated matrix or AnnData object.

Be reminded that it is not advised to use the corrected data matrices for differential expression testing.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.mnn_correct.html>`__


Correct batch effects with ComBat function (`pp.combat`)
========================================================

Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. This uses the implementation of ComBat

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.combat.html>`__


    ]]></help>
    <expand macro="citations"/>
</tool>