view remove_confounders.xml @ 11:73ab2ac53d06 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit aba2a85f5da6e1094f382d1f0d94c4b8f2544a7d
author iuc
date Wed, 08 Nov 2023 14:45:35 +0000
parents bf2017df9837
children 458e8f43a775
line wrap: on
line source

<tool id="scanpy_remove_confounders" name="Remove confounders" version="@galaxy_version@" profile="@profile@">
    <description>with scanpy</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "pp.regress_out"
sc.pp.regress_out(
   adata=adata,
   #set $keys = [str(x.strip()) for x in str($method.keys).split(',')]
   keys=$keys,
   copy=False)

#else if $method.method == "pp.mnn_correct"
    #for i, filepath in enumerate($methods.extra_adata)
adata_$i = ad.read('$filepath')
    #end for

sc.pp.mnn_correct(
    adata,
    #for i, filepath in enumerate($methods.extra_adata)
    adata_$i,
    #end for
    #if str($methods.var_subset) != ''
    #set $var_subset=([x.strip() for x in str($method.var_subset).split(',')])
    var_subset=$var_subset,
    #end if
    batch_key='$method.batch_key',
    index_unique='$method.index_unique'
    #if str($methods.batch_categories) != ''
    #set $batch_categories=([x.strip() for x in str($method.batch_categories).split(',')])
    batch_categories=$batch_categories,
    #end if
    k=$method.k,
    sigma=$method.sigma,
    cos_norm_in=$method.cos_norm_in,
    cos_norm_out=$method.cos_norm_out,
    svd_dim=$method.svd_dim,
    var_adj=$method.var_adj,
    compute_angle=$method.compute_angle,
    mnn_order='$method.mnn_order',
    svd_mode='$method.svd_mode',
    do_concatenate=True,
    save_raw=True,
    n_jobs=\${GALAXY_SLOTS:-4})

#else if $method.method == "pp.combat"
sc.pp.combat(
    adata,
    key='$method.key',
    inplace=True)

#end if

@CMD_anndata_write_outputs@
]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for plotting">
                <option value="pp.regress_out">Regress out unwanted sources of variation, using 'pp.regress_out'</option>
                <option value="pp.mnn_correct">Correct batch effects by matching mutual nearest neighbors, using 'pp.mnn_correct'</option>
                <option value="pp.combat">Correct batch effects with ComBat function, using 'pp.combat'</option>
            </param>
            <when value="pp.regress_out">
                <param argument="keys" type="text" value="" label="Keys for observation annotation on which to regress on" help="Keys separated by a comma">
                    <expand macro="sanitize_query" />
                </param>
            </when>
            <when value="pp.mnn_correct">
                <param name="extra_adata" type="data" multiple="true" optional="true" format="h5ad" label="Extra annotated data matrix" help="They should have same number of variables."/>
                <param argument="var_subset" type="text" value="" optional="true" label="The subset of vars to be used when performing MNN correction" help="List of comma-separated key from '.var_names'. If not set, all vars are used">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="batch_key" type="text" value="batch" label="Batch key for the concatenate">
                    <expand macro="sanitize_query" />
                </param>
                <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices">
                    <option value="-">-</option>
                    <option value="_">_</option>
                    <option value=" "> </option>
                    <option value="/">/</option>
                </param>
                <param argument="batch_categories" type="text" value="" optional="true" label="Batch categories for the concatenate" help="List of comma-separated key">
                    <expand macro="sanitize_query" />
                </param>
                <param argument="k" type="integer" value="20" label="Number of mutual nearest neighbors"/>
                <param argument="sigma" type="float" value="1" label="The bandwidth of the Gaussian smoothing kernel used to compute the correction vectors"/>
                <param argument="cos_norm_in" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed on the input data prior to calculating distances between cells?"/>
                <param argument="cos_norm_out" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Should cosine normalization be performed prior to computing corrected expression values?"/>
                <param argument="svd_dim" type="integer" value="" optional="true" label="Number of dimensions to use for summarizing biological substructure within each batch" help="If not set, biological components will not be removed from the correction vectors."/>
                <param argument="var_adj" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Adjust variance of the correction vectors?" help="This step takes most computing time."/>
                <param argument="compute_angle" type="boolean" truevalue="True" falsevalue="False" checked="false" label="compute the angle between each cell’s correction vector and the biological subspace of the reference batch?"/>
                <param argument="mnn_order" type="text" value="" optional="true" label="The order in which batches are to be corrected" help="List of comma-separated key. If not set, datas are corrected sequentially">
                    <expand macro="sanitize_query" />
                </param>
                <param name="svd_mode" type="select" label="SVD mode">
                    <option value="svd">svd: SVD using a non-randomized SVD-via-ID algorithm</option>
                    <option value="rsvd" selected="true">rsvd: SVD using a randomized SVD-via-ID algorithm</option>
                    <option value="irlb">irlb: truncated SVD by implicitly restarted Lanczos bidiagonalization</option>
                </param>
            </when>
            <when value="pp.combat">
                <param argument="key" type="text" value="batch" label="Key to a categorical annotation from adata.obs that will be used for batch effect removal">
                    <expand macro="sanitize_query" />
                </param>
            </when>
        </conditional>
        <expand macro="inputs_common_advanced"/>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <!-- test 0 -->
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.regress_out"/>
                <param name="keys" value="cell_type"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.regress_out"/>
                    <has_text_matching expression="keys=\['cell_type'\]"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.regress_out.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
        <!--<test expect_num_outputs="2">
            < test 2 >
            <param name="adata" value="krumsiek11.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.mnn_correct"/>
                <param name="reg_keys" value="cell_type"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="sc.pp.mnn_correct"/>
                <has_text_matching expression="keys='cell_type'"/>
            </assert_stdout>
            <output name="anndata_out" file="pp.mnn_correct.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>-->
        <test expect_num_outputs="2">
            <!-- test 1 -->
            <param name="adata" value="blobs.h5ad" />
            <conditional name="method">
                <param name="method" value="pp.combat"/>
                <param name="key" value="blobs"/>
            </conditional>
            <section name="advanced_common">
                <param name="show_log" value="true" />
            </section>
            <output name="hidden_output">
                <assert_contents>
                    <has_text_matching expression="sc.pp.combat"/>
                    <has_text_matching expression="key='blobs'"/>
                </assert_contents>
            </output>
            <output name="anndata_out" file="pp.combat.blobs.h5ad" ftype="h5ad" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
Regress out unwanted sources of variation, using `pp.regress_out`
=================================================================

Regress out unwanted sources of variation, using simple linear regression. This is 
inspired by Seurat's `regressOut` function in R.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.regress_out.html>`__

Correct batch effects by matching mutual nearest neighbors, using `pp.mnn_correct`
==================================================================================

This uses the implementation of mnnpy. Depending on do_concatenate, it returns AnnData objects in the 
original order containing corrected expression values or a concatenated matrix or AnnData object.

Be reminded that it is not advised to use the corrected data matrices for differential expression testing.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/stable/generated/scanpy.external.pp.mnn_correct.html>`__


Correct batch effects with ComBat function (`pp.combat`)
========================================================

Corrects for batch effects by fitting linear models, gains statistical power via an EB framework where information is borrowed across genes. This uses the implementation of ComBat

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.combat.html>`__


    ]]></help>
    <expand macro="citations"/>
</tool>