view remove_confounders.xml @ 0:9ca360dde8e3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 92f85afaed0097d1879317a9f513093fce5481d6
author iuc
date Mon, 04 Mar 2019 10:16:47 -0500
parents
children a89ee42625ad
line wrap: on
line source

<tool id="scanpy_remove_confounders" name="Remove confounders with scanpy" version="@version@">
    <description></description>
    <macros>
        <import>macros.xml</import>
        <xml name="score_genes_params">
            <param argument="n_bins" type="integer" value="25" label="Number of expression level bins for sampling" help=""/>
            <param argument="random_state" type="integer" value="0" label="Random seed for sampling" help=""/>
            <expand macro="param_use_raw"/>
        </xml>
        <token name="@CMD_score_genes_inputs@"><![CDATA[
    n_bins=$method.n_bins,
    random_state=$method.random_state,
    use_raw=$method.use_raw,
    copy=False
        ]]></token>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
@CMD_read_inputs@

#if $method.method == "pp.regress_out"
sc.pp.regress_out(
   adata=adata,
   keys='$method.reg_keys',
   copy=False)
#elif $method.method == "tl.score_genes"
sc.tl.score_genes(
    adata=adata,
    #set $gene_list = [str(x.strip()) for x in str($method.gene_list).split(',')]
    gene_list=$gene_list,
    ctrl_size=$method.ctrl_size,
    score_name='$method.score_name',
    #if $method.gene_pool
        #set $gene_pool = [str(x.strip()) for x in $method.gene_pool.split(',')]
    gene_pool=$gene_pool,
    #end if
    @CMD_score_genes_inputs@)
adata.obs.to_csv('$obs', sep='\t')
#elif $method.method == "tl.score_genes_cell_cycle"
sc.tl.score_genes_cell_cycle(
    adata=adata,
    #set $s_genes = [str(x.strip()) for x in $method.s_genes.split(',')]
    s_genes=$s_genes,
    #set $g2m_genes = [str(x.strip()) for x in $method.g2m_genes.split(',')]
    g2m_genes=$g2m_genes,
    @CMD_score_genes_inputs@)
adata.obs.to_csv('$obs', sep='\t')
#end if

@CMD_anndata_write_outputs@
]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="inputs_anndata"/>
        <conditional name="method">
            <param argument="method" type="select" label="Method used for plotting">
                <option value="pp.regress_out">Regress out unwanted sources of variation, using `pp.regress_out`</option>
                <!--<option value="pp.mnn_correct">, using `pp.mnn_correct`</option>!-->
                <!--<option value="pp.dca">, using `pp.mnn_correct`</option>!-->
                <!--<option value="pp.magic">, using `pp.magic`</option>!-->
                <!--<option value="tl.sim">, using `tl.sim`</option>!-->
                <!--<option value="pp.calculate_qc_metrics">, using `pp.calculate_qc_metrics`</option>!-->
                <option value="tl.score_genes">Score a set of genes, using `tl.score_genes`</option>
                <option value="tl.score_genes_cell_cycle">Score cell cycle genes, using `tl.score_genes_cell_cycle`</option>
                <!--<option value="tl.cyclone">, using `tl.cyclone`</option>!-->
                <!--<option value="tl.andbag">, using `tl.andbag`</option>!-->
            </param>
            <when value="pp.regress_out">
                <param argument="reg_keys" type="text" value="" label="Keys for observation annotation on which to regress on" help=""/>
            </when>
            <when value="tl.score_genes">
                <param argument="gene_list" type="text" value="" label="The list of gene names used for score calculation" help="Genes separated by a comma"/>
                <param argument="ctrl_size" type="integer" value="50" label="Number of reference genes to be sampled"
                    help="If `len(gene_list)` is not too low, you can set `ctrl_size=len(gene_list)`."/>
                <param argument="gene_pool" type="text" value="" optional="true" label="Genes for sampling the reference set"
                    help="Default is all genes. Genes separated by a comma"/>
                <expand macro="score_genes_params"/>
                <param argument="score_name" type="text" value="score" label="Name of the field to be added in `.obs`" help=""/>
            </when>
            <when value="tl.score_genes_cell_cycle">
                <param name="s_genes" type="text" value="" label="List of genes associated with S phase" help="Genes separated by a comma"/>
                <param name="g2m_genes" type="text" value="" label="List of genes associated with G2M phase" help="Genes separated by a comma"/>
                <expand macro="score_genes_params"/>
            </when>
        </conditional>
        <expand macro="anndata_output_format"/>
    </inputs>
    <outputs>
        <expand macro="anndata_outputs"/>
        <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Observations annotation">
            <filter>method['method'] == 'tl.score_genes' or method['method'] == 'tl.score_genes_cell_cycle'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="pp.regress_out"/>
                <param name="reg_keys" value="cell_type"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad" />
            <assert_stdout>
                <has_text_matching expression="sc.pp.regress_out"/>
                <has_text_matching expression="keys='cell_type'"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="pp.regress_out.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="tl.score_genes"/>
                <param name="gene_list" value="Gata2, Fog1"/>
                <param name="ctrl_size" value="2"/>
                <param name="n_bins" value="2"/>
                <param name="random_state" value="2"/>
                <param name="use_raw" value="False"/>
                <param name="score_name" value="score"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad"/>
            <assert_stdout>
                <has_text_matching expression="sc.tl.score_genes" />
                <has_text_matching expression="gene_list=\['Gata2', 'Fog1'\]" />
                <has_text_matching expression="ctrl_size=2" />
                <has_text_matching expression="score_name='score'" />
                <has_text_matching expression="n_bins=2" />
                <has_text_matching expression="random_state=2" />
                <has_text_matching expression="use_raw=False" />
                <has_text_matching expression="copy=False" />
            </assert_stdout>
            <output name="anndata_out_h5ad" file="tl.score_genes.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
            <output name="obs" file="tl.score_genes.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/>
        </test>
        <test>
            <conditional name="input">
                <param name="format" value="h5ad" />
                <param name="adata" value="krumsiek11.h5ad" />
            </conditional>
            <conditional name="method">
                <param name="method" value="tl.score_genes_cell_cycle"/>
                <param name="s_genes" value="Gata2, Fog1, EgrNab"/>
                <param name="g2m_genes" value="Gata2, Fog1, EgrNab"/>
                <param name="n_bins" value="2"/>
                <param name="random_state" value="1"/>
                <param name="use_raw" value="False"/>
            </conditional>
            <param name="anndata_output_format" value="h5ad"/>
            <assert_stdout>
                <has_text_matching expression="sc.tl.score_genes_cell_cycle"/>
                <has_text_matching expression="s_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
                <has_text_matching expression="g2m_genes=\['Gata2', 'Fog1', 'EgrNab'\]"/>
                <has_text_matching expression="n_bins=2"/>
                <has_text_matching expression="random_state=1"/>
                <has_text_matching expression="use_raw=False"/>
            </assert_stdout>
            <output name="anndata_out_h5ad" file="tl.score_genes_cell_cycle.krumsiek11.h5ad" ftype="h5" compare="sim_size"/>
            <output name="obs" file="tl.score_genes_cell_cycle.krumsiek11.obs.tabular" ftype="tabular" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
Regress out unwanted sources of variation, using `pp.regress_out`
=================================================================

Regress out unwanted sources of variation, using simple linear regression. This is 
inspired by Seurat's `regressOut` function in R.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.pp.regress_out.html#scanpy.api.pp.regress_out>`__

Score a set of genes, using `tl.score_genes`
============================================

The score is the average expression of a set of genes subtracted with the
average expression of a reference set of genes. The reference set is
randomly sampled from the `gene_pool` for each binned expression value.

This reproduces the approach in Seurat (Satija et al, 2015) and has been implemented
for Scanpy by Davide Cittaro.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes.html#scanpy.api.tl.score_genes>`__

Score cell cycle genes, using `tl.score_genes_cell_cycle`
=========================================================

Given two lists of genes associated to S phase and G2M phase, calculates
scores and assigns a cell cycle phase (G1, S or G2M). See
`score_genes` for more explanation.

More details on the `scanpy documentation
<https://scanpy.readthedocs.io/en/latest/api/scanpy.api.tl.score_genes_cell_cycle.html#scanpy.api.tl.score_genes_cell_cycle>`__
    ]]></help>
    <expand macro="citations"/>
</tool>