view gemini_burden.xml @ 8:f0bf88e9e689 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit f7bdf08922aaf4119aefe7041e754a69cf64aebd
author iuc
date Wed, 13 Jul 2022 15:28:45 +0000
parents 699ce2631e79
children
line wrap: on
line source

<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@">
    <description>perform sample-wise gene-level burden calculations</description>
    <expand macro="bio_tools"/>
    <macros>
        <import>gemini_macros.xml</import>
        <token name="@BINARY@">burden</token>
        <token name="@GEMINI_BUG_WARNING@">Caveat: due to a bug in this version of GEMINI, filtering on alternate allele frequency will get disabled when you select 'All non-synonymous variants' as the types of variants to be used for the burden calculations above.</token>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command>
<![CDATA[
        gemini @BINARY@
            $nonsynonymous
            #if str($analysis.mode) == "c-alpha":
                --calpha
                #if str($analysis.casecontrol.source) == "custom":
                    --cases
                    #echo (" ".join("'" + token + "'" for token in $analysis.casecontrol.cases.strip().split()))#
                    --controls
                    #echo (" ".join("'" + token + "'" for token in $analysis.casecontrol.controls.strip().split()))#
                #end if
                --min-aaf ${analysis.min_aaf}
                --max-aaf ${analysis.max_aaf}
                --permutations ${analysis.permutations}
                ${analysis.save_tscores}
            #end if
            '$infile'
            > '$outfile'
        #if str($analysis.mode) == "c-alpha" and $analysis.save_tscores:
            && touch permutated_t_scores.txt
            && mv permutated_t_scores.txt $t_scores
        #end if
]]>

    </command>
    <inputs>
        <expand macro="infile" />
        <param argument="--non-synonymous" name="nonsynonymous" type="select"
        label="Use the following types of variants for the burden calculation">
            <option value="">High impact variants only</option>
            <option value="--nonsynonymous">All non-synonymous variants</option>
        </param>
        <conditional name="analysis">
            <param argument="--calpha" name="mode" type="select"
            label="Which type of burden calculation should be performed?">
                <option value="counts">Count of variants per gene and sample</option>
                <option value="c-alpha">C-alpha association test per gene</option>
            </param>
            <when value="counts" />
            <when value="c-alpha">
                <conditional name="casecontrol">
                    <param name="source" type="select"
                    label="Selection of case and control samples"
                    help="Specify how case and control samples for the C-alpha association test should be selected">
                        <option value="ped">Use PED file information</option>
                        <option value="custom">Custom selection</option>
                    </param>
                    <when value="ped" />
                    <when value="custom">
                        <param argument="--cases" name="cases" type="text" value=""
                        label="Space separated list of cases for association testing"/>
                        <param argument="--controls" name="controls" type="text" value=""
                        label="Space separated list of controls for association testing" help="(--controls)"/>
                    </when>
                </conditional>
                <param argument="--min-aaf" name="min_aaf" type="float" min="0" max="1" value="0"
                label="The min. alt. allele frequency for a variant to be included"
                help="@GEMINI_BUG_WARNING@" />
                <param argument="--max-aaf" name="max_aaf" type="float" min="0" max="1" value="1"
                label="The max. alt. allele frequency for a variant to be included"
                help="@GEMINI_BUG_WARNING@" />
                <param argument="--permutations" name="permutations" type="integer" min="1" value="100" label="Number of permutations to run for the C-alpha test" />
                <param argument="--save_tscores" name="save_tscores" type="boolean" truevalue="--save_tscores" falsevalue="" checked="False"
                    label="Save the list of individual T-scores from all permutations as an extra dataset" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="outfile" format="tabular"
        label="GEMINI burden ${analysis.mode} on ${on_string}" />
        <data name="t_scores" format="tabular"
        label="GEMINI burden permuted T scores on ${on_string}">
            <filter>analysis['mode'] == 'c-alpha' and analysis['save_tscores']</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!-- test counts mode with defaults -->
            <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
            <conditional name="analysis">
                <param name="mode" value="counts" />
            </conditional>
            <output name="outfile" file="gemini_burden_count_highimpact_result.tabular" />
        </test>
        <test expect_num_outputs="1">
            <!-- test nonsynonymous parameter -->
            <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
            <param name="nonsynonymous" value="--nonsynonymous" />
            <conditional name="analysis">
                <param name="mode" value="counts" />
            </conditional>
            <output name="outfile" file="gemini_burden_count_nonsynonymous_result.tabular" />
        </test>
        <test expect_num_outputs="1">
            <!-- test c-alpha mode with defaults -->
            <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
            <conditional name="analysis">
                <param name="mode" value="c-alpha" />
                <conditional name="casecontrol">
                    <param name="source" value="ped" />
                </conditional>
            </conditional>
            <output name="outfile" file="gemini_burden_calpha_template.tabular" compare="re_match" />
        </test>
        <test expect_num_outputs="1">
            <!-- test c-alpha mode with user-supplied case/control samples -->
            <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
            <conditional name="analysis">
                <param name="mode" value="c-alpha" />
                <conditional name="casecontrol">
                    <param name="source" value="custom" />
                    <param name="controls" value="1_kid 3_kid" />
                    <param name="cases" value="1_dad 1_mom 3_dad 3_mom" />
                </conditional>
            </conditional>
            <output name="outfile" file="gemini_burden_calpha_template.tabular" compare="re_match" />
        </test>
        <test expect_num_outputs="2">
            <!-- test additional T scores output generation -->
            <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" />
            <conditional name="analysis">
                <param name="mode" value="c-alpha" />
                <conditional name="casecontrol">
                    <param name="source" value="custom" />
                    <param name="controls" value="1_kid 3_kid" />
                    <param name="cases" value="1_dad 1_mom 3_dad 3_mom" />
                </conditional>
                <param name="permutations" value="2" />
                <param name="save_tscores" value="True" />
            </conditional>
            <output name="t_scores">
                <assert_contents>
                    <has_line_matching expression="WDR37&#009;.+&#009;.+" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Burden performs sample-wise gene-level burden calculations.

The burden tool provides a set of utilities to perform burden summaries on a per-gene, per sample basis. By default, it outputs a table of gene-wise counts of all high impact variants in coding regions for each sample:

GEMINI burden example::

 gene    M10475  M10478  M10500  M128215
 WDR37   2       2       2       2
 CTBP2   0       0       0       1
 DHODH   1       0       0       0

**Setting examples**

**--nonsynonymous**

If you want to be a little bit less restrictive, you can include all non-synonymous variants instead.

GEMINI output with setting --nonsynonymous::

 gene    M10475  M10478  M10500  M128215
 SYCE1   0       1       1       0
 WDR37   2       2       2       2
 CTBP2   0       0       0       1
 ASAH2C  2       1       1       0
 DHODH   1       0       0       0

**--calpha**

If your database has been loaded with a PED file describing case and control samples, you can calculate the c-alpha statistic for cases vs. control.

GEMINI output with setting --calpha::

 gene    T       c       Z       p_value
 SYCE1   -0.5    0.25    -1.0    0.841344746069
 WDR37   -1.0    1.5     -0.816496580928 0.792891910879
 CTBP2   0.0     0.0     nan     nan
 ASAH2C  -0.5    0.75    -0.57735026919  0.718148569175
 DHODH   0.0     0.0     nan     nan

To calculate the **P-value** using a permutation test, use the --permutations option, specifying the number of permutations of the case/control labels you want to use.

**--min-aaf and --max-aaf for --calpha**

By default, all variants affecting a given gene will be included in the C-alpha computation. However, one may establish alternate allele frequency boundaries for the variants included using the --min-aaf and --max-aaf options.

Used settings:

  - -calpha test.burden.db 
  - -min-aaf 0.0 
  - -max-aaf 0.01
  - -cases 
  - -controls for --calpha

If you do not have a PED file loaded, or your PED file does not follow the standard PED phenotype encoding format you can still perform the c-alpha test, but you have to specify which samples are the control samples and which are the case samples.

Used settings:

 - -controls M10475 M10478 
 - -cases M10500 M128215 
 - -calpha

Output::

 gene    T       c       Z       p_value
 SYCE1   -0.5    0.25    -1.0    0.841344746069
 WDR37   -1.0    1.5     -0.816496580928 0.792891910879
 CTBP2   0.0     0.0     nan     nan
 ASAH2C  -0.5    0.75    -0.57735026919  0.718148569175
 DHODH   0.0     0.0     nan     nan

**--nonsynonymous --calpha**

If you would rather consider all nonsynonymous variants for the C-alpha test rather than just the medium and high impact variants, add the --nonsynonymous flag.


    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1371/journal.pgen.1001322</citation><!-- c-alpha citation -->
    </expand>
</tool>