Mercurial > repos > iuc > gemini_burden
view gemini_burden.xml @ 8:f0bf88e9e689 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit f7bdf08922aaf4119aefe7041e754a69cf64aebd
author | iuc |
---|---|
date | Wed, 13 Jul 2022 15:28:45 +0000 |
parents | 699ce2631e79 |
children |
line wrap: on
line source
<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@"> <description>perform sample-wise gene-level burden calculations</description> <expand macro="bio_tools"/> <macros> <import>gemini_macros.xml</import> <token name="@BINARY@">burden</token> <token name="@GEMINI_BUG_WARNING@">Caveat: due to a bug in this version of GEMINI, filtering on alternate allele frequency will get disabled when you select 'All non-synonymous variants' as the types of variants to be used for the burden calculations above.</token> </macros> <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version_command" /> <command> <![CDATA[ gemini @BINARY@ $nonsynonymous #if str($analysis.mode) == "c-alpha": --calpha #if str($analysis.casecontrol.source) == "custom": --cases #echo (" ".join("'" + token + "'" for token in $analysis.casecontrol.cases.strip().split()))# --controls #echo (" ".join("'" + token + "'" for token in $analysis.casecontrol.controls.strip().split()))# #end if --min-aaf ${analysis.min_aaf} --max-aaf ${analysis.max_aaf} --permutations ${analysis.permutations} ${analysis.save_tscores} #end if '$infile' > '$outfile' #if str($analysis.mode) == "c-alpha" and $analysis.save_tscores: && touch permutated_t_scores.txt && mv permutated_t_scores.txt $t_scores #end if ]]> </command> <inputs> <expand macro="infile" /> <param argument="--non-synonymous" name="nonsynonymous" type="select" label="Use the following types of variants for the burden calculation"> <option value="">High impact variants only</option> <option value="--nonsynonymous">All non-synonymous variants</option> </param> <conditional name="analysis"> <param argument="--calpha" name="mode" type="select" label="Which type of burden calculation should be performed?"> <option value="counts">Count of variants per gene and sample</option> <option value="c-alpha">C-alpha association test per gene</option> </param> <when value="counts" /> <when value="c-alpha"> <conditional name="casecontrol"> <param name="source" type="select" label="Selection of case and control samples" help="Specify how case and control samples for the C-alpha association test should be selected"> <option value="ped">Use PED file information</option> <option value="custom">Custom selection</option> </param> <when value="ped" /> <when value="custom"> <param argument="--cases" name="cases" type="text" value="" label="Space separated list of cases for association testing"/> <param argument="--controls" name="controls" type="text" value="" label="Space separated list of controls for association testing" help="(--controls)"/> </when> </conditional> <param argument="--min-aaf" name="min_aaf" type="float" min="0" max="1" value="0" label="The min. alt. allele frequency for a variant to be included" help="@GEMINI_BUG_WARNING@" /> <param argument="--max-aaf" name="max_aaf" type="float" min="0" max="1" value="1" label="The max. alt. allele frequency for a variant to be included" help="@GEMINI_BUG_WARNING@" /> <param argument="--permutations" name="permutations" type="integer" min="1" value="100" label="Number of permutations to run for the C-alpha test" /> <param argument="--save_tscores" name="save_tscores" type="boolean" truevalue="--save_tscores" falsevalue="" checked="False" label="Save the list of individual T-scores from all permutations as an extra dataset" /> </when> </conditional> </inputs> <outputs> <data name="outfile" format="tabular" label="GEMINI burden ${analysis.mode} on ${on_string}" /> <data name="t_scores" format="tabular" label="GEMINI burden permuted T scores on ${on_string}"> <filter>analysis['mode'] == 'c-alpha' and analysis['save_tscores']</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <!-- test counts mode with defaults --> <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" /> <conditional name="analysis"> <param name="mode" value="counts" /> </conditional> <output name="outfile" file="gemini_burden_count_highimpact_result.tabular" /> </test> <test expect_num_outputs="1"> <!-- test nonsynonymous parameter --> <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" /> <param name="nonsynonymous" value="--nonsynonymous" /> <conditional name="analysis"> <param name="mode" value="counts" /> </conditional> <output name="outfile" file="gemini_burden_count_nonsynonymous_result.tabular" /> </test> <test expect_num_outputs="1"> <!-- test c-alpha mode with defaults --> <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" /> <conditional name="analysis"> <param name="mode" value="c-alpha" /> <conditional name="casecontrol"> <param name="source" value="ped" /> </conditional> </conditional> <output name="outfile" file="gemini_burden_calpha_template.tabular" compare="re_match" /> </test> <test expect_num_outputs="1"> <!-- test c-alpha mode with user-supplied case/control samples --> <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" /> <conditional name="analysis"> <param name="mode" value="c-alpha" /> <conditional name="casecontrol"> <param name="source" value="custom" /> <param name="controls" value="1_kid 3_kid" /> <param name="cases" value="1_dad 1_mom 3_dad 3_mom" /> </conditional> </conditional> <output name="outfile" file="gemini_burden_calpha_template.tabular" compare="re_match" /> </test> <test expect_num_outputs="2"> <!-- test additional T scores output generation --> <param name="infile" value="gemini_de_novo_input.db" ftype="gemini.sqlite" /> <conditional name="analysis"> <param name="mode" value="c-alpha" /> <conditional name="casecontrol"> <param name="source" value="custom" /> <param name="controls" value="1_kid 3_kid" /> <param name="cases" value="1_dad 1_mom 3_dad 3_mom" /> </conditional> <param name="permutations" value="2" /> <param name="save_tscores" value="True" /> </conditional> <output name="t_scores"> <assert_contents> <has_line_matching expression="WDR37	.+	.+" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ **What it does** Burden performs sample-wise gene-level burden calculations. The burden tool provides a set of utilities to perform burden summaries on a per-gene, per sample basis. By default, it outputs a table of gene-wise counts of all high impact variants in coding regions for each sample: GEMINI burden example:: gene M10475 M10478 M10500 M128215 WDR37 2 2 2 2 CTBP2 0 0 0 1 DHODH 1 0 0 0 **Setting examples** **--nonsynonymous** If you want to be a little bit less restrictive, you can include all non-synonymous variants instead. GEMINI output with setting --nonsynonymous:: gene M10475 M10478 M10500 M128215 SYCE1 0 1 1 0 WDR37 2 2 2 2 CTBP2 0 0 0 1 ASAH2C 2 1 1 0 DHODH 1 0 0 0 **--calpha** If your database has been loaded with a PED file describing case and control samples, you can calculate the c-alpha statistic for cases vs. control. GEMINI output with setting --calpha:: gene T c Z p_value SYCE1 -0.5 0.25 -1.0 0.841344746069 WDR37 -1.0 1.5 -0.816496580928 0.792891910879 CTBP2 0.0 0.0 nan nan ASAH2C -0.5 0.75 -0.57735026919 0.718148569175 DHODH 0.0 0.0 nan nan To calculate the **P-value** using a permutation test, use the --permutations option, specifying the number of permutations of the case/control labels you want to use. **--min-aaf and --max-aaf for --calpha** By default, all variants affecting a given gene will be included in the C-alpha computation. However, one may establish alternate allele frequency boundaries for the variants included using the --min-aaf and --max-aaf options. Used settings: - -calpha test.burden.db - -min-aaf 0.0 - -max-aaf 0.01 - -cases - -controls for --calpha If you do not have a PED file loaded, or your PED file does not follow the standard PED phenotype encoding format you can still perform the c-alpha test, but you have to specify which samples are the control samples and which are the case samples. Used settings: - -controls M10475 M10478 - -cases M10500 M128215 - -calpha Output:: gene T c Z p_value SYCE1 -0.5 0.25 -1.0 0.841344746069 WDR37 -1.0 1.5 -0.816496580928 0.792891910879 CTBP2 0.0 0.0 nan nan ASAH2C -0.5 0.75 -0.57735026919 0.718148569175 DHODH 0.0 0.0 nan nan **--nonsynonymous --calpha** If you would rather consider all nonsynonymous variants for the C-alpha test rather than just the medium and high impact variants, add the --nonsynonymous flag. ]]></help> <expand macro="citations"> <citation type="doi">10.1371/journal.pgen.1001322</citation><!-- c-alpha citation --> </expand> </tool>