Mercurial > repos > iuc > gemini_stats
diff gemini_stats.xml @ 0:ac761838cdaf draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
author | iuc |
---|---|
date | Thu, 18 Feb 2016 08:55:47 -0500 |
parents | |
children | ee894347fcd6 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_stats.xml Thu Feb 18 08:55:47 2016 -0500 @@ -0,0 +1,125 @@ +<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> + <description>Compute useful variant statistics</description> + <macros> + <import>gemini_macros.xml</import> + <token name="@BINARY@">stats</token> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> +<![CDATA[ + gemini @BINARY@ + $stats_type + + #if $gt_filter.strip(): + --gt-filter "${gt_filter}" + #end if + + #if $summarize.strip(): + --gt-filter "${gt_filter}" + #end if + + "${ infile }" + > "${ outfile }" +]]> + </command> + <inputs> + <expand macro="infile" /> + + <param name="stats_type" type="select" label="Studying ..." help=""> + <option value="--tstv">Compute the transition and transversion ratios for the snps (--tstv)</option> + <option value="--tstv-coding">Compute the transition/transversion ratios for the snps in the coding regions (--tstv-coding)</option> + <option value="--tstv-noncoding">Compute the transition/transversion ratios for the snps in the non-coding regions (--tstv-noncoding)</option> + <option value="--snp-counts">Compute the type and count of the snps (--snp-counts)</option> + <option value="--sfs">Calculate the site frequency spectrum of the variants (--sfs)</option> + <option value="--mds">Compute the pair-wise genetic distance between each sample (--mds)</option> + <option value="--vars-by-sample">Return the total variants per sample, sum of homozygous and heterozygous variants (--vars-by-sample)</option> + <option value="--gts-by-sample">Return the count of each genotype class observed per sample (--gts-by-sample)</option> + </param> + + <param name="gt_filter" type="text" area="True" size="5x50" label="Restrictions to apply to genotype values" help="(--gt-filer)"> + <expand macro="sanitize_query" /> + </param> + + <param name="summarize" type="text" area="True" size="5x50" label="The query to be issued to the database to summarize" help="(--summarize)"> + <expand macro="sanitize_query" /> + </param> + + </inputs> + <outputs> + <data name="outfile" format="tabular" /> + </outputs> + <tests> + <test> + <param name="infile" value="gemini_burden_input.db" ftype="gemini.sqlite" /> + <param name="stats_type" value="--vars-by-sample" /> + <output name="outfile" file="gemini_stats_result.tabular" /> + </test> + </tests> + <help><![CDATA[ +**What it does** + +The stats tool computes some useful variant statistics for a GEMINI database. +Like computing the transition and transversion ratios for the snps. + +**Settings and examples** + +--tstv-coding: + Compute the transition/transversion ratios for the snps in the coding regions. + +--tstv-noncoding: + Compute the transition/transversion ratios for the snps in the non-coding regions. + +EXAMPLE Compute the type and count of the snps; --snp-counts:: + + type count + A->G 2 + C->T 1 + G->A 1 + +EXAMPLE Calculate the site frequency spectrum of the variants; --sfs:: + + aaf count + 0.125 2 + 0.375 1 + +EXAMPLE Compute the pair-wise genetic distance between each sample; --mds:: + + sample1 sample2 distance + M10500 M10500 0.0 + M10475 M10478 1.25 + M10500 M10475 2.0 + M10500 M10478 0.5714 + +EXAMPLE Return a count of the types of genotypes per sample; --gts-by-sample:: + + sample num_hom_ref num_het num_hom_alt num_unknown total + M10475 4 1 3 1 9 + M10478 2 2 4 1 9 + + + +EXAMPLE Return the total variants per sample (sum of homozygous and heterozygous variants); --vars-by-sample:: + + sample total + M10475 4 + M10478 6 + +**Final solution** + +--summarize: + If none of these tools are exactly what you want, you can summarize the variants per sample of an arbitrary query using the –summarize flag. + +EXAMPLE If you wanted to know, for each sample, how many variants are on chromosome 1 that are also in dbSNP;--summarize "select * from variants where in_dbsnp=1 and chrom='chr1'":: + + sample total num_het num_hom_alt + M10475 1 1 0 + M128215 1 1 0 + M10478 2 2 0 + M10500 2 1 1 + + + ]]></help> + <expand macro="citations"/> +</tool>