diff gemini_stats.xml @ 0:ac761838cdaf draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
author iuc
date Thu, 18 Feb 2016 08:55:47 -0500
parents
children ee894347fcd6
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_stats.xml	Thu Feb 18 08:55:47 2016 -0500
@@ -0,0 +1,125 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Compute useful variant statistics</description>
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">stats</token>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            $stats_type
+
+            #if $gt_filter.strip():
+                --gt-filter "${gt_filter}"
+            #end if
+
+            #if $summarize.strip():
+                --gt-filter "${gt_filter}"
+            #end if
+
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <inputs>
+        <expand macro="infile" />
+
+        <param name="stats_type" type="select" label="Studying ..." help="">
+            <option value="--tstv">Compute the transition and transversion ratios for the snps (--tstv)</option>
+            <option value="--tstv-coding">Compute the transition/transversion ratios for the snps in the coding regions (--tstv-coding)</option>
+            <option value="--tstv-noncoding">Compute the transition/transversion ratios for the snps in the non-coding regions (--tstv-noncoding)</option>
+            <option value="--snp-counts">Compute the type and count of the snps (--snp-counts)</option>
+            <option value="--sfs">Calculate the site frequency spectrum of the variants (--sfs)</option>
+            <option value="--mds">Compute the pair-wise genetic distance between each sample (--mds)</option>
+            <option value="--vars-by-sample">Return the total variants per sample, sum of homozygous and heterozygous variants (--vars-by-sample)</option>
+            <option value="--gts-by-sample">Return the count of each genotype class observed per sample (--gts-by-sample)</option>
+        </param>
+
+        <param name="gt_filter" type="text" area="True" size="5x50" label="Restrictions to apply to genotype values" help="(--gt-filer)">
+            <expand macro="sanitize_query" />
+        </param>
+
+        <param name="summarize" type="text" area="True" size="5x50" label="The query to be issued to the database to summarize" help="(--summarize)">
+            <expand macro="sanitize_query" />
+        </param>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="gemini_burden_input.db" ftype="gemini.sqlite" />
+            <param name="stats_type" value="--vars-by-sample" />
+            <output name="outfile" file="gemini_stats_result.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+The stats tool computes some useful variant statistics for a GEMINI database.
+Like computing the transition and transversion ratios for the snps.
+
+**Settings and examples**
+
+--tstv-coding:
+ Compute the transition/transversion ratios for the snps in the coding regions.
+
+--tstv-noncoding:
+ Compute the transition/transversion ratios for the snps in the non-coding regions.
+
+EXAMPLE Compute the type and count of the snps; --snp-counts::
+
+ type    count
+ A->G    2
+ C->T    1
+ G->A    1
+
+EXAMPLE Calculate the site frequency spectrum of the variants; --sfs::
+
+ aaf     count
+ 0.125   2
+ 0.375   1
+
+EXAMPLE Compute the pair-wise genetic distance between each sample; --mds::
+
+ sample1  sample2  distance
+ M10500   M10500   0.0
+ M10475   M10478   1.25
+ M10500   M10475   2.0
+ M10500   M10478   0.5714
+
+EXAMPLE Return a count of the types of genotypes per sample; --gts-by-sample::
+
+ sample   num_hom_ref   num_het   num_hom_alt   num_unknown   total
+ M10475   4             1         3             1             9
+ M10478   2             2         4             1             9
+
+
+
+EXAMPLE Return the total variants per sample (sum of homozygous and heterozygous variants); --vars-by-sample::
+
+ sample  total
+ M10475  4
+ M10478  6
+
+**Final solution**
+
+--summarize:
+ If none of these tools are exactly what you want, you can summarize the variants per sample of an arbitrary query using the –summarize flag. 
+
+EXAMPLE If you wanted to know, for each sample, how many variants are on chromosome 1 that are also in dbSNP;--summarize "select * from variants where in_dbsnp=1 and chrom='chr1'":: 
+
+ sample   total  num_het  num_hom_alt
+ M10475   1      1        0
+ M128215  1      1        0
+ M10478   2      2        0
+ M10500   2      1        1
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>