diff gemini_roh.xml @ 0:720cbfb4190d draft

Imported from capsule None
author iuc
date Mon, 25 Aug 2014 17:15:54 -0400
parents
children 93bb0cfacefb
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_roh.xml	Mon Aug 25 17:15:54 2014 -0400
@@ -0,0 +1,106 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Identifying runs of homozygosity</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">roh</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            --min-snps $min_snps
+            --min-total-depth $min_total_depth
+            --min-gt-depth $min_gt_depth
+            --min-size $min_size
+            --max-hets $max_hets
+            --max-unknowns $max_unknowns
+            #if $samples.strip() != '':
+                -s "${samples}"
+            #end if
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <param name="min_snps" type="integer" value="25" size="5" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_total_depth" type="integer" value="20" size="10" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_gt_depth" type="integer" value="0" size="10" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered" 
+            help="default: 0 (--min-gt-depth)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_size" type="integer" value="100000" size="10" label="Minimum run size in base pairs" help="default: 100000 (--min-size)">
+            <validator type="in_range" min="1"/>
+        </param>
+        <param name="max_hets" type="integer" value="1" size="5" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)">
+            <validator type="in_range" min="1"/>
+        </param>
+        <param name="max_unknowns" type="integer" value="3" size="5" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)">
+            <validator type="in_range" min="0"/>
+        </param>
+
+        <param name="samples" size="30" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/>
+
+    </inputs>
+
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+===========================================================================
+``ROH``: Identifying runs of homozygosity
+===========================================================================
+Runs of homozygosity are long stretches of homozygous genotypes that reflect
+segments shared identically by descent and are a result of consanguinity or
+natural selection. Consanguinity elevates the occurrence of rare recessive 
+diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious 
+mutations. Hence, the identification of these runs holds medical value. 
+
+The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data. 
+The tool basically looks at every homozygous position on the chromosome as a possible
+start site for the run and looks for those that could give rise to a potentially long 
+stretch of homozygous genotypes. 
+
+For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u) 
+the possible roh runs (H) would be:
+
+
+::
+
+	genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H
+	roh_run1     = H H H H h H H H H u H H H H H u H H H H H H H
+	roh_run2     =           H H H H u H H H H H u H H H H H H H h H H H H H
+	roh_run3     =                     H H H H H u H H H H H H H h H H H H H
+	roh_run4     =                                 H H H H H H H h H H H H H
+
+roh returned for --min-snps = 20 would be:
+
+::
+	
+	roh_run1     = H H H H h H H H H u H H H H H u H H H H H H H
+	roh_run2     =           H H H H u H H H H H u H H H H H H H h H H H H H
+
+
+As you can see, the immediate homozygous position right of a break (h or u) would be the possible 
+start of a new roh run and genotypes to the left of a break are pruned since they cannot 
+be part of a longer run than we have seen before.
+
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>