Mercurial > repos > iuc > gemini_roh
diff gemini_roh.xml @ 0:91b4db3e6df4 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
author | iuc |
---|---|
date | Thu, 18 Feb 2016 08:57:56 -0500 |
parents | |
children | ce54eb6fd5f9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_roh.xml Thu Feb 18 08:57:56 2016 -0500 @@ -0,0 +1,110 @@ +<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> + <description>Identifying runs of homozygosity</description> + <macros> + <import>gemini_macros.xml</import> + <token name="@BINARY@">roh</token> + </macros> + <expand macro="requirements" /> + <expand macro="stdio" /> + <expand macro="version_command" /> + <command> +<![CDATA[ + gemini @BINARY@ + --min-snps $min_snps + --min-total-depth $min_total_depth + --min-gt-depth $min_gt_depth + --min-size $min_size + --max-hets $max_hets + --max-unknowns $max_unknowns + #if $samples.strip(): + -s "${samples}" + #end if + "${ infile }" + > "${ outfile }" +]]> + </command> + <inputs> + <expand macro="infile" /> + + <param name="min_snps" type="integer" value="25" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)"> + <validator type="in_range" min="0"/> + </param> + <param name="min_total_depth" type="integer" value="20" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)"> + <validator type="in_range" min="0"/> + </param> + <param name="min_gt_depth" type="integer" value="0" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered" + help="default: 0 (--min-gt-depth)"> + <validator type="in_range" min="0"/> + </param> + <param name="min_size" type="integer" value="100000" label="Minimum run size in base pairs" help="default: 100000 (--min-size)"> + <validator type="in_range" min="1"/> + </param> + <param name="max_hets" type="integer" value="1" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)"> + <validator type="in_range" min="1"/> + </param> + <param name="max_unknowns" type="integer" value="3" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)"> + <validator type="in_range" min="0"/> + </param> + + <param name="samples" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/> + + </inputs> + + <outputs> + <data name="outfile" format="tabular" /> + </outputs> + <tests> + <test> + <param name="infile" value="gemini_burden_input.db" ftype="gemini.sqlite" /> + <param name="min_snps" value="3" /> + <param name="min_size" value="10" /> + <param name="min_total_depth" value="0" /> + <output name="outfile" file="gemini_roh_result.tabular" /> + </test> + </tests> + <help><![CDATA[ + +**What it does** + +=========================================================================== +``ROH``: Identifying runs of homozygosity +=========================================================================== +Runs of homozygosity are long stretches of homozygous genotypes that reflect +segments shared identically by descent and are a result of consanguinity or +natural selection. Consanguinity elevates the occurrence of rare recessive +diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious +mutations. Hence, the identification of these runs holds medical value. + +The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data. +The tool basically looks at every homozygous position on the chromosome as a possible +start site for the run and looks for those that could give rise to a potentially long +stretch of homozygous genotypes. + +For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u) +the possible roh runs (H) would be: + + +:: + + genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H + roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H + roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H + roh_run3 = H H H H H u H H H H H H H h H H H H H + roh_run4 = H H H H H H H h H H H H H + +roh returned for --min-snps = 20 would be: + +:: + + roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H + roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H + + +As you can see, the immediate homozygous position right of a break (h or u) would be the possible +start of a new roh run and genotypes to the left of a break are pruned since they cannot +be part of a longer run than we have seen before. + + + ]]></help> + <expand macro="citations"/> +</tool>