# HG changeset patch # User wolma # Date 1418509170 18000 # Node ID 2cd8b25e36852061f8e195cfcec94bf4ff3df4f3 Imported from capsule None diff -r 000000000000 -r 2cd8b25e3685 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sat Dec 13 17:19:30 2014 -0500 @@ -0,0 +1,6 @@ + + + + + + diff -r 000000000000 -r 2cd8b25e3685 vcf_filter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcf_filter.xml Sat Dec 13 17:19:30 2014 -0500 @@ -0,0 +1,127 @@ + + Extracts lines from a vcf variant file based on field-specific filters + + mimodd + + mimodd version -q + + mimodd vcf-filter + $inputfile + -o $outputfile + #if len($datasets): + -s + #for $i in $datasets + "$i.sample" + #end for + --gt + #for $i in $datasets + ## remove whitespace from free-text input + #echo ("".join($i.GT.split()) or "ANY") + #echo " " + #end for + --dp + #for $i in $datasets + $i.DP + #end for + --gq + #for $i in $datasets + $i.GQ + #end for + #end if + #if len($regions): + -r + #for $i in $regions + #if $i.stop: + $i.chrom:$i.start-$i.stop + #else: + $i.chrom:$i.start + #end if + #end for + #end if + #if $vfilter: + --vfilter + ## remove ',' (and possibly adjacent whitespace) and replace with ' ' + #echo (" ".join("".join($vfilter.split()).split(','))) + #end if + $vartype + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + + **What it does** + +The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants. + +The following types of variant filters can be set up: + +1) Sample-specific filters: + + Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept. + +2) Region filters: + + Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept. + +3) Variant type filter: + + Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels + +In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter. +The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field. + +**Examples of sample-specific filters:** + +*Simple genotype pattern* + +genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant + +*Complex genotype pattern* + +genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype + +*Multiple sample-specific filters* + +Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1: +==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant + +*Combining sample-specific filter criteria* + +genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9 +==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9 +**and** at least three reads from the sample cover the variant site + +**TIP:** + +As in the example above, genotype quality is typically most useful in combination with a genotype pattern. +It acts then, effectively, to make the genotype filter more stringent. + + + + +