comparison vcf_filter.xml @ 0:2cd8b25e3685 draft

Imported from capsule None
author wolma
date Sat, 13 Dec 2014 17:19:30 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2cd8b25e3685
1 <tool id="vcf_filter" name="VCF Filter">
2 <description>Extracts lines from a vcf variant file based on field-specific filters</description>
3 <requirements>
4 <requirement type="package" version="0.1.5">mimodd</requirement>
5 </requirements>
6 <version_command>mimodd version -q</version_command>
7 <command>
8 mimodd vcf-filter
9 $inputfile
10 -o $outputfile
11 #if len($datasets):
12 -s
13 #for $i in $datasets
14 "$i.sample"
15 #end for
16 --gt
17 #for $i in $datasets
18 ## remove whitespace from free-text input
19 #echo ("".join($i.GT.split()) or "ANY")
20 #echo " "
21 #end for
22 --dp
23 #for $i in $datasets
24 $i.DP
25 #end for
26 --gq
27 #for $i in $datasets
28 $i.GQ
29 #end for
30 #end if
31 #if len($regions):
32 -r
33 #for $i in $regions
34 #if $i.stop:
35 $i.chrom:$i.start-$i.stop
36 #else:
37 $i.chrom:$i.start
38 #end if
39 #end for
40 #end if
41 #if $vfilter:
42 --vfilter
43 ## remove ',' (and possibly adjacent whitespace) and replace with ' '
44 #echo (" ".join("".join($vfilter.split()).split(',')))
45 #end if
46 $vartype
47 </command>
48
49 <inputs>
50 <param name="inputfile" type="data" format="vcf" label="VCF input file" />
51 <repeat name="datasets" title="Sample-specific Filter" default="0" min="0">
52 <param name="sample" type="text" label="sample" help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." />
53 <param name="GT" type="text" label="genotype pattern(s) for the inclusion of variants" help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." />
54 <param name="DP" type="integer" label="depth of coverage for the sample at the variant site" value = "0" help="keep only variants with at least this sample-specific coverage at the variant site" />
55 <param name="GQ" type="integer" label="genotype quality for the variant in the sample" value = "0" help="keep only variants for which the genotype prediction for the sample has at least this quality" />
56 </repeat>
57 <repeat name="regions" title="Region Filter" default="0" min="0" help = "Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">
58 <param name="chrom" type="text" label="Chromosome" />
59 <param name="start" type="text" label="Region Start" />
60 <param name="stop" type="text" label="Region End" />
61 </repeat>
62 <param name="vartype" type="select" label="Select the types of variants to include in the output">
63 <option value="">all types of variants</option>
64 <option value="--no-indels">exclude indels</option>
65 <option value="--indels-only">only indels</option>
66 </param>
67 <param name="vfilter" type="text" label="sample" help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." />
68 </inputs>
69
70 <outputs>
71 <data name="outputfile" format="vcf" />
72 </outputs>
73
74 <help>
75 .. class:: infomark
76
77 **What it does**
78
79 The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants.
80
81 The following types of variant filters can be set up:
82
83 1) Sample-specific filters:
84
85 Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept.
86
87 2) Region filters:
88
89 Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept.
90
91 3) Variant type filter:
92
93 Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels
94
95 In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter.
96 The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field.
97
98 **Examples of sample-specific filters:**
99
100 *Simple genotype pattern*
101
102 genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant
103
104 *Complex genotype pattern*
105
106 genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype
107
108 *Multiple sample-specific filters*
109
110 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:
111 ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant
112
113 *Combining sample-specific filter criteria*
114
115 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9
116 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9
117 **and** at least three reads from the sample cover the variant site
118
119 **TIP:**
120
121 As in the example above, genotype quality is typically most useful in combination with a genotype pattern.
122 It acts then, effectively, to make the genotype filter more stringent.
123
124
125
126 </help>
127 </tool>