annotate vcf_filter.xml @ 3:cc1b68d2795f draft default tip

upgrade to v0.1.5.1
author wolma
date Fri, 16 Jan 2015 11:20:32 -0500
parents 2cd8b25e3685
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
1 <tool id="vcf_filter" name="VCF Filter">
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
2 <description>Extracts lines from a vcf variant file based on field-specific filters</description>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
3 <requirements>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
4 <requirement type="package" version="0.1.5">mimodd</requirement>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
5 </requirements>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
6 <version_command>mimodd version -q</version_command>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
7 <command>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
8 mimodd vcf-filter
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
9 $inputfile
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
10 -o $outputfile
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
11 #if len($datasets):
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
12 -s
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
13 #for $i in $datasets
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
14 "$i.sample"
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
15 #end for
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
16 --gt
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
17 #for $i in $datasets
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
18 ## remove whitespace from free-text input
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
19 #echo ("".join($i.GT.split()) or "ANY")
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
20 #echo " "
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
21 #end for
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
22 --dp
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
23 #for $i in $datasets
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
24 $i.DP
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
25 #end for
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
26 --gq
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
27 #for $i in $datasets
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
28 $i.GQ
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
29 #end for
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
30 #end if
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
31 #if len($regions):
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
32 -r
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
33 #for $i in $regions
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
34 #if $i.stop:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
35 $i.chrom:$i.start-$i.stop
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
36 #else:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
37 $i.chrom:$i.start
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
38 #end if
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
39 #end for
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
40 #end if
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
41 #if $vfilter:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
42 --vfilter
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
43 ## remove ',' (and possibly adjacent whitespace) and replace with ' '
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
44 #echo (" ".join("".join($vfilter.split()).split(',')))
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
45 #end if
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
46 $vartype
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
47 </command>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
48
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
49 <inputs>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
50 <param name="inputfile" type="data" format="vcf" label="VCF input file" />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
51 <repeat name="datasets" title="Sample-specific Filter" default="0" min="0">
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
52 <param name="sample" type="text" label="sample" help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
53 <param name="GT" type="text" label="genotype pattern(s) for the inclusion of variants" help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
54 <param name="DP" type="integer" label="depth of coverage for the sample at the variant site" value = "0" help="keep only variants with at least this sample-specific coverage at the variant site" />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
55 <param name="GQ" type="integer" label="genotype quality for the variant in the sample" value = "0" help="keep only variants for which the genotype prediction for the sample has at least this quality" />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
56 </repeat>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
57 <repeat name="regions" title="Region Filter" default="0" min="0" help = "Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
58 <param name="chrom" type="text" label="Chromosome" />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
59 <param name="start" type="text" label="Region Start" />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
60 <param name="stop" type="text" label="Region End" />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
61 </repeat>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
62 <param name="vartype" type="select" label="Select the types of variants to include in the output">
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
63 <option value="">all types of variants</option>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
64 <option value="--no-indels">exclude indels</option>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
65 <option value="--indels-only">only indels</option>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
66 </param>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
67 <param name="vfilter" type="text" label="sample" help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
68 </inputs>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
69
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
70 <outputs>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
71 <data name="outputfile" format="vcf" />
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
72 </outputs>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
73
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
74 <help>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
75 .. class:: infomark
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
76
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
77 **What it does**
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
78
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
79 The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants.
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
80
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
81 The following types of variant filters can be set up:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
82
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
83 1) Sample-specific filters:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
84
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
85 Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept.
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
86
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
87 2) Region filters:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
88
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
89 Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept.
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
90
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
91 3) Variant type filter:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
92
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
93 Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
94
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
95 In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter.
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
96 The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field.
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
97
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
98 **Examples of sample-specific filters:**
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
99
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
100 *Simple genotype pattern*
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
101
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
102 genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
103
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
104 *Complex genotype pattern*
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
105
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
106 genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
107
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
108 *Multiple sample-specific filters*
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
109
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
110 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
111 ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
112
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
113 *Combining sample-specific filter criteria*
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
114
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
115 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
116 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
117 **and** at least three reads from the sample cover the variant site
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
118
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
119 **TIP:**
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
120
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
121 As in the example above, genotype quality is typically most useful in combination with a genotype pattern.
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
122 It acts then, effectively, to make the genotype filter more stringent.
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
123
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
124
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
125
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
126 </help>
2cd8b25e3685 Imported from capsule None
wolma
parents:
diff changeset
127 </tool>