annotate vcf_filter.xml @ 2:72d20758ba2c

final upload
author wolma
date Wed, 11 Feb 2015 09:23:43 -0500
parents a548b3c6ed00
children ffee8534a5c4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
1 <tool id="vcf_filter" name="VCF Filter">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
2 <description>Extracts lines from a vcf variant file based on field-specific filters</description>
2
72d20758ba2c final upload
wolma
parents: 1
diff changeset
3 <macros>
72d20758ba2c final upload
wolma
parents: 1
diff changeset
4 <import>toolshed_macros.xml</import>
72d20758ba2c final upload
wolma
parents: 1
diff changeset
5 </macros>
72d20758ba2c final upload
wolma
parents: 1
diff changeset
6 <expand macro="requirements"/>
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
7 <version_command>mimodd version -q</version_command>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
8 <command>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
9 mimodd vcf-filter
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
10 "$inputfile"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
11 -o "$outputfile"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
12 #if len($datasets):
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
13 -s
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
14 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
15 "$i.sample"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
16 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
17 --gt
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
18 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
19 ## remove whitespace from free-text input
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
20 "#echo ("".join($i.GT.split()) or "ANY")#"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
21 #echo " "
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
22 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
23 --dp
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
24 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
25 "$i.DP"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
26 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
27 --gq
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
28 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
29 "$i.GQ"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
30 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
31 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
32 #if len($regions):
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
33 -r
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
34 #for $i in $regions
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
35 #if $i.stop:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
36 "$i.chrom:$i.start-$i.stop"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
37 #else:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
38 "$i.chrom:$i.start"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
39 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
40 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
41 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
42 #if $vfilter:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
43 --vfilter
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
44 ## remove ',' (and possibly adjacent whitespace) and replace with ' '
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
45 "#echo ('" "'.join($vfilter.split(',')))#"
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
46 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
47 $vartype
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
48 </command>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
49
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
50 <inputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
51 <param name="inputfile" type="data" format="vcf" label="VCF input file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
52 <repeat name="datasets" title="Sample-specific Filter" default="0" min="0">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
53 <param name="sample" type="text" label="sample" help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
54 <param name="GT" type="text" label="genotype pattern(s) for the inclusion of variants" help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
55 <param name="DP" type="integer" label="depth of coverage for the sample at the variant site" value = "0" help="keep only variants with at least this sample-specific coverage at the variant site" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
56 <param name="GQ" type="integer" label="genotype quality for the variant in the sample" value = "0" help="keep only variants for which the genotype prediction for the sample has at least this quality" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
57 </repeat>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
58 <repeat name="regions" title="Region Filter" default="0" min="0" help = "Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported.">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
59 <param name="chrom" type="text" label="Chromosome" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
60 <param name="start" type="text" label="Region Start" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
61 <param name="stop" type="text" label="Region End" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
62 </repeat>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
63 <param name="vartype" type="select" label="Select the types of variants to include in the output">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
64 <option value="">all types of variants</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
65 <option value="--no-indels">exclude indels</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
66 <option value="--indels-only">only indels</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
67 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
68 <param name="vfilter" type="text" label="sample" help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
69 </inputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
70
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
71 <outputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
72 <data name="outputfile" format="vcf" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
73 </outputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
74
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
75 <help>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
76 .. class:: infomark
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
77
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
78 **What it does**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
79
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
80 The tool filters a variant file in VCF format to generate a new VCF file with only a subset of the original variants.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
81
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
82 The following types of variant filters can be set up:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
83
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
84 1) Sample-specific filters:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
85
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
86 Filter variants based on their characteristics in the sequenced reads of a specific sample. Multiple sample-specific filters are combined by logical AND, i.e., only variants that pass ALL sample-specific filters are kept.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
87
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
88 2) Region filters:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
89
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
90 Filter variants based on the genomic region they affect. Multiple region filters are combined by logical OR, i.e., variants passing ANY region filter are kept.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
91
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
92 3) Variant type filter:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
93
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
94 Filter variants by their type, i.e. whether they are single nucleotide variations (SNVs) or indels
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
95
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
96 In addition, the *sample* filter can be used to reduce the samples encoded in a multi-sample VCF file to just those specified by the filter.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
97 The *sample* filter is included mainly for compatibility reasons: if an external tool cannot deal with the multisample file format, but instead looks only at the first sample-specific column of the file, you can use the filter to turn the multi-sample file into a single-sample file. Besides, the filter can also be used to change the order of the samples since it will sort the samples in the order specified in the filter field.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
98
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
99 **Examples of sample-specific filters:**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
100
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
101 *Simple genotype pattern*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
102
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
103 genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
104
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
105 *Complex genotype pattern*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
106
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
107 genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
108
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
109 *Multiple sample-specific filters*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
110
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
111 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
112 ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
113
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
114 *Combining sample-specific filter criteria*
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
115
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
116 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
117 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
118 **and** at least three reads from the sample cover the variant site
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
119
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
120 **TIP:**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
121
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
122 As in the example above, genotype quality is typically most useful in combination with a genotype pattern.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
123 It acts then, effectively, to make the genotype filter more stringent.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
124
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
125
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
126
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
127 </help>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
128 </tool>