comparison vcf_filter.xml @ 23:5db0545b9004 draft

update to v0.1.7.3
author wolma
date Thu, 21 Jul 2016 03:55:49 -0400
parents c46406466625
children
comparison
equal deleted inserted replaced
22:24154c580718 23:5db0545b9004
1 <tool id="vcf_filter" name="VCF Filter" version="0.1.7.2"> 1 <tool id="vcf_filter" name="VCF Filter" version="0.1.7.3">
2 <description>Extracts lines from a vcf variant file based on field-specific filters</description> 2 <description>Extracts lines from a vcf variant file based on field-specific filters</description>
3 <macros> 3 <macros>
4 <import>toolshed_macros.xml</import> 4 <import>toolshed_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements" />
7 <version_command>python3 -m MiModD version -q</version_command> 7 <version_command>python3 -m MiModD version -q</version_command>
8 <command> 8 <command>
9 python3 -m MiModD vcf-filter 9 python3 -m MiModD vcf-filter
10 "$inputfile" 10 "$inputfile"
11 -o "$outputfile" 11 -o "$outputfile"
50 #end if 50 #end if
51 $vartype 51 $vartype
52 </command> 52 </command>
53 53
54 <inputs> 54 <inputs>
55 <param name="inputfile" type="data" format="vcf" label="VCF input file" /> 55 <param format="vcf" label="VCF input file" name="inputfile" type="data" />
56 <repeat name="datasets" title="Sample-specific Filter" default="0" min="0"> 56 <repeat default="0" min="0" name="datasets" title="Sample-specific Filter">
57 <param name="sample" type="text" label="sample" help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." /> 57 <param help="name of a sample as it appears in the VCF input file and that indicates the sample that this filter should be applied to." label="sample" name="sample" type="text" />
58 <param name="GT" type="text" label="genotype pattern(s) for the inclusion of variants" help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." /> 58 <param help="keep only variants for which the genotype of the sample matches the specified pattern; format: x/x where x = 0 is wildtype and x = 1 is mutant. Multiple genotypes can be specified as a comma-separated list." label="genotype pattern(s) for the inclusion of variants" name="GT" type="text" />
59 <param name="DP" type="integer" label="depth of coverage for the sample at the variant site" value = "0" help="keep only variants with at least this sample-specific coverage at the variant site" /> 59 <param help="keep only variants with at least this sample-specific coverage at the variant site" label="depth of coverage for the sample at the variant site" name="DP" type="integer" value="0" />
60 <param name="GQ" type="integer" label="genotype quality for the variant in the sample" value = "0" help="keep only variants for which the genotype prediction for the sample has at least this quality" /> 60 <param help="keep only variants for which the genotype prediction for the sample has at least this quality" label="genotype quality for the variant in the sample" name="GQ" type="integer" value="0" />
61 <param name="AF" type="text" label="allelic fraction filter" help="expected format: [allele number]:[minimal fraction]:[maximal fraction]; keep only variants for which the fraction of sample-specific reads supporting a given allele number is between minimal and maximal fraction; if allele number is omitted, the filter operates on the most frequent non-reference allele instead" /> 61 <param help="expected format: [allele number]:[minimal fraction]:[maximal fraction]; keep only variants for which the fraction of sample-specific reads supporting a given allele number is between minimal and maximal fraction; if allele number is omitted, the filter operates on the most frequent non-reference allele instead" label="allelic fraction filter" name="AF" type="text" />
62 </repeat> 62 </repeat>
63 <repeat name="regions" title="Region Filter" default="0" min="0" help = "Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported."> 63 <repeat default="0" help="Filter variant sites by their position in the genome. If multiple Region Filters are specified, all variants that fall in ONE of the regions are reported." min="0" name="regions" title="Region Filter">
64 <param name="chrom" type="text" label="Chromosome" /> 64 <param label="Chromosome" name="chrom" type="text" />
65 <param name="start" type="text" label="Region Start" /> 65 <param label="Region Start" name="start" type="text" />
66 <param name="stop" type="text" label="Region End" /> 66 <param label="Region End" name="stop" type="text" />
67 </repeat> 67 </repeat>
68 <param name="vartype" type="select" label="Select the types of variants to include in the output"> 68 <param label="Select the types of variants to include in the output" name="vartype" type="select">
69 <option value="">all types of variants</option> 69 <option value="">all types of variants</option>
70 <option value="--no-indels">exclude indels</option> 70 <option value="--no-indels">exclude indels</option>
71 <option value="--indels-only">only indels</option> 71 <option value="--indels-only">only indels</option>
72 </param> 72 </param>
73 <param name="vfilter" type="text" label="sample" help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." /> 73 <param help="Filter output by sample name; only the sample-specific columns with their sample name matching any of the comma separated filters will be retained in the output." label="sample" name="vfilter" type="text" />
74 </inputs> 74 </inputs>
75 75
76 <outputs> 76 <outputs>
77 <data name="outputfile" format="vcf" /> 77 <data format="vcf" name="outputfile" />
78 </outputs> 78 </outputs>
79 79
80 <help> 80 <help>
81 .. class:: infomark 81 .. class:: infomark
82 82
103 103
104 **Examples of sample-specific filters:** 104 **Examples of sample-specific filters:**
105 105
106 *Simple genotype pattern* 106 *Simple genotype pattern*
107 107
108 genotype pattern: 1/1 ==> keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant 108 genotype pattern: 1/1 ==&gt; keep all variants in the vcf input file for which the specified sample's genotype is homozygous mutant
109 109
110 *Complex genotype pattern* 110 *Complex genotype pattern*
111 111
112 genotype pattern: 0/1, 0/0 ==> keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype 112 genotype pattern: 0/1, 0/0 ==&gt; keep all variants for which the sample's genotype is either heterozygous or homozygous wildtype
113 113
114 *Multiple sample-specific filters* 114 *Multiple sample-specific filters*
115 115
116 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1: 116 Filter 1: genotype pattern: 0/0, Filter 2: genotype pattern 1/1:
117 ==> keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant 117 ==&gt; keep all variants for which the first sample's gentoype is homozygous wildtype **and** the second sample's genotype is homozygous mutant
118 118
119 *Combining sample-specific filter criteria* 119 *Combining sample-specific filter criteria*
120 120
121 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9 121 genotype pattern: 1/1, depth of coverage: 3, genotype quality: 9
122 ==> keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9 122 ==&gt; keep variants for which the sample's genotype is homozygous mutant **and** for which this genotype assignment is corroborated by a genotype quality score of at least 9
123 **and** at least three reads from the sample cover the variant site 123 **and** at least three reads from the sample cover the variant site
124 124
125 **TIP:** 125 **TIP:**
126 126
127 As in the example above, genotype quality is typically most useful in combination with a genotype pattern. 127 As in the example above, genotype quality is typically most useful in combination with a genotype pattern.