annotate libexec/filterSomaticVariants.pl @ 19:bddc6981a6a7

change version
author mini
date Wed, 15 Oct 2014 15:30:28 +0200
parents 0e8e6011082b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
1 #!/usr/bin/env perl
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
2
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
3 =head1 LICENSE
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
4
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
5 Strelka Workflow Software
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
6 Copyright (c) 2009-2013 Illumina, Inc.
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
7
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
8 This software is provided under the terms and conditions of the
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
9 Illumina Open Source Software License 1.
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
10
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
11 You should have received a copy of the Illumina Open Source
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
12 Software License 1 along with this program. If not, see
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
13 <https://github.com/downloads/sequencing/licenses/>.
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
14
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
15 =head1 SYNOPSIS
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
16
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
17 filterSomaticVariants.pl [options] | --help
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
18
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
19 =head2 SUMMARY
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
20
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
21 Aggregate and filter the variant caller results by chromosome
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
22
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
23 =cut
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
24
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
25 use warnings FATAL => 'all';
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
26 use strict;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
27
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
28 use Carp;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
29 $SIG{__DIE__} = \&Carp::confess;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
30
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
31 use File::Spec;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
32 use Getopt::Long;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
33 use Pod::Usage;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
34
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
35 my $baseDir;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
36 my $libDir;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
37 BEGIN {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
38 my $thisDir=(File::Spec->splitpath($0))[1];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
39 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
40 $libDir=File::Spec->catdir($baseDir,'lib');
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
41 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
42 use lib $libDir;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
43 use Utils;
9
0e8e6011082b Strelka Workflow 0.0.2
mini
parents: 6
diff changeset
44 use Vcf;
0e8e6011082b Strelka Workflow 0.0.2
mini
parents: 6
diff changeset
45 print "all imported in filter";
6
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
46
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
47
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
48 my $scriptName=(File::Spec->splitpath($0))[2];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
49 my $argCount=scalar(@ARGV);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
50 my $cmdline = join(' ',$0,@ARGV);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
51
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
52
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
53 my ($chrom, $configFile);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
54 my $help;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
55
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
56 GetOptions( "chrom=s" => \$chrom,
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
57 "config=s" => \$configFile,
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
58 "help|h" => \$help) or pod2usage(2);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
59
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
60 pod2usage(2) if($help);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
61 pod2usage(2) unless(defined($chrom));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
62 pod2usage(2) unless(defined($configFile));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
63
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
64
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
65
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
66 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
67 # read config and validate values
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
68 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
69 checkFile($configFile,"configuration ini");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
70 my $config = parseConfigIni($configFile);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
71
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
72 my $chromSizeKey = "chrom_" . $chrom . "_size";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
73 my $chromKnownSizeKey = "chrom_" . $chrom . "_knownSize";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
74
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
75 for (("outDir", $chromSizeKey, $chromKnownSizeKey)) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
76 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
77 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
78 for (qw(binSize ssnvQuality_LowerBound sindelQuality_LowerBound
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
79 isSkipDepthFilters depthFilterMultiple
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
80 snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
81 indelMaxRefRepeat indelMaxWindowFilteredBasecallFrac
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
82 indelMaxIntHpolLength)) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
83 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
84 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
85
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
86 my $outDir = $config->{derived}{outDir};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
87 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
88 checkDir($outDir,"output");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
89 checkDir($chromDir,"output chromosome");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
90
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
91 my $userconfig = $config->{user};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
92
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
93 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
94 for my $binId (@$binList) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
95 my $dir = File::Spec->catdir($chromDir,'bins',$binId);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
96 checkDir($dir,"input bin");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
97 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
98
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
99 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
100 # parameters from user config file:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
101 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
102
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
103 # minimum passed ssnv_nt Q-score:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
104 my $minQSSNT=$userconfig->{ssnvQuality_LowerBound};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
105 # minimum passed sindel_nt Q-score:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
106 my $minQSINT=$userconfig->{sindelQuality_LowerBound};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
107
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
108 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
109 # filtration parameters from user config file:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
110 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
111
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
112 # skip depth filters for targeted resequencing:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
113 my $isUseDepthFilter=(! $userconfig->{isSkipDepthFilters});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
114 # multiple of the normal mean coverage to filter snvs and indels
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
115 my $depthFilterMultiple=$userconfig->{depthFilterMultiple};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
116 # max filtered basecall fraction for any sample
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
117 my $snvMaxFilteredBasecallFrac=$userconfig->{snvMaxFilteredBasecallFrac};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
118 # max snv spanning-deletion fraction for any sample
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
119 my $snvMaxSpanningDeletionFrac=$userconfig->{snvMaxSpanningDeletionFrac};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
120 # max indel reference repeat length
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
121 my $indelMaxRefRepeat=$userconfig->{indelMaxRefRepeat};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
122 # max indel window filter fraction
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
123 my $indelMaxWindowFilteredBasecallFrac=$userconfig->{indelMaxWindowFilteredBasecallFrac};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
124 # max indel interupted hompolymer length:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
125 my $indelMaxIntHpolLength=$userconfig->{indelMaxIntHpolLength};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
126
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
127
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
128 # first we want the normal sample mean chromosome depth:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
129 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
130 my $filterCoverage;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
131 if($isUseDepthFilter) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
132 my $totalCoverage = 0;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
133 for my $binId (@$binList) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
134 my $sFile = File::Spec->catfile($chromDir,'bins',$binId,'strelka.stats');
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
135 checkFile($sFile,"strelka bin stats");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
136 open(my $sFH, '<', $sFile)
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
137 || errorX("Can't open file: '$sFile' $!");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
138
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
139 my $is_found=0;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
140 while(<$sFH>) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
141 next unless(/^NORMAL_NO_REF_N_COVERAGE\s/);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
142 my $is_bad = 0;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
143 if(not /sample_size:\s*(\d+)\s+/) { $is_bad=1; }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
144 my $ss=$1;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
145 # leave the regex for mean fairly loose to pick up scientific notation, etc..
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
146 if(not /mean:\s*(\d[^\s]*|nan)\s+/) { $is_bad=1; }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
147 my $mean=$1;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
148 errorX("Unexpected format in file: '$sFile'") if($is_bad);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
149
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
150 my $coverage = ( $ss==0 ? 0 : int($ss*$mean) );
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
151
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
152 $totalCoverage += $coverage;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
153 $is_found=1;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
154 last;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
155 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
156 close($sFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
157 errorX("Unexpected format in file: '$sFile'") unless($is_found);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
158 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
159
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
160 my $chromKnownSize = $config->{derived}{$chromKnownSizeKey};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
161 my $normalMeanCoverage = ($totalCoverage/$chromKnownSize);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
162 $filterCoverage = $normalMeanCoverage*$depthFilterMultiple;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
163 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
164
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
165
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
166 # add filter description to vcf header unless it already exists
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
167 # return 1 if filter id already exists, client can decide if this is an error
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
168 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
169 sub add_vcf_filter($$$) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
170 my ($vcf,$id,$desc) = @_;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
171 return 1 if(scalar(@{$vcf->get_header_line(key=>'FILTER', ID=>$id)}));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
172 $vcf->add_header_line({key=>'FILTER', ID=>$id,Description=>$desc});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
173 return 0;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
174 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
175
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
176
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
177 sub check_vcf_for_sample($$$) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
178 my ($vcf,$sample,$file) = @_;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
179 my $is_found=0;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
180 for ($vcf->get_samples()) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
181 $is_found=1 if($_ eq $sample);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
182 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
183 errorX("Failed to find sample '$sample' in vcf file '$file'") unless($is_found);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
184 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
185
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
186
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
187
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
188 my $depthFiltId="DP";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
189
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
190
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
191 # Runs all post-call vcf filters:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
192 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
193 sub filterSnvFileList(\@$$$) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
194 my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
195
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
196 my $baseFiltId="BCNoise";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
197 my $spanFiltId="SpanDel";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
198 my $qFiltId="QSS_ref";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
199
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
200 open(my $aFH,'>',$acceptFileName)
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
201 or errorX("Failed to open file: '$acceptFileName'");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
202
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
203 my $is_first=1;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
204 for my $ifile (@$ifiles) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
205 open(my $iFH,'<',"$ifile")
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
206 or errorX("Failed to open file: '$ifile'");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
207 my $vcf = Vcf->new(fh=>$iFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
208
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
209 # run some simple header validation on each vcf:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
210 $vcf->parse_header();
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
211 check_vcf_for_sample($vcf,'NORMAL',$ifile);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
212 check_vcf_for_sample($vcf,'TUMOR',$ifile);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
213
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
214 if($is_first) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
215 # TODO: update vcf meta-data for chromosome-level filtered files
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
216 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
217 $vcf->remove_header_line(key=>"cmdline");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
218 $vcf->add_header_line({key=>"cmdline",value=>$cmdline});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
219 if($isUseDepthFilter) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
220 $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
221 add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
222 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
223 add_vcf_filter($vcf,$baseFiltId,"Fraction of basecalls filtered at this site in either sample is at or above $snvMaxFilteredBasecallFrac");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
224 add_vcf_filter($vcf,$spanFiltId,"Fraction of reads crossing site with spanning deletions in either sample exceeeds $snvMaxSpanningDeletionFrac");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
225 add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or ssnv Q-score < $minQSSNT, ie calls with NT!=ref or QSS_NT < $minQSSNT");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
226 print $aFH $vcf->format_header();
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
227 $is_first=0;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
228 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
229
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
230 while(my $x=$vcf->next_data_hash()) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
231 my $norm=$x->{gtypes}->{NORMAL};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
232 my $tumr=$x->{gtypes}->{TUMOR};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
233
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
234 my %filters;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
235
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
236 # normal depth filter:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
237 my $normalDP=$norm->{DP};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
238 if($isUseDepthFilter) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
239 $filters{$depthFiltId} = ($normalDP > $depthFilterVal);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
240 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
241
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
242 # filtered basecall fraction:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
243 my $normal_filt=($normalDP>0 ? $norm->{FDP}/$normalDP : 0);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
244
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
245 my $tumorDP=$tumr->{DP};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
246 my $tumor_filt=($tumorDP>0 ? $tumr->{FDP}/$tumorDP : 0);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
247
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
248 $filters{$baseFiltId}=(($normal_filt >= $snvMaxFilteredBasecallFrac) or
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
249 ($tumor_filt >= $snvMaxFilteredBasecallFrac));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
250
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
251 # spanning deletion fraction:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
252 my $normalSDP=$norm->{SDP};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
253 my $normalSpanTot=($normalDP+$normalSDP);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
254 my $normalSpanDelFrac=($normalSpanTot>0 ? ($normalSDP/$normalSpanTot) : 0);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
255
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
256 my $tumorSDP=$tumr->{SDP};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
257 my $tumorSpanTot=($tumorDP+$tumorSDP);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
258 my $tumorSpanDelFrac=($tumorSpanTot>0 ? ($tumorSDP/$tumorSpanTot) : 0);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
259
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
260 $filters{$spanFiltId}=(($normalSpanDelFrac > $snvMaxSpanningDeletionFrac) or
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
261 ($tumorSpanDelFrac > $snvMaxSpanningDeletionFrac));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
262
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
263 # Q-val filter:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
264 $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
265 ($x->{INFO}->{QSS_NT} < $minQSSNT));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
266
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
267 $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
268
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
269 print $aFH $vcf->format_line($x);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
270 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
271
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
272 $vcf->close();
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
273 close($iFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
274 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
275
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
276 close($aFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
277 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
278
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
279
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
280
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
281 sub updateA2(\@$) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
282 my ($a2,$FH) = @_;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
283 my $line=<$FH>;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
284 unless(defined($line)) { errorX("Unexpected end of somatic indel window file"); }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
285 chomp $line;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
286 @$a2 = split("\t",$line);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
287 unless(scalar(@$a2)) { errorX("Unexpected format in somatic indel window file"); }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
288 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
289
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
290
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
291
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
292 sub filterIndelFileList(\@$$$) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
293 my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
294
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
295 my $repeatFiltId="Repeat";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
296 my $iHpolFiltId="iHpol";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
297 my $baseFiltId="BCNoise";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
298 my $qFiltId="QSI_ref";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
299
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
300 open(my $aFH,'>',$acceptFileName)
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
301 or errorX("Failed to open file: '$acceptFileName'");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
302
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
303 my $is_first=1;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
304 for my $ifile (@$ifiles) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
305 open(my $iFH,'<',"$ifile")
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
306 or errorX("Failed to open somatic indel file: '$ifile'");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
307
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
308 my $iwfile = $ifile . ".window";
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
309 open(my $iwFH,'<',"$iwfile")
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
310 or errorX("Failed to open somatic indel window file: '$iwfile'");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
311
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
312 my @a2; # hold window file data for one line in case we overstep...
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
313
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
314 my $vcf = Vcf->new(fh=>$iFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
315
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
316 # run some simple header validation on each vcf:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
317 $vcf->parse_header();
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
318 check_vcf_for_sample($vcf,'NORMAL',$ifile);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
319 check_vcf_for_sample($vcf,'TUMOR',$ifile);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
320
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
321 if($is_first) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
322 # TODO: update all vcf meta-data for chromosome-level filtered files
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
323 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
324 $vcf->remove_header_line(key=>"cmdline");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
325 $vcf->add_header_line({key=>"cmdline",value=>$cmdline});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
326 if($isUseDepthFilter) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
327 $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
328 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
329 $vcf->add_header_line({key=>'FORMAT', ID=>'DP50',Number=>1,Type=>'Float',Description=>'Average tier1 read depth within 50 bases'});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
330 $vcf->add_header_line({key=>'FORMAT', ID=>'FDP50',Number=>1,Type=>'Float',Description=>'Average tier1 number of basecalls filtered from original read depth within 50 bases'});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
331 $vcf->add_header_line({key=>'FORMAT', ID=>'SUBDP50',Number=>1,Type=>'Float',Description=>'Average number of reads below tier1 mapping quality threshold aligned across sites within 50 bases'});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
332 if($isUseDepthFilter) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
333 add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
334 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
335 add_vcf_filter($vcf,$repeatFiltId,"Sequence repeat of more than ${indelMaxRefRepeat}x in the reference sequence");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
336 add_vcf_filter($vcf,$iHpolFiltId,"Indel overlaps an interupted homopolymer longer than ${indelMaxIntHpolLength}x in the reference sequence");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
337 add_vcf_filter($vcf,$baseFiltId,"Average fraction of filtered basecalls within 50 bases of the indel exceeds $indelMaxWindowFilteredBasecallFrac");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
338 add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or sindel Q-score < $minQSINT, ie calls with NT!=ref or QSI_NT < $minQSINT");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
339 print $aFH $vcf->format_header();
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
340 $is_first=0;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
341 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
342
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
343 while(my $x=$vcf->next_data_hash()) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
344 $vcf->add_format_field($x,'DP50');
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
345 $vcf->add_format_field($x,'FDP50');
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
346 $vcf->add_format_field($x,'SUBDP50');
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
347
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
348 my $norm=$x->{gtypes}->{NORMAL};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
349 my $tumr=$x->{gtypes}->{TUMOR};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
350
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
351 my $chrom=$x->{CHROM};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
352 my $pos=int($x->{POS});
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
353
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
354 # get matching line from window file:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
355 while((scalar(@a2)<2) or
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
356 (($a2[0] le $chrom) and (int($a2[1]) < $pos))) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
357 updateA2(@a2,$iwFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
358 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
359 unless(scalar(@a2) and ($a2[0] eq $chrom) and (int($a2[1]) == $pos))
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
360 { errorX("Can't find somatic indel window position.\nIndel line: " . $vcf->format_line($x) ); }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
361
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
362 # add window data to vcf record:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
363 #
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
364 $norm->{DP50} = $a2[2]+$a2[3];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
365 $norm->{FDP50} = $a2[3];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
366 $norm->{SUBDP50} = $a2[4];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
367 $tumr->{DP50} = $a2[5]+$a2[6];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
368 $tumr->{FDP50} = $a2[6];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
369 $tumr->{SUBDP50} = $a2[7];
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
370
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
371 my %filters;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
372
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
373 # normal depth filter:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
374 my $normalDP=$norm->{DP};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
375 if($isUseDepthFilter) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
376 $filters{$depthFiltId}=($normalDP > $depthFilterVal);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
377 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
378
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
379 # ref repeat
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
380 my $refRep=$x->{INFO}->{RC};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
381 $filters{$repeatFiltId}=(defined($refRep) and
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
382 ($refRep > $indelMaxRefRepeat));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
383
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
384 # ihpol
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
385 my $iHpol=$x->{INFO}->{IHP};
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
386 $filters{$iHpolFiltId}=(defined($iHpol) and
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
387 ($iHpol > $indelMaxIntHpolLength));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
388
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
389 # base filt:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
390 my $normWinFrac=( $norm->{DP50} ? $norm->{FDP50}/$norm->{DP50} : 0 );
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
391 my $tumrWinFrac=( $tumr->{DP50} ? $tumr->{FDP50}/$tumr->{DP50} : 0 );
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
392 $filters{$baseFiltId}=( ($normWinFrac >= $indelMaxWindowFilteredBasecallFrac) or
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
393 ($tumrWinFrac >= $indelMaxWindowFilteredBasecallFrac) );
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
394
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
395 # Q-val filter:
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
396 $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
397 ($x->{INFO}->{QSI_NT} < $minQSINT));
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
398
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
399 $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
400
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
401 print $aFH $vcf->format_line($x);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
402 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
403
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
404 $vcf->close();
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
405 close($iFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
406 close($iwFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
407 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
408
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
409 close($aFH);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
410 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
411
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
412
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
413
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
414 my @ssnvFiles;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
415 my @sindelFiles;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
416 for my $binId (@$binList) {
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
417 my $ssnvFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.snvs.unfiltered.vcf');
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
418 my $sindelFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.indels.unfiltered.vcf');
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
419 checkFile($ssnvFile,"bin snv file");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
420 checkFile($sindelFile,"bin indel file");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
421 push @ssnvFiles,$ssnvFile;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
422 push @sindelFiles,$sindelFile;
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
423 }
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
424
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
425 my $ssnvOutFile = File::Spec->catfile($chromDir,"somatic.snvs.vcf");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
426 filterSnvFileList(@ssnvFiles,$filterCoverage,$ssnvOutFile,$isUseDepthFilter);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
427
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
428 my $sindelOutFile = File::Spec->catfile($chromDir,"somatic.indels.vcf");
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
429 filterIndelFileList(@sindelFiles,$filterCoverage,$sindelOutFile,$isUseDepthFilter);
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
430
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
431
87568e5a7d4f Testing strelka version 0.0.1
mini
parents:
diff changeset
432 1;