6
|
1 #!/usr/bin/env perl
|
|
2
|
|
3 =head1 LICENSE
|
|
4
|
|
5 Strelka Workflow Software
|
|
6 Copyright (c) 2009-2013 Illumina, Inc.
|
|
7
|
|
8 This software is provided under the terms and conditions of the
|
|
9 Illumina Open Source Software License 1.
|
|
10
|
|
11 You should have received a copy of the Illumina Open Source
|
|
12 Software License 1 along with this program. If not, see
|
|
13 <https://github.com/downloads/sequencing/licenses/>.
|
|
14
|
|
15 =head1 SYNOPSIS
|
|
16
|
|
17 consolidateSomaticVariants.pl [options] | --help
|
|
18
|
|
19 =head2 SUMMARY
|
|
20
|
|
21 Aggregate final results from all chromosomes
|
|
22
|
|
23 =cut
|
|
24
|
|
25 use warnings FATAL => 'all';
|
|
26 use strict;
|
|
27
|
|
28 use Carp;
|
|
29 $SIG{__DIE__} = \&Carp::confess;
|
|
30
|
|
31 use File::Spec;
|
|
32 use File::Temp;
|
|
33 use Getopt::Long;
|
|
34 use Pod::Usage;
|
|
35
|
|
36 my $baseDir;
|
|
37 my $libDir;
|
|
38 BEGIN {
|
|
39 my $thisDir=(File::Spec->splitpath($0))[1];
|
|
40 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
|
|
41 $libDir=File::Spec->catdir($baseDir,'lib');
|
|
42 }
|
|
43 use lib $libDir;
|
|
44 use Utils;
|
|
45 use Vcf;
|
|
46
|
|
47 if(getAbsPath($baseDir)) {
|
|
48 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
|
|
49 }
|
|
50
|
|
51
|
|
52 my $scriptName=(File::Spec->splitpath($0))[2];
|
|
53 my $argCount=scalar(@ARGV);
|
|
54 my $cmdline=join(' ',$0,@ARGV);
|
|
55
|
|
56
|
|
57 my $configFile;
|
|
58 my $help;
|
|
59
|
|
60 GetOptions( "config=s" => \$configFile,
|
|
61 "help|h" => \$help) or pod2usage(2);
|
|
62
|
|
63 pod2usage(2) if($help);
|
|
64 pod2usage(2) unless(defined($configFile));
|
|
65
|
|
66 #
|
|
67 # read config and validate values
|
|
68 #
|
|
69 checkFile($configFile,"configuration ini");
|
|
70 my $config = parseConfigIni($configFile);
|
|
71
|
|
72
|
|
73 for (qw(outDir chromOrder)) {
|
|
74 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
|
|
75 }
|
|
76 for (qw(isWriteRealignedBam binSize)) {
|
|
77 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
|
|
78 }
|
|
79
|
|
80 my $userconfig = $config->{user};
|
|
81
|
|
82 my @chromOrder = split(/\t/,$config->{derived}{chromOrder});
|
|
83 for my $chrom (@chromOrder) {
|
|
84 my $chromSizeKey = "chrom_" . $chrom . "_size";
|
|
85 errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey));
|
|
86 }
|
|
87
|
|
88 my $outDir = $config->{derived}{outDir};
|
|
89 checkDir($outDir,"output");
|
|
90
|
|
91
|
|
92 my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam};
|
|
93
|
|
94 for my $chrom (@chromOrder) {
|
|
95 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
96 checkDir($chromDir,"input chromosome");
|
|
97
|
|
98 next unless($isWriteRealignedBam);
|
|
99 my $chromSizeKey = "chrom_" . $chrom . "_size";
|
|
100 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
|
|
101 for my $binId (@$binList) {
|
|
102 my $dir = File::Spec->catdir($chromDir,'bins',$binId);
|
|
103 checkDir($dir,"input bin");
|
|
104 }
|
|
105 }
|
|
106
|
|
107
|
|
108
|
|
109 # suffix used for large result file intermediates:
|
|
110 my $itag = ".incomplete";
|
|
111
|
|
112
|
|
113 #
|
|
114 # concatenate vcfs:
|
|
115 #
|
|
116 sub concatenateVcfs($) {
|
|
117 my $fileName = shift;
|
|
118
|
|
119 my $is_first = 1;
|
|
120
|
|
121 my $allFileName = "all." . $fileName;
|
|
122 my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag);
|
|
123 open(my $aFH,'>',"$allFile")
|
|
124 || errorX("Failed to open file: '$allFile'");
|
|
125
|
|
126 # loop over all chroms once to create the header, and one more time for all the data:
|
|
127 my $headervcf;
|
|
128 for my $chrom (@chromOrder) {
|
|
129 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
130 my $iFile = File::Spec->catfile($chromDir,$fileName);
|
|
131 checkFile($iFile);
|
|
132
|
|
133 my $depthKey="maxDepth_${chrom}";
|
|
134
|
|
135 if($is_first) {
|
|
136 open(my $iFH,'<',"$iFile")
|
|
137 || errorX("Failed to open file: '$iFile'");
|
|
138 $headervcf = Vcf->new(fh=>$iFH);
|
|
139 $headervcf->parse_header();
|
|
140 $headervcf->remove_header_line(key=>"cmdline");
|
|
141 $headervcf->add_header_line({key=>"cmdline",value=>$cmdline});
|
|
142 $headervcf->remove_header_line(key=>"$depthKey");
|
|
143 close($iFH);
|
|
144 $is_first=0;
|
|
145 }
|
|
146
|
|
147 {
|
|
148 open(my $iFH,'<',"$iFile")
|
|
149 || errorX("Failed to open file: '$iFile'");
|
|
150 my $vcf = Vcf->new(fh=>$iFH);
|
|
151 $vcf->parse_header();
|
|
152 for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) {
|
|
153 # $line seems to be returned as a length 1 array ref to a hash -- ??!?!??!!
|
|
154 $headervcf->add_header_line($line->[0]);
|
|
155 }
|
|
156 $vcf->close();
|
|
157 close($iFH);
|
|
158 }
|
|
159 }
|
|
160 print $aFH $headervcf->format_header();
|
|
161 $headervcf->close();
|
|
162
|
|
163 for my $chrom (@chromOrder) {
|
|
164 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
165 my $iFile = File::Spec->catfile($chromDir,$fileName);
|
|
166
|
|
167 open(my $iFH,'<',"$iFile")
|
|
168 || errorX("Failed to open file: '$iFile'");
|
|
169
|
|
170 my $vcf = Vcf->new(fh=>$iFH);
|
|
171 $vcf->parse_header();
|
|
172 print $aFH $_ while(<$iFH>);
|
|
173 }
|
|
174
|
|
175 close($aFH);
|
|
176
|
|
177 # make a second set of files with only the passed variants:
|
|
178 my $passedFileName = "passed." . $fileName;
|
|
179 my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag);
|
|
180 open(my $pFH,'>',"$passedFile")
|
|
181 || errorX("Failed to open file: '$passedFile'");
|
|
182
|
|
183 open(my $arFH,'<',"$allFile")
|
|
184 || errorX("Failed to open file: '$allFile'");
|
|
185
|
|
186 while(<$arFH>) {
|
|
187 chomp;
|
|
188 unless(/^#/) {
|
|
189 my @F = split(/\t/);
|
|
190 next if((scalar(@F)>=7) && ($F[6] ne "PASS"));
|
|
191 }
|
|
192 print $pFH "$_\n";
|
|
193 }
|
|
194
|
|
195 close($arFH);
|
|
196 close($pFH);
|
|
197
|
|
198 my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName);
|
|
199 checkMove($allFile,$allFileFinished);
|
|
200
|
|
201 my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName);
|
|
202 checkMove($passedFile,$passedFileFinished);
|
|
203 }
|
|
204
|
|
205 concatenateVcfs("somatic.snvs.vcf");
|
|
206 concatenateVcfs("somatic.indels.vcf");
|
|
207
|
|
208
|
|
209 my $bamSuffix = ".realigned.bam";
|
|
210
|
|
211 sub consolidateBam($) {
|
|
212 my $label = shift;
|
|
213
|
|
214 my $fileName = $label . $bamSuffix;
|
|
215
|
|
216 my $reDir = File::Spec->catdir($outDir,'realigned');
|
|
217 checkMakeDir($reDir);
|
|
218
|
|
219 my @bamList;
|
|
220 for my $chrom (@chromOrder) {
|
|
221 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
|
|
222
|
|
223 my $chromSizeKey = "chrom_" . $chrom . "_size";
|
|
224 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
|
|
225 for my $binId (@$binList) {
|
|
226 my $binDir = File::Spec->catdir($chromDir,'bins',$binId);
|
|
227 my $rbamFile = File::Spec->catfile($binDir,$fileName);
|
|
228 checkFile($rbamFile,"bin realigned bam file");
|
|
229
|
|
230 push @bamList,$rbamFile;
|
|
231 }
|
|
232 }
|
|
233
|
|
234 return unless(scalar(@bamList));
|
|
235
|
|
236 my $headerFH = File::Temp->new();
|
9
|
237 my $getHeaderCmd = "bash -c 'samtools view -H ".$bamList[0]." > $headerFH'";
|
6
|
238 executeCmd($getHeaderCmd);
|
|
239
|
|
240 my $allFile = File::Spec->catfile($reDir,$fileName . $itag);
|
9
|
241 my $cmd="samtools merge -h $headerFH $allFile ". join(" ",@bamList);
|
6
|
242 executeCmd($cmd);
|
|
243
|
|
244 my $allFileFinished = File::Spec->catfile($reDir,$fileName);
|
|
245 checkMove($allFile,$allFileFinished);
|
|
246
|
9
|
247 my $indexCmd="samtools index $allFileFinished";
|
6
|
248 executeCmd($indexCmd);
|
|
249
|
|
250 # for now don't remove all the bin realignments...
|
|
251 # unlink(@bamList);
|
|
252 }
|
|
253
|
|
254 if($isWriteRealignedBam) {
|
|
255 consolidateBam("normal");
|
|
256 consolidateBam("tumor");
|
|
257 }
|
|
258
|
|
259
|
|
260 1;
|
|
261
|
|
262 __END__
|
|
263
|