Mercurial > repos > mini > strelka
comparison strelka2/libexec/consolidateResults.pl @ 0:7a9f20ca4ad5
Uploaded
author | mini |
---|---|
date | Thu, 25 Sep 2014 11:59:08 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7a9f20ca4ad5 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 =head1 LICENSE | |
4 | |
5 Strelka Workflow Software | |
6 Copyright (c) 2009-2013 Illumina, Inc. | |
7 | |
8 This software is provided under the terms and conditions of the | |
9 Illumina Open Source Software License 1. | |
10 | |
11 You should have received a copy of the Illumina Open Source | |
12 Software License 1 along with this program. If not, see | |
13 <https://github.com/downloads/sequencing/licenses/>. | |
14 | |
15 =head1 SYNOPSIS | |
16 | |
17 consolidateSomaticVariants.pl [options] | --help | |
18 | |
19 =head2 SUMMARY | |
20 | |
21 Aggregate final results from all chromosomes | |
22 | |
23 =cut | |
24 | |
25 use warnings FATAL => 'all'; | |
26 use strict; | |
27 | |
28 use Carp; | |
29 $SIG{__DIE__} = \&Carp::confess; | |
30 | |
31 use File::Spec; | |
32 use File::Temp; | |
33 use Getopt::Long; | |
34 use Pod::Usage; | |
35 | |
36 my $baseDir; | |
37 my $libDir; | |
38 #my $optDir; | |
39 #my $vcftDir; | |
40 BEGIN { | |
41 my $thisDir=(File::Spec->splitpath($0))[1]; | |
42 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); | |
43 $libDir=File::Spec->catdir($baseDir,'lib'); | |
44 #$optDir=File::Spec->catdir($baseDir,'opt'); | |
45 #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl'); | |
46 } | |
47 use lib $libDir; | |
48 use Utils; | |
49 #use lib $vcftDir; | |
50 use Vcf; | |
51 | |
52 if(getAbsPath($baseDir)) { | |
53 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); | |
54 } | |
55 #$optDir=File::Spec->catdir($baseDir,'opt'); | |
56 | |
57 | |
58 my $scriptName=(File::Spec->splitpath($0))[2]; | |
59 my $argCount=scalar(@ARGV); | |
60 my $cmdline=join(' ',$0,@ARGV); | |
61 | |
62 | |
63 my $configFile; | |
64 my $help; | |
65 | |
66 GetOptions( "config=s" => \$configFile, | |
67 "help|h" => \$help) or pod2usage(2); | |
68 | |
69 pod2usage(2) if($help); | |
70 pod2usage(2) unless(defined($configFile)); | |
71 | |
72 # | |
73 # check fixed paths | |
74 # | |
75 #my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools'); | |
76 #checkFile($samtoolsBin,"samtools binary"); | |
77 | |
78 | |
79 # | |
80 # read config and validate values | |
81 # | |
82 checkFile($configFile,"configuration ini"); | |
83 my $config = parseConfigIni($configFile); | |
84 | |
85 | |
86 for (qw(outDir chromOrder)) { | |
87 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); | |
88 } | |
89 for (qw(isWriteRealignedBam binSize)) { | |
90 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); | |
91 } | |
92 | |
93 my $userconfig = $config->{user}; | |
94 | |
95 my @chromOrder = split(/\t/,$config->{derived}{chromOrder}); | |
96 for my $chrom (@chromOrder) { | |
97 my $chromSizeKey = "chrom_" . $chrom . "_size"; | |
98 errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey)); | |
99 } | |
100 | |
101 my $outDir = $config->{derived}{outDir}; | |
102 checkDir($outDir,"output"); | |
103 | |
104 | |
105 my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam}; | |
106 | |
107 for my $chrom (@chromOrder) { | |
108 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); | |
109 checkDir($chromDir,"input chromosome"); | |
110 | |
111 next unless($isWriteRealignedBam); | |
112 my $chromSizeKey = "chrom_" . $chrom . "_size"; | |
113 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); | |
114 for my $binId (@$binList) { | |
115 my $dir = File::Spec->catdir($chromDir,'bins',$binId); | |
116 checkDir($dir,"input bin"); | |
117 } | |
118 } | |
119 | |
120 | |
121 | |
122 # suffix used for large result file intermediates: | |
123 my $itag = ".incomplete"; | |
124 | |
125 | |
126 # | |
127 # concatenate vcfs: | |
128 # | |
129 sub concatenateVcfs($) { | |
130 my $fileName = shift; | |
131 | |
132 my $is_first = 1; | |
133 | |
134 my $allFileName = "all." . $fileName; | |
135 my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag); | |
136 open(my $aFH,'>',"$allFile") | |
137 || errorX("Failed to open file: '$allFile'"); | |
138 | |
139 # loop over all chroms once to create the header, and one more time for all the data: | |
140 my $headervcf; | |
141 for my $chrom (@chromOrder) { | |
142 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); | |
143 my $iFile = File::Spec->catfile($chromDir,$fileName); | |
144 checkFile($iFile); | |
145 | |
146 my $depthKey="maxDepth_${chrom}"; | |
147 | |
148 if($is_first) { | |
149 open(my $iFH,'<',"$iFile") | |
150 || errorX("Failed to open file: '$iFile'"); | |
151 $headervcf = Vcf->new(fh=>$iFH); | |
152 $headervcf->parse_header(); | |
153 $headervcf->remove_header_line(key=>"cmdline"); | |
154 $headervcf->add_header_line({key=>"cmdline",value=>$cmdline}); | |
155 $headervcf->remove_header_line(key=>"$depthKey"); | |
156 close($iFH); | |
157 $is_first=0; | |
158 } | |
159 | |
160 { | |
161 open(my $iFH,'<',"$iFile") | |
162 || errorX("Failed to open file: '$iFile'"); | |
163 my $vcf = Vcf->new(fh=>$iFH); | |
164 $vcf->parse_header(); | |
165 for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) { | |
166 # $line seems to be returned as a length 1 array ref to a hash -- ??!?!??!! | |
167 $headervcf->add_header_line($line->[0]); | |
168 } | |
169 $vcf->close(); | |
170 close($iFH); | |
171 } | |
172 } | |
173 print $aFH $headervcf->format_header(); | |
174 $headervcf->close(); | |
175 | |
176 for my $chrom (@chromOrder) { | |
177 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); | |
178 my $iFile = File::Spec->catfile($chromDir,$fileName); | |
179 | |
180 open(my $iFH,'<',"$iFile") | |
181 || errorX("Failed to open file: '$iFile'"); | |
182 | |
183 my $vcf = Vcf->new(fh=>$iFH); | |
184 $vcf->parse_header(); | |
185 print $aFH $_ while(<$iFH>); | |
186 } | |
187 | |
188 close($aFH); | |
189 | |
190 # make a second set of files with only the passed variants: | |
191 my $passedFileName = "passed." . $fileName; | |
192 my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag); | |
193 open(my $pFH,'>',"$passedFile") | |
194 || errorX("Failed to open file: '$passedFile'"); | |
195 | |
196 open(my $arFH,'<',"$allFile") | |
197 || errorX("Failed to open file: '$allFile'"); | |
198 | |
199 while(<$arFH>) { | |
200 chomp; | |
201 unless(/^#/) { | |
202 my @F = split(/\t/); | |
203 next if((scalar(@F)>=7) && ($F[6] ne "PASS")); | |
204 } | |
205 print $pFH "$_\n"; | |
206 } | |
207 | |
208 close($arFH); | |
209 close($pFH); | |
210 | |
211 my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName); | |
212 checkMove($allFile,$allFileFinished); | |
213 | |
214 my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName); | |
215 checkMove($passedFile,$passedFileFinished); | |
216 } | |
217 | |
218 concatenateVcfs("somatic.snvs.vcf"); | |
219 concatenateVcfs("somatic.indels.vcf"); | |
220 | |
221 | |
222 my $bamSuffix = ".realigned.bam"; | |
223 | |
224 sub consolidateBam($) { | |
225 my $label = shift; | |
226 | |
227 my $fileName = $label . $bamSuffix; | |
228 | |
229 my $reDir = File::Spec->catdir($outDir,'realigned'); | |
230 checkMakeDir($reDir); | |
231 | |
232 my @bamList; | |
233 for my $chrom (@chromOrder) { | |
234 my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); | |
235 | |
236 my $chromSizeKey = "chrom_" . $chrom . "_size"; | |
237 my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); | |
238 for my $binId (@$binList) { | |
239 my $binDir = File::Spec->catdir($chromDir,'bins',$binId); | |
240 my $rbamFile = File::Spec->catfile($binDir,$fileName); | |
241 checkFile($rbamFile,"bin realigned bam file"); | |
242 | |
243 push @bamList,$rbamFile; | |
244 } | |
245 } | |
246 | |
247 return unless(scalar(@bamList)); | |
248 | |
249 my $headerFH = File::Temp->new(); | |
250 my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$bamList[0]." > $headerFH'"; | |
251 executeCmd($getHeaderCmd); | |
252 | |
253 my $allFile = File::Spec->catfile($reDir,$fileName . $itag); | |
254 my $cmd="$samtoolsBin merge -h $headerFH $allFile ". join(" ",@bamList); | |
255 executeCmd($cmd); | |
256 | |
257 my $allFileFinished = File::Spec->catfile($reDir,$fileName); | |
258 checkMove($allFile,$allFileFinished); | |
259 | |
260 my $indexCmd="$samtoolsBin index $allFileFinished"; | |
261 executeCmd($indexCmd); | |
262 | |
263 # for now don't remove all the bin realignments... | |
264 # unlink(@bamList); | |
265 } | |
266 | |
267 if($isWriteRealignedBam) { | |
268 consolidateBam("normal"); | |
269 consolidateBam("tumor"); | |
270 } | |
271 | |
272 | |
273 1; | |
274 | |
275 __END__ | |
276 |