6
|
1 #!/usr/bin/env perl
|
9
|
2
|
6
|
3 =head1 LICENSE
|
|
4
|
|
5 Strelka Workflow Software
|
|
6 Copyright (c) 2009-2013 Illumina, Inc.
|
|
7
|
|
8 This software is provided under the terms and conditions of the
|
|
9 Illumina Open Source Software License 1.
|
|
10
|
|
11 You should have received a copy of the Illumina Open Source
|
|
12 Software License 1 along with this program. If not, see
|
|
13 <https://github.com/downloads/sequencing/licenses/>.
|
|
14
|
|
15 =head1 SYNOPSIS
|
|
16
|
|
17 callSomaticVariants.pl [options] | --help
|
|
18
|
|
19 =head2 SUMMARY
|
|
20
|
|
21 Run the somatic variant caller for snvs and indels on a single
|
|
22 chromosome bin.
|
|
23
|
|
24 =cut
|
|
25
|
|
26 use warnings FATAL => 'all';
|
|
27 use strict;
|
|
28
|
|
29 use Carp;
|
|
30 $SIG{__DIE__} = \&Carp::confess;
|
|
31
|
|
32 use File::Spec;
|
|
33 use Getopt::Long;
|
|
34 use Pod::Usage;
|
|
35
|
|
36 my $baseDir;
|
|
37 my $libDir;
|
|
38 BEGIN {
|
|
39
|
|
40 my $thisDir=(File::Spec->splitpath($0))[1];
|
|
41 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
|
|
42 $libDir=File::Spec->catdir($baseDir,'lib');
|
|
43 }
|
|
44 use lib $libDir;
|
|
45 use Utils;
|
9
|
46 print "all imported call";
|
6
|
47
|
|
48 if(getAbsPath($baseDir)) {
|
|
49 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
|
|
50 }
|
|
51 my $libexecDir=File::Spec->catdir($baseDir,'libexec');
|
|
52
|
|
53
|
|
54 my $scriptName=(File::Spec->splitpath($0))[2];
|
|
55 my $argCount=scalar(@ARGV);
|
|
56 my $cmdline = join(' ',$0,@ARGV);
|
|
57
|
|
58
|
|
59 my ($chrom, $binId, $configFile);
|
|
60 my $help;
|
|
61
|
|
62 GetOptions(
|
|
63 "chrom=s" => \$chrom,
|
|
64 "bin=s" => \$binId,
|
|
65 "config=s" => \$configFile,
|
|
66 "help|h" => \$help) or pod2usage(2);
|
|
67
|
|
68 pod2usage(2) if($help);
|
|
69 pod2usage(2) unless(defined($chrom));
|
|
70 pod2usage(2) unless(defined($binId));
|
|
71 pod2usage(2) unless(defined($configFile));
|
|
72
|
|
73
|
|
74
|
|
75 #
|
|
76 # check all fixed paths (not based on commandline arguments):
|
|
77 #
|
|
78 checkDir($baseDir);
|
|
79 checkDir($libexecDir);
|
|
80
|
|
81 my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2');
|
|
82 checkFile($strelkaBin,"strelka binary");
|
|
83
|
|
84
|
|
85
|
|
86 #
|
|
87 # read config and validate values
|
|
88 #
|
|
89 checkFile($configFile,"configuration ini");
|
|
90 my $config = parseConfigIni($configFile);
|
|
91
|
|
92 for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) {
|
|
93 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
|
|
94 }
|
|
95
|
|
96 # we skip maxInputDepth,minTier2Mapq here to allow older config files
|
|
97 for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior
|
|
98 ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) {
|
|
99 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
|
|
100 }
|
|
101
|
|
102 my $outDir = $config->{derived}{outDir};
|
|
103 my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId);
|
|
104 checkDir($outDir,"output");
|
|
105 checkDir($binDir,"output bin");
|
|
106
|
|
107
|
|
108 my $tumorBam = $config->{derived}{tumorBam};
|
|
109 my $normalBam = $config->{derived}{normalBam};
|
|
110 my $refFile = $config->{derived}{refFile};
|
|
111 checkFile($tumorBam,"tumor BAM");
|
|
112 checkFile($normalBam,"normal BAM");
|
|
113 checkFile($refFile,"reference");
|
|
114
|
|
115
|
|
116 # pull out some config options for convenience:
|
|
117 my $binSize=$config->{user}{binSize};
|
|
118 my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam};
|
|
119 my $knownGenomeSize = $config->{derived}{knownGenomeSize};
|
|
120
|
|
121
|
|
122 my $begin = (int($binId)*$binSize)+1;
|
|
123 my $end = ((int($binId)+1)*$binSize);
|
|
124 #my $end = $begin+100000; #debug mode
|
|
125
|
|
126 my $useroptions = $config->{user};
|
|
127
|
|
128 # set previous default value if an older config file is being used:
|
|
129 if(! defined($useroptions->{minTier2Mapq})) {
|
|
130 $useroptions->{minTier2Mapq} = 5;
|
|
131 }
|
|
132
|
|
133
|
|
134 #
|
|
135 # setup the strelka command-line:
|
|
136 #
|
|
137 my $strelka_base_opts= "-clobber" .
|
|
138 " -filter-unanchored" .
|
|
139 " -min-paired-align-score " . $useroptions->{minTier1Mapq} .
|
|
140 " -min-single-align-score 10" .
|
|
141 " -min-qscore 0" .
|
|
142 " -report-range-begin $begin -report-range-end $end" .
|
|
143 " -samtools-reference '$refFile'" .
|
|
144 " -max-window-mismatch 3 20 -print-used-allele-counts" .
|
|
145 " -bam-seq-name '" . $chrom . "'" .
|
|
146 " -genome-size $knownGenomeSize" .
|
|
147 " -max-indel-size 50" .
|
|
148 " -indel-nonsite-match-prob 0.5" .
|
|
149 " --min-contig-open-end-support 35" .
|
|
150 " --somatic-snv-rate " . $useroptions->{ssnvPrior} .
|
|
151 " --shared-site-error-rate " . $useroptions->{ssnvNoise} .
|
|
152 " --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} .
|
|
153 " --somatic-indel-rate " . $useroptions->{sindelPrior} .
|
|
154 " --shared-indel-error-rate " . $useroptions->{sindelNoise} .
|
|
155 " --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} .
|
|
156 " --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} .
|
|
157 " --tier2-single-align-score-rescue-mode" .
|
|
158 " --tier2-mismatch-density-filter-count 10" .
|
|
159 " --tier2-no-filter-unanchored" .
|
|
160 " --tier2-indel-nonsite-match-prob 0.25" .
|
|
161 " --tier2-include-singleton" .
|
|
162 " --tier2-include-anomalous";
|
|
163
|
|
164
|
|
165 my $somSnvFile='somatic.snvs.unfiltered.vcf';
|
|
166 my $somIndelFile='somatic.indels.unfiltered.vcf';
|
|
167
|
|
168 my $cmd = "$strelkaBin $strelka_base_opts" .
|
|
169 " -bam-file " . $normalBam .
|
|
170 " --tumor-bam-file " . $tumorBam .
|
|
171 " --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) .
|
|
172 " --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) .
|
|
173 " --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window');
|
|
174
|
|
175
|
|
176 sub ualignFile($) {
|
|
177 return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam");
|
|
178 }
|
|
179 sub alignFile($) {
|
|
180 return File::Spec->catfile($binDir,$_[0] . ".realigned");
|
|
181 }
|
|
182
|
|
183
|
|
184 if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) {
|
|
185 $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth};
|
|
186 }
|
|
187
|
|
188
|
|
189 if($isWriteRealignedBam) {
|
|
190 $cmd .= " -realigned-read-file " . ualignFile("normal") .
|
|
191 " --tumor-realigned-read-file " . ualignFile("tumor");
|
|
192 }
|
|
193
|
|
194 if(defined($useroptions->{extraStrelkaArguments})){
|
|
195 my $arg=$useroptions->{extraStrelkaArguments};
|
|
196 if($arg !~ /^\s*$/) {
|
|
197 $cmd .= " " . $arg;
|
|
198 }
|
|
199 }
|
|
200
|
|
201 # this file contains site stats that used to be printed on stdout:
|
|
202 #
|
|
203 $cmd .=
|
|
204 " --report-file " . File::Spec->catfile($binDir,'strelka.stats');
|
|
205
|
|
206 $cmd .=
|
|
207 " >| " . File::Spec->catfile($binDir,'strelka.stdout') .
|
|
208 " 2>| " . File::Spec->catfile($binDir,'strelka.stderr');
|
|
209
|
|
210
|
|
211 executeCmd($cmd,0);
|
|
212
|
|
213
|
|
214 if($isWriteRealignedBam) {
|
|
215 for my $label (qw(normal tumor)) {
|
|
216 my $ufile = ualignFile($label);
|
|
217 if( -f $ufile ) {
|
|
218 my $afile = alignFile($label);
|
|
219 my $cmd = "samtools sort " . $ufile . " " . $afile;
|
|
220 executeCmd($cmd,0);
|
|
221 unlink($ufile);
|
|
222 } else {
|
|
223 logX("Can't find unsorted realigned BAM file: '$ufile'");
|
|
224 }
|
|
225 }
|
|
226 }
|
|
227
|
|
228
|
|
229 1;
|
|
230
|
|
231 __END__
|
|
232
|
|
233
|