6
|
1 #!/usr/bin/env perl
|
|
2 print "\n";
|
|
3 print @INC;
|
|
4 print "\n";
|
|
5 =head1 LICENSE
|
|
6
|
|
7 Strelka Workflow Software
|
|
8 Copyright (c) 2009-2013 Illumina, Inc.
|
|
9
|
|
10 This software is provided under the terms and conditions of the
|
|
11 Illumina Open Source Software License 1.
|
|
12
|
|
13 You should have received a copy of the Illumina Open Source
|
|
14 Software License 1 along with this program. If not, see
|
|
15 <https://github.com/downloads/sequencing/licenses/>.
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 callSomaticVariants.pl [options] | --help
|
|
20
|
|
21 =head2 SUMMARY
|
|
22
|
|
23 Run the somatic variant caller for snvs and indels on a single
|
|
24 chromosome bin.
|
|
25
|
|
26 =cut
|
|
27
|
|
28 use warnings FATAL => 'all';
|
|
29 use strict;
|
|
30
|
|
31 use Carp;
|
|
32 $SIG{__DIE__} = \&Carp::confess;
|
|
33
|
|
34 use File::Spec;
|
|
35 use Getopt::Long;
|
|
36 use Pod::Usage;
|
|
37
|
|
38 my $baseDir;
|
|
39 my $libDir;
|
|
40 BEGIN {
|
|
41 print "\n";
|
|
42 print @INC;
|
|
43 print "\n";
|
|
44
|
|
45 my $thisDir=(File::Spec->splitpath($0))[1];
|
|
46 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
|
|
47 $libDir=File::Spec->catdir($baseDir,'lib');
|
|
48 }
|
|
49 use lib $libDir;
|
|
50 use Utils;
|
|
51
|
|
52 if(getAbsPath($baseDir)) {
|
|
53 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
|
|
54 }
|
|
55 my $libexecDir=File::Spec->catdir($baseDir,'libexec');
|
|
56 #my $optDir=File::Spec->catdir($baseDir,'opt');
|
|
57
|
|
58
|
|
59 my $scriptName=(File::Spec->splitpath($0))[2];
|
|
60 my $argCount=scalar(@ARGV);
|
|
61 my $cmdline = join(' ',$0,@ARGV);
|
|
62
|
|
63
|
|
64 my ($chrom, $binId, $configFile);
|
|
65 my $help;
|
|
66
|
|
67 GetOptions(
|
|
68 "chrom=s" => \$chrom,
|
|
69 "bin=s" => \$binId,
|
|
70 "config=s" => \$configFile,
|
|
71 "help|h" => \$help) or pod2usage(2);
|
|
72
|
|
73 pod2usage(2) if($help);
|
|
74 pod2usage(2) unless(defined($chrom));
|
|
75 pod2usage(2) unless(defined($binId));
|
|
76 pod2usage(2) unless(defined($configFile));
|
|
77
|
|
78
|
|
79
|
|
80 #
|
|
81 # check all fixed paths (not based on commandline arguments):
|
|
82 #
|
|
83 checkDir($baseDir);
|
|
84 checkDir($libexecDir);
|
|
85 #checkDir($optDir);
|
|
86
|
|
87 my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2');
|
|
88 checkFile($strelkaBin,"strelka binary");
|
|
89 #my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
|
|
90 #checkFile($samtoolsBin,"samtools binary");
|
|
91
|
|
92
|
|
93
|
|
94 #
|
|
95 # read config and validate values
|
|
96 #
|
|
97 checkFile($configFile,"configuration ini");
|
|
98 my $config = parseConfigIni($configFile);
|
|
99
|
|
100 for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) {
|
|
101 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
|
|
102 }
|
|
103
|
|
104 # we skip maxInputDepth,minTier2Mapq here to allow older config files
|
|
105 for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior
|
|
106 ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) {
|
|
107 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
|
|
108 }
|
|
109
|
|
110 my $outDir = $config->{derived}{outDir};
|
|
111 my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId);
|
|
112 checkDir($outDir,"output");
|
|
113 checkDir($binDir,"output bin");
|
|
114
|
|
115
|
|
116 my $tumorBam = $config->{derived}{tumorBam};
|
|
117 my $normalBam = $config->{derived}{normalBam};
|
|
118 my $refFile = $config->{derived}{refFile};
|
|
119 checkFile($tumorBam,"tumor BAM");
|
|
120 checkFile($normalBam,"normal BAM");
|
|
121 checkFile($refFile,"reference");
|
|
122
|
|
123
|
|
124 # pull out some config options for convenience:
|
|
125 my $binSize=$config->{user}{binSize};
|
|
126 my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam};
|
|
127 my $knownGenomeSize = $config->{derived}{knownGenomeSize};
|
|
128
|
|
129
|
|
130 my $begin = (int($binId)*$binSize)+1;
|
|
131 my $end = ((int($binId)+1)*$binSize);
|
|
132 #my $end = $begin+100000; #debug mode
|
|
133
|
|
134 my $useroptions = $config->{user};
|
|
135
|
|
136 # set previous default value if an older config file is being used:
|
|
137 if(! defined($useroptions->{minTier2Mapq})) {
|
|
138 $useroptions->{minTier2Mapq} = 5;
|
|
139 }
|
|
140
|
|
141
|
|
142 #
|
|
143 # setup the strelka command-line:
|
|
144 #
|
|
145 my $strelka_base_opts= "-clobber" .
|
|
146 " -filter-unanchored" .
|
|
147 " -min-paired-align-score " . $useroptions->{minTier1Mapq} .
|
|
148 " -min-single-align-score 10" .
|
|
149 " -min-qscore 0" .
|
|
150 " -report-range-begin $begin -report-range-end $end" .
|
|
151 " -samtools-reference '$refFile'" .
|
|
152 " -max-window-mismatch 3 20 -print-used-allele-counts" .
|
|
153 " -bam-seq-name '" . $chrom . "'" .
|
|
154 " -genome-size $knownGenomeSize" .
|
|
155 " -max-indel-size 50" .
|
|
156 " -indel-nonsite-match-prob 0.5" .
|
|
157 " --min-contig-open-end-support 35" .
|
|
158 " --somatic-snv-rate " . $useroptions->{ssnvPrior} .
|
|
159 " --shared-site-error-rate " . $useroptions->{ssnvNoise} .
|
|
160 " --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} .
|
|
161 " --somatic-indel-rate " . $useroptions->{sindelPrior} .
|
|
162 " --shared-indel-error-rate " . $useroptions->{sindelNoise} .
|
|
163 " --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} .
|
|
164 " --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} .
|
|
165 " --tier2-single-align-score-rescue-mode" .
|
|
166 " --tier2-mismatch-density-filter-count 10" .
|
|
167 " --tier2-no-filter-unanchored" .
|
|
168 " --tier2-indel-nonsite-match-prob 0.25" .
|
|
169 " --tier2-include-singleton" .
|
|
170 " --tier2-include-anomalous";
|
|
171
|
|
172
|
|
173 my $somSnvFile='somatic.snvs.unfiltered.vcf';
|
|
174 my $somIndelFile='somatic.indels.unfiltered.vcf';
|
|
175
|
|
176 my $cmd = "$strelkaBin $strelka_base_opts" .
|
|
177 " -bam-file " . $normalBam .
|
|
178 " --tumor-bam-file " . $tumorBam .
|
|
179 " --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) .
|
|
180 " --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) .
|
|
181 " --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window');
|
|
182
|
|
183
|
|
184 sub ualignFile($) {
|
|
185 return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam");
|
|
186 }
|
|
187 sub alignFile($) {
|
|
188 return File::Spec->catfile($binDir,$_[0] . ".realigned");
|
|
189 }
|
|
190
|
|
191
|
|
192 if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) {
|
|
193 $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth};
|
|
194 }
|
|
195
|
|
196
|
|
197 if($isWriteRealignedBam) {
|
|
198 $cmd .= " -realigned-read-file " . ualignFile("normal") .
|
|
199 " --tumor-realigned-read-file " . ualignFile("tumor");
|
|
200 }
|
|
201
|
|
202 if(defined($useroptions->{extraStrelkaArguments})){
|
|
203 my $arg=$useroptions->{extraStrelkaArguments};
|
|
204 if($arg !~ /^\s*$/) {
|
|
205 $cmd .= " " . $arg;
|
|
206 }
|
|
207 }
|
|
208
|
|
209 # this file contains site stats that used to be printed on stdout:
|
|
210 #
|
|
211 $cmd .=
|
|
212 " --report-file " . File::Spec->catfile($binDir,'strelka.stats');
|
|
213
|
|
214 $cmd .=
|
|
215 " >| " . File::Spec->catfile($binDir,'strelka.stdout') .
|
|
216 " 2>| " . File::Spec->catfile($binDir,'strelka.stderr');
|
|
217
|
|
218
|
|
219 executeCmd($cmd,0);
|
|
220
|
|
221
|
|
222 if($isWriteRealignedBam) {
|
|
223 for my $label (qw(normal tumor)) {
|
|
224 my $ufile = ualignFile($label);
|
|
225 if( -f $ufile ) {
|
|
226 my $afile = alignFile($label);
|
|
227 my $cmd = "samtools sort " . $ufile . " " . $afile;
|
|
228 executeCmd($cmd,0);
|
|
229 unlink($ufile);
|
|
230 } else {
|
|
231 logX("Can't find unsorted realigned BAM file: '$ufile'");
|
|
232 }
|
|
233 }
|
|
234 }
|
|
235
|
|
236
|
|
237 1;
|
|
238
|
|
239 __END__
|
|
240
|
|
241
|