comparison libexec/callSomaticVariants.pl @ 6:87568e5a7d4f

Testing strelka version 0.0.1
author mini
date Fri, 26 Sep 2014 13:24:13 +0200
parents
children 0e8e6011082b
comparison
equal deleted inserted replaced
5:07cbbd662111 6:87568e5a7d4f
1 #!/usr/bin/env perl
2 print "\n";
3 print @INC;
4 print "\n";
5 =head1 LICENSE
6
7 Strelka Workflow Software
8 Copyright (c) 2009-2013 Illumina, Inc.
9
10 This software is provided under the terms and conditions of the
11 Illumina Open Source Software License 1.
12
13 You should have received a copy of the Illumina Open Source
14 Software License 1 along with this program. If not, see
15 <https://github.com/downloads/sequencing/licenses/>.
16
17 =head1 SYNOPSIS
18
19 callSomaticVariants.pl [options] | --help
20
21 =head2 SUMMARY
22
23 Run the somatic variant caller for snvs and indels on a single
24 chromosome bin.
25
26 =cut
27
28 use warnings FATAL => 'all';
29 use strict;
30
31 use Carp;
32 $SIG{__DIE__} = \&Carp::confess;
33
34 use File::Spec;
35 use Getopt::Long;
36 use Pod::Usage;
37
38 my $baseDir;
39 my $libDir;
40 BEGIN {
41 print "\n";
42 print @INC;
43 print "\n";
44
45 my $thisDir=(File::Spec->splitpath($0))[1];
46 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
47 $libDir=File::Spec->catdir($baseDir,'lib');
48 }
49 use lib $libDir;
50 use Utils;
51
52 if(getAbsPath($baseDir)) {
53 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
54 }
55 my $libexecDir=File::Spec->catdir($baseDir,'libexec');
56 #my $optDir=File::Spec->catdir($baseDir,'opt');
57
58
59 my $scriptName=(File::Spec->splitpath($0))[2];
60 my $argCount=scalar(@ARGV);
61 my $cmdline = join(' ',$0,@ARGV);
62
63
64 my ($chrom, $binId, $configFile);
65 my $help;
66
67 GetOptions(
68 "chrom=s" => \$chrom,
69 "bin=s" => \$binId,
70 "config=s" => \$configFile,
71 "help|h" => \$help) or pod2usage(2);
72
73 pod2usage(2) if($help);
74 pod2usage(2) unless(defined($chrom));
75 pod2usage(2) unless(defined($binId));
76 pod2usage(2) unless(defined($configFile));
77
78
79
80 #
81 # check all fixed paths (not based on commandline arguments):
82 #
83 checkDir($baseDir);
84 checkDir($libexecDir);
85 #checkDir($optDir);
86
87 my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2');
88 checkFile($strelkaBin,"strelka binary");
89 #my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
90 #checkFile($samtoolsBin,"samtools binary");
91
92
93
94 #
95 # read config and validate values
96 #
97 checkFile($configFile,"configuration ini");
98 my $config = parseConfigIni($configFile);
99
100 for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) {
101 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
102 }
103
104 # we skip maxInputDepth,minTier2Mapq here to allow older config files
105 for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior
106 ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) {
107 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
108 }
109
110 my $outDir = $config->{derived}{outDir};
111 my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId);
112 checkDir($outDir,"output");
113 checkDir($binDir,"output bin");
114
115
116 my $tumorBam = $config->{derived}{tumorBam};
117 my $normalBam = $config->{derived}{normalBam};
118 my $refFile = $config->{derived}{refFile};
119 checkFile($tumorBam,"tumor BAM");
120 checkFile($normalBam,"normal BAM");
121 checkFile($refFile,"reference");
122
123
124 # pull out some config options for convenience:
125 my $binSize=$config->{user}{binSize};
126 my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam};
127 my $knownGenomeSize = $config->{derived}{knownGenomeSize};
128
129
130 my $begin = (int($binId)*$binSize)+1;
131 my $end = ((int($binId)+1)*$binSize);
132 #my $end = $begin+100000; #debug mode
133
134 my $useroptions = $config->{user};
135
136 # set previous default value if an older config file is being used:
137 if(! defined($useroptions->{minTier2Mapq})) {
138 $useroptions->{minTier2Mapq} = 5;
139 }
140
141
142 #
143 # setup the strelka command-line:
144 #
145 my $strelka_base_opts= "-clobber" .
146 " -filter-unanchored" .
147 " -min-paired-align-score " . $useroptions->{minTier1Mapq} .
148 " -min-single-align-score 10" .
149 " -min-qscore 0" .
150 " -report-range-begin $begin -report-range-end $end" .
151 " -samtools-reference '$refFile'" .
152 " -max-window-mismatch 3 20 -print-used-allele-counts" .
153 " -bam-seq-name '" . $chrom . "'" .
154 " -genome-size $knownGenomeSize" .
155 " -max-indel-size 50" .
156 " -indel-nonsite-match-prob 0.5" .
157 " --min-contig-open-end-support 35" .
158 " --somatic-snv-rate " . $useroptions->{ssnvPrior} .
159 " --shared-site-error-rate " . $useroptions->{ssnvNoise} .
160 " --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} .
161 " --somatic-indel-rate " . $useroptions->{sindelPrior} .
162 " --shared-indel-error-rate " . $useroptions->{sindelNoise} .
163 " --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} .
164 " --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} .
165 " --tier2-single-align-score-rescue-mode" .
166 " --tier2-mismatch-density-filter-count 10" .
167 " --tier2-no-filter-unanchored" .
168 " --tier2-indel-nonsite-match-prob 0.25" .
169 " --tier2-include-singleton" .
170 " --tier2-include-anomalous";
171
172
173 my $somSnvFile='somatic.snvs.unfiltered.vcf';
174 my $somIndelFile='somatic.indels.unfiltered.vcf';
175
176 my $cmd = "$strelkaBin $strelka_base_opts" .
177 " -bam-file " . $normalBam .
178 " --tumor-bam-file " . $tumorBam .
179 " --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) .
180 " --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) .
181 " --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window');
182
183
184 sub ualignFile($) {
185 return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam");
186 }
187 sub alignFile($) {
188 return File::Spec->catfile($binDir,$_[0] . ".realigned");
189 }
190
191
192 if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) {
193 $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth};
194 }
195
196
197 if($isWriteRealignedBam) {
198 $cmd .= " -realigned-read-file " . ualignFile("normal") .
199 " --tumor-realigned-read-file " . ualignFile("tumor");
200 }
201
202 if(defined($useroptions->{extraStrelkaArguments})){
203 my $arg=$useroptions->{extraStrelkaArguments};
204 if($arg !~ /^\s*$/) {
205 $cmd .= " " . $arg;
206 }
207 }
208
209 # this file contains site stats that used to be printed on stdout:
210 #
211 $cmd .=
212 " --report-file " . File::Spec->catfile($binDir,'strelka.stats');
213
214 $cmd .=
215 " >| " . File::Spec->catfile($binDir,'strelka.stdout') .
216 " 2>| " . File::Spec->catfile($binDir,'strelka.stderr');
217
218
219 executeCmd($cmd,0);
220
221
222 if($isWriteRealignedBam) {
223 for my $label (qw(normal tumor)) {
224 my $ufile = ualignFile($label);
225 if( -f $ufile ) {
226 my $afile = alignFile($label);
227 my $cmd = "samtools sort " . $ufile . " " . $afile;
228 executeCmd($cmd,0);
229 unlink($ufile);
230 } else {
231 logX("Can't find unsorted realigned BAM file: '$ufile'");
232 }
233 }
234 }
235
236
237 1;
238
239 __END__
240
241