Mercurial > repos > mini > strelka
comparison libexec/callSomaticVariants.pl @ 6:87568e5a7d4f
Testing strelka version 0.0.1
author | mini |
---|---|
date | Fri, 26 Sep 2014 13:24:13 +0200 |
parents | |
children | 0e8e6011082b |
comparison
equal
deleted
inserted
replaced
5:07cbbd662111 | 6:87568e5a7d4f |
---|---|
1 #!/usr/bin/env perl | |
2 print "\n"; | |
3 print @INC; | |
4 print "\n"; | |
5 =head1 LICENSE | |
6 | |
7 Strelka Workflow Software | |
8 Copyright (c) 2009-2013 Illumina, Inc. | |
9 | |
10 This software is provided under the terms and conditions of the | |
11 Illumina Open Source Software License 1. | |
12 | |
13 You should have received a copy of the Illumina Open Source | |
14 Software License 1 along with this program. If not, see | |
15 <https://github.com/downloads/sequencing/licenses/>. | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 callSomaticVariants.pl [options] | --help | |
20 | |
21 =head2 SUMMARY | |
22 | |
23 Run the somatic variant caller for snvs and indels on a single | |
24 chromosome bin. | |
25 | |
26 =cut | |
27 | |
28 use warnings FATAL => 'all'; | |
29 use strict; | |
30 | |
31 use Carp; | |
32 $SIG{__DIE__} = \&Carp::confess; | |
33 | |
34 use File::Spec; | |
35 use Getopt::Long; | |
36 use Pod::Usage; | |
37 | |
38 my $baseDir; | |
39 my $libDir; | |
40 BEGIN { | |
41 print "\n"; | |
42 print @INC; | |
43 print "\n"; | |
44 | |
45 my $thisDir=(File::Spec->splitpath($0))[1]; | |
46 $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); | |
47 $libDir=File::Spec->catdir($baseDir,'lib'); | |
48 } | |
49 use lib $libDir; | |
50 use Utils; | |
51 | |
52 if(getAbsPath($baseDir)) { | |
53 errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); | |
54 } | |
55 my $libexecDir=File::Spec->catdir($baseDir,'libexec'); | |
56 #my $optDir=File::Spec->catdir($baseDir,'opt'); | |
57 | |
58 | |
59 my $scriptName=(File::Spec->splitpath($0))[2]; | |
60 my $argCount=scalar(@ARGV); | |
61 my $cmdline = join(' ',$0,@ARGV); | |
62 | |
63 | |
64 my ($chrom, $binId, $configFile); | |
65 my $help; | |
66 | |
67 GetOptions( | |
68 "chrom=s" => \$chrom, | |
69 "bin=s" => \$binId, | |
70 "config=s" => \$configFile, | |
71 "help|h" => \$help) or pod2usage(2); | |
72 | |
73 pod2usage(2) if($help); | |
74 pod2usage(2) unless(defined($chrom)); | |
75 pod2usage(2) unless(defined($binId)); | |
76 pod2usage(2) unless(defined($configFile)); | |
77 | |
78 | |
79 | |
80 # | |
81 # check all fixed paths (not based on commandline arguments): | |
82 # | |
83 checkDir($baseDir); | |
84 checkDir($libexecDir); | |
85 #checkDir($optDir); | |
86 | |
87 my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2'); | |
88 checkFile($strelkaBin,"strelka binary"); | |
89 #my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools'); | |
90 #checkFile($samtoolsBin,"samtools binary"); | |
91 | |
92 | |
93 | |
94 # | |
95 # read config and validate values | |
96 # | |
97 checkFile($configFile,"configuration ini"); | |
98 my $config = parseConfigIni($configFile); | |
99 | |
100 for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) { | |
101 errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); | |
102 } | |
103 | |
104 # we skip maxInputDepth,minTier2Mapq here to allow older config files | |
105 for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior | |
106 ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) { | |
107 errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); | |
108 } | |
109 | |
110 my $outDir = $config->{derived}{outDir}; | |
111 my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId); | |
112 checkDir($outDir,"output"); | |
113 checkDir($binDir,"output bin"); | |
114 | |
115 | |
116 my $tumorBam = $config->{derived}{tumorBam}; | |
117 my $normalBam = $config->{derived}{normalBam}; | |
118 my $refFile = $config->{derived}{refFile}; | |
119 checkFile($tumorBam,"tumor BAM"); | |
120 checkFile($normalBam,"normal BAM"); | |
121 checkFile($refFile,"reference"); | |
122 | |
123 | |
124 # pull out some config options for convenience: | |
125 my $binSize=$config->{user}{binSize}; | |
126 my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam}; | |
127 my $knownGenomeSize = $config->{derived}{knownGenomeSize}; | |
128 | |
129 | |
130 my $begin = (int($binId)*$binSize)+1; | |
131 my $end = ((int($binId)+1)*$binSize); | |
132 #my $end = $begin+100000; #debug mode | |
133 | |
134 my $useroptions = $config->{user}; | |
135 | |
136 # set previous default value if an older config file is being used: | |
137 if(! defined($useroptions->{minTier2Mapq})) { | |
138 $useroptions->{minTier2Mapq} = 5; | |
139 } | |
140 | |
141 | |
142 # | |
143 # setup the strelka command-line: | |
144 # | |
145 my $strelka_base_opts= "-clobber" . | |
146 " -filter-unanchored" . | |
147 " -min-paired-align-score " . $useroptions->{minTier1Mapq} . | |
148 " -min-single-align-score 10" . | |
149 " -min-qscore 0" . | |
150 " -report-range-begin $begin -report-range-end $end" . | |
151 " -samtools-reference '$refFile'" . | |
152 " -max-window-mismatch 3 20 -print-used-allele-counts" . | |
153 " -bam-seq-name '" . $chrom . "'" . | |
154 " -genome-size $knownGenomeSize" . | |
155 " -max-indel-size 50" . | |
156 " -indel-nonsite-match-prob 0.5" . | |
157 " --min-contig-open-end-support 35" . | |
158 " --somatic-snv-rate " . $useroptions->{ssnvPrior} . | |
159 " --shared-site-error-rate " . $useroptions->{ssnvNoise} . | |
160 " --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} . | |
161 " --somatic-indel-rate " . $useroptions->{sindelPrior} . | |
162 " --shared-indel-error-rate " . $useroptions->{sindelNoise} . | |
163 " --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} . | |
164 " --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} . | |
165 " --tier2-single-align-score-rescue-mode" . | |
166 " --tier2-mismatch-density-filter-count 10" . | |
167 " --tier2-no-filter-unanchored" . | |
168 " --tier2-indel-nonsite-match-prob 0.25" . | |
169 " --tier2-include-singleton" . | |
170 " --tier2-include-anomalous"; | |
171 | |
172 | |
173 my $somSnvFile='somatic.snvs.unfiltered.vcf'; | |
174 my $somIndelFile='somatic.indels.unfiltered.vcf'; | |
175 | |
176 my $cmd = "$strelkaBin $strelka_base_opts" . | |
177 " -bam-file " . $normalBam . | |
178 " --tumor-bam-file " . $tumorBam . | |
179 " --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) . | |
180 " --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) . | |
181 " --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window'); | |
182 | |
183 | |
184 sub ualignFile($) { | |
185 return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam"); | |
186 } | |
187 sub alignFile($) { | |
188 return File::Spec->catfile($binDir,$_[0] . ".realigned"); | |
189 } | |
190 | |
191 | |
192 if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) { | |
193 $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth}; | |
194 } | |
195 | |
196 | |
197 if($isWriteRealignedBam) { | |
198 $cmd .= " -realigned-read-file " . ualignFile("normal") . | |
199 " --tumor-realigned-read-file " . ualignFile("tumor"); | |
200 } | |
201 | |
202 if(defined($useroptions->{extraStrelkaArguments})){ | |
203 my $arg=$useroptions->{extraStrelkaArguments}; | |
204 if($arg !~ /^\s*$/) { | |
205 $cmd .= " " . $arg; | |
206 } | |
207 } | |
208 | |
209 # this file contains site stats that used to be printed on stdout: | |
210 # | |
211 $cmd .= | |
212 " --report-file " . File::Spec->catfile($binDir,'strelka.stats'); | |
213 | |
214 $cmd .= | |
215 " >| " . File::Spec->catfile($binDir,'strelka.stdout') . | |
216 " 2>| " . File::Spec->catfile($binDir,'strelka.stderr'); | |
217 | |
218 | |
219 executeCmd($cmd,0); | |
220 | |
221 | |
222 if($isWriteRealignedBam) { | |
223 for my $label (qw(normal tumor)) { | |
224 my $ufile = ualignFile($label); | |
225 if( -f $ufile ) { | |
226 my $afile = alignFile($label); | |
227 my $cmd = "samtools sort " . $ufile . " " . $afile; | |
228 executeCmd($cmd,0); | |
229 unlink($ufile); | |
230 } else { | |
231 logX("Can't find unsorted realigned BAM file: '$ufile'"); | |
232 } | |
233 } | |
234 } | |
235 | |
236 | |
237 1; | |
238 | |
239 __END__ | |
240 | |
241 |