Mercurial > repos > mini > strelka
diff libexec/callSomaticVariants.pl @ 6:87568e5a7d4f
Testing strelka version 0.0.1
author | mini |
---|---|
date | Fri, 26 Sep 2014 13:24:13 +0200 |
parents | |
children | 0e8e6011082b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libexec/callSomaticVariants.pl Fri Sep 26 13:24:13 2014 +0200 @@ -0,0 +1,241 @@ +#!/usr/bin/env perl +print "\n"; +print @INC; +print "\n"; +=head1 LICENSE + +Strelka Workflow Software +Copyright (c) 2009-2013 Illumina, Inc. + +This software is provided under the terms and conditions of the +Illumina Open Source Software License 1. + +You should have received a copy of the Illumina Open Source +Software License 1 along with this program. If not, see +<https://github.com/downloads/sequencing/licenses/>. + +=head1 SYNOPSIS + +callSomaticVariants.pl [options] | --help + +=head2 SUMMARY + +Run the somatic variant caller for snvs and indels on a single +chromosome bin. + +=cut + +use warnings FATAL => 'all'; +use strict; + +use Carp; +$SIG{__DIE__} = \&Carp::confess; + +use File::Spec; +use Getopt::Long; +use Pod::Usage; + +my $baseDir; +my $libDir; +BEGIN { +print "\n"; +print @INC; +print "\n"; + + my $thisDir=(File::Spec->splitpath($0))[1]; + $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); + $libDir=File::Spec->catdir($baseDir,'lib'); +} +use lib $libDir; +use Utils; + +if(getAbsPath($baseDir)) { + errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); +} +my $libexecDir=File::Spec->catdir($baseDir,'libexec'); +#my $optDir=File::Spec->catdir($baseDir,'opt'); + + +my $scriptName=(File::Spec->splitpath($0))[2]; +my $argCount=scalar(@ARGV); +my $cmdline = join(' ',$0,@ARGV); + + +my ($chrom, $binId, $configFile); +my $help; + +GetOptions( + "chrom=s" => \$chrom, + "bin=s" => \$binId, + "config=s" => \$configFile, + "help|h" => \$help) or pod2usage(2); + +pod2usage(2) if($help); +pod2usage(2) unless(defined($chrom)); +pod2usage(2) unless(defined($binId)); +pod2usage(2) unless(defined($configFile)); + + + +# +# check all fixed paths (not based on commandline arguments): +# +checkDir($baseDir); +checkDir($libexecDir); +#checkDir($optDir); + +my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2'); +checkFile($strelkaBin,"strelka binary"); +#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools'); +#checkFile($samtoolsBin,"samtools binary"); + + + +# +# read config and validate values +# +checkFile($configFile,"configuration ini"); +my $config = parseConfigIni($configFile); + +for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); +} + +# we skip maxInputDepth,minTier2Mapq here to allow older config files +for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior + ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) { + errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); +} + +my $outDir = $config->{derived}{outDir}; +my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId); +checkDir($outDir,"output"); +checkDir($binDir,"output bin"); + + +my $tumorBam = $config->{derived}{tumorBam}; +my $normalBam = $config->{derived}{normalBam}; +my $refFile = $config->{derived}{refFile}; +checkFile($tumorBam,"tumor BAM"); +checkFile($normalBam,"normal BAM"); +checkFile($refFile,"reference"); + + +# pull out some config options for convenience: +my $binSize=$config->{user}{binSize}; +my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam}; +my $knownGenomeSize = $config->{derived}{knownGenomeSize}; + + +my $begin = (int($binId)*$binSize)+1; +my $end = ((int($binId)+1)*$binSize); +#my $end = $begin+100000; #debug mode + +my $useroptions = $config->{user}; + +# set previous default value if an older config file is being used: +if(! defined($useroptions->{minTier2Mapq})) { + $useroptions->{minTier2Mapq} = 5; +} + + +# +# setup the strelka command-line: +# +my $strelka_base_opts= "-clobber" . +" -filter-unanchored" . +" -min-paired-align-score " . $useroptions->{minTier1Mapq} . +" -min-single-align-score 10" . +" -min-qscore 0" . +" -report-range-begin $begin -report-range-end $end" . +" -samtools-reference '$refFile'" . +" -max-window-mismatch 3 20 -print-used-allele-counts" . +" -bam-seq-name '" . $chrom . "'" . +" -genome-size $knownGenomeSize" . +" -max-indel-size 50" . +" -indel-nonsite-match-prob 0.5" . +" --min-contig-open-end-support 35" . +" --somatic-snv-rate " . $useroptions->{ssnvPrior} . +" --shared-site-error-rate " . $useroptions->{ssnvNoise} . +" --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} . +" --somatic-indel-rate " . $useroptions->{sindelPrior} . +" --shared-indel-error-rate " . $useroptions->{sindelNoise} . +" --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} . +" --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} . +" --tier2-single-align-score-rescue-mode" . +" --tier2-mismatch-density-filter-count 10" . +" --tier2-no-filter-unanchored" . +" --tier2-indel-nonsite-match-prob 0.25" . +" --tier2-include-singleton" . +" --tier2-include-anomalous"; + + +my $somSnvFile='somatic.snvs.unfiltered.vcf'; +my $somIndelFile='somatic.indels.unfiltered.vcf'; + +my $cmd = "$strelkaBin $strelka_base_opts" . +" -bam-file " . $normalBam . +" --tumor-bam-file " . $tumorBam . +" --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) . +" --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) . +" --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window'); + + +sub ualignFile($) { + return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam"); +} +sub alignFile($) { + return File::Spec->catfile($binDir,$_[0] . ".realigned"); +} + + +if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) { + $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth}; +} + + +if($isWriteRealignedBam) { + $cmd .= " -realigned-read-file " . ualignFile("normal") . + " --tumor-realigned-read-file " . ualignFile("tumor"); +} + +if(defined($useroptions->{extraStrelkaArguments})){ + my $arg=$useroptions->{extraStrelkaArguments}; + if($arg !~ /^\s*$/) { + $cmd .= " " . $arg; + } +} + +# this file contains site stats that used to be printed on stdout: +# +$cmd .= +" --report-file " . File::Spec->catfile($binDir,'strelka.stats'); + +$cmd .= +" >| " . File::Spec->catfile($binDir,'strelka.stdout') . +" 2>| " . File::Spec->catfile($binDir,'strelka.stderr'); + + +executeCmd($cmd,0); + + +if($isWriteRealignedBam) { + for my $label (qw(normal tumor)) { + my $ufile = ualignFile($label); + if( -f $ufile ) { + my $afile = alignFile($label); + my $cmd = "samtools sort " . $ufile . " " . $afile; + executeCmd($cmd,0); + unlink($ufile); + } else { + logX("Can't find unsorted realigned BAM file: '$ufile'"); + } + } +} + + +1; + +__END__ + +