| 
6
 | 
     1 #!/usr/bin/env perl
 | 
| 
 | 
     2 print "\n";
 | 
| 
 | 
     3 print @INC;
 | 
| 
 | 
     4 print "\n";
 | 
| 
 | 
     5 =head1 LICENSE
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 Strelka Workflow Software
 | 
| 
 | 
     8 Copyright (c) 2009-2013 Illumina, Inc.
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 This software is provided under the terms and conditions of the
 | 
| 
 | 
    11 Illumina Open Source Software License 1.
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 You should have received a copy of the Illumina Open Source
 | 
| 
 | 
    14 Software License 1 along with this program. If not, see
 | 
| 
 | 
    15 <https://github.com/downloads/sequencing/licenses/>.
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 =head1 SYNOPSIS
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 callSomaticVariants.pl [options] | --help
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 =head2 SUMMARY
 | 
| 
 | 
    22 
 | 
| 
 | 
    23 Run the somatic variant caller for snvs and indels on a single
 | 
| 
 | 
    24 chromosome bin.
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 =cut
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 use warnings FATAL => 'all';
 | 
| 
 | 
    29 use strict;
 | 
| 
 | 
    30 
 | 
| 
 | 
    31 use Carp;
 | 
| 
 | 
    32 $SIG{__DIE__} = \&Carp::confess;
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 use File::Spec;
 | 
| 
 | 
    35 use Getopt::Long;
 | 
| 
 | 
    36 use Pod::Usage;
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 my $baseDir;
 | 
| 
 | 
    39 my $libDir;
 | 
| 
 | 
    40 BEGIN {
 | 
| 
 | 
    41 print "\n";
 | 
| 
 | 
    42 print @INC;
 | 
| 
 | 
    43 print "\n";
 | 
| 
 | 
    44 
 | 
| 
 | 
    45     my $thisDir=(File::Spec->splitpath($0))[1];
 | 
| 
 | 
    46     $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
 | 
| 
 | 
    47     $libDir=File::Spec->catdir($baseDir,'lib');
 | 
| 
 | 
    48 }
 | 
| 
 | 
    49 use lib $libDir;
 | 
| 
 | 
    50 use Utils;
 | 
| 
 | 
    51 
 | 
| 
 | 
    52 if(getAbsPath($baseDir)) {
 | 
| 
 | 
    53     errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
 | 
| 
 | 
    54 }
 | 
| 
 | 
    55 my $libexecDir=File::Spec->catdir($baseDir,'libexec');
 | 
| 
 | 
    56 #my $optDir=File::Spec->catdir($baseDir,'opt');
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 my $scriptName=(File::Spec->splitpath($0))[2];
 | 
| 
 | 
    60 my $argCount=scalar(@ARGV);
 | 
| 
 | 
    61 my $cmdline = join(' ',$0,@ARGV);
 | 
| 
 | 
    62 
 | 
| 
 | 
    63 
 | 
| 
 | 
    64 my ($chrom, $binId, $configFile);
 | 
| 
 | 
    65 my $help;
 | 
| 
 | 
    66 
 | 
| 
 | 
    67 GetOptions(
 | 
| 
 | 
    68             "chrom=s" => \$chrom,
 | 
| 
 | 
    69             "bin=s" => \$binId,
 | 
| 
 | 
    70             "config=s" => \$configFile,
 | 
| 
 | 
    71             "help|h" => \$help) or pod2usage(2);
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 pod2usage(2) if($help);
 | 
| 
 | 
    74 pod2usage(2) unless(defined($chrom));
 | 
| 
 | 
    75 pod2usage(2) unless(defined($binId));
 | 
| 
 | 
    76 pod2usage(2) unless(defined($configFile));
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 
 | 
| 
 | 
    80 #
 | 
| 
 | 
    81 # check all fixed paths (not based on commandline arguments):
 | 
| 
 | 
    82 #
 | 
| 
 | 
    83 checkDir($baseDir);
 | 
| 
 | 
    84 checkDir($libexecDir);
 | 
| 
 | 
    85 #checkDir($optDir);
 | 
| 
 | 
    86 
 | 
| 
 | 
    87 my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2');
 | 
| 
 | 
    88 checkFile($strelkaBin,"strelka binary");
 | 
| 
 | 
    89 #my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
 | 
| 
 | 
    90 #checkFile($samtoolsBin,"samtools binary");
 | 
| 
 | 
    91 
 | 
| 
 | 
    92 
 | 
| 
 | 
    93 
 | 
| 
 | 
    94 #
 | 
| 
 | 
    95 # read config and validate values
 | 
| 
 | 
    96 #
 | 
| 
 | 
    97 checkFile($configFile,"configuration ini");
 | 
| 
 | 
    98 my $config  = parseConfigIni($configFile);
 | 
| 
 | 
    99 
 | 
| 
 | 
   100 for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) {
 | 
| 
 | 
   101     errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
 | 
| 
 | 
   102 }
 | 
| 
 | 
   103 
 | 
| 
 | 
   104 # we skip maxInputDepth,minTier2Mapq here to allow older config files
 | 
| 
 | 
   105 for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior
 | 
| 
 | 
   106         ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) {
 | 
| 
 | 
   107     errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
 | 
| 
 | 
   108 }
 | 
| 
 | 
   109 
 | 
| 
 | 
   110 my $outDir = $config->{derived}{outDir};
 | 
| 
 | 
   111 my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId);
 | 
| 
 | 
   112 checkDir($outDir,"output");
 | 
| 
 | 
   113 checkDir($binDir,"output bin");
 | 
| 
 | 
   114 
 | 
| 
 | 
   115 
 | 
| 
 | 
   116 my $tumorBam = $config->{derived}{tumorBam};
 | 
| 
 | 
   117 my $normalBam = $config->{derived}{normalBam};
 | 
| 
 | 
   118 my $refFile = $config->{derived}{refFile};
 | 
| 
 | 
   119 checkFile($tumorBam,"tumor BAM");
 | 
| 
 | 
   120 checkFile($normalBam,"normal BAM");
 | 
| 
 | 
   121 checkFile($refFile,"reference");
 | 
| 
 | 
   122 
 | 
| 
 | 
   123 
 | 
| 
 | 
   124 # pull out some config options for convenience:
 | 
| 
 | 
   125 my $binSize=$config->{user}{binSize};
 | 
| 
 | 
   126 my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam};
 | 
| 
 | 
   127 my $knownGenomeSize = $config->{derived}{knownGenomeSize};
 | 
| 
 | 
   128 
 | 
| 
 | 
   129 
 | 
| 
 | 
   130 my $begin = (int($binId)*$binSize)+1;
 | 
| 
 | 
   131 my $end = ((int($binId)+1)*$binSize);
 | 
| 
 | 
   132 #my $end = $begin+100000;  #debug mode
 | 
| 
 | 
   133 
 | 
| 
 | 
   134 my $useroptions = $config->{user};
 | 
| 
 | 
   135 
 | 
| 
 | 
   136 # set previous default value if an older config file is being used:
 | 
| 
 | 
   137 if(! defined($useroptions->{minTier2Mapq})) {
 | 
| 
 | 
   138     $useroptions->{minTier2Mapq} = 5;
 | 
| 
 | 
   139 }
 | 
| 
 | 
   140 
 | 
| 
 | 
   141 
 | 
| 
 | 
   142 #
 | 
| 
 | 
   143 # setup the strelka command-line:
 | 
| 
 | 
   144 #
 | 
| 
 | 
   145 my $strelka_base_opts= "-clobber" .
 | 
| 
 | 
   146 " -filter-unanchored" .
 | 
| 
 | 
   147 " -min-paired-align-score " . $useroptions->{minTier1Mapq} .
 | 
| 
 | 
   148 " -min-single-align-score 10" .
 | 
| 
 | 
   149 " -min-qscore 0" .
 | 
| 
 | 
   150 " -report-range-begin $begin -report-range-end $end" .
 | 
| 
 | 
   151 " -samtools-reference '$refFile'" .
 | 
| 
 | 
   152 " -max-window-mismatch 3 20 -print-used-allele-counts" .
 | 
| 
 | 
   153 " -bam-seq-name '" . $chrom . "'" .
 | 
| 
 | 
   154 " -genome-size $knownGenomeSize" .
 | 
| 
 | 
   155 " -max-indel-size 50" .
 | 
| 
 | 
   156 " -indel-nonsite-match-prob 0.5" .
 | 
| 
 | 
   157 " --min-contig-open-end-support 35" .
 | 
| 
 | 
   158 " --somatic-snv-rate " . $useroptions->{ssnvPrior} .
 | 
| 
 | 
   159 " --shared-site-error-rate " . $useroptions->{ssnvNoise} .
 | 
| 
 | 
   160 " --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} .
 | 
| 
 | 
   161 " --somatic-indel-rate " . $useroptions->{sindelPrior} .
 | 
| 
 | 
   162 " --shared-indel-error-rate " . $useroptions->{sindelNoise} .
 | 
| 
 | 
   163 " --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} .
 | 
| 
 | 
   164 " --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} .
 | 
| 
 | 
   165 " --tier2-single-align-score-rescue-mode" .
 | 
| 
 | 
   166 " --tier2-mismatch-density-filter-count 10" .
 | 
| 
 | 
   167 " --tier2-no-filter-unanchored" .
 | 
| 
 | 
   168 " --tier2-indel-nonsite-match-prob 0.25" .
 | 
| 
 | 
   169 " --tier2-include-singleton" .
 | 
| 
 | 
   170 " --tier2-include-anomalous";
 | 
| 
 | 
   171 
 | 
| 
 | 
   172 
 | 
| 
 | 
   173 my $somSnvFile='somatic.snvs.unfiltered.vcf';
 | 
| 
 | 
   174 my $somIndelFile='somatic.indels.unfiltered.vcf';
 | 
| 
 | 
   175 
 | 
| 
 | 
   176 my $cmd =  "$strelkaBin $strelka_base_opts" .
 | 
| 
 | 
   177 " -bam-file " . $normalBam .
 | 
| 
 | 
   178 " --tumor-bam-file " . $tumorBam .
 | 
| 
 | 
   179 " --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) .
 | 
| 
 | 
   180 " --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) .
 | 
| 
 | 
   181 " --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window');
 | 
| 
 | 
   182 
 | 
| 
 | 
   183 
 | 
| 
 | 
   184 sub ualignFile($) {
 | 
| 
 | 
   185     return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam");
 | 
| 
 | 
   186 }
 | 
| 
 | 
   187 sub alignFile($) {
 | 
| 
 | 
   188     return File::Spec->catfile($binDir,$_[0] . ".realigned");
 | 
| 
 | 
   189 }
 | 
| 
 | 
   190 
 | 
| 
 | 
   191 
 | 
| 
 | 
   192 if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) {
 | 
| 
 | 
   193     $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth};
 | 
| 
 | 
   194 }
 | 
| 
 | 
   195 
 | 
| 
 | 
   196 
 | 
| 
 | 
   197 if($isWriteRealignedBam) {
 | 
| 
 | 
   198     $cmd .= " -realigned-read-file " . ualignFile("normal") .
 | 
| 
 | 
   199             " --tumor-realigned-read-file " . ualignFile("tumor");
 | 
| 
 | 
   200 }
 | 
| 
 | 
   201 
 | 
| 
 | 
   202 if(defined($useroptions->{extraStrelkaArguments})){
 | 
| 
 | 
   203     my $arg=$useroptions->{extraStrelkaArguments};
 | 
| 
 | 
   204     if($arg !~ /^\s*$/) {
 | 
| 
 | 
   205         $cmd .= " " . $arg;
 | 
| 
 | 
   206     }
 | 
| 
 | 
   207 }
 | 
| 
 | 
   208 
 | 
| 
 | 
   209 # this file contains site stats that used to be printed on stdout:
 | 
| 
 | 
   210 #
 | 
| 
 | 
   211 $cmd .=
 | 
| 
 | 
   212 " --report-file " . File::Spec->catfile($binDir,'strelka.stats');
 | 
| 
 | 
   213 
 | 
| 
 | 
   214 $cmd .=
 | 
| 
 | 
   215 " >| " . File::Spec->catfile($binDir,'strelka.stdout') .
 | 
| 
 | 
   216 " 2>| " . File::Spec->catfile($binDir,'strelka.stderr');
 | 
| 
 | 
   217 
 | 
| 
 | 
   218 
 | 
| 
 | 
   219 executeCmd($cmd,0);
 | 
| 
 | 
   220 
 | 
| 
 | 
   221 
 | 
| 
 | 
   222 if($isWriteRealignedBam) {
 | 
| 
 | 
   223     for my $label (qw(normal tumor)) {
 | 
| 
 | 
   224         my $ufile = ualignFile($label);
 | 
| 
 | 
   225         if( -f $ufile ) {
 | 
| 
 | 
   226             my $afile = alignFile($label);
 | 
| 
 | 
   227             my $cmd = "samtools sort " . $ufile .  " " . $afile;
 | 
| 
 | 
   228             executeCmd($cmd,0);
 | 
| 
 | 
   229             unlink($ufile);
 | 
| 
 | 
   230         } else {
 | 
| 
 | 
   231             logX("Can't find unsorted realigned BAM file: '$ufile'");
 | 
| 
 | 
   232         }
 | 
| 
 | 
   233     }
 | 
| 
 | 
   234 }
 | 
| 
 | 
   235 
 | 
| 
 | 
   236 
 | 
| 
 | 
   237 1;
 | 
| 
 | 
   238 
 | 
| 
 | 
   239 __END__
 | 
| 
 | 
   240 
 | 
| 
 | 
   241 
 |