# HG changeset patch # User mini # Date 1411724094 14400 # Node ID 7c35d3efd1e75e029e8d00cdfd06623d8e43f424 # Parent f48854499d41f09fb85b6e6bc793888f989d4968 Deleted selected files diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.configureStrelkaWorkflow.pl.swp Binary file strelka2/.configureStrelkaWorkflow.pl.swp has changed diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.strelka.xml.swp Binary file strelka2/.strelka.xml.swp has changed diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.strelka_wrapper.bash.swo Binary file strelka2/.strelka_wrapper.bash.swo has changed diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/.strelka_wrapper.py.swp Binary file strelka2/.strelka_wrapper.py.swp has changed diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/configureStrelkaWorkflow.pl --- a/strelka2/configureStrelkaWorkflow.pl Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,564 +0,0 @@ -#!/usr/bin/env perl - -=head1 LICENSE - -Strelka Workflow Software -Copyright (c) 2009-2013 Illumina, Inc. - -This software is provided under the terms and conditions of the -Illumina Open Source Software License 1. - -You should have received a copy of the Illumina Open Source -Software License 1 along with this program. If not, see -. - -=head1 SYNOPSIS - -configureStrelkaWorkflow.pl --tumor FILE --normal FILE --ref FILE --config FILE [options] - -This script configures the strelka workflow for somatic variant -calling on matched tumor-normal BAM files. The configuration process -will produce an analysis makefile and directory structure. The -makefile can be used to run the analysis on a workstation or compute -cluster via make/qmake or other makefile compatible process. - -=head1 ARGUMENTS - -=over 4 - -=item --tumor FILE - -Path to tumor sample BAM file (required) - -=item --normal FILE - -Path to normal sample BAM file (required) - -=item --ref FILE - -Path to reference genome fasta (required) - -=item --config FILE - -Strelka configuration file. Default config files can be found in -${STRELKA_INSTALL_ROOT}/etc/ for both ELAND and BWA alignments. (required) - -=back - -=head1 OPTIONS - -=over 4 - -=item --output-dir DIRECTORY - -Root of the analysis directory. This script will place all -configuration files in the analysis directory, and after configuration -all results and intermediate files will be written within the analysis -directory during a run. This directory must not already -exist. (default: ./strelkaAnalysis) - -=back - -=cut - -use warnings FATAL => 'all'; -use strict; - -use Carp; -$SIG{__DIE__} = \&Carp::confess; - -use Cwd qw(getcwd); -use File::Spec; -use Getopt::Long; -use Pod::Usage; - -my $baseDir; -my $libDir; -BEGIN { - my $thisDir=(File::Spec->splitpath($0))[1]; - $baseDir=$thisDir;#$baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); - $libDir=File::Spec->catdir($baseDir,'lib'); -} -use lib $libDir; -use Utils; - -if(getAbsPath($baseDir)) { - errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); -} -my $libexecDir=File::Spec->catdir($baseDir,'libexec'); -#my $optDir=File::Spec->catdir($baseDir,'opt'); - - -my $scriptName=(File::Spec->splitpath($0))[2]; -my $argCount=scalar(@ARGV); -my $cmdline = join(' ',$0,@ARGV); - - -sub usage() { pod2usage(-verbose => 1, - -exitval => 2); } - -# -# user configuration: -# - -my ($tumorBam, $normalBam, $refFile, $configFile, $outDir); -my $help; - -GetOptions( "tumor=s" => \$tumorBam, - "normal=s" => \$normalBam, - "ref=s" => \$refFile, - "config=s" => \$configFile, - "output-dir=s" => \$outDir, - "help|h" => \$help) || usage(); - -usage() if($help); -usage() unless($argCount); - - -# -# Validate input conditions: -# - -sub checkFileArg($$) { - my ($file,$label) = @_; - - errorX("Must specify $label file") unless(defined($file)); #raise an error if file not defined - checkFile($file,$label); -} - -checkFileArg($tumorBam,"tumor BAM"); -checkFileArg($normalBam,"normal BAM"); -checkFileArg($refFile,"reference fasta"); -checkFileArg($configFile,"configuration ini"); - -sub makeAbsoluteFilePaths(\$) { - my ($filePathRef) = @_; - - my ($v,$fileDir,$fileName) = File::Spec->splitpath($$filePathRef); - if(getAbsPath($fileDir)) { - errorX("Can't resolve directory path for '$fileDir' from input file argument: '$$filePathRef'"); - } - $$filePathRef = File::Spec->catfile($fileDir,$fileName); -} - -makeAbsoluteFilePaths($tumorBam); -makeAbsoluteFilePaths($normalBam); -makeAbsoluteFilePaths($refFile); -makeAbsoluteFilePaths($configFile); - -# also check for BAM index files: -sub checkBamIndex($) { - my ($file) = @_; - my $ifile = $file . ".bai"; - if(! -f $ifile) { - errorX("Can't find index for BAM file '$file'"); - } -} - -checkBamIndex($tumorBam); -checkBamIndex($normalBam); - - -sub checkFaIndex($) { - my ($file) = @_; - my $ifile = $file . ".fai"; - if(! -f $ifile) { - errorX("Can't find index for fasta file '$file'"); - } - # check that fai file isn't improperly formatted (a la the GATK bundle NCBI 37 fai files) - open(my $FH,"< $ifile") || errorX("Can't open fai file '$ifile'"); - my $lineno=1; - while(<$FH>) { - chomp; - my @F=split(); - if(scalar(@F) != 5) { - errorX("Unexpected format for line number '$lineno' of fasta index file: '$ifile'\n\tRe-running fasta indexing may fix the issue. To do so, run: \"samtools faidx $file\""); - } - $lineno++; - } - close($FH); -} - -checkFaIndex($refFile); - - -if(defined($outDir)) { - if(getAbsPath($outDir)) { - errorX("Can't resolve path for ouput directory: '$outDir'"); - } -} else { - $outDir=File::Spec->catdir(Cwd::getcwd(),'strelkaAnalysis'); -} - -if(-e $outDir) { - errorX("Output path already exists: '$outDir'"); -} - -if(getAbsPath($baseDir)) { - errorX("Can't resolve path for strelka install directory: '$baseDir'"); -} - - #my $samtoolsDir = File::Spec->catdir($optDir,'samtools'); -checkDir($libexecDir,"strelka libexec"); - #checkDir($samtoolsDir,"samtools"); - -my $callScriptName = "callSomaticVariants.pl"; -my $filterScriptName = "filterSomaticVariants.pl"; -my $finishScriptName = "consolidateResults.pl"; -my $callScript = File::Spec->catfile($libexecDir,$callScriptName); -my $filterScript = File::Spec->catfile($libexecDir,$filterScriptName); -my $finishScript = File::Spec->catfile($libexecDir,$finishScriptName); -my $countFasta = File::Spec->catfile($libexecDir,"countFastaBases"); - #my $samtoolsBin = File::Spec->catfile($samtoolsDir,"samtools"); -checkFile($callScript,"somatic variant call script"); -checkFile($filterScript,"somatic variant filter script"); -checkFile($finishScript,"result consolidation script"); -checkFile($countFasta,"fasta scanner"); -#checkFile($samtoolsBin,"samtools"); - -# -# Configure bin runs: -# -checkMakeDir($outDir); - -# -# Configure bin runs: open and validate config ini -# -my $config = parseConfigIni($configFile); - -sub checkConfigKeys($) { - my ($keyref) = @_; - for (@$keyref) { - errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); - } -} - -# these are the keys we need at configuration time: -my @config_keys = qw(binSize); - -# these are additional keys we will need to run the workflow: -# (note we don't check for (maxInputDepth,minTier2Mapq) for back compatibility with older config files) -my @workflow_keys = qw( -minTier1Mapq isWriteRealignedBam ssnvPrior sindelPrior ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac -ssnvQuality_LowerBound sindelQuality_LowerBound isSkipDepthFilters depthFilterMultiple -snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac indelMaxRefRepeat -indelMaxWindowFilteredBasecallFrac indelMaxIntHpolLength); - -checkConfigKeys(\@config_keys); -checkConfigKeys(\@workflow_keys); - - -my $binSize = int($config->{user}{binSize}); - -$config->{derived}{configurationCmdline} = $cmdline; -$config->{derived}{normalBam} = $normalBam; -$config->{derived}{tumorBam} = $tumorBam; -$config->{derived}{refFile} = $refFile; -$config->{derived}{outDir} = $outDir; - -# -# Configure bin runs: check for consistent chrom info between BAMs and reference -# -sub getBamChromInfo($) { - my $file = shift; - my $cmd = "samtools view -H $file |"; - open(my $FH,$cmd) || errorX("Can't open process $cmd"); - - my %info; - my $n=0; - while(<$FH>) { - next unless(/^\@SQ/); - chomp; - my @F = split(/\t/); - scalar(@F) >= 3 || errorX("Unexpected bam header for file '$file'"); - - my %h = (); - foreach (@F) { - my @vals = split(':'); - $h{$vals[0]} = $vals[1]; - } - $F[1] = $h{'SN'}; - $F[2] = $h{'LN'}; - - my $size = int($F[2]); - ($size > 0) || errorX("Unexpected chromosome size '$size' in bam header for file '$file'"); - $info{$F[1]}{size} = $size; - $info{$F[1]}{order} = $n; - $n++; - } - close($FH) || errorX("Can't close process $cmd"); - return %info; -} - - -my %chromInfo = getBamChromInfo($normalBam); -my @chroms = sort { $chromInfo{$a}{order} <=> $chromInfo{$b}{order} } (keys(%chromInfo)); - -{ - #consistency check: - my %tumorChromInfo = getBamChromInfo($tumorBam); - for my $chrom (@chroms) { - my $ln = $chromInfo{$chrom}{size}; - my $tln = $tumorChromInfo{$chrom}{size}; - my $order = $chromInfo{$chrom}{order}; - my $torder = $tumorChromInfo{$chrom}{order}; - unless(defined($tln) && ($tln==$ln) && ($torder==$order)) { - errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'"); - } - delete $tumorChromInfo{$chrom}; - } - for my $chrom (keys(%tumorChromInfo)) { - errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'"); - } -} - - -my %refChromInfo; -logX("Scanning reference genome"); -{ - my $knownGenomeSize=0; - my $cmd="$countFasta $refFile |"; - open(my $FFH,$cmd) || errorX("Failed to open process '$cmd'"); - - while(<$FFH>) { - chomp; - my @F = split(/\t/); - scalar(@F) == 4 || errorX("Unexpected value from '$cmd'"); - $knownGenomeSize += int($F[2]); - $refChromInfo{$F[1]}{knownSize} = int($F[2]); - $refChromInfo{$F[1]}{size} = int($F[3]); - } - close($FFH) || errorX("Failed to close process '$cmd'"); - - #consistency check: - for my $chrom (@chroms) { - my $ln = $chromInfo{$chrom}{size}; - my $rln = $refChromInfo{$chrom}{size}; - unless(defined($rln) && ($rln==$ln)) { - errorX("BAM headers and reference fasta disagree on chromosome: '$chrom'"); - } - $config->{derived}{"chrom_${chrom}_size"} = $rln; - $config->{derived}{"chrom_${chrom}_knownSize"} = $refChromInfo{$chrom}{knownSize}; - } - $config->{derived}{chromOrder} = join("\t",@chroms); - - $config->{derived}{knownGenomeSize} = $knownGenomeSize; -} -logX("Scanning reference genome complete"); - - - -# -# Configure bin runs: create directory structure -# -my $resultsDir = File::Spec->catdir($outDir,'results'); -checkMakeDir($resultsDir); -if($config->{user}{isWriteRealignedBam}) { - my $bamDir = File::Spec->catdir($outDir,'realigned'); - checkMakeDir($bamDir); -} -my $chromRootDir = File::Spec->catdir($outDir,'chromosomes'); -checkMakeDir($chromRootDir); -for my $chrom (@chroms) { - my $chromDir = File::Spec->catdir($chromRootDir,$chrom); - checkMakeDir($chromDir); - - my $chromRef = $chromInfo{$chrom}; - $chromRef->{dir} = $chromDir; - $chromRef->{binList} = getBinList($chromRef->{size},$binSize); - - my $binRootDir = File::Spec->catdir($chromDir,'bins'); - checkMakeDir($binRootDir); - - for my $binId ( @{$chromRef->{binList}} ) { - my $binDir = File::Spec->catdir($binRootDir,$binId); - checkMakeDir($binDir); - } -} - - - -# -# write run config file: -# -my $runConfigFile; -{ - my $cstr = <catdir($outDir,'config'); - checkMakeDir($configDir); - $runConfigFile = File::Spec->catdir($configDir,'run.config.ini'); - open(my $FH,"> $runConfigFile") || errorX("Can't open file '$runConfigFile'"); - print $FH $cstr; - close($FH); -} - - - -# -# create makefile -# -my $makeFile = File::Spec->catfile($outDir,"Makefile"); -open(my $MAKEFH, "> $makeFile") || errorX("Can't open file: '$makeFile'"); - -my $completeFile = "task.complete"; - -print $MAKEFH <. - -=cut - - -package Utils; - -use base 'Exporter'; - -our @EXPORT = qw( - errorX logX executeCmd checkFile checkDir checkMove - getAbsPath checkMakeDir getBinList - parseConfigIni writeConfigIni - ); - -use warnings FATAL => 'all'; -use strict; - -use Carp; -use Cwd qw(realpath); -use File::Copy qw(move); -use File::Path qw(mkpath); - - -sub errorX($) { - confess "\nERROR: " . $_[0] . "\n\n"; -} - -sub logX($) { - print STDERR "INFO: " . $_[0] . "\n"; -} - - -sub executeCmd($;$) { - my $cmd = shift; - my $isVerbose = shift; - - logX("Running: '$cmd'") if(defined($isVerbose) and $isVerbose); - system($cmd) == 0 - or errorX("Failed system call: '$cmd'"); -} -#return an error if file does not exist -sub checkFile($;$) { - my $file = shift; - return if(-f $file); - my $label = shift; - errorX("Can't find" . (defined($label) ? " $label" : "") . " file: '$file'"); -} -#return an error if file does not Exist -sub checkDir($;$) { - my $dir = shift; - return if(-d $dir); - my $label = shift; - errorX("Can't find" . (defined($label) ? " $label" : "") . " directory: '$dir'"); -} - -sub checkMove($$) { - my ($old,$new) = @_; - move($old,$new) || errorX("File move failed: $!\n\tAttempting to move '$old' to '$new'"); -} - - - - -=item getAbsPath($path) - -This procedure attempts to convert a path provided by the user on the -command line into an absolute path. It should be able to handle "~" -paths and conventional relative paths using ".." or ".". Resolution of -links should follow the convention of "Cwd::realpath". - -B - - $dirRef - path (converted to absolute path in place) - -B - - returns zero if successful, non-zero otherwise. - -=cut -sub getAbsPath(\$) { - my ($dirRef) = @_; - my @tmp=glob($$dirRef); - return 1 if(scalar(@tmp) != 1); - my $ret = Cwd::realpath($tmp[0]); - return 1 if !$ret && !($ret = File::Spec->rel2abs($tmp[0])); - $$dirRef = $ret; - return 0; -} - - -#verify path is not a file, then create a directory with this name if does not exist -sub checkMakeDir($) { - my $dir = shift; - unless (-e $dir) { - File::Path::mkpath($dir) || errorX("Can't create directory '$dir'"); - } else { - errorX("Path is not a directory '$dir'\n") unless(-d $dir); - } -} - - - -sub getBinList($$) { - my ($chromSize,$binSize) = @_; - - my $nm1 = (($chromSize-1) / $binSize); - return [ map {sprintf("%04i",$_)} (0..$nm1) ]; -} - - - -sub parseConfigError($$) { - my ($file,$line) = @_; - errorX("Config file '$file' contains unexpected line '$line'\n"); -} - -#lis le fichier de config, si la ligne est de type : some space + [ some character ] + some space then register ther character in $section. ( [user] , then $section=user). -sub parseConfigIni($) { - my $file = shift; - my %config; - open(my $FH,"< $file") || errorX("Can't open config file '$file'"); - my $section = "noSection"; - while(<$FH>) { - next if(/^[;#]/); - next if(/^\s*$/); - chomp; - my $line=$_; - my @ncl = split(/[;#]/); - next unless(scalar(@ncl)); - my $nc = $ncl[0]; - if($nc =~ /^\s*\[([^\]]*)\]\s*$/) { - $section = $1; - next; - } - my ($key,$val) = map { s/^\s+//; s/\s+$//; $_ } split(/=/,$nc,2); - unless(defined($key) && defined($val) && ($key ne "")) { parseConfigError($file,$line); } - - $config{$section}{$key} = $val; - } - close($FH); - return \%config; -} - - -# minimal ini stringifier: -# -sub writeConfigIni($) { - my $config = shift; - my $val = ""; - for my $section (sort(keys(%$config))) { - $val .= "\n[$section]\n"; - for my $key (sort(keys(%{$config->{$section}}))) { - $val .= "$key = " . $config->{$section}{$key} . "\n"; - } - } - return $val; -} - - -1; diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/callSomaticVariants.pl --- a/strelka2/libexec/callSomaticVariants.pl Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,241 +0,0 @@ -#!/usr/bin/env perl -print "\n"; -print @INC; -print "\n"; -=head1 LICENSE - -Strelka Workflow Software -Copyright (c) 2009-2013 Illumina, Inc. - -This software is provided under the terms and conditions of the -Illumina Open Source Software License 1. - -You should have received a copy of the Illumina Open Source -Software License 1 along with this program. If not, see -. - -=head1 SYNOPSIS - -callSomaticVariants.pl [options] | --help - -=head2 SUMMARY - -Run the somatic variant caller for snvs and indels on a single -chromosome bin. - -=cut - -use warnings FATAL => 'all'; -use strict; - -use Carp; -$SIG{__DIE__} = \&Carp::confess; - -use File::Spec; -use Getopt::Long; -use Pod::Usage; - -my $baseDir; -my $libDir; -BEGIN { -print "\n"; -print @INC; -print "\n"; - - my $thisDir=(File::Spec->splitpath($0))[1]; - $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); - $libDir=File::Spec->catdir($baseDir,'lib'); -} -use lib $libDir; -use Utils; - -if(getAbsPath($baseDir)) { - errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); -} -my $libexecDir=File::Spec->catdir($baseDir,'libexec'); -#my $optDir=File::Spec->catdir($baseDir,'opt'); - - -my $scriptName=(File::Spec->splitpath($0))[2]; -my $argCount=scalar(@ARGV); -my $cmdline = join(' ',$0,@ARGV); - - -my ($chrom, $binId, $configFile); -my $help; - -GetOptions( - "chrom=s" => \$chrom, - "bin=s" => \$binId, - "config=s" => \$configFile, - "help|h" => \$help) or pod2usage(2); - -pod2usage(2) if($help); -pod2usage(2) unless(defined($chrom)); -pod2usage(2) unless(defined($binId)); -pod2usage(2) unless(defined($configFile)); - - - -# -# check all fixed paths (not based on commandline arguments): -# -checkDir($baseDir); -checkDir($libexecDir); -#checkDir($optDir); - -my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2'); -checkFile($strelkaBin,"strelka binary"); -#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools'); -#checkFile($samtoolsBin,"samtools binary"); - - - -# -# read config and validate values -# -checkFile($configFile,"configuration ini"); -my $config = parseConfigIni($configFile); - -for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) { - errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); -} - -# we skip maxInputDepth,minTier2Mapq here to allow older config files -for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior - ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) { - errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); -} - -my $outDir = $config->{derived}{outDir}; -my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId); -checkDir($outDir,"output"); -checkDir($binDir,"output bin"); - - -my $tumorBam = $config->{derived}{tumorBam}; -my $normalBam = $config->{derived}{normalBam}; -my $refFile = $config->{derived}{refFile}; -checkFile($tumorBam,"tumor BAM"); -checkFile($normalBam,"normal BAM"); -checkFile($refFile,"reference"); - - -# pull out some config options for convenience: -my $binSize=$config->{user}{binSize}; -my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam}; -my $knownGenomeSize = $config->{derived}{knownGenomeSize}; - - -my $begin = (int($binId)*$binSize)+1; -my $end = ((int($binId)+1)*$binSize); -#my $end = $begin+100000; #debug mode - -my $useroptions = $config->{user}; - -# set previous default value if an older config file is being used: -if(! defined($useroptions->{minTier2Mapq})) { - $useroptions->{minTier2Mapq} = 5; -} - - -# -# setup the strelka command-line: -# -my $strelka_base_opts= "-clobber" . -" -filter-unanchored" . -" -min-paired-align-score " . $useroptions->{minTier1Mapq} . -" -min-single-align-score 10" . -" -min-qscore 0" . -" -report-range-begin $begin -report-range-end $end" . -" -samtools-reference '$refFile'" . -" -max-window-mismatch 3 20 -print-used-allele-counts" . -" -bam-seq-name '" . $chrom . "'" . -" -genome-size $knownGenomeSize" . -" -max-indel-size 50" . -" -indel-nonsite-match-prob 0.5" . -" --min-contig-open-end-support 35" . -" --somatic-snv-rate " . $useroptions->{ssnvPrior} . -" --shared-site-error-rate " . $useroptions->{ssnvNoise} . -" --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} . -" --somatic-indel-rate " . $useroptions->{sindelPrior} . -" --shared-indel-error-rate " . $useroptions->{sindelNoise} . -" --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} . -" --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} . -" --tier2-single-align-score-rescue-mode" . -" --tier2-mismatch-density-filter-count 10" . -" --tier2-no-filter-unanchored" . -" --tier2-indel-nonsite-match-prob 0.25" . -" --tier2-include-singleton" . -" --tier2-include-anomalous"; - - -my $somSnvFile='somatic.snvs.unfiltered.vcf'; -my $somIndelFile='somatic.indels.unfiltered.vcf'; - -my $cmd = "$strelkaBin $strelka_base_opts" . -" -bam-file " . $normalBam . -" --tumor-bam-file " . $tumorBam . -" --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) . -" --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) . -" --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window'); - - -sub ualignFile($) { - return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam"); -} -sub alignFile($) { - return File::Spec->catfile($binDir,$_[0] . ".realigned"); -} - - -if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) { - $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth}; -} - - -if($isWriteRealignedBam) { - $cmd .= " -realigned-read-file " . ualignFile("normal") . - " --tumor-realigned-read-file " . ualignFile("tumor"); -} - -if(defined($useroptions->{extraStrelkaArguments})){ - my $arg=$useroptions->{extraStrelkaArguments}; - if($arg !~ /^\s*$/) { - $cmd .= " " . $arg; - } -} - -# this file contains site stats that used to be printed on stdout: -# -$cmd .= -" --report-file " . File::Spec->catfile($binDir,'strelka.stats'); - -$cmd .= -" >| " . File::Spec->catfile($binDir,'strelka.stdout') . -" 2>| " . File::Spec->catfile($binDir,'strelka.stderr'); - - -executeCmd($cmd,0); - - -if($isWriteRealignedBam) { - for my $label (qw(normal tumor)) { - my $ufile = ualignFile($label); - if( -f $ufile ) { - my $afile = alignFile($label); - my $cmd = "samtools sort " . $ufile . " " . $afile; - executeCmd($cmd,0); - unlink($ufile); - } else { - logX("Can't find unsorted realigned BAM file: '$ufile'"); - } - } -} - - -1; - -__END__ - - diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/consolidateResults.pl --- a/strelka2/libexec/consolidateResults.pl Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,276 +0,0 @@ -#!/usr/bin/env perl - -=head1 LICENSE - -Strelka Workflow Software -Copyright (c) 2009-2013 Illumina, Inc. - -This software is provided under the terms and conditions of the -Illumina Open Source Software License 1. - -You should have received a copy of the Illumina Open Source -Software License 1 along with this program. If not, see -. - -=head1 SYNOPSIS - -consolidateSomaticVariants.pl [options] | --help - -=head2 SUMMARY - -Aggregate final results from all chromosomes - -=cut - -use warnings FATAL => 'all'; -use strict; - -use Carp; -$SIG{__DIE__} = \&Carp::confess; - -use File::Spec; -use File::Temp; -use Getopt::Long; -use Pod::Usage; - -my $baseDir; -my $libDir; -#my $optDir; -#my $vcftDir; -BEGIN { - my $thisDir=(File::Spec->splitpath($0))[1]; - $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); - $libDir=File::Spec->catdir($baseDir,'lib'); - #$optDir=File::Spec->catdir($baseDir,'opt'); - #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl'); -} -use lib $libDir; -use Utils; -#use lib $vcftDir; -use Vcf; - -if(getAbsPath($baseDir)) { - errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'"); -} -#$optDir=File::Spec->catdir($baseDir,'opt'); - - -my $scriptName=(File::Spec->splitpath($0))[2]; -my $argCount=scalar(@ARGV); -my $cmdline=join(' ',$0,@ARGV); - - -my $configFile; -my $help; - -GetOptions( "config=s" => \$configFile, - "help|h" => \$help) or pod2usage(2); - -pod2usage(2) if($help); -pod2usage(2) unless(defined($configFile)); - -# -# check fixed paths -# -#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools'); -#checkFile($samtoolsBin,"samtools binary"); - - -# -# read config and validate values -# -checkFile($configFile,"configuration ini"); -my $config = parseConfigIni($configFile); - - -for (qw(outDir chromOrder)) { - errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); -} -for (qw(isWriteRealignedBam binSize)) { - errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); -} - -my $userconfig = $config->{user}; - -my @chromOrder = split(/\t/,$config->{derived}{chromOrder}); -for my $chrom (@chromOrder) { - my $chromSizeKey = "chrom_" . $chrom . "_size"; - errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey)); -} - -my $outDir = $config->{derived}{outDir}; -checkDir($outDir,"output"); - - -my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam}; - -for my $chrom (@chromOrder) { - my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); - checkDir($chromDir,"input chromosome"); - - next unless($isWriteRealignedBam); - my $chromSizeKey = "chrom_" . $chrom . "_size"; - my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); - for my $binId (@$binList) { - my $dir = File::Spec->catdir($chromDir,'bins',$binId); - checkDir($dir,"input bin"); - } -} - - - -# suffix used for large result file intermediates: -my $itag = ".incomplete"; - - -# -# concatenate vcfs: -# -sub concatenateVcfs($) { - my $fileName = shift; - - my $is_first = 1; - - my $allFileName = "all." . $fileName; - my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag); - open(my $aFH,'>',"$allFile") - || errorX("Failed to open file: '$allFile'"); - - # loop over all chroms once to create the header, and one more time for all the data: - my $headervcf; - for my $chrom (@chromOrder) { - my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); - my $iFile = File::Spec->catfile($chromDir,$fileName); - checkFile($iFile); - - my $depthKey="maxDepth_${chrom}"; - - if($is_first) { - open(my $iFH,'<',"$iFile") - || errorX("Failed to open file: '$iFile'"); - $headervcf = Vcf->new(fh=>$iFH); - $headervcf->parse_header(); - $headervcf->remove_header_line(key=>"cmdline"); - $headervcf->add_header_line({key=>"cmdline",value=>$cmdline}); - $headervcf->remove_header_line(key=>"$depthKey"); - close($iFH); - $is_first=0; - } - - { - open(my $iFH,'<',"$iFile") - || errorX("Failed to open file: '$iFile'"); - my $vcf = Vcf->new(fh=>$iFH); - $vcf->parse_header(); - for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) { - # $line seems to be returned as a length 1 array ref to a hash -- ??!?!??!! - $headervcf->add_header_line($line->[0]); - } - $vcf->close(); - close($iFH); - } - } - print $aFH $headervcf->format_header(); - $headervcf->close(); - - for my $chrom (@chromOrder) { - my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); - my $iFile = File::Spec->catfile($chromDir,$fileName); - - open(my $iFH,'<',"$iFile") - || errorX("Failed to open file: '$iFile'"); - - my $vcf = Vcf->new(fh=>$iFH); - $vcf->parse_header(); - print $aFH $_ while(<$iFH>); - } - - close($aFH); - - # make a second set of files with only the passed variants: - my $passedFileName = "passed." . $fileName; - my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag); - open(my $pFH,'>',"$passedFile") - || errorX("Failed to open file: '$passedFile'"); - - open(my $arFH,'<',"$allFile") - || errorX("Failed to open file: '$allFile'"); - - while(<$arFH>) { - chomp; - unless(/^#/) { - my @F = split(/\t/); - next if((scalar(@F)>=7) && ($F[6] ne "PASS")); - } - print $pFH "$_\n"; - } - - close($arFH); - close($pFH); - - my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName); - checkMove($allFile,$allFileFinished); - - my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName); - checkMove($passedFile,$passedFileFinished); -} - -concatenateVcfs("somatic.snvs.vcf"); -concatenateVcfs("somatic.indels.vcf"); - - -my $bamSuffix = ".realigned.bam"; - -sub consolidateBam($) { - my $label = shift; - - my $fileName = $label . $bamSuffix; - - my $reDir = File::Spec->catdir($outDir,'realigned'); - checkMakeDir($reDir); - - my @bamList; - for my $chrom (@chromOrder) { - my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); - - my $chromSizeKey = "chrom_" . $chrom . "_size"; - my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); - for my $binId (@$binList) { - my $binDir = File::Spec->catdir($chromDir,'bins',$binId); - my $rbamFile = File::Spec->catfile($binDir,$fileName); - checkFile($rbamFile,"bin realigned bam file"); - - push @bamList,$rbamFile; - } - } - - return unless(scalar(@bamList)); - - my $headerFH = File::Temp->new(); - my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$bamList[0]." > $headerFH'"; - executeCmd($getHeaderCmd); - - my $allFile = File::Spec->catfile($reDir,$fileName . $itag); - my $cmd="$samtoolsBin merge -h $headerFH $allFile ". join(" ",@bamList); - executeCmd($cmd); - - my $allFileFinished = File::Spec->catfile($reDir,$fileName); - checkMove($allFile,$allFileFinished); - - my $indexCmd="$samtoolsBin index $allFileFinished"; - executeCmd($indexCmd); - - # for now don't remove all the bin realignments... - # unlink(@bamList); -} - -if($isWriteRealignedBam) { - consolidateBam("normal"); - consolidateBam("tumor"); -} - - -1; - -__END__ - diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/countFastaBases Binary file strelka2/libexec/countFastaBases has changed diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/filterSomaticVariants.pl --- a/strelka2/libexec/filterSomaticVariants.pl Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,435 +0,0 @@ -#!/usr/bin/env perl - -=head1 LICENSE - -Strelka Workflow Software -Copyright (c) 2009-2013 Illumina, Inc. - -This software is provided under the terms and conditions of the -Illumina Open Source Software License 1. - -You should have received a copy of the Illumina Open Source -Software License 1 along with this program. If not, see -. - -=head1 SYNOPSIS - -filterSomaticVariants.pl [options] | --help - -=head2 SUMMARY - -Aggregate and filter the variant caller results by chromosome - -=cut - -use warnings FATAL => 'all'; -use strict; - -use Carp; -$SIG{__DIE__} = \&Carp::confess; - -use File::Spec; -use Getopt::Long; -use Pod::Usage; - -my $baseDir; -my $libDir; -#my $vcftDir; -BEGIN { - my $thisDir=(File::Spec->splitpath($0))[1]; - $baseDir=File::Spec->catdir($thisDir,File::Spec->updir()); - $libDir=File::Spec->catdir($baseDir,'lib'); - #my $optDir=File::Spec->catdir($baseDir,'opt'); - #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl'); -} -use lib $libDir; -use Utils; -#use lib $vcftDir; -#use Vcf; - - -my $scriptName=(File::Spec->splitpath($0))[2]; -my $argCount=scalar(@ARGV); -my $cmdline = join(' ',$0,@ARGV); - - -my ($chrom, $configFile); -my $help; - -GetOptions( "chrom=s" => \$chrom, - "config=s" => \$configFile, - "help|h" => \$help) or pod2usage(2); - -pod2usage(2) if($help); -pod2usage(2) unless(defined($chrom)); -pod2usage(2) unless(defined($configFile)); - - - -# -# read config and validate values -# -checkFile($configFile,"configuration ini"); -my $config = parseConfigIni($configFile); - -my $chromSizeKey = "chrom_" . $chrom . "_size"; -my $chromKnownSizeKey = "chrom_" . $chrom . "_knownSize"; - -for (("outDir", $chromSizeKey, $chromKnownSizeKey)) { - errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_})); -} -for (qw(binSize ssnvQuality_LowerBound sindelQuality_LowerBound - isSkipDepthFilters depthFilterMultiple - snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac - indelMaxRefRepeat indelMaxWindowFilteredBasecallFrac - indelMaxIntHpolLength)) { - errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_})); -} - -my $outDir = $config->{derived}{outDir}; -my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom); -checkDir($outDir,"output"); -checkDir($chromDir,"output chromosome"); - -my $userconfig = $config->{user}; - -my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize}); -for my $binId (@$binList) { - my $dir = File::Spec->catdir($chromDir,'bins',$binId); - checkDir($dir,"input bin"); -} - -# -# parameters from user config file: -# - -# minimum passed ssnv_nt Q-score: -my $minQSSNT=$userconfig->{ssnvQuality_LowerBound}; -# minimum passed sindel_nt Q-score: -my $minQSINT=$userconfig->{sindelQuality_LowerBound}; - -# -# filtration parameters from user config file: -# - -# skip depth filters for targeted resequencing: -my $isUseDepthFilter=(! $userconfig->{isSkipDepthFilters}); -# multiple of the normal mean coverage to filter snvs and indels -my $depthFilterMultiple=$userconfig->{depthFilterMultiple}; -# max filtered basecall fraction for any sample -my $snvMaxFilteredBasecallFrac=$userconfig->{snvMaxFilteredBasecallFrac}; -# max snv spanning-deletion fraction for any sample -my $snvMaxSpanningDeletionFrac=$userconfig->{snvMaxSpanningDeletionFrac}; -# max indel reference repeat length -my $indelMaxRefRepeat=$userconfig->{indelMaxRefRepeat}; -# max indel window filter fraction -my $indelMaxWindowFilteredBasecallFrac=$userconfig->{indelMaxWindowFilteredBasecallFrac}; -# max indel interupted hompolymer length: -my $indelMaxIntHpolLength=$userconfig->{indelMaxIntHpolLength}; - - -# first we want the normal sample mean chromosome depth: -# -my $filterCoverage; -if($isUseDepthFilter) { - my $totalCoverage = 0; - for my $binId (@$binList) { - my $sFile = File::Spec->catfile($chromDir,'bins',$binId,'strelka.stats'); - checkFile($sFile,"strelka bin stats"); - open(my $sFH, '<', $sFile) - || errorX("Can't open file: '$sFile' $!"); - - my $is_found=0; - while(<$sFH>) { - next unless(/^NORMAL_NO_REF_N_COVERAGE\s/); - my $is_bad = 0; - if(not /sample_size:\s*(\d+)\s+/) { $is_bad=1; } - my $ss=$1; - # leave the regex for mean fairly loose to pick up scientific notation, etc.. - if(not /mean:\s*(\d[^\s]*|nan)\s+/) { $is_bad=1; } - my $mean=$1; - errorX("Unexpected format in file: '$sFile'") if($is_bad); - - my $coverage = ( $ss==0 ? 0 : int($ss*$mean) ); - - $totalCoverage += $coverage; - $is_found=1; - last; - } - close($sFH); - errorX("Unexpected format in file: '$sFile'") unless($is_found); - } - - my $chromKnownSize = $config->{derived}{$chromKnownSizeKey}; - my $normalMeanCoverage = ($totalCoverage/$chromKnownSize); - $filterCoverage = $normalMeanCoverage*$depthFilterMultiple; -} - - -# add filter description to vcf header unless it already exists -# return 1 if filter id already exists, client can decide if this is an error -# -sub add_vcf_filter($$$) { - my ($vcf,$id,$desc) = @_; - return 1 if(scalar(@{$vcf->get_header_line(key=>'FILTER', ID=>$id)})); - $vcf->add_header_line({key=>'FILTER', ID=>$id,Description=>$desc}); - return 0; -} - - -sub check_vcf_for_sample($$$) { - my ($vcf,$sample,$file) = @_; - my $is_found=0; - for ($vcf->get_samples()) { - $is_found=1 if($_ eq $sample); - } - errorX("Failed to find sample '$sample' in vcf file '$file'") unless($is_found); -} - - - -my $depthFiltId="DP"; - - -# Runs all post-call vcf filters: -# -sub filterSnvFileList(\@$$$) { - my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_; - - my $baseFiltId="BCNoise"; - my $spanFiltId="SpanDel"; - my $qFiltId="QSS_ref"; - - open(my $aFH,'>',$acceptFileName) - or errorX("Failed to open file: '$acceptFileName'"); - - my $is_first=1; - for my $ifile (@$ifiles) { - open(my $iFH,'<',"$ifile") - or errorX("Failed to open file: '$ifile'"); - my $vcf = Vcf->new(fh=>$iFH); - - # run some simple header validation on each vcf: - $vcf->parse_header(); - check_vcf_for_sample($vcf,'NORMAL',$ifile); - check_vcf_for_sample($vcf,'TUMOR',$ifile); - - if($is_first) { - # TODO: update vcf meta-data for chromosome-level filtered files - # - $vcf->remove_header_line(key=>"cmdline"); - $vcf->add_header_line({key=>"cmdline",value=>$cmdline}); - if($isUseDepthFilter) { - $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal}); - add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample"); - } - add_vcf_filter($vcf,$baseFiltId,"Fraction of basecalls filtered at this site in either sample is at or above $snvMaxFilteredBasecallFrac"); - add_vcf_filter($vcf,$spanFiltId,"Fraction of reads crossing site with spanning deletions in either sample exceeeds $snvMaxSpanningDeletionFrac"); - add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or ssnv Q-score < $minQSSNT, ie calls with NT!=ref or QSS_NT < $minQSSNT"); - print $aFH $vcf->format_header(); - $is_first=0; - } - - while(my $x=$vcf->next_data_hash()) { - my $norm=$x->{gtypes}->{NORMAL}; - my $tumr=$x->{gtypes}->{TUMOR}; - - my %filters; - - # normal depth filter: - my $normalDP=$norm->{DP}; - if($isUseDepthFilter) { - $filters{$depthFiltId} = ($normalDP > $depthFilterVal); - } - - # filtered basecall fraction: - my $normal_filt=($normalDP>0 ? $norm->{FDP}/$normalDP : 0); - - my $tumorDP=$tumr->{DP}; - my $tumor_filt=($tumorDP>0 ? $tumr->{FDP}/$tumorDP : 0); - - $filters{$baseFiltId}=(($normal_filt >= $snvMaxFilteredBasecallFrac) or - ($tumor_filt >= $snvMaxFilteredBasecallFrac)); - - # spanning deletion fraction: - my $normalSDP=$norm->{SDP}; - my $normalSpanTot=($normalDP+$normalSDP); - my $normalSpanDelFrac=($normalSpanTot>0 ? ($normalSDP/$normalSpanTot) : 0); - - my $tumorSDP=$tumr->{SDP}; - my $tumorSpanTot=($tumorDP+$tumorSDP); - my $tumorSpanDelFrac=($tumorSpanTot>0 ? ($tumorSDP/$tumorSpanTot) : 0); - - $filters{$spanFiltId}=(($normalSpanDelFrac > $snvMaxSpanningDeletionFrac) or - ($tumorSpanDelFrac > $snvMaxSpanningDeletionFrac)); - - # Q-val filter: - $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or - ($x->{INFO}->{QSS_NT} < $minQSSNT)); - - $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters); - - print $aFH $vcf->format_line($x); - } - - $vcf->close(); - close($iFH); - } - - close($aFH); -} - - - -sub updateA2(\@$) { - my ($a2,$FH) = @_; - my $line=<$FH>; - unless(defined($line)) { errorX("Unexpected end of somatic indel window file"); } - chomp $line; - @$a2 = split("\t",$line); - unless(scalar(@$a2)) { errorX("Unexpected format in somatic indel window file"); } -} - - - -sub filterIndelFileList(\@$$$) { - my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_; - - my $repeatFiltId="Repeat"; - my $iHpolFiltId="iHpol"; - my $baseFiltId="BCNoise"; - my $qFiltId="QSI_ref"; - - open(my $aFH,'>',$acceptFileName) - or errorX("Failed to open file: '$acceptFileName'"); - - my $is_first=1; - for my $ifile (@$ifiles) { - open(my $iFH,'<',"$ifile") - or errorX("Failed to open somatic indel file: '$ifile'"); - - my $iwfile = $ifile . ".window"; - open(my $iwFH,'<',"$iwfile") - or errorX("Failed to open somatic indel window file: '$iwfile'"); - - my @a2; # hold window file data for one line in case we overstep... - - my $vcf = Vcf->new(fh=>$iFH); - - # run some simple header validation on each vcf: - $vcf->parse_header(); - check_vcf_for_sample($vcf,'NORMAL',$ifile); - check_vcf_for_sample($vcf,'TUMOR',$ifile); - - if($is_first) { - # TODO: update all vcf meta-data for chromosome-level filtered files - # - $vcf->remove_header_line(key=>"cmdline"); - $vcf->add_header_line({key=>"cmdline",value=>$cmdline}); - if($isUseDepthFilter) { - $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal}); - } - $vcf->add_header_line({key=>'FORMAT', ID=>'DP50',Number=>1,Type=>'Float',Description=>'Average tier1 read depth within 50 bases'}); - $vcf->add_header_line({key=>'FORMAT', ID=>'FDP50',Number=>1,Type=>'Float',Description=>'Average tier1 number of basecalls filtered from original read depth within 50 bases'}); - $vcf->add_header_line({key=>'FORMAT', ID=>'SUBDP50',Number=>1,Type=>'Float',Description=>'Average number of reads below tier1 mapping quality threshold aligned across sites within 50 bases'}); - if($isUseDepthFilter) { - add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample"); - } - add_vcf_filter($vcf,$repeatFiltId,"Sequence repeat of more than ${indelMaxRefRepeat}x in the reference sequence"); - add_vcf_filter($vcf,$iHpolFiltId,"Indel overlaps an interupted homopolymer longer than ${indelMaxIntHpolLength}x in the reference sequence"); - add_vcf_filter($vcf,$baseFiltId,"Average fraction of filtered basecalls within 50 bases of the indel exceeds $indelMaxWindowFilteredBasecallFrac"); - add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or sindel Q-score < $minQSINT, ie calls with NT!=ref or QSI_NT < $minQSINT"); - print $aFH $vcf->format_header(); - $is_first=0; - } - - while(my $x=$vcf->next_data_hash()) { - $vcf->add_format_field($x,'DP50'); - $vcf->add_format_field($x,'FDP50'); - $vcf->add_format_field($x,'SUBDP50'); - - my $norm=$x->{gtypes}->{NORMAL}; - my $tumr=$x->{gtypes}->{TUMOR}; - - my $chrom=$x->{CHROM}; - my $pos=int($x->{POS}); - - # get matching line from window file: - while((scalar(@a2)<2) or - (($a2[0] le $chrom) and (int($a2[1]) < $pos))) { - updateA2(@a2,$iwFH); - } - unless(scalar(@a2) and ($a2[0] eq $chrom) and (int($a2[1]) == $pos)) - { errorX("Can't find somatic indel window position.\nIndel line: " . $vcf->format_line($x) ); } - - # add window data to vcf record: - # - $norm->{DP50} = $a2[2]+$a2[3]; - $norm->{FDP50} = $a2[3]; - $norm->{SUBDP50} = $a2[4]; - $tumr->{DP50} = $a2[5]+$a2[6]; - $tumr->{FDP50} = $a2[6]; - $tumr->{SUBDP50} = $a2[7]; - - my %filters; - - # normal depth filter: - my $normalDP=$norm->{DP}; - if($isUseDepthFilter) { - $filters{$depthFiltId}=($normalDP > $depthFilterVal); - } - - # ref repeat - my $refRep=$x->{INFO}->{RC}; - $filters{$repeatFiltId}=(defined($refRep) and - ($refRep > $indelMaxRefRepeat)); - - # ihpol - my $iHpol=$x->{INFO}->{IHP}; - $filters{$iHpolFiltId}=(defined($iHpol) and - ($iHpol > $indelMaxIntHpolLength)); - - # base filt: - my $normWinFrac=( $norm->{DP50} ? $norm->{FDP50}/$norm->{DP50} : 0 ); - my $tumrWinFrac=( $tumr->{DP50} ? $tumr->{FDP50}/$tumr->{DP50} : 0 ); - $filters{$baseFiltId}=( ($normWinFrac >= $indelMaxWindowFilteredBasecallFrac) or - ($tumrWinFrac >= $indelMaxWindowFilteredBasecallFrac) ); - - # Q-val filter: - $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or - ($x->{INFO}->{QSI_NT} < $minQSINT)); - - $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters); - - print $aFH $vcf->format_line($x); - } - - $vcf->close(); - close($iFH); - close($iwFH); - } - - close($aFH); -} - - - -my @ssnvFiles; -my @sindelFiles; -for my $binId (@$binList) { - my $ssnvFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.snvs.unfiltered.vcf'); - my $sindelFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.indels.unfiltered.vcf'); - checkFile($ssnvFile,"bin snv file"); - checkFile($sindelFile,"bin indel file"); - push @ssnvFiles,$ssnvFile; - push @sindelFiles,$sindelFile; -} - -my $ssnvOutFile = File::Spec->catfile($chromDir,"somatic.snvs.vcf"); -filterSnvFileList(@ssnvFiles,$filterCoverage,$ssnvOutFile,$isUseDepthFilter); - -my $sindelOutFile = File::Spec->catfile($chromDir,"somatic.indels.vcf"); -filterIndelFileList(@sindelFiles,$filterCoverage,$sindelOutFile,$isUseDepthFilter); - - -1; diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/libexec/strelka2 Binary file strelka2/libexec/strelka2 has changed diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/strelka2.xml --- a/strelka2/strelka2.xml Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,126 +0,0 @@ - - - Strelka good interface but no dependencies - strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam --refFile $refFile - #if $configuration.configuration_switch == 'Default': - --configFile Default - #else if $configuration.configuration_switch == 'Path': - --configFile $configuration.configFile - #else: - --configFile Custom - --depthFilterMultiple $configuration.depthFilterMultiple - --snvMaxFilteredBasecallFrac $configuration.snvMaxFilteredBasecallFrac - --snvMaxSpanningDeletionFrac $configuration.snvMaxSpanningDeletionFrac - --indelMaxRefRepeat $configuration.indelMaxRefRepeat - --indelMaxWindowFilteredBasecallFrac $configuration.indelMaxWindowFilteredBasecallFrac - --indelMaxIntHpolLength $configuration.indelMaxIntHpolLength - --ssnvPrior $configuration.ssnvPrior - --sindelPrior $configuration.sindelPrior - --ssnvNoise $configuration.ssnvNoise - --sindelNoise $configuration.sindelNoise - --ssnvNoiseStrandBiasFrac $configuration.ssnvNoiseStrandBiasFrac - --minTier1Mapq $configuration.minTier1Mapq - --minTier2Mapq $configuration.minTier2Mapq - --ssnvQuality_LowerBound $configuration.ssnvQuality_LowerBound - --sindelQuality_LowerBound $configuration.sindelQuality_LowerBound - --isWriteRealignedBam $configuration.isWriteRealignedBam - --binSize $configuration.binSize - --isSkipDepthFilters $configuration.isSkipDepthFilters - --maxInputDepth $configuration.maxInputDepth - #if $configuration.extra_arguments.extra_arguments_switch == 'Yes': - --extraStrelkaArguments $configuration.extra_arguments.extraStrelkaArguments - #end if - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - conf_file.conf_file_switch == "Yes" - - - - - - - - - - - - - - - - - -Strelka, a method for somatic SNV and small indel detectipon from sequencing data of matched tumor-normal samples. - - - diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/strelka_config.sample --- a/strelka2/strelka_config.sample Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ - -; -; User configuration options for Strelka somatic small-variant caller -; workflow: -; - -[user] - -; -; isSkipDepthFilters should be set to 1 to skip depth filtration for -; whole exome or other targeted sequencing data -; -isSkipDepthFilters = 1 - -; -; strelka will not accept input reads above this depth (they will be skipped -; until the depth drops below this value). Set this value <= 0 to disable -; this feature. Using this filter will bound memory usage given extremely high -; depth input, but may be problematic in high-depth targeted sequencing -; applications. -; -maxInputDepth = 10000 - -; -; If the depth filter is not skipped, all variants which occur at a -; depth greater than depthFilterMultiple*chromosome mean depth will be -; filtered out. -; -depthFilterMultiple = 3.0 - -; -; Somatic SNV calls are filtered at sites where greater than this -; fraction of basecalls have been removed by the mismatch density -; filter in either sample. -; -snvMaxFilteredBasecallFrac = 0.4 - -; -; Somatic SNV calls are filtered at sites where greater than this -; fraction of overlapping reads contain deletions which span the SNV -; call site. -; -snvMaxSpanningDeletionFrac = 0.75 - -; -; Somatic indel calls are filtered if they represent an expansion or -; contraction of a repeated pattern with a repeat count greater than -; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8, -; then the indel is filtered when it is an expansion/contraction of a -; homopolymer longer than 8 bases, a dinucleotide repeat longer than -; 16 bases, etc.) -; -indelMaxRefRepeat = 8 - -; -; Somatic indel calls are filtered if greater than this fraction of -; basecalls in a window extending 50 bases to each side of an indel's -; call position have been removed by the mismatch density filter. -; -indelMaxWindowFilteredBasecallFrac = 0.3 - -; -; Somatic indels are filtered if they overlap ’interrupted -; homopolymers’ greater than this length. The term 'interrupted -; homopolymer' is used to indicate the longest homopolymer which can -; be found intersecting or adjacent to the called indel when a single -; non-homopolymer base is allowed. -; -indelMaxIntHpolLength = 14 - -; -; prior probability of a somatic snv or indel -; -ssnvPrior = 0.000001 -sindelPrior = 0.000001 - -; -; probability of an snv or indel noise allele -; -; NB: in the calling model a noise allele is shared in tumor and -; normal samples, but occurs at any frequency. -; -ssnvNoise = 0.0000005 -sindelNoise = 0.000001 - -; -; Fraction of snv noise attributed to strand-bias. -; -; It is not recommended to change this setting. However, if it is -; essential to turn the strand bias penalization off, the following is -; recommended: -; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, -; (1) set ssnvNoiseStrandBiasFrac = 0 -; (2) divide the current ssnvNoise value by 2 -; -ssnvNoiseStrandBiasFrac = 0.5 - -; -; minimum MAPQ score for PE reads at tier1: -; -minTier1Mapq = 20 - -; -; minimum MAPQ score for PE and SE reads at tier2: -; -minTier2Mapq = 5 - -; -; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are -; marked as filtered: -; -ssnvQuality_LowerBound = 15 - -; -; Somatic quality score (QSI_NT, NT=ref) below which somatic indels -; are marked as filtered: -; -sindelQuality_LowerBound = 30 - -; -; Optionally write out read alignments which were altered during the -; realignment step. At the completion of the workflow run, the -; realigned reads can be found in: -; -; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam -; -isWriteRealignedBam = 0 - -; -; Jobs are parallelized over segments of the reference genome no larger -; than this size: -; -binSize = 25000000 - -; -; Additional arguments passed to strelka. -; -extraStrelkaArguments = - diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/strelka_wrapper.py --- a/strelka2/strelka_wrapper.py Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,201 +0,0 @@ -#!/usr/bin/env python -#Dan Blankenberg - -""" -A wrapper script for running the GenomeAnalysisTK.jar commands. -""" - -from __future__ import print_function -import sys, argparse, os, tempfile, subprocess, shutil -from binascii import unhexlify -from string import Template -from galaxy import eggs -#import pkg_resources; pkg_resources.require( "bx-python" ) - -#GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is -#GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed -#DEFAULT_GATK_PREFIX = "gatk_file" -#CHUNK_SIZE = 2**20 #1mb -# -# -def cleanup_before_exit( tmp_dir ): - if tmp_dir and os.path.exists( tmp_dir ): - shutil.rmtree( tmp_dir ) - -def _create_config(args, config_path): - conf_file = open(config_path, "w") - conf_file.write("[user]\n") - for option in args: - if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None: - conf_file.write("%s=%s\n" % (option, args[option])) - conf_file.close() - -def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error): - stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name - proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) ) - return_code = proc.wait() - if return_code: - for line in open( stderr_name ): - print(line, file=sys.stderr) - os.unlink( stderr_name ) #clean up - cleanup_before_exit( tmp_dir ) - raise Exception( msg_error ) - else: - os.unlink( stderr_name ) - -def index_bam_files( bam_filenames, tmp_dir ): - for bam_filename in bam_filenames: - bam_index_filename = "%s.bai" % bam_filename - print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename)) - if not os.path.exists( bam_index_filename ): - #need to index this bam file - command = 'samtools index %s %s' % ( bam_filename, bam_index_filename ) - my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename) - -def index_fasta_files( fasta_filenames, tmp_dir ): - for fasta_filename in fasta_filenames: - fasta_index_filename = "%s.fai" % fasta_filename - print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename)) - if not os.path.exists( fasta_index_filename ): - #need to index this bam file - command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename ) - my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename) - -def __main__(): - #Parse Command Line OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD - #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD - - root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2" - expected_dir="for_tests" - job_dir=os.getcwd() - analysis_dir=job_dir + "/StrelkaAnalysis" - config_script=root_dir + "/configureStrelkaWorkflow.pl" - tmp_dir = "tmp" #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' ) - config_ini = "%s/config.ini" % (tmp_dir) - - print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" + config_ini) - - #manage parsing - parser = argparse.ArgumentParser() - parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) - parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) - parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) - parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) - parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False ) - parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False ) - parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False ) - parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False ) - parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False ) - parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False ) - parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False ) - parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False ) - parser.add_argument( '--binSize', help='path to tumor bam file', required = False ) - parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False ) - parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False ) - parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False ) - args = parser.parse_args() - - #verifying eveything's ok - if not os.path.isfile(config_script): - sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'") - print("configuring...", file=sys.stdout) - if os.path.exists(analysis_dir): - sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen") - - - # creating index if needed - os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH'] - bam_filenames = [ args.tumorBam, args.normalBam ] - index_bam_files( bam_filenames, tmp_dir ) - fasta_files = [ args.refFile ] - index_fasta_files( fasta_files, tmp_dir ) - - #creating config file if needed - if args.configFile == "Custom": - _create_config(vars(args), config_ini) - elif args.configFile == "Default": - cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini) - my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") - else: - if not os.path.exists(args.configFile): - print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr) - cmdbash="cp %s %s" % (args.configFile, config_ini) - my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") - - - - - #configuration of workflow - cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir) - print( "**** Starting configuration.") - print( "**** Configuration cmd: '" + cmd + "'") - my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !") - print("completed configuration") - - #run the workflow ! - cmd="make -C " + analysis_dir - print("**** starting workflow.") - print("**** workflow cmd: '" + cmd + "'") - my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !") - print("**** completed workflow execution") - - - - - - - - - - - - - - -#bam_filenames = [] -# if options.datasets: -# for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets: -# gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function -# if dataset_arg: -# cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename ) -# if galaxy_ext == "bam": -# bam_filenames.append( gatk_filename ) -# #set up stdout and stderr output options -# stdout = open_file_from_option( options.stdout, mode = 'wb' ) -# stderr = open_file_from_option( options.stderr, mode = 'wb' ) -# #if no stderr file is specified, we'll use our own -# if stderr is None: -# stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir ) -# -# proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir ) -# return_code = proc.wait() -# -# if return_code: -# stderr_target = sys.stderr -# else: -# stderr_target = sys.stdout -# stderr.flush() -# stderr.seek(0) -# while True: -# chunk = stderr.read( CHUNK_SIZE ) -# if chunk: -# stderr_target.write( chunk ) -# else: -# break -# stderr.close() -# #generate html reports -# if options.html_report_from_directory: -# for ( html_filename, html_dir ) in options.html_report_from_directory: -# html_report_from_directory( open( html_filename, 'wb' ), html_dir ) -# -# cleanup_before_exit( tmp_dir ) - -if __name__=="__main__": __main__() diff -r f48854499d41 -r 7c35d3efd1e7 strelka2/tool_dependencies.xml --- a/strelka2/tool_dependencies.xml Thu Sep 25 12:02:14 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ - - - - - - - - -