changeset 0:7a9f20ca4ad5

Uploaded
author mini
date Thu, 25 Sep 2014 11:59:08 -0400
parents
children f48854499d41
files strelka2/.configureStrelkaWorkflow.pl.swp strelka2/.strelka.xml.swp strelka2/.strelka_wrapper.bash.swo strelka2/.strelka_wrapper.py.swp strelka2/configureStrelkaWorkflow.pl strelka2/lib/.Utils.pm.swp strelka2/lib/Utils.pm strelka2/libexec/callSomaticVariants.pl strelka2/libexec/consolidateResults.pl strelka2/libexec/countFastaBases strelka2/libexec/filterSomaticVariants.pl strelka2/libexec/strelka2 strelka2/strelka.xml strelka2/strelka2.xml strelka2/strelka_config.sample strelka2/strelka_wrapper.py strelka2/tool_dependencies.xml
diffstat 17 files changed, 2198 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file strelka2/.configureStrelkaWorkflow.pl.swp has changed
Binary file strelka2/.strelka.xml.swp has changed
Binary file strelka2/.strelka_wrapper.bash.swo has changed
Binary file strelka2/.strelka_wrapper.py.swp has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/configureStrelkaWorkflow.pl	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,564 @@
+#!/usr/bin/env perl
+
+=head1 LICENSE
+
+Strelka Workflow Software
+Copyright (c) 2009-2013 Illumina, Inc.
+
+This software is provided under the terms and conditions of the
+Illumina Open Source Software License 1.
+
+You should have received a copy of the Illumina Open Source
+Software License 1 along with this program. If not, see
+<https://github.com/downloads/sequencing/licenses/>.
+
+=head1 SYNOPSIS
+
+configureStrelkaWorkflow.pl --tumor FILE --normal FILE --ref FILE --config FILE [options]
+
+This script configures the strelka workflow for somatic variant
+calling on matched tumor-normal BAM files. The configuration process
+will produce an analysis makefile and directory structure. The
+makefile can be used to run the analysis on a workstation or compute
+cluster via make/qmake or other makefile compatible process.
+
+=head1 ARGUMENTS
+
+=over 4
+
+=item --tumor FILE
+
+Path to tumor sample BAM file (required)
+
+=item --normal FILE
+
+Path to normal sample BAM file (required)
+
+=item --ref FILE
+
+Path to reference genome fasta (required)
+
+=item --config FILE
+
+Strelka configuration file. Default config files can be found in
+${STRELKA_INSTALL_ROOT}/etc/ for both ELAND and BWA alignments. (required)
+
+=back
+
+=head1 OPTIONS
+
+=over 4
+
+=item --output-dir DIRECTORY
+
+Root of the analysis directory. This script will place all
+configuration files in the analysis directory, and after configuration
+all results and intermediate files will be written within the analysis
+directory during a run. This directory must not already
+exist. (default: ./strelkaAnalysis)
+
+=back
+
+=cut
+
+use warnings FATAL => 'all';
+use strict;
+
+use Carp;
+$SIG{__DIE__} = \&Carp::confess;
+
+use Cwd qw(getcwd);
+use File::Spec;
+use Getopt::Long;
+use Pod::Usage;
+
+my $baseDir;
+my $libDir;
+BEGIN {
+    my $thisDir=(File::Spec->splitpath($0))[1];
+    $baseDir=$thisDir;#$baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
+    $libDir=File::Spec->catdir($baseDir,'lib');
+}
+use lib $libDir;
+use Utils;
+
+if(getAbsPath($baseDir)) {
+    errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
+}
+my $libexecDir=File::Spec->catdir($baseDir,'libexec');
+#my $optDir=File::Spec->catdir($baseDir,'opt');
+
+
+my $scriptName=(File::Spec->splitpath($0))[2];
+my $argCount=scalar(@ARGV);
+my $cmdline = join(' ',$0,@ARGV);
+
+
+sub usage() { pod2usage(-verbose => 1,
+                        -exitval => 2); }
+
+#
+# user configuration:
+#
+
+my ($tumorBam, $normalBam, $refFile, $configFile, $outDir);
+my $help;
+
+GetOptions( "tumor=s" => \$tumorBam,
+            "normal=s" => \$normalBam,
+            "ref=s" => \$refFile,
+            "config=s" => \$configFile,
+            "output-dir=s" => \$outDir,
+            "help|h" => \$help) || usage();
+
+usage() if($help);
+usage() unless($argCount);
+
+
+#
+# Validate input conditions:
+#
+
+sub checkFileArg($$) {
+   my ($file,$label) = @_;
+
+   errorX("Must specify $label file") unless(defined($file)); #raise an error if file not defined
+   checkFile($file,$label);
+}
+
+checkFileArg($tumorBam,"tumor BAM");
+checkFileArg($normalBam,"normal BAM");
+checkFileArg($refFile,"reference fasta");
+checkFileArg($configFile,"configuration ini");
+
+sub makeAbsoluteFilePaths(\$) {
+    my ($filePathRef) = @_;
+
+    my ($v,$fileDir,$fileName) = File::Spec->splitpath($$filePathRef);
+    if(getAbsPath($fileDir)) {
+        errorX("Can't resolve directory path for '$fileDir' from input file argument: '$$filePathRef'");
+    }
+    $$filePathRef = File::Spec->catfile($fileDir,$fileName);
+}
+
+makeAbsoluteFilePaths($tumorBam);
+makeAbsoluteFilePaths($normalBam);
+makeAbsoluteFilePaths($refFile);
+makeAbsoluteFilePaths($configFile);
+
+# also check for BAM index files:
+sub checkBamIndex($) {
+    my ($file) = @_;
+    my $ifile = $file . ".bai";
+    if(! -f $ifile) {
+        errorX("Can't find index for BAM file '$file'");
+    }
+}
+
+checkBamIndex($tumorBam);
+checkBamIndex($normalBam);
+
+
+sub checkFaIndex($) {
+    my ($file) = @_;
+    my $ifile = $file . ".fai";
+    if(! -f $ifile) {
+        errorX("Can't find index for fasta file '$file'");
+    }
+    # check that fai file isn't improperly formatted (a la the GATK bundle NCBI 37 fai files)
+    open(my $FH,"< $ifile") || errorX("Can't open fai file '$ifile'");
+    my $lineno=1;
+    while(<$FH>) {
+          chomp;
+          my @F=split();
+          if(scalar(@F) != 5) {
+              errorX("Unexpected format for line number '$lineno' of fasta index file: '$ifile'\n\tRe-running fasta indexing may fix the issue. To do so, run: \"samtools faidx $file\"");
+          }
+          $lineno++;
+    }
+    close($FH);
+}
+
+checkFaIndex($refFile);
+
+
+if(defined($outDir)) {
+    if(getAbsPath($outDir)) {
+        errorX("Can't resolve path for ouput directory: '$outDir'");
+    }
+} else {
+    $outDir=File::Spec->catdir(Cwd::getcwd(),'strelkaAnalysis');
+}
+
+if(-e $outDir) {
+    errorX("Output path already exists: '$outDir'");
+}
+
+if(getAbsPath($baseDir)) {
+    errorX("Can't resolve path for strelka install directory: '$baseDir'");
+}
+
+		#my $samtoolsDir = File::Spec->catdir($optDir,'samtools');
+checkDir($libexecDir,"strelka libexec");
+		#checkDir($samtoolsDir,"samtools");
+
+my $callScriptName = "callSomaticVariants.pl";
+my $filterScriptName = "filterSomaticVariants.pl";
+my $finishScriptName = "consolidateResults.pl";
+my $callScript = File::Spec->catfile($libexecDir,$callScriptName);
+my $filterScript = File::Spec->catfile($libexecDir,$filterScriptName);
+my $finishScript = File::Spec->catfile($libexecDir,$finishScriptName);
+my $countFasta = File::Spec->catfile($libexecDir,"countFastaBases");
+		#my $samtoolsBin = File::Spec->catfile($samtoolsDir,"samtools");
+checkFile($callScript,"somatic variant call script");
+checkFile($filterScript,"somatic variant filter script");
+checkFile($finishScript,"result consolidation script");
+checkFile($countFasta,"fasta scanner");
+#checkFile($samtoolsBin,"samtools");
+
+#
+# Configure bin runs:
+#
+checkMakeDir($outDir);
+
+#
+# Configure bin runs: open and validate config ini
+#
+my $config = parseConfigIni($configFile);
+
+sub checkConfigKeys($) {
+    my ($keyref) = @_;
+    for (@$keyref) {
+        errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
+    }
+}
+
+# these are the keys we need at configuration time:
+my @config_keys = qw(binSize);
+
+# these are additional keys we will need to run the workflow:
+# (note we don't check for (maxInputDepth,minTier2Mapq) for back compatibility with older config files)
+my @workflow_keys = qw(
+minTier1Mapq isWriteRealignedBam ssnvPrior sindelPrior ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac
+ssnvQuality_LowerBound sindelQuality_LowerBound isSkipDepthFilters depthFilterMultiple
+snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac indelMaxRefRepeat
+indelMaxWindowFilteredBasecallFrac indelMaxIntHpolLength);
+
+checkConfigKeys(\@config_keys);
+checkConfigKeys(\@workflow_keys);
+
+
+my $binSize = int($config->{user}{binSize});
+
+$config->{derived}{configurationCmdline} = $cmdline;
+$config->{derived}{normalBam} = $normalBam;
+$config->{derived}{tumorBam} = $tumorBam;
+$config->{derived}{refFile} = $refFile;
+$config->{derived}{outDir} = $outDir;
+
+#
+# Configure bin runs: check for consistent chrom info between BAMs and reference
+#
+sub getBamChromInfo($) {
+    my $file = shift;
+    my $cmd = "samtools view -H $file |";
+    open(my $FH,$cmd) || errorX("Can't open process $cmd");
+
+    my %info;
+    my $n=0;
+    while(<$FH>) {
+        next unless(/^\@SQ/);
+        chomp;
+        my @F = split(/\t/);
+        scalar(@F) >= 3 || errorX("Unexpected bam header for file '$file'");
+
+        my %h = ();
+        foreach (@F) {
+            my @vals = split(':');
+            $h{$vals[0]} = $vals[1];
+        }
+        $F[1] = $h{'SN'};
+        $F[2] = $h{'LN'};
+
+        my $size = int($F[2]);
+        ($size > 0) || errorX("Unexpected chromosome size '$size' in bam header for file '$file'");
+        $info{$F[1]}{size} = $size;
+        $info{$F[1]}{order} = $n;
+        $n++;
+    }
+    close($FH) || errorX("Can't close process $cmd");
+    return %info;
+}
+
+
+my %chromInfo = getBamChromInfo($normalBam);
+my @chroms = sort { $chromInfo{$a}{order} <=> $chromInfo{$b}{order} } (keys(%chromInfo));
+
+{
+    #consistency check:
+    my %tumorChromInfo = getBamChromInfo($tumorBam);
+    for my $chrom (@chroms) {
+        my $ln = $chromInfo{$chrom}{size};
+        my $tln = $tumorChromInfo{$chrom}{size};
+        my $order = $chromInfo{$chrom}{order};
+        my $torder = $tumorChromInfo{$chrom}{order};
+        unless(defined($tln) && ($tln==$ln) && ($torder==$order)) {
+            errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'");
+        }
+        delete $tumorChromInfo{$chrom};
+    }
+    for my $chrom (keys(%tumorChromInfo)) {
+        errorX("Tumor and normal BAM file headers disagree on chromosome: '$chrom'");
+    }
+}
+
+
+my %refChromInfo;
+logX("Scanning reference genome");
+{
+    my $knownGenomeSize=0;
+    my $cmd="$countFasta $refFile |";
+    open(my $FFH,$cmd) || errorX("Failed to open process '$cmd'");
+
+    while(<$FFH>) {
+        chomp;
+        my @F = split(/\t/);
+        scalar(@F) == 4 || errorX("Unexpected value from '$cmd'");
+        $knownGenomeSize += int($F[2]);
+        $refChromInfo{$F[1]}{knownSize} = int($F[2]);
+        $refChromInfo{$F[1]}{size} = int($F[3]);
+    }
+    close($FFH) || errorX("Failed to close process '$cmd'");
+
+    #consistency check:
+    for my $chrom (@chroms) {
+        my $ln = $chromInfo{$chrom}{size};
+        my $rln = $refChromInfo{$chrom}{size};
+        unless(defined($rln) && ($rln==$ln)) {
+            errorX("BAM headers and reference fasta disagree on chromosome: '$chrom'");
+        }
+        $config->{derived}{"chrom_${chrom}_size"} = $rln;
+        $config->{derived}{"chrom_${chrom}_knownSize"} = $refChromInfo{$chrom}{knownSize};
+    }
+    $config->{derived}{chromOrder} = join("\t",@chroms);
+
+    $config->{derived}{knownGenomeSize} = $knownGenomeSize;
+}
+logX("Scanning reference genome complete");
+
+
+
+#
+# Configure bin runs: create directory structure
+#
+my $resultsDir = File::Spec->catdir($outDir,'results');
+checkMakeDir($resultsDir);
+if($config->{user}{isWriteRealignedBam}) {
+    my $bamDir = File::Spec->catdir($outDir,'realigned');
+    checkMakeDir($bamDir);
+}
+my $chromRootDir = File::Spec->catdir($outDir,'chromosomes');
+checkMakeDir($chromRootDir);
+for my $chrom (@chroms) {
+    my $chromDir = File::Spec->catdir($chromRootDir,$chrom);
+    checkMakeDir($chromDir);
+
+    my $chromRef = $chromInfo{$chrom};
+    $chromRef->{dir} = $chromDir;
+    $chromRef->{binList} = getBinList($chromRef->{size},$binSize);
+
+    my $binRootDir = File::Spec->catdir($chromDir,'bins');
+    checkMakeDir($binRootDir);
+
+    for my $binId ( @{$chromRef->{binList}} ) {
+        my $binDir = File::Spec->catdir($binRootDir,$binId);
+        checkMakeDir($binDir);
+    }
+}
+
+
+
+#
+# write run config file:
+#
+my $runConfigFile;
+{
+    my $cstr = <<END;
+;
+; Strelka workflow configuration file
+;
+; This is an automatically generated file, you probably don't want to edit it. If starting a new run,
+; input configuration templates (with comments) can be found in the Strelka etc/ directory.
+;
+END
+
+    $cstr .= writeConfigIni($config);
+
+    my $configDir = File::Spec->catdir($outDir,'config');
+    checkMakeDir($configDir);
+    $runConfigFile = File::Spec->catdir($configDir,'run.config.ini');
+    open(my $FH,"> $runConfigFile") || errorX("Can't open file '$runConfigFile'");
+    print $FH $cstr;
+    close($FH);
+}
+
+
+
+#
+# create makefile
+#
+my $makeFile = File::Spec->catfile($outDir,"Makefile");
+open(my $MAKEFH, "> $makeFile") || errorX("Can't open file: '$makeFile'");
+
+my $completeFile = "task.complete";
+
+print $MAKEFH <<ENDE;
+# This makefile was automatically generated by $scriptName
+#
+# Please do not edit.
+
+script_dir := $libexecDir
+call_script := \$(script_dir)/$callScriptName
+filter_script := \$(script_dir)/$filterScriptName
+finish_script := \$(script_dir)/$finishScriptName
+
+config_file := $runConfigFile
+
+analysis_dir := $outDir
+results_dir := \$(analysis_dir)/results
+
+ENDE
+
+print $MAKEFH <<'ENDE';
+
+complete_tag := task.complete
+
+finish_task := $(analysis_dir)/$(complete_tag)
+
+get_chrom_dir = $(analysis_dir)/chromosomes/$1
+get_chrom_task = $(call get_chrom_dir,$1)/$(complete_tag)
+get_bin_task = $(call get_chrom_dir,$1)/bins/$2/$(complete_tag)
+
+
+
+all: $(finish_task)
+	@$(print_success)
+
+
+define print_success
+echo;\
+echo Analysis complete. Final somatic calls can be found in $(results_dir);\
+echo
+endef
+
+
+# top level results target:
+#
+$(finish_task):
+	$(finish_script) --config=$(config_file) && touch $@
+
+
+# chromosome targets:
+#
+ENDE
+
+for my $chrom (@chroms) {
+
+    print $MAKEFH <<ENDE;
+chrom_${chrom}_task := \$(call get_chrom_task,$chrom)
+\$(finish_task): \$(chrom_${chrom}_task)
+\$(chrom_${chrom}_task):
+	\$(filter_script) --config=\$(config_file) --chrom=$chrom && touch \$@
+
+ENDE
+
+}
+
+print $MAKEFH <<ENDE;
+
+# chromosome bin targets:
+#
+ENDE
+
+for my $chrom (@chroms) {
+    for my $bin (@{$chromInfo{$chrom}{binList}}) {
+
+print $MAKEFH <<ENDE;
+chrom_${chrom}_bin_${bin}_task := \$(call get_bin_task,$chrom,$bin)
+\$(chrom_${chrom}_task): \$(chrom_${chrom}_bin_${bin}_task)
+\$(chrom_${chrom}_bin_${bin}_task):
+	\$(call_script) --config=\$(config_file) --chrom=$chrom --bin=$bin && touch \$@
+
+ENDE
+
+    }
+}
+
+
+# If the eval function is available, this is the way we could finish
+# the makefile without being so verbose but it doesn't look like qmake
+# understands this function.
+
+=cut
+
+print $MAKEFH <<ENDE;
+
+chroms := @chroms
+
+ENDE
+
+for my $chrom (@chroms) {
+    print $MAKEFH "${chrom}_bins := " . join(" ",@{$chromInfo{$chrom}{binList}}) . "\n";
+}
+
+print $MAKEFH <<'ENDE';
+
+define chrom_task_template
+chrom_$1_task := $(call get_chrom_task,$1)
+$(finish_task): $$(chrom_$1_task)
+$$(chrom_$1_task):
+	$$(filter_script) --config=$$(config_file) --chrom=$1 && touch $$@
+endef
+
+$(foreach c,$(chroms),$(eval $(call chrom_task_template,$c)))
+
+
+# chromosome bin targets:
+#
+define chrom_bin_task_template
+chrom_$1_bin_$2_task := $(call get_bin_task,$1,$2)
+$$(chrom_$1_task): $$(chrom_$1_bin_$2_task)
+$$(chrom_$1_bin_$2_task):
+	$$(call_script) --config=$$(config_file) --chrom=$1 --bin=$2 && touch $$@
+endef
+
+$(foreach c,$(chroms), \
+    $(foreach b,$($c_bins),$(eval $(call chrom_bin_task_template,$c,$b))) \
+ )
+
+ENDE
+
+=cut
+
+
+
+print <<END;
+
+
+Successfully configured analysis and created makefile '$makeFile'.
+
+To run the analysis locally using make, run:
+
+make -C $outDir
+
+...or:
+
+cd $outDir
+make
+
+END
+
+1;
+
+__END__
+
Binary file strelka2/lib/.Utils.pm.swp has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/lib/Utils.pm	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,172 @@
+
+=head1 LICENSE
+
+Strelka Workflow Software
+Copyright (c) 2009-2013 Illumina, Inc.
+
+This software is provided under the terms and conditions of the
+Illumina Open Source Software License 1.
+
+You should have received a copy of the Illumina Open Source
+Software License 1 along with this program. If not, see
+<https://github.com/downloads/sequencing/licenses/>.
+
+=cut
+
+
+package Utils;
+
+use base 'Exporter';
+
+our @EXPORT = qw(
+        errorX logX executeCmd checkFile checkDir checkMove
+        getAbsPath checkMakeDir getBinList
+        parseConfigIni writeConfigIni
+    );
+
+use warnings FATAL => 'all';
+use strict;
+
+use Carp;
+use Cwd qw(realpath);
+use File::Copy qw(move);
+use File::Path qw(mkpath);
+
+
+sub errorX($) {
+    confess "\nERROR: " . $_[0] . "\n\n";
+}
+
+sub logX($) {
+    print STDERR "INFO: " . $_[0] . "\n";
+}
+
+
+sub executeCmd($;$) {
+    my $cmd = shift;
+    my $isVerbose = shift;
+
+    logX("Running: '$cmd'") if(defined($isVerbose) and $isVerbose);
+    system($cmd) == 0
+      or errorX("Failed system call: '$cmd'");
+}
+#return an error if file does not exist 
+sub checkFile($;$) {
+    my $file = shift;
+    return if(-f $file);
+    my $label = shift;
+    errorX("Can't find" . (defined($label) ? " $label" : "") . " file: '$file'");
+}
+#return an error if file does not Exist
+sub checkDir($;$) {
+    my $dir = shift;
+    return if(-d $dir);
+    my $label = shift;
+    errorX("Can't find" . (defined($label) ? " $label" : "") . " directory: '$dir'");
+}
+
+sub checkMove($$) {
+    my ($old,$new) = @_;
+    move($old,$new) || errorX("File move failed: $!\n\tAttempting to move '$old' to '$new'");
+}
+
+
+
+
+=item getAbsPath($path)
+
+This procedure attempts to convert a path provided by the user on the
+command line into an absolute path. It should be able to handle "~"
+paths and conventional relative paths using ".." or ".". Resolution of
+links should follow the convention of "Cwd::realpath".
+
+B<Parameters:>
+
+    $dirRef         - path (converted to absolute path in place)
+
+B<Returns:>
+
+    returns zero if successful, non-zero otherwise.
+
+=cut
+sub getAbsPath(\$) {
+    my ($dirRef) = @_;
+    my @tmp=glob($$dirRef);
+    return 1 if(scalar(@tmp) != 1);
+    my $ret = Cwd::realpath($tmp[0]);
+    return 1 if !$ret && !($ret = File::Spec->rel2abs($tmp[0]));
+    $$dirRef = $ret;
+    return 0;
+}
+
+
+#verify path is not a file, then create a directory with this name if does not exist
+sub checkMakeDir($) {
+    my $dir = shift;
+    unless (-e $dir) {
+        File::Path::mkpath($dir) || errorX("Can't create directory '$dir'");
+    } else {
+        errorX("Path is not a directory '$dir'\n") unless(-d $dir);
+    }
+}
+
+
+
+sub getBinList($$) {
+    my ($chromSize,$binSize) = @_;
+
+    my $nm1 = (($chromSize-1) / $binSize);
+    return [ map {sprintf("%04i",$_)} (0..$nm1) ];
+}
+
+
+
+sub parseConfigError($$) {
+    my ($file,$line) = @_;
+    errorX("Config file '$file' contains unexpected line '$line'\n");
+}
+
+#lis le fichier de config, si la ligne est de type : some space + [ some character ] + some space then register ther character in $section. (   [user]   , then $section=user).   
+sub parseConfigIni($) {
+    my $file = shift;
+    my %config;
+    open(my $FH,"< $file") || errorX("Can't open config file '$file'");
+    my $section = "noSection";
+    while(<$FH>) {
+        next if(/^[;#]/);
+        next if(/^\s*$/);
+        chomp;
+        my $line=$_;
+        my @ncl = split(/[;#]/);
+        next unless(scalar(@ncl));
+        my $nc = $ncl[0];
+        if($nc =~ /^\s*\[([^\]]*)\]\s*$/) {
+            $section = $1;
+            next;
+        }
+        my ($key,$val) = map { s/^\s+//; s/\s+$//; $_ } split(/=/,$nc,2);
+        unless(defined($key) && defined($val) && ($key ne "")) { parseConfigError($file,$line); }
+
+        $config{$section}{$key} = $val;
+    }
+    close($FH);
+    return \%config;
+}
+
+
+# minimal ini stringifier:
+#
+sub writeConfigIni($) {
+    my $config = shift;
+    my $val = "";
+    for my $section (sort(keys(%$config))) {
+        $val .= "\n[$section]\n";
+        for my $key (sort(keys(%{$config->{$section}}))) {
+            $val .= "$key = " . $config->{$section}{$key} . "\n";
+        }
+    }
+    return $val;
+}
+
+
+1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/libexec/callSomaticVariants.pl	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,241 @@
+#!/usr/bin/env perl
+print "\n";
+print @INC;
+print "\n";
+=head1 LICENSE
+
+Strelka Workflow Software
+Copyright (c) 2009-2013 Illumina, Inc.
+
+This software is provided under the terms and conditions of the
+Illumina Open Source Software License 1.
+
+You should have received a copy of the Illumina Open Source
+Software License 1 along with this program. If not, see
+<https://github.com/downloads/sequencing/licenses/>.
+
+=head1 SYNOPSIS
+
+callSomaticVariants.pl [options] | --help
+
+=head2 SUMMARY
+
+Run the somatic variant caller for snvs and indels on a single
+chromosome bin.
+
+=cut
+
+use warnings FATAL => 'all';
+use strict;
+
+use Carp;
+$SIG{__DIE__} = \&Carp::confess;
+
+use File::Spec;
+use Getopt::Long;
+use Pod::Usage;
+
+my $baseDir;
+my $libDir;
+BEGIN {
+print "\n";
+print @INC;
+print "\n";
+
+    my $thisDir=(File::Spec->splitpath($0))[1];
+    $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
+    $libDir=File::Spec->catdir($baseDir,'lib');
+}
+use lib $libDir;
+use Utils;
+
+if(getAbsPath($baseDir)) {
+    errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
+}
+my $libexecDir=File::Spec->catdir($baseDir,'libexec');
+#my $optDir=File::Spec->catdir($baseDir,'opt');
+
+
+my $scriptName=(File::Spec->splitpath($0))[2];
+my $argCount=scalar(@ARGV);
+my $cmdline = join(' ',$0,@ARGV);
+
+
+my ($chrom, $binId, $configFile);
+my $help;
+
+GetOptions(
+            "chrom=s" => \$chrom,
+            "bin=s" => \$binId,
+            "config=s" => \$configFile,
+            "help|h" => \$help) or pod2usage(2);
+
+pod2usage(2) if($help);
+pod2usage(2) unless(defined($chrom));
+pod2usage(2) unless(defined($binId));
+pod2usage(2) unless(defined($configFile));
+
+
+
+#
+# check all fixed paths (not based on commandline arguments):
+#
+checkDir($baseDir);
+checkDir($libexecDir);
+#checkDir($optDir);
+
+my $strelkaBin=File::Spec->catdir($libexecDir,'strelka2');
+checkFile($strelkaBin,"strelka binary");
+#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
+#checkFile($samtoolsBin,"samtools binary");
+
+
+
+#
+# read config and validate values
+#
+checkFile($configFile,"configuration ini");
+my $config  = parseConfigIni($configFile);
+
+for (qw(knownGenomeSize tumorBam normalBam refFile outDir)) {
+    errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
+}
+
+# we skip maxInputDepth,minTier2Mapq here to allow older config files
+for (qw(minTier1Mapq isWriteRealignedBam binSize ssnvPrior sindelPrior
+        ssnvNoise sindelNoise ssnvNoiseStrandBiasFrac)) {
+    errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
+}
+
+my $outDir = $config->{derived}{outDir};
+my $binDir = File::Spec->catdir($outDir,'chromosomes',$chrom,'bins',$binId);
+checkDir($outDir,"output");
+checkDir($binDir,"output bin");
+
+
+my $tumorBam = $config->{derived}{tumorBam};
+my $normalBam = $config->{derived}{normalBam};
+my $refFile = $config->{derived}{refFile};
+checkFile($tumorBam,"tumor BAM");
+checkFile($normalBam,"normal BAM");
+checkFile($refFile,"reference");
+
+
+# pull out some config options for convenience:
+my $binSize=$config->{user}{binSize};
+my $isWriteRealignedBam=$config->{user}{isWriteRealignedBam};
+my $knownGenomeSize = $config->{derived}{knownGenomeSize};
+
+
+my $begin = (int($binId)*$binSize)+1;
+my $end = ((int($binId)+1)*$binSize);
+#my $end = $begin+100000;  #debug mode
+
+my $useroptions = $config->{user};
+
+# set previous default value if an older config file is being used:
+if(! defined($useroptions->{minTier2Mapq})) {
+    $useroptions->{minTier2Mapq} = 5;
+}
+
+
+#
+# setup the strelka command-line:
+#
+my $strelka_base_opts= "-clobber" .
+" -filter-unanchored" .
+" -min-paired-align-score " . $useroptions->{minTier1Mapq} .
+" -min-single-align-score 10" .
+" -min-qscore 0" .
+" -report-range-begin $begin -report-range-end $end" .
+" -samtools-reference '$refFile'" .
+" -max-window-mismatch 3 20 -print-used-allele-counts" .
+" -bam-seq-name '" . $chrom . "'" .
+" -genome-size $knownGenomeSize" .
+" -max-indel-size 50" .
+" -indel-nonsite-match-prob 0.5" .
+" --min-contig-open-end-support 35" .
+" --somatic-snv-rate " . $useroptions->{ssnvPrior} .
+" --shared-site-error-rate " . $useroptions->{ssnvNoise} .
+" --shared-site-error-strand-bias-fraction " . $useroptions->{ssnvNoiseStrandBiasFrac} .
+" --somatic-indel-rate " . $useroptions->{sindelPrior} .
+" --shared-indel-error-rate " . $useroptions->{sindelNoise} .
+" --tier2-min-single-align-score " . $useroptions->{minTier2Mapq} .
+" --tier2-min-paired-align-score " . $useroptions->{minTier2Mapq} .
+" --tier2-single-align-score-rescue-mode" .
+" --tier2-mismatch-density-filter-count 10" .
+" --tier2-no-filter-unanchored" .
+" --tier2-indel-nonsite-match-prob 0.25" .
+" --tier2-include-singleton" .
+" --tier2-include-anomalous";
+
+
+my $somSnvFile='somatic.snvs.unfiltered.vcf';
+my $somIndelFile='somatic.indels.unfiltered.vcf';
+
+my $cmd =  "$strelkaBin $strelka_base_opts" .
+" -bam-file " . $normalBam .
+" --tumor-bam-file " . $tumorBam .
+" --somatic-snv-file " . File::Spec->catfile($binDir,$somSnvFile) .
+" --somatic-indel-file " . File::Spec->catfile($binDir,$somIndelFile) .
+" --variant-window-flank-file 50 " . File::Spec->catfile($binDir,$somIndelFile . '.window');
+
+
+sub ualignFile($) {
+    return File::Spec->catfile($binDir,$_[0] . ".unsorted.realigned.bam");
+}
+sub alignFile($) {
+    return File::Spec->catfile($binDir,$_[0] . ".realigned");
+}
+
+
+if(exists($useroptions->{maxInputDepth}) && ($useroptions->{maxInputDepth} > 0)) {
+    $cmd .= " --max-input-depth " . $useroptions->{maxInputDepth};
+}
+
+
+if($isWriteRealignedBam) {
+    $cmd .= " -realigned-read-file " . ualignFile("normal") .
+            " --tumor-realigned-read-file " . ualignFile("tumor");
+}
+
+if(defined($useroptions->{extraStrelkaArguments})){
+    my $arg=$useroptions->{extraStrelkaArguments};
+    if($arg !~ /^\s*$/) {
+        $cmd .= " " . $arg;
+    }
+}
+
+# this file contains site stats that used to be printed on stdout:
+#
+$cmd .=
+" --report-file " . File::Spec->catfile($binDir,'strelka.stats');
+
+$cmd .=
+" >| " . File::Spec->catfile($binDir,'strelka.stdout') .
+" 2>| " . File::Spec->catfile($binDir,'strelka.stderr');
+
+
+executeCmd($cmd,0);
+
+
+if($isWriteRealignedBam) {
+    for my $label (qw(normal tumor)) {
+        my $ufile = ualignFile($label);
+        if( -f $ufile ) {
+            my $afile = alignFile($label);
+            my $cmd = "samtools sort " . $ufile .  " " . $afile;
+            executeCmd($cmd,0);
+            unlink($ufile);
+        } else {
+            logX("Can't find unsorted realigned BAM file: '$ufile'");
+        }
+    }
+}
+
+
+1;
+
+__END__
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/libexec/consolidateResults.pl	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,276 @@
+#!/usr/bin/env perl
+
+=head1 LICENSE
+
+Strelka Workflow Software
+Copyright (c) 2009-2013 Illumina, Inc.
+
+This software is provided under the terms and conditions of the
+Illumina Open Source Software License 1.
+
+You should have received a copy of the Illumina Open Source
+Software License 1 along with this program. If not, see
+<https://github.com/downloads/sequencing/licenses/>.
+
+=head1 SYNOPSIS
+
+consolidateSomaticVariants.pl [options] | --help
+
+=head2 SUMMARY
+
+Aggregate final results from all chromosomes
+
+=cut
+
+use warnings FATAL => 'all';
+use strict;
+
+use Carp;
+$SIG{__DIE__} = \&Carp::confess;
+
+use File::Spec;
+use File::Temp;
+use Getopt::Long;
+use Pod::Usage;
+
+my $baseDir;
+my $libDir;
+#my $optDir;
+#my $vcftDir;
+BEGIN {
+    my $thisDir=(File::Spec->splitpath($0))[1];
+    $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
+    $libDir=File::Spec->catdir($baseDir,'lib');
+    #$optDir=File::Spec->catdir($baseDir,'opt');
+    #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl');
+}
+use lib $libDir;
+use Utils;
+#use lib $vcftDir;
+use Vcf;
+
+if(getAbsPath($baseDir)) {
+    errorX("Can't resolve path for strelka_workflow install directory: '$baseDir'");
+}
+#$optDir=File::Spec->catdir($baseDir,'opt');
+
+
+my $scriptName=(File::Spec->splitpath($0))[2];
+my $argCount=scalar(@ARGV);
+my $cmdline=join(' ',$0,@ARGV);
+
+
+my $configFile;
+my $help;
+
+GetOptions( "config=s" => \$configFile,
+            "help|h" => \$help) or pod2usage(2);
+
+pod2usage(2) if($help);
+pod2usage(2) unless(defined($configFile));
+
+#
+# check fixed paths
+#
+#my $samtoolsBin = File::Spec->catfile($optDir,'samtools','samtools');
+#checkFile($samtoolsBin,"samtools binary");
+
+
+#
+# read config and validate values
+#
+checkFile($configFile,"configuration ini");
+my $config  = parseConfigIni($configFile);
+
+
+for (qw(outDir chromOrder)) {
+    errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
+}
+for (qw(isWriteRealignedBam binSize)) {
+    errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
+}
+
+my $userconfig = $config->{user};
+
+my @chromOrder = split(/\t/,$config->{derived}{chromOrder});
+for my $chrom (@chromOrder) {
+    my $chromSizeKey = "chrom_" . $chrom . "_size";
+    errorX("Undefined configuration option: '$_'") unless(defined($chromSizeKey));
+}
+
+my $outDir = $config->{derived}{outDir};
+checkDir($outDir,"output");
+
+
+my $isWriteRealignedBam = $userconfig->{isWriteRealignedBam};
+
+for my $chrom (@chromOrder) {
+    my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
+    checkDir($chromDir,"input chromosome");
+
+    next unless($isWriteRealignedBam);
+    my $chromSizeKey = "chrom_" . $chrom . "_size";
+    my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
+    for my $binId (@$binList) {
+        my $dir = File::Spec->catdir($chromDir,'bins',$binId);
+        checkDir($dir,"input bin");
+    }
+}
+
+
+
+# suffix used for large result file intermediates:
+my $itag = ".incomplete";
+
+
+#
+# concatenate vcfs:
+#
+sub concatenateVcfs($) {
+    my $fileName = shift;
+
+    my $is_first = 1;
+
+    my $allFileName = "all." . $fileName;
+    my $allFile = File::Spec->catfile($outDir,'results',$allFileName . $itag);
+    open(my $aFH,'>',"$allFile")
+          || errorX("Failed to open file: '$allFile'");
+
+    # loop over all chroms once to create the header, and one more time for all the data:
+    my $headervcf;
+    for my $chrom (@chromOrder) {
+        my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
+        my $iFile = File::Spec->catfile($chromDir,$fileName);
+        checkFile($iFile);
+
+        my $depthKey="maxDepth_${chrom}";
+
+        if($is_first) {
+            open(my $iFH,'<',"$iFile")
+                || errorX("Failed to open file: '$iFile'");
+            $headervcf = Vcf->new(fh=>$iFH);
+            $headervcf->parse_header();
+            $headervcf->remove_header_line(key=>"cmdline");
+            $headervcf->add_header_line({key=>"cmdline",value=>$cmdline});
+            $headervcf->remove_header_line(key=>"$depthKey");
+            close($iFH);
+            $is_first=0;
+        }
+
+        {
+            open(my $iFH,'<',"$iFile")
+                || errorX("Failed to open file: '$iFile'");
+            my $vcf = Vcf->new(fh=>$iFH);
+            $vcf->parse_header();
+            for my $line (@{$vcf->get_header_line(key=>"$depthKey")}) {
+                # $line seems to be returned as a length 1 array ref to a hash --  ??!?!??!!
+                $headervcf->add_header_line($line->[0]);
+            }
+            $vcf->close();
+            close($iFH);
+         }
+    }
+    print $aFH $headervcf->format_header();
+    $headervcf->close();
+
+    for my $chrom (@chromOrder) {
+        my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
+        my $iFile = File::Spec->catfile($chromDir,$fileName);
+
+        open(my $iFH,'<',"$iFile")
+            || errorX("Failed to open file: '$iFile'");
+
+        my $vcf = Vcf->new(fh=>$iFH);
+        $vcf->parse_header();
+        print $aFH $_ while(<$iFH>);
+    }
+
+    close($aFH);
+
+    # make a second set of files with only the passed variants:
+    my $passedFileName = "passed." . $fileName;
+    my $passedFile = File::Spec->catfile($outDir,'results',$passedFileName . $itag);
+    open(my $pFH,'>',"$passedFile")
+          || errorX("Failed to open file: '$passedFile'");
+
+    open(my $arFH,'<',"$allFile")
+          || errorX("Failed to open file: '$allFile'");
+
+    while(<$arFH>) {
+        chomp;
+        unless(/^#/) {
+            my @F = split(/\t/);
+            next if((scalar(@F)>=7) && ($F[6] ne "PASS"));
+        }
+        print $pFH "$_\n";
+    }
+
+    close($arFH);
+    close($pFH);
+
+    my $allFileFinished = File::Spec->catfile($outDir,'results',$allFileName);
+    checkMove($allFile,$allFileFinished);
+
+    my $passedFileFinished = File::Spec->catfile($outDir,'results',$passedFileName);
+    checkMove($passedFile,$passedFileFinished);
+}
+
+concatenateVcfs("somatic.snvs.vcf");
+concatenateVcfs("somatic.indels.vcf");
+
+
+my $bamSuffix = ".realigned.bam";
+
+sub consolidateBam($) {
+    my $label = shift;
+
+    my $fileName = $label . $bamSuffix;
+
+    my $reDir = File::Spec->catdir($outDir,'realigned');
+    checkMakeDir($reDir);
+
+    my @bamList;
+    for my $chrom (@chromOrder) {
+        my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
+
+        my $chromSizeKey = "chrom_" . $chrom . "_size";
+        my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
+        for my $binId (@$binList) {
+            my $binDir = File::Spec->catdir($chromDir,'bins',$binId);
+            my $rbamFile = File::Spec->catfile($binDir,$fileName);
+            checkFile($rbamFile,"bin realigned bam file");
+
+            push @bamList,$rbamFile;
+        }
+    }
+
+    return unless(scalar(@bamList));
+
+    my $headerFH = File::Temp->new();
+    my $getHeaderCmd = "bash -c '$samtoolsBin view -H ".$bamList[0]." > $headerFH'";
+    executeCmd($getHeaderCmd);
+
+    my $allFile = File::Spec->catfile($reDir,$fileName . $itag);
+    my $cmd="$samtoolsBin merge -h $headerFH $allFile ". join(" ",@bamList);
+    executeCmd($cmd);
+
+    my $allFileFinished = File::Spec->catfile($reDir,$fileName);
+    checkMove($allFile,$allFileFinished);
+
+    my $indexCmd="$samtoolsBin index $allFileFinished";
+    executeCmd($indexCmd);
+
+    # for now don't remove all the bin realignments...
+    # unlink(@bamList);
+}
+
+if($isWriteRealignedBam) {
+    consolidateBam("normal");
+    consolidateBam("tumor");
+}
+
+
+1;
+
+__END__
+
Binary file strelka2/libexec/countFastaBases has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/libexec/filterSomaticVariants.pl	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,435 @@
+#!/usr/bin/env perl
+
+=head1 LICENSE
+
+Strelka Workflow Software
+Copyright (c) 2009-2013 Illumina, Inc.
+
+This software is provided under the terms and conditions of the
+Illumina Open Source Software License 1.
+
+You should have received a copy of the Illumina Open Source
+Software License 1 along with this program. If not, see
+<https://github.com/downloads/sequencing/licenses/>.
+
+=head1 SYNOPSIS
+
+filterSomaticVariants.pl [options] | --help
+
+=head2 SUMMARY
+
+Aggregate and filter the variant caller results by chromosome
+
+=cut
+
+use warnings FATAL => 'all';
+use strict;
+
+use Carp;
+$SIG{__DIE__} = \&Carp::confess;
+
+use File::Spec;
+use Getopt::Long;
+use Pod::Usage;
+
+my $baseDir;
+my $libDir;
+#my $vcftDir;
+BEGIN {
+    my $thisDir=(File::Spec->splitpath($0))[1];
+    $baseDir=File::Spec->catdir($thisDir,File::Spec->updir());
+    $libDir=File::Spec->catdir($baseDir,'lib');
+    #my $optDir=File::Spec->catdir($baseDir,'opt');
+    #$vcftDir=File::Spec->catdir($optDir,'vcftools','lib','perl5','site_perl');
+}
+use lib $libDir;
+use Utils;
+#use lib $vcftDir;
+#use Vcf;
+
+
+my $scriptName=(File::Spec->splitpath($0))[2];
+my $argCount=scalar(@ARGV);
+my $cmdline = join(' ',$0,@ARGV);
+
+
+my ($chrom, $configFile);
+my $help;
+
+GetOptions( "chrom=s" => \$chrom,
+            "config=s" => \$configFile,
+            "help|h" => \$help) or pod2usage(2);
+
+pod2usage(2) if($help);
+pod2usage(2) unless(defined($chrom));
+pod2usage(2) unless(defined($configFile));
+
+
+
+#
+# read config and validate values
+#
+checkFile($configFile,"configuration ini");
+my $config  = parseConfigIni($configFile);
+
+my $chromSizeKey = "chrom_" . $chrom . "_size";
+my $chromKnownSizeKey = "chrom_" . $chrom . "_knownSize";
+
+for (("outDir", $chromSizeKey, $chromKnownSizeKey)) {
+    errorX("Undefined configuration option: '$_'") unless(defined($config->{derived}{$_}));
+}
+for (qw(binSize ssnvQuality_LowerBound sindelQuality_LowerBound
+        isSkipDepthFilters depthFilterMultiple
+        snvMaxFilteredBasecallFrac snvMaxSpanningDeletionFrac
+        indelMaxRefRepeat indelMaxWindowFilteredBasecallFrac
+        indelMaxIntHpolLength)) {
+    errorX("Undefined configuration option: '$_'") unless(defined($config->{user}{$_}));
+}
+
+my $outDir = $config->{derived}{outDir};
+my $chromDir = File::Spec->catdir($outDir,'chromosomes',$chrom);
+checkDir($outDir,"output");
+checkDir($chromDir,"output chromosome");
+
+my $userconfig = $config->{user};
+
+my $binList = getBinList($config->{derived}{$chromSizeKey},$userconfig->{binSize});
+for my $binId (@$binList) {
+    my $dir = File::Spec->catdir($chromDir,'bins',$binId);
+    checkDir($dir,"input bin");
+}
+
+#
+# parameters from user config file:
+#
+
+# minimum passed ssnv_nt Q-score:
+my $minQSSNT=$userconfig->{ssnvQuality_LowerBound};
+# minimum passed sindel_nt Q-score:
+my $minQSINT=$userconfig->{sindelQuality_LowerBound};
+
+#
+# filtration parameters from user config file:
+#
+
+# skip depth filters for targeted resequencing:
+my $isUseDepthFilter=(! $userconfig->{isSkipDepthFilters});
+# multiple of the normal mean coverage to filter snvs and indels
+my $depthFilterMultiple=$userconfig->{depthFilterMultiple};
+# max filtered basecall fraction for any sample
+my $snvMaxFilteredBasecallFrac=$userconfig->{snvMaxFilteredBasecallFrac};
+# max snv spanning-deletion fraction for any sample
+my $snvMaxSpanningDeletionFrac=$userconfig->{snvMaxSpanningDeletionFrac};
+# max indel reference repeat length
+my $indelMaxRefRepeat=$userconfig->{indelMaxRefRepeat};
+# max indel window filter fraction
+my $indelMaxWindowFilteredBasecallFrac=$userconfig->{indelMaxWindowFilteredBasecallFrac};
+# max indel interupted hompolymer length:
+my $indelMaxIntHpolLength=$userconfig->{indelMaxIntHpolLength};
+
+
+# first we want the normal sample mean chromosome depth:
+#
+my $filterCoverage;
+if($isUseDepthFilter) {
+    my $totalCoverage = 0;
+    for my $binId (@$binList) {
+        my $sFile = File::Spec->catfile($chromDir,'bins',$binId,'strelka.stats');
+        checkFile($sFile,"strelka bin stats");
+        open(my $sFH, '<', $sFile)
+          || errorX("Can't open file: '$sFile' $!");
+
+        my $is_found=0;
+        while(<$sFH>) {
+            next unless(/^NORMAL_NO_REF_N_COVERAGE\s/);
+            my $is_bad = 0;
+            if(not /sample_size:\s*(\d+)\s+/) { $is_bad=1; }
+            my $ss=$1;
+            # leave the regex for mean fairly loose to pick up scientific notation, etc..
+            if(not /mean:\s*(\d[^\s]*|nan)\s+/) { $is_bad=1; }
+            my $mean=$1;
+            errorX("Unexpected format in file: '$sFile'") if($is_bad);
+
+            my $coverage = ( $ss==0 ? 0 : int($ss*$mean) );
+
+            $totalCoverage += $coverage;
+            $is_found=1;
+            last;
+        }
+        close($sFH);
+        errorX("Unexpected format in file: '$sFile'") unless($is_found);
+    }
+
+    my $chromKnownSize = $config->{derived}{$chromKnownSizeKey};
+    my $normalMeanCoverage = ($totalCoverage/$chromKnownSize);
+    $filterCoverage = $normalMeanCoverage*$depthFilterMultiple;
+}
+
+
+# add filter description to vcf header unless it already exists
+# return 1 if filter id already exists, client can decide if this is an error
+#
+sub add_vcf_filter($$$) {
+    my ($vcf,$id,$desc) = @_;
+    return 1 if(scalar(@{$vcf->get_header_line(key=>'FILTER', ID=>$id)}));
+    $vcf->add_header_line({key=>'FILTER', ID=>$id,Description=>$desc});
+    return 0;
+}
+
+
+sub check_vcf_for_sample($$$) {
+    my ($vcf,$sample,$file) = @_;
+    my $is_found=0;
+    for ($vcf->get_samples()) {
+        $is_found=1 if($_ eq $sample);
+    }
+    errorX("Failed to find sample '$sample' in vcf file '$file'") unless($is_found);
+}
+
+
+
+my $depthFiltId="DP";
+
+
+# Runs all post-call vcf filters:
+#
+sub filterSnvFileList(\@$$$) {
+    my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_;
+
+    my $baseFiltId="BCNoise";
+    my $spanFiltId="SpanDel";
+    my $qFiltId="QSS_ref";
+
+    open(my $aFH,'>',$acceptFileName)
+      or errorX("Failed to open file: '$acceptFileName'");
+
+    my $is_first=1;
+    for my $ifile (@$ifiles) {
+        open(my $iFH,'<',"$ifile")
+          or errorX("Failed to open file: '$ifile'");
+        my $vcf = Vcf->new(fh=>$iFH);
+
+        # run some simple header validation on each vcf:
+        $vcf->parse_header();
+        check_vcf_for_sample($vcf,'NORMAL',$ifile);
+        check_vcf_for_sample($vcf,'TUMOR',$ifile);
+
+        if($is_first) {
+            # TODO: update vcf meta-data for chromosome-level filtered files
+            #
+            $vcf->remove_header_line(key=>"cmdline");
+            $vcf->add_header_line({key=>"cmdline",value=>$cmdline});
+            if($isUseDepthFilter) {
+                $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal});
+                add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample");
+            }
+            add_vcf_filter($vcf,$baseFiltId,"Fraction of basecalls filtered at this site in either sample is at or above $snvMaxFilteredBasecallFrac");
+            add_vcf_filter($vcf,$spanFiltId,"Fraction of reads crossing site with spanning deletions in either sample exceeeds $snvMaxSpanningDeletionFrac");
+            add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or ssnv Q-score < $minQSSNT, ie calls with NT!=ref or QSS_NT < $minQSSNT");
+            print $aFH $vcf->format_header();
+            $is_first=0;
+        }
+
+        while(my $x=$vcf->next_data_hash()) {
+            my $norm=$x->{gtypes}->{NORMAL};
+            my $tumr=$x->{gtypes}->{TUMOR};
+
+            my %filters;
+
+            # normal depth filter:
+            my $normalDP=$norm->{DP};
+            if($isUseDepthFilter) {
+                $filters{$depthFiltId} = ($normalDP > $depthFilterVal);
+            }
+
+            # filtered basecall fraction:
+            my $normal_filt=($normalDP>0 ? $norm->{FDP}/$normalDP : 0);
+
+            my $tumorDP=$tumr->{DP};
+            my $tumor_filt=($tumorDP>0 ? $tumr->{FDP}/$tumorDP : 0);
+
+            $filters{$baseFiltId}=(($normal_filt >= $snvMaxFilteredBasecallFrac) or
+                                   ($tumor_filt >= $snvMaxFilteredBasecallFrac));
+
+            # spanning deletion fraction:
+            my $normalSDP=$norm->{SDP};
+            my $normalSpanTot=($normalDP+$normalSDP);
+            my $normalSpanDelFrac=($normalSpanTot>0 ? ($normalSDP/$normalSpanTot) : 0);
+
+            my $tumorSDP=$tumr->{SDP};
+            my $tumorSpanTot=($tumorDP+$tumorSDP);
+            my $tumorSpanDelFrac=($tumorSpanTot>0 ? ($tumorSDP/$tumorSpanTot) : 0);
+
+            $filters{$spanFiltId}=(($normalSpanDelFrac > $snvMaxSpanningDeletionFrac) or
+                                   ($tumorSpanDelFrac > $snvMaxSpanningDeletionFrac));
+
+            # Q-val filter:
+            $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or
+                                ($x->{INFO}->{QSS_NT} < $minQSSNT));
+
+            $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters);
+
+            print $aFH $vcf->format_line($x);
+        }
+
+        $vcf->close();
+        close($iFH);
+    }
+
+    close($aFH);
+}
+
+
+
+sub updateA2(\@$) {
+    my ($a2,$FH) = @_;
+    my $line=<$FH>;
+    unless(defined($line)) { errorX("Unexpected end of somatic indel window file"); }
+    chomp $line;
+    @$a2 = split("\t",$line);
+    unless(scalar(@$a2)) { errorX("Unexpected format in somatic indel window file"); }
+}
+
+
+
+sub filterIndelFileList(\@$$$) {
+    my ($ifiles,$depthFilterVal,$acceptFileName,$isUseDepthFilter) = @_;
+
+    my $repeatFiltId="Repeat";
+    my $iHpolFiltId="iHpol";
+    my $baseFiltId="BCNoise";
+    my $qFiltId="QSI_ref";
+
+    open(my $aFH,'>',$acceptFileName)
+      or errorX("Failed to open file: '$acceptFileName'");
+
+    my $is_first=1;
+    for my $ifile (@$ifiles) {
+        open(my $iFH,'<',"$ifile")
+          or errorX("Failed to open somatic indel file: '$ifile'");
+
+        my $iwfile = $ifile . ".window";
+        open(my $iwFH,'<',"$iwfile")
+          or errorX("Failed to open somatic indel window file: '$iwfile'");
+
+        my @a2; # hold window file data for one line in case we overstep...
+
+        my $vcf = Vcf->new(fh=>$iFH);
+
+        # run some simple header validation on each vcf:
+        $vcf->parse_header();
+        check_vcf_for_sample($vcf,'NORMAL',$ifile);
+        check_vcf_for_sample($vcf,'TUMOR',$ifile);
+
+        if($is_first) {
+            # TODO: update all vcf meta-data for chromosome-level filtered files
+            #
+            $vcf->remove_header_line(key=>"cmdline");
+            $vcf->add_header_line({key=>"cmdline",value=>$cmdline});
+            if($isUseDepthFilter) {
+                $vcf->add_header_line({key=>"maxDepth_$chrom",value=>$depthFilterVal});
+            }
+            $vcf->add_header_line({key=>'FORMAT', ID=>'DP50',Number=>1,Type=>'Float',Description=>'Average tier1 read depth within 50 bases'});
+            $vcf->add_header_line({key=>'FORMAT', ID=>'FDP50',Number=>1,Type=>'Float',Description=>'Average tier1 number of basecalls filtered from original read depth within 50 bases'});
+            $vcf->add_header_line({key=>'FORMAT', ID=>'SUBDP50',Number=>1,Type=>'Float',Description=>'Average number of reads below tier1 mapping quality threshold aligned across sites within 50 bases'});
+            if($isUseDepthFilter) {
+                add_vcf_filter($vcf,$depthFiltId,"Greater than ${depthFilterMultiple}x chromosomal mean depth in Normal sample");
+            }
+            add_vcf_filter($vcf,$repeatFiltId,"Sequence repeat of more than ${indelMaxRefRepeat}x in the reference sequence");
+            add_vcf_filter($vcf,$iHpolFiltId,"Indel overlaps an interupted homopolymer longer than ${indelMaxIntHpolLength}x in the reference sequence");
+            add_vcf_filter($vcf,$baseFiltId,"Average fraction of filtered basecalls within 50 bases of the indel exceeds $indelMaxWindowFilteredBasecallFrac");
+            add_vcf_filter($vcf,$qFiltId,"Normal sample is not homozygous ref or sindel Q-score < $minQSINT, ie calls with NT!=ref or QSI_NT < $minQSINT");
+            print $aFH $vcf->format_header();
+            $is_first=0;
+        }
+
+        while(my $x=$vcf->next_data_hash()) {
+            $vcf->add_format_field($x,'DP50');
+            $vcf->add_format_field($x,'FDP50');
+            $vcf->add_format_field($x,'SUBDP50');
+
+            my $norm=$x->{gtypes}->{NORMAL};
+            my $tumr=$x->{gtypes}->{TUMOR};
+
+            my $chrom=$x->{CHROM};
+            my $pos=int($x->{POS});
+
+            # get matching line from window file:
+            while((scalar(@a2)<2) or
+                  (($a2[0] le $chrom) and (int($a2[1]) < $pos))) {
+                updateA2(@a2,$iwFH);
+            }
+            unless(scalar(@a2) and ($a2[0] eq $chrom) and (int($a2[1]) == $pos))
+                { errorX("Can't find somatic indel window position.\nIndel line: " . $vcf->format_line($x) ); }
+
+            # add window data to vcf record:
+            #
+            $norm->{DP50} = $a2[2]+$a2[3];
+            $norm->{FDP50} = $a2[3];
+            $norm->{SUBDP50} = $a2[4];
+            $tumr->{DP50} = $a2[5]+$a2[6];
+            $tumr->{FDP50} = $a2[6];
+            $tumr->{SUBDP50} = $a2[7];
+
+            my %filters;
+
+            # normal depth filter:
+            my $normalDP=$norm->{DP};
+            if($isUseDepthFilter) {
+                $filters{$depthFiltId}=($normalDP > $depthFilterVal);
+            }
+
+            # ref repeat
+            my $refRep=$x->{INFO}->{RC};
+            $filters{$repeatFiltId}=(defined($refRep) and
+                                     ($refRep > $indelMaxRefRepeat));
+
+            # ihpol
+            my $iHpol=$x->{INFO}->{IHP};
+            $filters{$iHpolFiltId}=(defined($iHpol) and
+                                    ($iHpol > $indelMaxIntHpolLength));
+
+            # base filt:
+            my $normWinFrac=( $norm->{DP50} ? $norm->{FDP50}/$norm->{DP50} : 0 );
+            my $tumrWinFrac=( $tumr->{DP50} ? $tumr->{FDP50}/$tumr->{DP50} : 0 );
+            $filters{$baseFiltId}=( ($normWinFrac >= $indelMaxWindowFilteredBasecallFrac) or
+                                    ($tumrWinFrac >= $indelMaxWindowFilteredBasecallFrac) );
+
+            # Q-val filter:
+            $filters{$qFiltId}=(($x->{INFO}->{NT} ne "ref") or
+                                ($x->{INFO}->{QSI_NT} < $minQSINT));
+
+            $x->{FILTER} = $vcf->add_filter($x->{FILTER},%filters);
+
+            print $aFH $vcf->format_line($x);
+        }
+
+        $vcf->close();
+        close($iFH);
+        close($iwFH);
+    }
+
+    close($aFH);
+}
+
+
+
+my @ssnvFiles;
+my @sindelFiles;
+for my $binId (@$binList) {
+    my $ssnvFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.snvs.unfiltered.vcf');
+    my $sindelFile = File::Spec->catfile($chromDir,'bins',$binId,'somatic.indels.unfiltered.vcf');
+    checkFile($ssnvFile,"bin snv file");
+    checkFile($sindelFile,"bin indel file");
+    push @ssnvFiles,$ssnvFile;
+    push @sindelFiles,$sindelFile;
+}
+
+my $ssnvOutFile = File::Spec->catfile($chromDir,"somatic.snvs.vcf");
+filterSnvFileList(@ssnvFiles,$filterCoverage,$ssnvOutFile,$isUseDepthFilter);
+
+my $sindelOutFile = File::Spec->catfile($chromDir,"somatic.indels.vcf");
+filterIndelFileList(@sindelFiles,$filterCoverage,$sindelOutFile,$isUseDepthFilter);
+
+
+1;
Binary file strelka2/libexec/strelka2 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/strelka.xml	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,35 @@
+<tool id="strelka2" name="Strelka 2" version="1.0.14">
+  <requirements>
+    <requirement type="package" version="0.1.18">samtools</requirement>
+    <requirement type="package" version="0.1.11">vcftools</requirement>
+  </requirements>
+  <description>Strelka without dependencie built-in</description>
+  <command interpreter="python">
+   strelka_wrapper.py
+    --tumorBam $tumorBam
+    --normalBam $normalBam
+    --refFile $refFile
+    --configFile $configFile
+  </command>
+
+  <inputs>
+    <param format="bam" name="tumorBam" type="data" label="Tumor bam file"/>
+    <param format="bam" name="normalBam" type="data" label="Normal bam file"/>
+    <param format="fasta" name="refFile" type="data" label="ref fasta file"/>
+    <param format="ini" name="configFile" type="data" label="config file"/>
+  </inputs>
+
+  <outputs>
+    <data format="vcf" name="output_vcf_1" label="${tool.name} on ${on_string} (passed.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.snvs.vcf" />
+    <data format="vcf" name="output_vcf_2" label="${tool.name} on ${on_string} (passed.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.indels.vcf" />
+    <data format="vcf" name="output_vcf_3" label="${tool.name} on ${on_string} (all.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.snvs.vcf"  />
+    <data format="vcf" name="output_vcf_4" label="${tool.name} on ${on_string} (all.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.indels.vcf"  />
+  </outputs>
+  <trackster_conf/>
+
+
+  <help>
+Strelka, a method for somatic SNV and small indel detectipon from sequencing data of matched tumor-normal samples.
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/strelka2.xml	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,126 @@
+<tool id="strelka88" name="Strelka good interface but no dependencies">
+
+  <description>Strelka good interface but no dependencies</description>
+  <command interpreter="python">strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam --refFile $refFile
+   #if $configuration.configuration_switch == 'Default':
+	--configFile 				Default
+   #else if $configuration.configuration_switch == 'Path':
+        --configFile				$configuration.configFile
+   #else:
+        --configFile				Custom
+        --depthFilterMultiple 			$configuration.depthFilterMultiple
+        --snvMaxFilteredBasecallFrac		$configuration.snvMaxFilteredBasecallFrac
+        --snvMaxSpanningDeletionFrac 		$configuration.snvMaxSpanningDeletionFrac
+        --indelMaxRefRepeat			$configuration.indelMaxRefRepeat
+        --indelMaxWindowFilteredBasecallFrac	$configuration.indelMaxWindowFilteredBasecallFrac
+        --indelMaxIntHpolLength			$configuration.indelMaxIntHpolLength
+        --ssnvPrior				$configuration.ssnvPrior
+        --sindelPrior				$configuration.sindelPrior
+        --ssnvNoise				$configuration.ssnvNoise
+        --sindelNoise				$configuration.sindelNoise
+        --ssnvNoiseStrandBiasFrac		$configuration.ssnvNoiseStrandBiasFrac
+        --minTier1Mapq				$configuration.minTier1Mapq
+        --minTier2Mapq				$configuration.minTier2Mapq
+        --ssnvQuality_LowerBound		$configuration.ssnvQuality_LowerBound
+        --sindelQuality_LowerBound		$configuration.sindelQuality_LowerBound
+        --isWriteRealignedBam			$configuration.isWriteRealignedBam
+        --binSize 				$configuration.binSize
+        --isSkipDepthFilters			$configuration.isSkipDepthFilters
+        --maxInputDepth 			$configuration.maxInputDepth
+        #if $configuration.extra_arguments.extra_arguments_switch == 'Yes':
+	      --extraStrelkaArguments 		    $configuration.extra_arguments.extraStrelkaArguments
+        #end if
+   #end if
+   </command>
+
+   <inputs>
+    <param format="bam" name="tumorBam" type="data" label="Tumor bam file"/>
+    <param format="bam" name="normalBam" type="data" label="Normal bam file"/>
+    <param format="fasta" name="refFile" type="data" label="ref fasta file"/>
+
+    <conditional name="configuration">
+      <param name="configuration_switch" type="select" label="how do you want to configure strelka">
+        <option value="Default" selected="true">Default</option>
+	<option value="Path">Use a config file</option>
+        <option value="Custom">Custom</option>
+      </param>
+      <when value="Default">
+        <!-- do nothing -->
+      </when>
+      <when value="Path">
+        <param format="ini" name="configFile" type="data" label="config file"/>
+      </when>
+      <when value="Custom">
+        <param name="binSize" type="integer" value="25000000" label="binSize" />
+        <param name="minTier1Mapq" type="integer" value="20" min="0" max="40" help="between 0 and 40" label="minTier1Mapq" />
+        <param name="minTier2Mapq" type="integer" value="5" min="0" max="5" help="between 0 and 5" label="minTier2Mapq" />
+        <param name="isWriteRealignedBam" type="integer" value="0" label="isWriteRealignedBam" />
+        <param name="ssnvPrior" type="float" value="0.000001" label="ssnvPrior" />
+        <param name="sindelPrior" type="float" value="0.000001" label="sindelPrior" />
+        <param name="ssnvNoise" type="float" value="0.0000005" label="ssnvNoise" />
+        <param name="sindelNoise" type="float" value="0.000001" label="sindelNoise" />
+        <param name="ssnvNoiseStrandBiasFrac" type="float" value="0.5" label="ssnvNoiseStrandBiasFrac" />
+        <param name="ssnvQuality_LowerBound" type="integer" value="15" label="ssnvQuality_LowerBound" />
+        <param name="sindelQuality_LowerBound" type="integer" value="30" label="sindelQuality_LowerBound" />
+        <param name="isSkipDepthFilters" type="integer" value="1" label="isSkipDepthFilters" />
+        <param name="depthFilterMultiple" type="float" value="3.0" label="depthFilterMultiple" />
+        <param name="snvMaxFilteredBasecallFrac" type="float" value="0.4" label="snvMaxFilteredBasecallFrac" />
+        <param name="snvMaxSpanningDeletionFrac" type="float" value="0.75" label="snvMaxSpanningDeletionFrac" />
+        <param name="indelMaxRefRepeat" type="integer" value="8" label="indelMaxRefRepeat" />
+        <param name="indelMaxWindowFilteredBasecallFrac" type="float" value="0.3" label="indelMaxWindowFilteredBasecallFrac" />
+        <param name="indelMaxIntHpolLength" type="integer" value="14" label="indelMaxIntHpolLength" />
+        <param name="maxInputDepth" type="integer" value="10000" label="maxInputDepth" />
+        <conditional name="extra_arguments">
+	  <param name="extra_arguments_switch" type="select" label="Do you Want to add extraStrelkaArguments?">
+            <option value="No" selected="true">No</option>
+            <option value="Yes">Yes</option>
+          </param>
+          <when value="No">
+            <!-- do nothing -->
+          </when>
+          <when value="Yes"> 
+	    <param name="extraStrelkaArguments" type="text" value="" label="extraStrelkaArguments" />
+          </when>
+        </conditional>
+      </when>
+    </conditional>  
+    <conditional name="conf_file">
+      <param name="conf_file_switch" type="select" label="output conf_file ?">
+        <option value="No" selected="true">No</option>
+        <option value="Yes">Yes</option>
+      </param>
+    </conditional>
+
+
+
+
+  </inputs>
+
+  <outputs>
+    <data format="vcf" name="output1_vcf" label="${tool.name} on ${on_string}(passed.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.snvs.vcf"/>
+    <data format="vcf" name="output2_vcf" label="${tool.name} on ${on_string}(passed.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/passed.somatic.indels.vcf"/>
+    <data format="vcf" name="output3_vcf" label="${tool.name} on ${on_string}(all.somatic.snvs.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.snvs.vcf"/>
+    <data format="vcf" name="output4_vcf" label="${tool.name} on ${on_string}(all.somatic.indels.vcf)" from_work_dir="StrelkaAnalysis/results/all.somatic.indels.vcf"/>
+    <data name="conf_file.ini" label="conf_file.ini" from_work_dir="StrelkaAnalysis/tmp/config.ini">
+      <filter>conf_file.conf_file_switch == "Yes"</filter>
+    </data>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="normalBam" ftype="bam" value="NA12891_dupmark_chr20_region.bam"/>
+      <param name="tumorBam" ftype="bam" value="NA12892_dupmark_chr20_region.bam"/>
+      <param name="refFile" ftype="fasta" value="chr20_860k_only.fa"/>
+      <param name="configuration_switch" value="Default"/>
+      <output name="output1_vcf" file="passed.somatic.snvs.vcf"/>
+      <output name="output2_vcf" file="passed.somatic.indels.vcf"/>
+      <output name="output3_vcf" file="all.somatic.snvs.vcf"/>
+      <output name="output4_vcf" file="all.somatic.indels.vcf"/>
+    </test>
+  </tests>
+
+  <help>
+Strelka, a method for somatic SNV and small indel detectipon from sequencing data of matched tumor-normal samples.
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/strelka_config.sample	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,139 @@
+
+;
+; User configuration options for Strelka somatic small-variant caller
+; workflow:
+;
+
+[user]
+
+;
+; isSkipDepthFilters should be set to 1 to skip depth filtration for
+; whole exome or other targeted sequencing data
+;
+isSkipDepthFilters = 1
+
+;
+; strelka will not accept input reads above this depth (they will be skipped
+; until the depth drops below this value). Set this value <= 0 to disable
+; this feature. Using this filter will bound memory usage given extremely high
+; depth input, but may be problematic in high-depth targeted sequencing
+; applications.
+;
+maxInputDepth = 10000
+
+;
+; If the depth filter is not skipped, all variants which occur at a
+; depth greater than depthFilterMultiple*chromosome mean depth will be
+; filtered out.
+;
+depthFilterMultiple = 3.0
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of basecalls have been removed by the mismatch density
+; filter in either sample.
+;
+snvMaxFilteredBasecallFrac = 0.4
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of overlapping reads contain deletions which span the SNV
+; call site.
+;
+snvMaxSpanningDeletionFrac = 0.75
+
+;
+; Somatic indel calls are filtered if they represent an expansion or
+; contraction of a repeated pattern with a repeat count greater than
+; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8,
+; then the indel is filtered when it is an expansion/contraction of a
+; homopolymer longer than 8 bases, a dinucleotide repeat longer than
+; 16 bases, etc.)
+;
+indelMaxRefRepeat = 8
+
+;
+; Somatic indel calls are filtered if greater than this fraction of
+; basecalls in a window extending 50 bases to each side of an indel's
+; call position have been removed by the mismatch density filter.
+;
+indelMaxWindowFilteredBasecallFrac = 0.3
+
+;
+; Somatic indels are filtered if they overlap ’interrupted
+; homopolymers’ greater than this length. The term 'interrupted
+; homopolymer' is used to indicate the longest homopolymer which can
+; be found intersecting or adjacent to the called indel when a single
+; non-homopolymer base is allowed.
+;
+indelMaxIntHpolLength = 14
+
+;
+; prior probability of a somatic snv or indel
+;
+ssnvPrior = 0.000001
+sindelPrior = 0.000001
+
+;
+; probability of an snv or indel noise allele 
+;
+; NB: in the calling model a noise allele is shared in tumor and
+; normal samples, but occurs at any frequency.
+;
+ssnvNoise = 0.0000005
+sindelNoise = 0.000001
+
+;
+; Fraction of snv noise attributed to strand-bias.
+;
+; It is not recommended to change this setting. However, if it is
+; essential to turn the strand bias penalization off, the following is
+; recommended:
+; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5,
+; (1) set ssnvNoiseStrandBiasFrac = 0
+; (2) divide the current ssnvNoise value by 2
+;
+ssnvNoiseStrandBiasFrac = 0.5
+
+;
+; minimum MAPQ score for PE reads at tier1:
+;
+minTier1Mapq = 20
+
+;
+; minimum MAPQ score for PE and SE reads at tier2:
+;
+minTier2Mapq = 5
+
+;
+; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are
+; marked as filtered:
+;
+ssnvQuality_LowerBound = 15
+
+;
+; Somatic quality score (QSI_NT, NT=ref) below which somatic indels
+; are marked as filtered:
+;
+sindelQuality_LowerBound = 30
+
+;
+; Optionally write out read alignments which were altered during the
+; realignment step. At the completion of the workflow run, the
+; realigned reads can be found in:
+;
+; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam
+;
+isWriteRealignedBam = 0
+
+;
+; Jobs are parallelized over segments of the reference genome no larger
+; than this size:
+;
+binSize = 25000000
+
+;
+; Additional arguments passed to strelka.
+;
+extraStrelkaArguments =
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/strelka_wrapper.py	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+#Dan Blankenberg
+
+"""
+A wrapper script for running the GenomeAnalysisTK.jar commands.
+"""
+
+from __future__ import print_function
+import sys, argparse, os, tempfile, subprocess, shutil
+from binascii import unhexlify
+from string import Template
+from galaxy import eggs
+#import pkg_resources; pkg_resources.require( "bx-python" )
+
+#GALAXY_EXT_TO_GATK_EXT = { 'gatk_interval':'intervals', 'bam_index':'bam.bai', 'gatk_dbsnp':'dbSNP', 'picard_interval_list':'interval_list' } #items not listed here will use the galaxy extension as-is
+#GALAXY_EXT_TO_GATK_FILE_TYPE = GALAXY_EXT_TO_GATK_EXT #for now, these are the same, but could be different if needed
+#DEFAULT_GATK_PREFIX = "gatk_file"
+#CHUNK_SIZE = 2**20 #1mb
+#
+#
+def cleanup_before_exit( tmp_dir ):
+    if tmp_dir and os.path.exists( tmp_dir ):
+        shutil.rmtree( tmp_dir )
+
+def _create_config(args, config_path):
+    conf_file = open(config_path, "w")
+    conf_file.write("[user]\n")
+    for option in args:
+        if not option in ["tumorBam", "normalBam", "refFile", "configFile"] and args[option]!=None:
+    	    conf_file.write("%s=%s\n" % (option, args[option]))
+    conf_file.close()
+
+def my_Popen(cmd, prefix_for_stderr_name, tmp_dir, msg_error):
+    stderr_name = tempfile.NamedTemporaryFile( prefix = prefix_for_stderr_name ).name
+    proc = subprocess.Popen( args=cmd, shell=True, stderr=open( stderr_name, 'wb' ) )
+    return_code = proc.wait()                          
+    if return_code:
+	for line in open( stderr_name ):
+           print(line, file=sys.stderr)
+	os.unlink( stderr_name ) #clean up
+ 	cleanup_before_exit( tmp_dir )
+ 	raise Exception( msg_error )
+    else:
+        os.unlink( stderr_name )
+
+def index_bam_files( bam_filenames, tmp_dir ):
+    for bam_filename in bam_filenames:
+        bam_index_filename = "%s.bai" % bam_filename
+        print("bam_filename is: " + bam_filename + " bam_index_filename is: " + bam_index_filename + " test is: %s" % os.path.exists(bam_index_filename))
+        if not os.path.exists( bam_index_filename ):
+            #need to index this bam file
+            command = 'samtools index %s %s' % ( bam_filename, bam_index_filename )
+            my_Popen( command, "bam_index_stderr", tmp_dir, "Error during indexation of fasta file :" + bam_filename)
+
+def index_fasta_files( fasta_filenames, tmp_dir ):
+    for fasta_filename in fasta_filenames:
+        fasta_index_filename = "%s.fai" % fasta_filename
+        print("fasta_filename is: " + fasta_filename + " fasta_index_filename is: " + fasta_index_filename + " test is: %s" % os.path.exists(fasta_index_filename))
+        if not os.path.exists( fasta_index_filename ):
+            #need to index this bam file
+            command = 'samtools faidx %s %s' % ( fasta_filename, fasta_index_filename )
+            my_Popen( command, "fasta_index_stderr", tmp_dir, "Error during indexation of fasta file :" + fasta_filename)
+
+def __main__():
+    #Parse Command Line  OPTPARSE DEPRECIATED USE ARGPARSE INSTEAD
+    #MKTEMP DEPRECIATED USE MKDTlizations#EMP INSTEAD
+        
+    root_dir= "/home/galaxyusr/data/galaxy_dist/tools/strelka2"
+    expected_dir="for_tests"
+    job_dir=os.getcwd()
+    analysis_dir=job_dir + "/StrelkaAnalysis"
+    config_script=root_dir + "/configureStrelkaWorkflow.pl"
+    tmp_dir = "tmp"  #tempfile.mkdtemp( prefix='tmp-strelkaAnalysis-' )
+    config_ini = "%s/config.ini" % (tmp_dir)
+
+    print("root_dir: " + root_dir + "\njob_dir :" + job_dir + "\nanalysis_dir :" + analysis_dir + "\nconfig_script :" + config_script + "\ntmp_dir :" + tmp_dir + "\nconfig_ini :" +  config_ini)
+   
+    #manage parsing
+    parser = argparse.ArgumentParser()                                             
+    parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False )
+    parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False )   
+    parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False )
+    parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False )
+    parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False )
+    parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False )
+    parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False )
+    parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False )
+    parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False )
+    parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False )
+    parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False )
+    parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False )
+    parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False )
+    parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False )
+    parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False )
+    parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False )
+    parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False )
+    parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False )
+    parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False )
+    parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False )
+    parser.add_argument( '--binSize', help='path to tumor bam file', required = False )
+    parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False )
+    parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False )
+    parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False )
+    args = parser.parse_args()
+
+    #verifying eveything's ok
+    if not os.path.isfile(config_script):
+    	sys.exit("ERROR: The strelka workflow must be built prior to running. See installation instructions in '$root_dir/README'")
+    print("configuring...", file=sys.stdout)
+    if os.path.exists(analysis_dir):
+	sys.exit("'" + analysis_dir + "' already exist, if you are executing this tool from galaxy it should not happen")
+    
+
+    # creating index if needed
+    os.environ['PATH']= root_dir + "/opt/samtools:" + os.environ['PATH']
+    bam_filenames = [ args.tumorBam, args.normalBam ]
+    index_bam_files( bam_filenames, tmp_dir )
+    fasta_files = [ args.refFile ]
+    index_fasta_files( fasta_files, tmp_dir )
+    
+    #creating config file if needed
+    if args.configFile == "Custom":
+    	_create_config(vars(args), config_ini)
+    elif args.configFile == "Default":
+        cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini)
+        my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
+    else:
+    	if not os.path.exists(args.configFile):
+	     print( "The path to your configuration File seems to be wrong, use another one or custom option", file=sys.stderr)
+    	cmdbash="cp %s %s" % (args.configFile, config_ini)
+        my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
+
+
+
+
+    #configuration of workflow
+    cmd="%s --tumor=%s --normal=%s --ref=%s --config=%s --output-dir=%s" % (config_script, args.tumorBam, args.normalBam, args.refFile, config_ini, analysis_dir)
+    print( "**** Starting configuration.")
+    print( "**** Configuration cmd: '" + cmd + "'")
+    my_Popen( cmd, "cinfugation_stderr", tmp_dir, "Error during configuration !")
+    print("completed configuration")
+    
+    #run the workflow !
+    cmd="make -C " + analysis_dir
+    print("**** starting workflow.")
+    print("**** workflow cmd: '" + cmd + "'")
+    my_Popen( cmd, "workflow_stderr", tmp_dir, "Error during workflow execution !")   
+    print("**** completed workflow execution")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#bam_filenames = []
+#    if options.datasets:
+#        for ( dataset_arg, filename, galaxy_ext, prefix ) in options.datasets:
+#            gatk_filename = filename_from_galaxy( filename, galaxy_ext, target_dir = tmp_dir, prefix = prefix )#return the link to the dataset that has been created in the function
+#            if dataset_arg:
+#                cmd = '%s %s "%s"' % ( cmd, gatk_filetype_argument_substitution( dataset_arg, galaxy_ext ), gatk_filename )
+#            if galaxy_ext == "bam":
+#                bam_filenames.append( gatk_filename )
+#    #set up stdout and stderr output options
+#    stdout = open_file_from_option( options.stdout, mode = 'wb' )
+#    stderr = open_file_from_option( options.stderr, mode = 'wb' )
+#    #if no stderr file is specified, we'll use our own
+#    if stderr is None:
+#        stderr = tempfile.NamedTemporaryFile( prefix="strelka-stderr-", dir=tmp_dir )
+#    
+#    proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
+#    return_code = proc.wait()
+#    
+#    if return_code:
+#        stderr_target = sys.stderr
+#    else:
+#        stderr_target = sys.stdout
+#    stderr.flush()
+#    stderr.seek(0)
+#    while True:
+#        chunk = stderr.read( CHUNK_SIZE )
+#        if chunk:
+#            stderr_target.write( chunk )
+#        else:
+#            break
+#    stderr.close()
+#    #generate html reports
+#    if options.html_report_from_directory:
+#        for ( html_filename, html_dir ) in options.html_report_from_directory:
+#            html_report_from_directory( open( html_filename, 'wb' ), html_dir )
+#    
+#    cleanup_before_exit( tmp_dir )
+
+if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/strelka2/tool_dependencies.xml	Thu Sep 25 11:59:08 2014 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="samtools" version="0.1.18">
+    <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
+  <package name="vcftools" version="0.1.11">
+    <repository changeset_revision="710efaae2ff8" name="package_vcftools_0_1_11" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
+</tool_dependency>