# HG changeset patch
# User mini
# Date 1413376992 -7200
# Node ID 3c10d88b55ad12c30396cd1fa7bb7e0ec0d66fa4
# Parent 8260a4188b0843daa65a4df67bcdaa7a26df1d15
improved user interface
diff -r 8260a4188b08 -r 3c10d88b55ad lib/strelka_config_bwa_default.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/strelka_config_bwa_default.ini Wed Oct 15 14:43:12 2014 +0200
@@ -0,0 +1,139 @@
+
+;
+; User configuration options for Strelka somatic small-variant caller
+; workflow:
+;
+
+[user]
+
+;
+; isSkipDepthFilters should be set to 1 to skip depth filtration for
+; whole exome or other targeted sequencing data
+;
+isSkipDepthFilters = 0
+
+;
+; strelka will not accept input reads above this depth (they will be skipped
+; until the depth drops below this value). Set this value <= 0 to disable
+; this feature. Using this filter will bound memory usage given extremely high
+; depth input, but may be problematic in high-depth targeted sequencing
+; applications.
+;
+maxInputDepth = 10000
+
+;
+; If the depth filter is not skipped, all variants which occur at a
+; depth greater than depthFilterMultiple*chromosome mean depth will be
+; filtered out.
+;
+depthFilterMultiple = 3.0
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of basecalls have been removed by the mismatch density
+; filter in either sample.
+;
+snvMaxFilteredBasecallFrac = 0.4
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of overlapping reads contain deletions which span the SNV
+; call site.
+;
+snvMaxSpanningDeletionFrac = 0.75
+
+;
+; Somatic indel calls are filtered if they represent an expansion or
+; contraction of a repeated pattern with a repeat count greater than
+; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8,
+; then the indel is filtered when it is an expansion/contraction of a
+; homopolymer longer than 8 bases, a dinucleotide repeat longer than
+; 16 bases, etc.)
+;
+indelMaxRefRepeat = 8
+
+;
+; Somatic indel calls are filtered if greater than this fraction of
+; basecalls in a window extending 50 bases to each side of an indel's
+; call position have been removed by the mismatch density filter.
+;
+indelMaxWindowFilteredBasecallFrac = 0.3
+
+;
+; Somatic indels are filtered if they overlap ’interrupted
+; homopolymers’ greater than this length. The term 'interrupted
+; homopolymer' is used to indicate the longest homopolymer which can
+; be found intersecting or adjacent to the called indel when a single
+; non-homopolymer base is allowed.
+;
+indelMaxIntHpolLength = 14
+
+;
+; prior probability of a somatic snv or indel
+;
+ssnvPrior = 0.000001
+sindelPrior = 0.000001
+
+;
+; probability of an snv or indel noise allele
+;
+; NB: in the calling model a noise allele is shared in tumor and
+; normal samples, but occurs at any frequency.
+;
+ssnvNoise = 0.0000005
+sindelNoise = 0.000001
+
+;
+; Fraction of snv noise attributed to strand-bias.
+;
+; It is not recommended to change this setting. However, if it is
+; essential to turn the strand bias penalization off, the following is
+; recommended:
+; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5,
+; (1) set ssnvNoiseStrandBiasFrac = 0
+; (2) divide the current ssnvNoise value by 2
+;
+ssnvNoiseStrandBiasFrac = 0.5
+
+;
+; minimum MAPQ score for PE reads at tier1:
+;
+minTier1Mapq = 20
+
+;
+; minimum MAPQ score for PE and SE reads at tier2:
+;
+minTier2Mapq = 5
+
+;
+; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are
+; marked as filtered:
+;
+ssnvQuality_LowerBound = 15
+
+;
+; Somatic quality score (QSI_NT, NT=ref) below which somatic indels
+; are marked as filtered:
+;
+sindelQuality_LowerBound = 30
+
+;
+; Optionally write out read alignments which were altered during the
+; realignment step. At the completion of the workflow run, the
+; realigned reads can be found in:
+;
+; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam
+;
+isWriteRealignedBam = 0
+
+;
+; Jobs are parallelized over segments of the reference genome no larger
+; than this size:
+;
+binSize = 25000000
+
+;
+; Additional arguments passed to strelka.
+;
+extraStrelkaArguments =
+
diff -r 8260a4188b08 -r 3c10d88b55ad lib/strelka_config_eland_default.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/strelka_config_eland_default.ini Wed Oct 15 14:43:12 2014 +0200
@@ -0,0 +1,140 @@
+
+;
+; User configuration options for Strelka somatic small-variant caller
+; workflow:
+;
+
+[user]
+
+;
+; isSkipDepthFilters should be set to 1 to skip depth filtration for
+; whole exome or other targeted sequencing data
+;
+isSkipDepthFilters = 0
+
+;
+; strelka will not accept input reads above this depth (they will be skipped
+; until the depth drops below this value). Set this value <= 0 to disable
+; this feature. Using this filter will bound memory usage given extremely high
+; depth input, but may be problematic in high-depth targeted sequencing
+; applications.
+;
+maxInputDepth = 10000
+
+;
+; If the depth filter is not skipped, all variants which occur at a
+; depth greater than depthFilterMultiple*chromosome mean depth will be
+; filtered out.
+;
+depthFilterMultiple = 3.0
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of basecalls have been removed by the mismatch density
+; filter in either sample.
+;
+snvMaxFilteredBasecallFrac = 0.4
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of overlapping reads contain deletions which span the SNV
+; call site.
+;
+snvMaxSpanningDeletionFrac = 0.75
+
+;
+; Somatic indel calls are filtered if they represent an expansion or
+; contraction of a repeated pattern with a repeat count greater than
+; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8,
+; then the indel is filtered when it is an expansion/contraction of a
+; homopolymer longer than 8 bases, a dinucleotide repeat longer than
+; 16 bases, etc.)
+;
+indelMaxRefRepeat = 8
+
+;
+; Somatic indel calls are filtered if greater than this fraction of
+; basecalls in a window extending 50 bases to each side of an indel's
+; call position have been removed by the mismatch density filter.
+;
+indelMaxWindowFilteredBasecallFrac = 0.3
+
+;
+; Somatic indels are filtered if they overlap ’interrupted
+; homopolymers’ greater than this length. The term 'interrupted
+; homopolymer' is used to indicate the longest homopolymer which can
+; be found intersecting or adjacent to the called indel when a single
+; non-homopolymer base is allowed.
+;
+indelMaxIntHpolLength = 14
+
+;
+; prior probability of a somatic snv or indel
+;
+ssnvPrior = 0.000001
+sindelPrior = 0.000001
+
+;
+; probability of an snv or indel noise allele
+;
+; NB: in the calling model a noise allele is shared in tumor and
+; normal samples, but occurs at any frequency.
+;
+ssnvNoise = 0.0000005
+sindelNoise = 0.0000001
+
+;
+; Fraction of snv noise attributed to strand-bias.
+;
+; It is not recommended to change this setting. However, if it is
+; essential to turn the strand bias penalization off, the following is
+; recommended:
+; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5,
+; (1) set ssnvNoiseStrandBiasFrac = 0
+; (2) divide the current ssnvNoise value by 2
+;
+ssnvNoiseStrandBiasFrac = 0.5
+
+;
+; minimum MAPQ score for PE reads at tier1:
+;
+minTier1Mapq = 40
+
+;
+; minimum MAPQ score for PE and SE reads at tier2:
+;
+minTier2Mapq = 5
+
+
+;
+; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are
+; marked as filtered:
+;
+ssnvQuality_LowerBound = 15
+
+;
+; Somatic quality score (QSI_NT, NT=ref) below which somatic indels
+; are marked as filtered:
+;
+sindelQuality_LowerBound = 30
+
+;
+; Optionally write out read alignments which were altered during the
+; realignment step. At the completion of the workflow run, the
+; realigned reads can be found in:
+;
+; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam
+;
+isWriteRealignedBam = 0
+
+;
+; Jobs are parallelized over segments of the reference genome no larger
+; than this size:
+;
+binSize = 25000000
+
+;
+; Additional arguments passed to strelka.
+;
+extraStrelkaArguments = --eland-compatibility
+
diff -r 8260a4188b08 -r 3c10d88b55ad lib/strelka_config_isaac_default.ini
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/strelka_config_isaac_default.ini Wed Oct 15 14:43:12 2014 +0200
@@ -0,0 +1,139 @@
+
+;
+; User configuration options for Strelka somatic small-variant caller
+; workflow:
+;
+
+[user]
+
+;
+; isSkipDepthFilters should be set to 1 to skip depth filtration for
+; whole exome or other targeted sequencing data
+;
+isSkipDepthFilters = 0
+
+;
+; strelka will not accept input reads above this depth (they will be skipped
+; until the depth drops below this value). Set this value <= 0 to disable
+; this feature. Using this filter will bound memory usage given extremely high
+; depth input, but may be problematic in high-depth targeted sequencing
+; applications.
+;
+maxInputDepth = 10000
+
+;
+; If the depth filter is not skipped, all variants which occur at a
+; depth greater than depthFilterMultiple*chromosome mean depth will be
+; filtered out.
+;
+depthFilterMultiple = 3.0
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of basecalls have been removed by the mismatch density
+; filter in either sample.
+;
+snvMaxFilteredBasecallFrac = 0.4
+
+;
+; Somatic SNV calls are filtered at sites where greater than this
+; fraction of overlapping reads contain deletions which span the SNV
+; call site.
+;
+snvMaxSpanningDeletionFrac = 0.75
+
+;
+; Somatic indel calls are filtered if they represent an expansion or
+; contraction of a repeated pattern with a repeat count greater than
+; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8,
+; then the indel is filtered when it is an expansion/contraction of a
+; homopolymer longer than 8 bases, a dinucleotide repeat longer than
+; 16 bases, etc.)
+;
+indelMaxRefRepeat = 8
+
+;
+; Somatic indel calls are filtered if greater than this fraction of
+; basecalls in a window extending 50 bases to each side of an indel's
+; call position have been removed by the mismatch density filter.
+;
+indelMaxWindowFilteredBasecallFrac = 0.3
+
+;
+; Somatic indels are filtered if they overlap ’interrupted
+; homopolymers’ greater than this length. The term 'interrupted
+; homopolymer' is used to indicate the longest homopolymer which can
+; be found intersecting or adjacent to the called indel when a single
+; non-homopolymer base is allowed.
+;
+indelMaxIntHpolLength = 14
+
+;
+; prior probability of a somatic snv or indel
+;
+ssnvPrior = 0.000001
+sindelPrior = 0.000001
+
+;
+; probability of an snv or indel noise allele
+;
+; NB: in the calling model a noise allele is shared in tumor and
+; normal samples, but occurs at any frequency.
+;
+ssnvNoise = 0.0000005
+sindelNoise = 0.000001
+
+;
+; Fraction of snv noise attributed to strand-bias.
+;
+; It is not recommended to change this setting. However, if it is
+; essential to turn the strand bias penalization off, the following is
+; recommended:
+; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5,
+; (1) set ssnvNoiseStrandBiasFrac = 0
+; (2) divide the current ssnvNoise value by 2
+;
+ssnvNoiseStrandBiasFrac = 0.5
+
+;
+; minimum MAPQ score for PE reads at tier1:
+;
+minTier1Mapq = 20
+
+;
+; minimum MAPQ score for PE and SE reads at tier2:
+;
+minTier2Mapq = 0
+
+;
+; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are
+; marked as filtered:
+;
+ssnvQuality_LowerBound = 15
+
+;
+; Somatic quality score (QSI_NT, NT=ref) below which somatic indels
+; are marked as filtered:
+;
+sindelQuality_LowerBound = 30
+
+;
+; Optionally write out read alignments which were altered during the
+; realignment step. At the completion of the workflow run, the
+; realigned reads can be found in:
+;
+; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam
+;
+isWriteRealignedBam = 0
+
+;
+; Jobs are parallelized over segments of the reference genome no larger
+; than this size:
+;
+binSize = 25000000
+
+;
+; Additional arguments passed to strelka.
+;
+extraStrelkaArguments = --remap-input-softclip
+
diff -r 8260a4188b08 -r 3c10d88b55ad strelka.xml
--- a/strelka.xml Wed Oct 01 13:45:33 2014 +0200
+++ b/strelka.xml Wed Oct 15 14:43:12 2014 +0200
@@ -6,9 +6,19 @@
samtoolsvcftools
- strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam --refFile $refFile
- #if $configuration.configuration_switch == 'Default':
- --configFile Default
+ strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam
+ #if $genomeSource.refGenomeSource == "history":
+ --refFile "${genomeSource.ownFile}"
+ #else:
+ --refFile "${genomeSource.index.fields.path}"
+ #end if
+
+ #if $configuration.configuration_switch == 'Default for Bwa':
+ --configFile strelka_config_bwa_default.ini
+ #else if $configuration.configuration_switch == 'Default for Isaac':
+ --configFile strelka_config_isaac_default.ini
+ #else if $configuration.configuration_switch == 'Default for Eland':
+ --configFile strelka_config_eland_default.ini
#else if $configuration.configuration_switch == 'Path':
--configFile $configuration.configFile
#else:
@@ -40,13 +50,28 @@
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
@@ -57,25 +82,27 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -85,18 +112,18 @@
-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
@@ -139,7 +166,7 @@
Strelka, a method for somatic SNV and small indel detection from sequencing data of matched tumor-normal samples.
-You can see more information at : https://sites.google.com/site/strelkasomaticvariantcaller
+You can see more information at : https://sites.google.com/site/strelkasomaticvariantcaller.
diff -r 8260a4188b08 -r 3c10d88b55ad strelka_wrapper.py
--- a/strelka_wrapper.py Wed Oct 01 13:45:33 2014 +0200
+++ b/strelka_wrapper.py Wed Oct 15 14:43:12 2014 +0200
@@ -61,30 +61,30 @@
print(os.environ['PATH'])
parser = argparse.ArgumentParser()
parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False )
- parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False )
- parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False )
- parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False )
- parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False )
- parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False )
- parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False )
- parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False )
- parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False )
- parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False )
- parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False )
- parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False )
- parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False )
- parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False )
+ parser.add_argument( '-n', '--normalBam', help='', required = False )
+ parser.add_argument( '-r', '--refFile', help='', required = False )
+ parser.add_argument( '-c', '--configFile', help='', required = False )
+ parser.add_argument( '--depthFilterMultiple', help='', required = False )
+ parser.add_argument( '--snvMaxFilteredBasecallFrac', help='', required = False )
+ parser.add_argument( '--snvMaxSpanningDeletionFrac', help='', required = False )
+ parser.add_argument( '--indelMaxRefRepeat', help='', required = False )
+ parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='', required = False )
+ parser.add_argument( '--indelMaxIntHpolLength', help='', required = False )
+ parser.add_argument( '--ssnvPrior', help='', required = False )
+ parser.add_argument( '--sindelPrior', help='', required = False )
+ parser.add_argument( '--ssnvNoise', help='', required = False )
+ parser.add_argument( '--sindelNoise', help='', required = False )
+ parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='', required = False )
+ parser.add_argument( '--minTier1Mapq', help='', required = False )
+ parser.add_argument( '--minTier2Mapq', help='', required = False )
+ parser.add_argument( '--ssnvQuality_LowerBound', help='', required = False )
+ parser.add_argument( '--sindelQuality_LowerBound', help='', required = False )
+ parser.add_argument( '--isWriteRealignedBam', help='', required = False )
parser.add_argument( '--binSize', help='path to tumor bam file', required = False )
- parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False )
- parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False )
- parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False )
- parser.add_argument( '--scriptPath', help='path to tumor bam file', required = False )
+ parser.add_argument( '--extraStrelkaArguments', help='', required = False )
+ parser.add_argument( '--isSkipDepthFilters', help='', required = False )
+ parser.add_argument( '--maxInputDepth', help='', required = False )
+ parser.add_argument( '--scriptPath', help='', required = False )
args = parser.parse_args()
root_dir= args.scriptPath
@@ -115,8 +115,8 @@
#creating config file if needed
if args.configFile == "Custom":
_create_config(vars(args), config_ini)
- elif args.configFile == "Default":
- cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini)
+ elif args.configFile in ["strelka_config_bwa_default.ini", "strelka_config_isaac_default.ini", "strelka_config_eland_default.ini"]:
+ cmdbash="cp %s %s" % (root_dir + "/lib/" + args.configFile, config_ini)
my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed")
else:
if not os.path.exists(args.configFile):