# HG changeset patch # User mini # Date 1413376992 -7200 # Node ID 3c10d88b55ad12c30396cd1fa7bb7e0ec0d66fa4 # Parent 8260a4188b0843daa65a4df67bcdaa7a26df1d15 improved user interface diff -r 8260a4188b08 -r 3c10d88b55ad lib/strelka_config_bwa_default.ini --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/strelka_config_bwa_default.ini Wed Oct 15 14:43:12 2014 +0200 @@ -0,0 +1,139 @@ + +; +; User configuration options for Strelka somatic small-variant caller +; workflow: +; + +[user] + +; +; isSkipDepthFilters should be set to 1 to skip depth filtration for +; whole exome or other targeted sequencing data +; +isSkipDepthFilters = 0 + +; +; strelka will not accept input reads above this depth (they will be skipped +; until the depth drops below this value). Set this value <= 0 to disable +; this feature. Using this filter will bound memory usage given extremely high +; depth input, but may be problematic in high-depth targeted sequencing +; applications. +; +maxInputDepth = 10000 + +; +; If the depth filter is not skipped, all variants which occur at a +; depth greater than depthFilterMultiple*chromosome mean depth will be +; filtered out. +; +depthFilterMultiple = 3.0 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of basecalls have been removed by the mismatch density +; filter in either sample. +; +snvMaxFilteredBasecallFrac = 0.4 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of overlapping reads contain deletions which span the SNV +; call site. +; +snvMaxSpanningDeletionFrac = 0.75 + +; +; Somatic indel calls are filtered if they represent an expansion or +; contraction of a repeated pattern with a repeat count greater than +; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8, +; then the indel is filtered when it is an expansion/contraction of a +; homopolymer longer than 8 bases, a dinucleotide repeat longer than +; 16 bases, etc.) +; +indelMaxRefRepeat = 8 + +; +; Somatic indel calls are filtered if greater than this fraction of +; basecalls in a window extending 50 bases to each side of an indel's +; call position have been removed by the mismatch density filter. +; +indelMaxWindowFilteredBasecallFrac = 0.3 + +; +; Somatic indels are filtered if they overlap ’interrupted +; homopolymers’ greater than this length. The term 'interrupted +; homopolymer' is used to indicate the longest homopolymer which can +; be found intersecting or adjacent to the called indel when a single +; non-homopolymer base is allowed. +; +indelMaxIntHpolLength = 14 + +; +; prior probability of a somatic snv or indel +; +ssnvPrior = 0.000001 +sindelPrior = 0.000001 + +; +; probability of an snv or indel noise allele +; +; NB: in the calling model a noise allele is shared in tumor and +; normal samples, but occurs at any frequency. +; +ssnvNoise = 0.0000005 +sindelNoise = 0.000001 + +; +; Fraction of snv noise attributed to strand-bias. +; +; It is not recommended to change this setting. However, if it is +; essential to turn the strand bias penalization off, the following is +; recommended: +; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, +; (1) set ssnvNoiseStrandBiasFrac = 0 +; (2) divide the current ssnvNoise value by 2 +; +ssnvNoiseStrandBiasFrac = 0.5 + +; +; minimum MAPQ score for PE reads at tier1: +; +minTier1Mapq = 20 + +; +; minimum MAPQ score for PE and SE reads at tier2: +; +minTier2Mapq = 5 + +; +; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are +; marked as filtered: +; +ssnvQuality_LowerBound = 15 + +; +; Somatic quality score (QSI_NT, NT=ref) below which somatic indels +; are marked as filtered: +; +sindelQuality_LowerBound = 30 + +; +; Optionally write out read alignments which were altered during the +; realignment step. At the completion of the workflow run, the +; realigned reads can be found in: +; +; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam +; +isWriteRealignedBam = 0 + +; +; Jobs are parallelized over segments of the reference genome no larger +; than this size: +; +binSize = 25000000 + +; +; Additional arguments passed to strelka. +; +extraStrelkaArguments = + diff -r 8260a4188b08 -r 3c10d88b55ad lib/strelka_config_eland_default.ini --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/strelka_config_eland_default.ini Wed Oct 15 14:43:12 2014 +0200 @@ -0,0 +1,140 @@ + +; +; User configuration options for Strelka somatic small-variant caller +; workflow: +; + +[user] + +; +; isSkipDepthFilters should be set to 1 to skip depth filtration for +; whole exome or other targeted sequencing data +; +isSkipDepthFilters = 0 + +; +; strelka will not accept input reads above this depth (they will be skipped +; until the depth drops below this value). Set this value <= 0 to disable +; this feature. Using this filter will bound memory usage given extremely high +; depth input, but may be problematic in high-depth targeted sequencing +; applications. +; +maxInputDepth = 10000 + +; +; If the depth filter is not skipped, all variants which occur at a +; depth greater than depthFilterMultiple*chromosome mean depth will be +; filtered out. +; +depthFilterMultiple = 3.0 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of basecalls have been removed by the mismatch density +; filter in either sample. +; +snvMaxFilteredBasecallFrac = 0.4 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of overlapping reads contain deletions which span the SNV +; call site. +; +snvMaxSpanningDeletionFrac = 0.75 + +; +; Somatic indel calls are filtered if they represent an expansion or +; contraction of a repeated pattern with a repeat count greater than +; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8, +; then the indel is filtered when it is an expansion/contraction of a +; homopolymer longer than 8 bases, a dinucleotide repeat longer than +; 16 bases, etc.) +; +indelMaxRefRepeat = 8 + +; +; Somatic indel calls are filtered if greater than this fraction of +; basecalls in a window extending 50 bases to each side of an indel's +; call position have been removed by the mismatch density filter. +; +indelMaxWindowFilteredBasecallFrac = 0.3 + +; +; Somatic indels are filtered if they overlap ’interrupted +; homopolymers’ greater than this length. The term 'interrupted +; homopolymer' is used to indicate the longest homopolymer which can +; be found intersecting or adjacent to the called indel when a single +; non-homopolymer base is allowed. +; +indelMaxIntHpolLength = 14 + +; +; prior probability of a somatic snv or indel +; +ssnvPrior = 0.000001 +sindelPrior = 0.000001 + +; +; probability of an snv or indel noise allele +; +; NB: in the calling model a noise allele is shared in tumor and +; normal samples, but occurs at any frequency. +; +ssnvNoise = 0.0000005 +sindelNoise = 0.0000001 + +; +; Fraction of snv noise attributed to strand-bias. +; +; It is not recommended to change this setting. However, if it is +; essential to turn the strand bias penalization off, the following is +; recommended: +; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, +; (1) set ssnvNoiseStrandBiasFrac = 0 +; (2) divide the current ssnvNoise value by 2 +; +ssnvNoiseStrandBiasFrac = 0.5 + +; +; minimum MAPQ score for PE reads at tier1: +; +minTier1Mapq = 40 + +; +; minimum MAPQ score for PE and SE reads at tier2: +; +minTier2Mapq = 5 + + +; +; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are +; marked as filtered: +; +ssnvQuality_LowerBound = 15 + +; +; Somatic quality score (QSI_NT, NT=ref) below which somatic indels +; are marked as filtered: +; +sindelQuality_LowerBound = 30 + +; +; Optionally write out read alignments which were altered during the +; realignment step. At the completion of the workflow run, the +; realigned reads can be found in: +; +; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam +; +isWriteRealignedBam = 0 + +; +; Jobs are parallelized over segments of the reference genome no larger +; than this size: +; +binSize = 25000000 + +; +; Additional arguments passed to strelka. +; +extraStrelkaArguments = --eland-compatibility + diff -r 8260a4188b08 -r 3c10d88b55ad lib/strelka_config_isaac_default.ini --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/strelka_config_isaac_default.ini Wed Oct 15 14:43:12 2014 +0200 @@ -0,0 +1,139 @@ + +; +; User configuration options for Strelka somatic small-variant caller +; workflow: +; + +[user] + +; +; isSkipDepthFilters should be set to 1 to skip depth filtration for +; whole exome or other targeted sequencing data +; +isSkipDepthFilters = 0 + +; +; strelka will not accept input reads above this depth (they will be skipped +; until the depth drops below this value). Set this value <= 0 to disable +; this feature. Using this filter will bound memory usage given extremely high +; depth input, but may be problematic in high-depth targeted sequencing +; applications. +; +maxInputDepth = 10000 + +; +; If the depth filter is not skipped, all variants which occur at a +; depth greater than depthFilterMultiple*chromosome mean depth will be +; filtered out. +; +depthFilterMultiple = 3.0 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of basecalls have been removed by the mismatch density +; filter in either sample. +; +snvMaxFilteredBasecallFrac = 0.4 + +; +; Somatic SNV calls are filtered at sites where greater than this +; fraction of overlapping reads contain deletions which span the SNV +; call site. +; +snvMaxSpanningDeletionFrac = 0.75 + +; +; Somatic indel calls are filtered if they represent an expansion or +; contraction of a repeated pattern with a repeat count greater than +; indelMaxRefRepeat in the reference (ie. if indelMaxRefRepeat is 8, +; then the indel is filtered when it is an expansion/contraction of a +; homopolymer longer than 8 bases, a dinucleotide repeat longer than +; 16 bases, etc.) +; +indelMaxRefRepeat = 8 + +; +; Somatic indel calls are filtered if greater than this fraction of +; basecalls in a window extending 50 bases to each side of an indel's +; call position have been removed by the mismatch density filter. +; +indelMaxWindowFilteredBasecallFrac = 0.3 + +; +; Somatic indels are filtered if they overlap ’interrupted +; homopolymers’ greater than this length. The term 'interrupted +; homopolymer' is used to indicate the longest homopolymer which can +; be found intersecting or adjacent to the called indel when a single +; non-homopolymer base is allowed. +; +indelMaxIntHpolLength = 14 + +; +; prior probability of a somatic snv or indel +; +ssnvPrior = 0.000001 +sindelPrior = 0.000001 + +; +; probability of an snv or indel noise allele +; +; NB: in the calling model a noise allele is shared in tumor and +; normal samples, but occurs at any frequency. +; +ssnvNoise = 0.0000005 +sindelNoise = 0.000001 + +; +; Fraction of snv noise attributed to strand-bias. +; +; It is not recommended to change this setting. However, if it is +; essential to turn the strand bias penalization off, the following is +; recommended: +; Assuming the current value of ssnvNoiseStrandBiasFrac is 0.5, +; (1) set ssnvNoiseStrandBiasFrac = 0 +; (2) divide the current ssnvNoise value by 2 +; +ssnvNoiseStrandBiasFrac = 0.5 + +; +; minimum MAPQ score for PE reads at tier1: +; +minTier1Mapq = 20 + +; +; minimum MAPQ score for PE and SE reads at tier2: +; +minTier2Mapq = 0 + +; +; Somatic quality score (QSS_NT, NT=ref) below which somatic SNVs are +; marked as filtered: +; +ssnvQuality_LowerBound = 15 + +; +; Somatic quality score (QSI_NT, NT=ref) below which somatic indels +; are marked as filtered: +; +sindelQuality_LowerBound = 30 + +; +; Optionally write out read alignments which were altered during the +; realignment step. At the completion of the workflow run, the +; realigned reads can be found in: +; +; ${ANALYSIS_DIR}/realigned/{normal,tumor}.realigned.bam +; +isWriteRealignedBam = 0 + +; +; Jobs are parallelized over segments of the reference genome no larger +; than this size: +; +binSize = 25000000 + +; +; Additional arguments passed to strelka. +; +extraStrelkaArguments = --remap-input-softclip + diff -r 8260a4188b08 -r 3c10d88b55ad strelka.xml --- a/strelka.xml Wed Oct 01 13:45:33 2014 +0200 +++ b/strelka.xml Wed Oct 15 14:43:12 2014 +0200 @@ -6,9 +6,19 @@ samtools vcftools - strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam --refFile $refFile - #if $configuration.configuration_switch == 'Default': - --configFile Default + strelka_wrapper.py --tumorBam $tumorBam --normalBam $normalBam + #if $genomeSource.refGenomeSource == "history": + --refFile "${genomeSource.ownFile}" + #else: + --refFile "${genomeSource.index.fields.path}" + #end if + + #if $configuration.configuration_switch == 'Default for Bwa': + --configFile strelka_config_bwa_default.ini + #else if $configuration.configuration_switch == 'Default for Isaac': + --configFile strelka_config_isaac_default.ini + #else if $configuration.configuration_switch == 'Default for Eland': + --configFile strelka_config_eland_default.ini #else if $configuration.configuration_switch == 'Path': --configFile $configuration.configFile #else: @@ -40,13 +50,28 @@ - - - + + + + + + + + + + + + + + + + - + + + @@ -57,25 +82,27 @@ - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + @@ -85,18 +112,18 @@ - + - - - - - - - + + + + + + + @@ -139,7 +166,7 @@ Strelka, a method for somatic SNV and small indel detection from sequencing data of matched tumor-normal samples. -You can see more information at : https://sites.google.com/site/strelkasomaticvariantcaller +You can see more information at : https://sites.google.com/site/strelkasomaticvariantcaller. diff -r 8260a4188b08 -r 3c10d88b55ad strelka_wrapper.py --- a/strelka_wrapper.py Wed Oct 01 13:45:33 2014 +0200 +++ b/strelka_wrapper.py Wed Oct 15 14:43:12 2014 +0200 @@ -61,30 +61,30 @@ print(os.environ['PATH']) parser = argparse.ArgumentParser() parser.add_argument( '-t', '--tumorBam', help='path to tumor bam file', required = False ) - parser.add_argument( '-n', '--normalBam', help='path to tumor bam file', required = False ) - parser.add_argument( '-r', '--refFile', help='path to tumor bam file', required = False ) - parser.add_argument( '-c', '--configFile', help='path to tumor bam file', required = False ) - parser.add_argument( '--depthFilterMultiple', help='path to tumor bam file', required = False ) - parser.add_argument( '--snvMaxFilteredBasecallFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--snvMaxSpanningDeletionFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--indelMaxRefRepeat', help='path to tumor bam file', required = False ) - parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--indelMaxIntHpolLength', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvPrior', help='path to tumor bam file', required = False ) - parser.add_argument( '--sindelPrior', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvNoise', help='path to tumor bam file', required = False ) - parser.add_argument( '--sindelNoise', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='path to tumor bam file', required = False ) - parser.add_argument( '--minTier1Mapq', help='path to tumor bam file', required = False ) - parser.add_argument( '--minTier2Mapq', help='path to tumor bam file', required = False ) - parser.add_argument( '--ssnvQuality_LowerBound', help='path to tumor bam file', required = False ) - parser.add_argument( '--sindelQuality_LowerBound', help='path to tumor bam file', required = False ) - parser.add_argument( '--isWriteRealignedBam', help='path to tumor bam file', required = False ) + parser.add_argument( '-n', '--normalBam', help='', required = False ) + parser.add_argument( '-r', '--refFile', help='', required = False ) + parser.add_argument( '-c', '--configFile', help='', required = False ) + parser.add_argument( '--depthFilterMultiple', help='', required = False ) + parser.add_argument( '--snvMaxFilteredBasecallFrac', help='', required = False ) + parser.add_argument( '--snvMaxSpanningDeletionFrac', help='', required = False ) + parser.add_argument( '--indelMaxRefRepeat', help='', required = False ) + parser.add_argument( '--indelMaxWindowFilteredBasecallFrac', help='', required = False ) + parser.add_argument( '--indelMaxIntHpolLength', help='', required = False ) + parser.add_argument( '--ssnvPrior', help='', required = False ) + parser.add_argument( '--sindelPrior', help='', required = False ) + parser.add_argument( '--ssnvNoise', help='', required = False ) + parser.add_argument( '--sindelNoise', help='', required = False ) + parser.add_argument( '--ssnvNoiseStrandBiasFrac', help='', required = False ) + parser.add_argument( '--minTier1Mapq', help='', required = False ) + parser.add_argument( '--minTier2Mapq', help='', required = False ) + parser.add_argument( '--ssnvQuality_LowerBound', help='', required = False ) + parser.add_argument( '--sindelQuality_LowerBound', help='', required = False ) + parser.add_argument( '--isWriteRealignedBam', help='', required = False ) parser.add_argument( '--binSize', help='path to tumor bam file', required = False ) - parser.add_argument( '--extraStrelkaArguments', help='path to tumor bam file', required = False ) - parser.add_argument( '--isSkipDepthFilters', help='path to tumor bam file', required = False ) - parser.add_argument( '--maxInputDepth', help='path to tumor bam file', required = False ) - parser.add_argument( '--scriptPath', help='path to tumor bam file', required = False ) + parser.add_argument( '--extraStrelkaArguments', help='', required = False ) + parser.add_argument( '--isSkipDepthFilters', help='', required = False ) + parser.add_argument( '--maxInputDepth', help='', required = False ) + parser.add_argument( '--scriptPath', help='', required = False ) args = parser.parse_args() root_dir= args.scriptPath @@ -115,8 +115,8 @@ #creating config file if needed if args.configFile == "Custom": _create_config(vars(args), config_ini) - elif args.configFile == "Default": - cmdbash="cp %s %s" % (root_dir + "/strelka_config.sample", config_ini) + elif args.configFile in ["strelka_config_bwa_default.ini", "strelka_config_isaac_default.ini", "strelka_config_eland_default.ini"]: + cmdbash="cp %s %s" % (root_dir + "/lib/" + args.configFile, config_ini) my_Popen(cmdbash, "copy_default_file_err", tmp_dir, "Error during the copy of default config file, maybe it was removed") else: if not os.path.exists(args.configFile):