changeset 2:2fe9ebb98aad draft

planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit 30867f1f022bed18ba1c3b8dc9c54226890b3a9c
author iuc
date Tue, 04 Dec 2018 05:15:50 -0500
parents 31a38ce7e8ae
children 7903598ccbaf
files macros.xml test-data/control_chrM.bam test-data/hg19_chrM.fa test-data/tumor_chrM.bam test-data/varscan_somatic_indel_result1.vcf test-data/varscan_somatic_indel_result2.vcf test-data/varscan_somatic_snp_result1.vcf test-data/varscan_somatic_snp_result2.vcf varscan.py varscan_somatic.xml
diffstat 10 files changed, 2030 insertions(+), 195 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Sun Jul 15 09:19:25 2018 -0400
+++ b/macros.xml	Tue Dec 04 05:15:50 2018 -0500
@@ -2,7 +2,6 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">varscan</requirement>
-            <requirement type="package" version="4.2.1">gawk</requirement>
             <yield/>
         </requirements>
     </xml>
@@ -20,6 +19,7 @@
     <xml name="citations">
         <citations>
             <citation type="doi">10.1101/gr.129684.111</citation>
+            <citation type="doi">10.1002/0471250953.bi1504s44</citation>
         </citations>
     </xml>
 
@@ -52,29 +52,34 @@
         
     </token>
 
-    <xml name="min_coverage">
+    <xml name="min_coverage" token_help="Minimum depth at a position to make a call">
         <param argument="--min-coverage" name="min_coverage" type="integer" value="8" min="1" max="200"
-            label="Minimum read depth" help="Minimum depth at a position to make a call"/>
+            label="Minimum coverage" help="@HELP@"/>
     </xml>
     <xml name="min_reads2">
         <param argument="--min-reads2" name="min_reads2" type="integer" value="2" min="1" max="200"
-            label="Minimum supporting reads" help="Minimum supporting reads at a position to make a call"/>
+            label="Minimum supporting reads" help="Minimum number of variant-supporting reads at a position required to make a call"/>
     </xml>
     <xml name="min_avg_qual">
         <param argument="--min-avg-qual" name="min_avg_qual" type="integer" value="15" min="1" max="50"
-            label="Minimum base quality at a position to count a read"/>
+            label="Minimum base quality"
+            help="The minimum base quality at the variant position required to use a read for calling" />
     </xml>
     <xml name="min_var_freq" token_value="0.01">
         <param argument="--min-var-freq" name="min_var_freq" type="float" value="@VALUE@" min="0" max="1"
-            label="Minimum variant allele frequency threshold"/>
+            label="Minimum variant allele frequency"
+            help="Minimum variant allele frequency required for calling a variant"/>
     </xml>
     <xml name="min_freq_for_hom">
         <param argument="--min-freq-for-hom" name="min_freq_for_hom" type="float" value="0.75" min="0" max="1"
-            label="Minimum frequency to call homozygote"/>
+            label="Minimum homozygous variant allele frequency"
+            help="Minimum variant allele frequency required for calling a homozygous genotype" />
     </xml>
-    <xml name="p_value" token_label="p-value threshold for calling variants" token_value="0.01">
-        <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0.0" max="1.0"
-            label="@LABEL@"/>
+    <xml name="p_value" token_value="0.01"
+    token_label="p-value threshold for calling variants"
+    token_help="">
+        <param argument="--p-value" name="p_value" type="float" value="@VALUE@" min="0" max="1"
+            label="@LABEL@" help="@HELP@"/>
     </xml>
     <xml name="strand_filter">
         <param name="strand_filter" type="select" label="Ignore variants with >90% support on one strand">
@@ -83,4 +88,12 @@
         </param>
     </xml>
 
+    <token name="@HELP_HEADER@"><![CDATA[
+**VarScan Overview**
+
+VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data. Full documentation of the command line package is available here_.
+
+.. _VarScan: http://dkoboldt.github.io/varscan/
+.. _here: http://dkoboldt.github.io/varscan/using-varscan.html
+   ]]></token>
 </macros>
Binary file test-data/control_chrM.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_chrM.fa	Tue Dec 04 05:15:50 2018 -0500
@@ -0,0 +1,333 @@
+>chrM
+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT
+TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG
+GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT
+CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA
+AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT
+GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA
+AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC
+CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT
+TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA
+TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC
+GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC
+CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA
+AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA
+ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC
+AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG
+GGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCC
+ACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGT
+TTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC
+GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTT
+TAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAA
+CTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAAC
+ACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGC
+CCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGA
+GCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA
+GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGC
+TCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGT
+AAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGG
+TGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTT
+ATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGT
+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC
+TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATA
+GAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACG
+AACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTT
+CAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCAC
+CTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGG
+CGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA
+TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTT
+CTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAA
+GACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCC
+GTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT
+ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT
+TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG
+TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGA
+GTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGA
+AAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACT
+GAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAA
+TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCA
+GATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA
+ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAG
+GAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTT
+TACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCC
+CAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAaaggtagca
+taatcacttgttccttaaatagggacctgtatgaatggctccacgagggt
+tcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg
+cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTA
+ATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCA
+TTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCA
+GTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGAT
+CCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATC
+CTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT
+CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGA
+TTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTT
+CTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCC
+TACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTAT
+TATACCCACACCCACCCAAGAACAGGGTTTgttaagatggcagagcccgg
+taatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct
+taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAA
+TCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATA
+CAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACC
+CTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCA
+CATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATC
+GCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT
+CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACT
+CAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGC
+GCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCAT
+CATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCC
+TTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTG
+GCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTT
+CGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAAT
+ACGCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATT
+ATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATGA
+CGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTAC
+TTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGC
+TACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCT
+AGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTC
+CCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGT
+AAATAATAGGAGCTTAAACCCCCTTATTTctaggactatgagaatcgaac
+ccatccctgagaatccaaaattctccgtgccacctatcacaccccatcct
+aAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTG
+GTTATACCCTTCCCGTACTAATTAATCCCCTGGCCCAACCCGTCATCTAC
+TCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATT
+TTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTC
+TAACCAAAAAAATAAACCCTCGTTCCACAGAAGCTGCCATCAAGTATTTC
+CTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATCCTCTTCAA
+CAATATACTCTCCGGACAATGAACCATAACCAATACTACCAATCAATACT
+CATCATTAATAATCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCC
+TTTCACTTCTGAGTCCCAGAGGTTACCCAAGGCACCCCTCTGACATCCGG
+CCTGCTTCTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACC
+AAATCTCTCCCTCACTAAACGTAAGCCTTCTCCTCACTCTCTCAATCTTA
+TCCATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGCTACGCAA
+AATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAATAGCAGTTC
+TACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATC
+CTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGAC
+CCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAA
+TTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTT
+TTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCAT
+CATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACC
+TACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAAC
+GTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCC
+CACACTCATCGCCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATAC
+TAATAATCTTATAGAAATTTAGGTTAAATACAGACCAAGAGCCTTCAAAG
+CCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAAGGACTGCAAA
+ACCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAA
+GCCCTTACTAGACCAATGGGACTTAAACCCACAAACACTTAGTTAACAGC
+TAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAA
+AAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCA
+ATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAGAGGCCTAACCCCTG
+TCTTTAGATTTACAGTCCAATGCTTCACTCAGCCATTTTACCTCACCCCC
+ACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTG
+GAACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCT
+CTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGG
+TAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAA
+TCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTA
+GTTCCCCTAATAATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAA
+CATAAGCTTCTGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTG
+CTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTA
+GCAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTC
+CTTACACCTAGCAGGTGTCTCCTCTATCTTAGGGGCCATCAATTTCATCA
+CAACAATTATCAATATAAAACCCCCTGCCATAACCCAATACCAAACGCCC
+CTCTTCGTCTGATCCGTCCTAATCACAGCAGTCCTACTTCTCCTATCTCT
+CCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGACCGCAACCTCA
+ACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATTCTATACCAA
+CACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACC
+AGGCTTCGGAATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAG
+AACCATTTGGATACATAGGTATGGTCTGAGCTATGATATCAATTGGCTTC
+CTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGAATAGACGT
+AGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCA
+CCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATG
+AAATGATCTGCTGCAGTGCTCTGAGCCCTAGGATTCATCTTTCTTTTCAC
+CGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCG
+TACTACACGACACGTACTACGTTGTAGCTCACTTCCACTATGTCCTATCA
+ATAGGAGCTGTATTTGCCATCATAGGAGGCTTCATTCACTGATTTCCCCT
+ATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATCCATTTCACTA
+TCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGC
+CTATCCGGAATGCCCCGACGTTACTCGGACTACCCCGATGCATACACCAC
+ATGAAACATCCTATCATCTGTAGGCTCATTCATTTCTCTAACAGCAGTAA
+TATTAATAATTTTCATGATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTC
+CTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCC
+CCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGACAaa
+aaaggaaggaatcgaaccccccaaagctggtttcaagccaaccccatggc
+ctccatgactttttcAAAAAGGTATTAGAAAAACCATTTCATAACTTTGT
+CAAAGTTAAATTATAGGCTAAATCCTATATATCTTAATGGCACATGCAGC
+GCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCA
+CCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTC
+CTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAT
+CTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCA
+TCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGAC
+GAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTA
+CTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACA
+TACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTT
+GACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTAC
+ATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAA
+CAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGA
+CCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAG
+TTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAG
+GGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTG
+TAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAAGAGAACCAAC
+ACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCA
+TAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATA
+TTAAACACAAACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAA
+AAATTATAACAAACCCTGAGAACCAAAATGAACGAAAATCTGTTCGCTTC
+ATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTC
+TATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCATCAACAACCGA
+CTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGAT
+AGCCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCT
+TAATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCA
+TTTACACCAACCACCCAACTATCTATAAACCTAGCCATGGCCATCCCCTT
+ATGAGCGGGCGCAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCC
+TAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTA
+GTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGT
+ACGCCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTG
+GAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATC
+ATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTT
+AATCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACA
+ACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCC
+CATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTA
+GCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACT
+AACCAACACACTAACCATATACCAATGGTGGCGCGATGTAACACGAGAAA
+GCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATAC
+GGGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTT
+CTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCCCAACTAGGAGGGC
+ACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTC
+CTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCA
+CCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTA
+TTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCAGAGTAC
+TTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAACATTTTT
+TGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCC
+TCACTATCTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACAT
+CACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGTAGATGTGGT
+TTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTA
+GTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAA
+AAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGC
+CTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACA
+TAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCC
+CGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATT
+ATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAA
+CAACTAACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATC
+CTAGCCCTAAGTCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAGC
+CGAATTGGTATATAGTTTAAACAAAACGAATGATTTCGACTCATTAAATT
+ATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTA
+GCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCAT
+ATCCTCCCTACTATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAG
+CTACTCTCATAACCCTCAACACCCACTCCCTCTTAGCCAATATTGTGCCT
+ATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTGGGCCTAGCCCT
+ACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTACATAACCTAA
+ACCTACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCAC
+TGACATGACTTTCCAAAAAGCACATAATTTGAATCAACACAACCACCCAC
+AGCCTAATTATTAGCATCATCCCCCTACTATTTTTTAACCAAATCAACAA
+CAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAACAACCC
+CCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATGGCAAGC
+CAACGCCACTTATCCAGCGAACCACTATCACGAAAAAAACTCTACCTCTC
+TATACTAATCTCCCTACAAATCTCCTTAATTATAACATTCACAGCCACAG
+AACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTG
+GCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCAC
+ATACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCAC
+TAATTTACACTCACAACACCCTAGGCTCACTAAACATTCTACTACTCACT
+CTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACTTAATATGACT
+AGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACT
+TATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTA
+CTTGCCGCAGTACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCAC
+ACTCATTCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTAC
+TATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACA
+GACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACATAGCCCTCGT
+AGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCA
+TTCTCATAATCGCCCACGGACTCACATCCTCATTACTATTCTGCCTAGCA
+AACTCAAACTACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGG
+ACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTCTAGCAAGCC
+TCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCT
+GTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGG
+ACTCAACATACTAGTCACAGCCCTATACTCCCTCTACATATTTACCACAA
+CACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTC
+ACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCT
+ATCCCTCAACCCCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTT
+TAACCAAAACATCAGATTGTGAATCTGACAACAGAGGCTTACGACCCCTT
+ATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCCCCATGTCTAA
+CAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTA
+GGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACA
+CTACTATAACCACCCTAACCCTGACTTCCCTAATTCCCCCCATCCTTACC
+ACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATC
+CATTGTCGCATCCACCTTTATTATCAGTCTCTTCCCCACAACAATATTCA
+TGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACC
+CAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAAT
+ATTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCAC
+TGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTA
+CTCATTTTCCTAATTACCATACTAATCTTAGTTACCGCTAACAACCTATT
+CCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCA
+TCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCA
+GTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG
+ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAA
+ACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCA
+GGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGA
+AGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAG
+CAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCA
+CTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC
+AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCT
+CCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAA
+CCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCAT
+ACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAG
+ATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACC
+TCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG
+TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAA
+ACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCC
+TATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCC
+CACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTA
+AACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAAC
+ATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT
+CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCA
+ACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTAT
+TTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCC
+CTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACC
+TAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATC
+TCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT
+CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAAC
+CTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGT
+TCAACCAGTAACCACTACTAATCAACGCCCATAATCATACAAAGCCCCCG
+CACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATT
+ATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATA
+CTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA
+CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCA
+ATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATA
+AATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAA
+TAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATA
+GGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACT
+CAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA
+CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATG
+ACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATT
+CATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCT
+CACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTA
+GCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCA
+CATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA
+ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGC
+CTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTAT
+CCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGT
+GAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCC
+GCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTA
+CTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT
+TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCAC
+GAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAAT
+CACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCT
+TCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGC
+GACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACAT
+CAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC
+CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTA
+GCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCG
+CCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTC
+TAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGA
+CAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC
+TATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA
+TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAG
+GACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAA
+GATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGG
+TACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTAC
+ATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCA
+CCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT
+TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAAC
+TCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTT
+AACAGTACATAGTACATAAAGTCATTTACCGTACATAGCACATTACAGTC
+AAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTG
+ACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT
+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC
+ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCC
+CTTAAATAAGACATCACGATG
Binary file test-data/tumor_chrM.bam has changed
--- a/test-data/varscan_somatic_indel_result1.vcf	Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL	TUMOR
--- a/test-data/varscan_somatic_indel_result2.vcf	Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL	TUMOR
--- a/test-data/varscan_somatic_snp_result1.vcf	Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL	TUMOR
-chr1	51436072	.	C	A	.	PASS	DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:26:26:0:0%:23,3,0,0	0/1:.:21:20:1:4.76%:19,1,1,0
-chr1	51436311	.	T	C	.	PASS	DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:9:9:0:0%:1,8,0,0	0/1:.:7:6:1:14.29%:0,6,0,1
-chr1	51436320	.	G	A	.	PASS	DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:9:9:0:0%:1,8,0,0	0/1:.:10:9:1:10%:0,9,0,1
-chr1	51439628	.	T	C	.	str10	DP=237;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.8101E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:123:123:0:0%:77,46,0,0	0/1:.:114:113:1:0.88%:74,39,0,1
-chr1	51439638	.	G	A	.	str10	DP=234;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9145E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:119:119:0:0%:72,47,0,0	0/1:.:115:114:1:0.87%:75,39,0,1
-chr1	51439665	.	C	T	.	PASS	DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:114:114:0:0%:56,58,0,0	0/1:.:112:109:3:2.68%:63,46,2,1
-chr1	51439671	.	G	A	.	str10	DP=222;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.045E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:110:110:0:0%:53,57,0,0	0/1:.:112:111:1:0.89%:59,52,1,0
-chr1	51439684	.	G	T	.	str10	DP=210;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.9524E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:106:106:0:0%:51,55,0,0	0/1:.:104:103:1:0.96%:53,50,1,0
-chr1	51439703	.	C	T	.	str10	DP=202;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.099E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:99:99:0:0%:46,53,0,0	0/1:.:103:102:1:0.97%:48,54,0,1
-chr1	51439705	.	G	T	.	str10	DP=204;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1961E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:98:98:0:0%:42,56,0,0	0/1:.:106:105:1:0.94%:48,57,0,1
-chr1	51439706	.	G	T	.	str10	DP=201;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1741E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:97:97:0:0%:41,56,0,0	0/1:.:104:103:1:0.96%:46,57,1,0
-chr1	51439726	.	C	G	.	str10	DP=187;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1872E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:90:90:0:0%:37,53,0,0	0/1:.:97:96:1:1.03%:44,52,1,0
-chr1	51439751	.	C	G	.	str10	DP=168;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.3293E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:79:78:0:0%:28,50,0,0	0/1:.:89:88:1:1.12%:35,53,0,1
-chr1	51439763	.	G	A	.	PASS	DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:76:76:0:0%:34,42,0,0	0/1:.:83:81:2:2.41%:32,49,1,1
-chr1	51439766	.	G	T	.	str10	DP=154;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1299E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:75:75:0:0%:34,41,0,0	0/1:.:79:78:1:1.27%:30,48,0,1
-chr1	51439788	.	T	C	.	str10	DP=136;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1471E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:66:66:0:0%:21,45,0,0	0/1:.:70:69:1:1.43%:24,45,1,0
-chr1	51439828	.	G	A	.	str10	DP=122;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.7377E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:52:52:0:0%:14,38,0,0	0/1:.:70:69:1:1.43%:27,42,0,1
-chr1	51439832	.	C	G	.	str10	DP=125;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.52E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:56:56:0:0%:14,42,0,0	0/1:.:69:68:1:1.45%:25,43,0,1
-chr1	51439876	.	T	G	.	str10	DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.619E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:46:46:0:0%:10,36,0,0	0/1:.:59:58:1:1.69%:23,35,0,1
-chr1	51439882	.	G	T	.	str10	DP=105;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2381E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:50:50:0:0%:13,37,0,0	0/1:.:55:54:1:1.82%:22,32,0,1
-chr1	51439889	.	G	T	.	str10	DP=97;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.1546E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:47:47:0:0%:14,33,0,0	0/1:.:50:49:1:2%:21,28,0,1
-chr1	51439953	.	G	T	.	str10	DP=59;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.9322E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:24:24:0:0%:5,19,0,0	0/1:.:35:34:1:2.86%:7,27,0,1
-chr1	51440035	.	G	T	.	PASS	DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:8:8:0:0%:1,7,0,0	0/1:.:13:12:1:7.69%:1,11,0,1
--- a/test-data/varscan_somatic_snp_result2.vcf	Sun Jul 15 09:19:25 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-##fileformat=VCFv4.1
-##source=VarScan2
-##INFO=<ID=DP,Number=1,Type=Integer,Description="Total depth of quality bases">
-##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description="Indicates if record is a somatic mutation">
-##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status of variant (0=Reference,1=Germline,2=Somatic,3=LOH, or 5=Unknown)">
-##INFO=<ID=SSC,Number=1,Type=String,Description="Somatic score in Phred scale (0-255) derived from somatic p-value">
-##INFO=<ID=GPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor+normal versus no variant for Germline calls">
-##INFO=<ID=SPV,Number=1,Type=Float,Description="Fisher's Exact Test P-value of tumor versus normal for Somatic/LOH calls">
-##FILTER=<ID=str10,Description="Less than 10% or more than 90% of variant supporting reads on one strand">
-##FILTER=<ID=indelError,Description="Likely artifact due to indel reads at this position">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Depth of reference-supporting bases (reads1)">
-##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Depth of variant-supporting bases (reads2)">
-##FORMAT=<ID=FREQ,Number=1,Type=String,Description="Variant allele frequency">
-##FORMAT=<ID=DP4,Number=1,Type=String,Description="Strand read counts: ref/fwd, ref/rev, var/fwd, var/rev">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NORMAL	TUMOR
-chr1	51436072	.	C	A	.	PASS	DP=47;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.4681E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:26:26:0:0%:23,3,0,0	0/1:.:21:20:1:4.76%:19,1,1,0
-chr1	51436311	.	T	C	.	PASS	DP=16;SOMATIC;SS=2;SSC=3;GPV=1E0;SPV=4.375E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:9:9:0:0%:1,8,0,0	0/1:.:7:6:1:14.29%:0,6,0,1
-chr1	51436320	.	G	A	.	PASS	DP=19;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=5.2632E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:9:9:0:0%:1,8,0,0	0/1:.:10:9:1:10%:0,9,0,1
-chr1	51439665	.	C	T	.	PASS	DP=226;SOMATIC;SS=2;SSC=9;GPV=1E0;SPV=1.2006E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:114:114:0:0%:56,58,0,0	0/1:.:112:109:3:2.68%:63,46,2,1
-chr1	51439763	.	G	A	.	PASS	DP=159;SOMATIC;SS=2;SSC=5;GPV=1E0;SPV=2.7092E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:76:76:0:0%:34,42,0,0	0/1:.:83:81:2:2.41%:32,49,1,1
-chr1	51440025	.	A	C	.	PASS	DP=27;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.6667E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:9:9:0:0%:1,8,0,0	0/1:.:18:17:1:5.56%:1,16,0,1
-chr1	51440035	.	G	T	.	PASS	DP=21;SOMATIC;SS=2;SSC=2;GPV=1E0;SPV=6.1905E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:8:8:0:0%:1,7,0,0	0/1:.:13:12:1:7.69%:1,11,0,1
-chr1	51440056	.	T	G	.	PASS	DP=22;SOMATIC;SS=2;SSC=1;GPV=1E0;SPV=6.3636E-1	GT:GQ:DP:RD:AD:FREQ:DP4	0/0:.:8:8:0:0%:1,7,0,0	0/1:.:14:13:1:7.14%:1,12,0,1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varscan.py	Tue Dec 04 05:15:50 2018 -0500
@@ -0,0 +1,1178 @@
+#!/usr/bin/env python3
+from __future__ import print_function
+
+import argparse
+import io
+import os
+import subprocess
+import sys
+import tempfile
+import time
+from contextlib import ExitStack
+from functools import partial
+from threading import Thread
+
+import pysam
+
+
+class VariantCallingError (RuntimeError):
+    """Exception class for issues with samtools and varscan subprocesses."""
+
+    def __init__(self, message=None, call='', error=''):
+        self.message = message
+        self.call = call.strip()
+        self.error = error.strip()
+
+    def __str__(self):
+        if self.message is None:
+            return ''
+        if self.error:
+            msg_header = '"{0}" failed with:\n{1}\n\n'.format(
+                self.call, self.error
+            )
+        else:
+            msg_header = '{0} failed.\n'
+            'No further information about this error is available.\n\n'.format(
+                self.call
+            )
+        return msg_header + self.message
+
+
+class VarScanCaller (object):
+    def __init__(self, ref_genome, bam_input_files,
+                 max_depth=None,
+                 min_mapqual=None, min_basequal=None,
+                 threads=1, verbose=False, quiet=True
+                 ):
+        self.ref_genome = ref_genome
+        self.bam_input_files = bam_input_files
+        self.max_depth = max_depth
+        self.min_mapqual = min_mapqual
+        self.min_basequal = min_basequal
+        self.threads = threads
+        self.verbose = verbose
+        self.quiet = quiet
+
+        with pysam.FastaFile(ref_genome) as ref_fa:
+            self.ref_contigs = ref_fa.references
+            self.ref_lengths = ref_fa.lengths
+
+        self.pileup_engine = ['samtools', 'mpileup']
+        self.varcall_engine = ['varscan', 'somatic']
+        self.requires_stdout_redirect = False
+        self.TemporaryContigVCF = partial(
+            tempfile.NamedTemporaryFile,
+            mode='wb', suffix='', delete=False, dir=os.getcwd()
+        )
+        self.tmpfiles = []
+
+    def _get_pysam_pileup_args(self):
+        param_dict = {}
+        if self.max_depth is not None:
+            param_dict['max_depth'] = self.max_depth
+        if self.min_mapqual is not None:
+            param_dict['min_mapping_quality'] = self.min_mapqual
+        if self.min_basequal is not None:
+            param_dict['min_base_quality'] = self.min_basequal
+        param_dict['compute_baq'] = False
+        param_dict['stepper'] = 'samtools'
+        return param_dict
+
+    def varcall_parallel(self, normal_purity=None, tumor_purity=None,
+                         min_coverage=None,
+                         min_var_count=None,
+                         min_var_freq=None, min_hom_freq=None,
+                         p_value=None, somatic_p_value=None,
+                         threads=None, verbose=None, quiet=None
+                         ):
+        if not threads:
+            threads = self.threads
+        if verbose is None:
+            verbose = self.verbose
+        if quiet is None:
+            quiet = self.quiet
+        # mapping of method parameters to varcall engine command line options
+        varcall_engine_option_mapping = [
+            ('--normal-purity', normal_purity),
+            ('--tumor-purity', tumor_purity),
+            ('--min-coverage', min_coverage),
+            ('--min-reads2', min_var_count),
+            ('--min-var-freq', min_var_freq),
+            ('--min-freq-for-hom', min_hom_freq),
+            ('--p-value', p_value),
+            ('--somatic-p-value', somatic_p_value),
+            ('--min-avg-qual', self.min_basequal)
+        ]
+        varcall_engine_options = []
+        for option, value in varcall_engine_option_mapping:
+            if value is not None:
+                varcall_engine_options += [option, str(value)]
+        pileup_engine_options = ['-B']
+        if self.max_depth is not None:
+            pileup_engine_options += ['-d', str(self.max_depth)]
+        if self.min_mapqual is not None:
+            pileup_engine_options += ['-q', str(self.min_mapqual)]
+        if self.min_basequal is not None:
+            pileup_engine_options += ['-Q', str(self.min_basequal)]
+
+        # Create a tuple of calls to samtools mpileup and varscan for
+        # each contig. The contig name is stored as the third element of
+        # that tuple.
+        # The calls are stored in the reverse order of the contig list so
+        # that they can be popped off later in the original order
+        calls = [(
+            self.pileup_engine + pileup_engine_options + [
+                '-r', contig + ':',
+                '-f', self.ref_genome
+            ] + self.bam_input_files,
+            self.varcall_engine + [
+                '-', '{out}', '--output-vcf', '1', '--mpileup', '1'
+            ] + varcall_engine_options,
+            contig
+        ) for contig in self.ref_contigs[::-1]]
+
+        if verbose:
+            print('Starting variant calling ..')
+
+        # launch subprocesses and monitor their status
+        subprocesses = []
+        error_table = {}
+        tmp_io_started = []
+        tmp_io_finished = []
+        self.tmpfiles = []
+
+        def enqueue_stderr_output(out, stderr_buffer):
+            for line in iter(out.readline, b''):
+                # Eventually we are going to print the contents of
+                # the stderr_buffer to sys.stderr so we can
+                # decode things here using its encoding.
+                # We do a 'backslashreplace' just to be on the safe side.
+                stderr_buffer.write(line.decode(sys.stderr.encoding,
+                                                'backslashreplace'))
+            out.close()
+
+        try:
+            while subprocesses or calls:
+                while calls and len(subprocesses) < threads:
+                    # There are still calls waiting for execution and we
+                    # have unoccupied threads so we launch a new combined
+                    # call to samtools mpileup and the variant caller.
+
+                    # pop the call arguments from our call stack
+                    call = calls.pop()
+                    # get the name of the contig that this call is going
+                    # to work on
+                    contig = call[2]
+                    # Based on the contig name, generate a readable and
+                    # file system-compatible prefix and use it to create
+                    # a named temporary file, to which the call output
+                    # will be redirected.
+                    # At the moment we create the output file we add it to
+                    # the list of all temporary output files so that we can
+                    # remove it eventually during cleanup.
+                    call_out = self.TemporaryContigVCF(
+                        prefix=''.join(
+                            c if c.isalnum() else '_' for c in contig
+                        ) + '_',
+                    )
+                    # maintain a list of variant call outputs
+                    # in the order the subprocesses got launched
+                    tmp_io_started.append(call_out.name)
+                    if self.requires_stdout_redirect:
+                        # redirect stdout to the temporary file just created
+                        stdout_p2 = call_out
+                        stderr_p2 = subprocess.PIPE
+                    else:
+                        # variant caller wants to write output to file directly
+                        stdout_p2 = subprocess.PIPE
+                        stderr_p2 = subprocess.STDOUT
+                        call[1][call[1].index('{out}')] = call_out.name
+                        call_out.close()
+                    # for reporting purposes, join the arguments for the
+                    # samtools and the variant caller calls into readable
+                    # strings
+                    c_str = (' '.join(call[0]), ' '.join(call[1]))
+                    error_table[c_str] = [io.StringIO(), io.StringIO()]
+                    # start the subprocesses
+                    p1 = subprocess.Popen(
+                        call[0],
+                        stdout=subprocess.PIPE, stderr=subprocess.PIPE
+                    )
+                    p2 = subprocess.Popen(
+                        call[1],
+                        stdin=p1.stdout,
+                        stdout=stdout_p2,
+                        stderr=stderr_p2
+                    )
+                    # subprocess bookkeeping
+                    subprocesses.append((c_str, p1, p2, call_out, contig))
+                    # make sure our newly launched call does not block
+                    # because its buffers fill up
+                    p1.stdout.close()
+                    t1 = Thread(
+                        target=enqueue_stderr_output,
+                        args=(p1.stderr, error_table[c_str][0])
+                    )
+                    t2 = Thread(
+                        target=enqueue_stderr_output,
+                        args=(
+                            p2.stderr
+                            if self.requires_stdout_redirect else
+                            p2.stdout,
+                            error_table[c_str][1]
+                        )
+                    )
+                    t1.daemon = t2.daemon = True
+                    t1.start()
+                    t2.start()
+
+                    if verbose:
+                        print(
+                            'Calling variants for contig: {0}'
+                            .format(call[2])
+                        )
+
+                # monitor all running calls to see if any of them are done
+                for call, p1, p2, ofo, contig in subprocesses:
+                    p1_stat = p1.poll()
+                    p2_stat = p2.poll()
+                    if p1_stat is not None or p2_stat is not None:
+                        # There is an outcome for this process!
+                        # Lets see what it is
+                        if p1_stat or p2_stat:
+                            print()
+                            print(
+                                error_table[call][0].getvalue(),
+                                error_table[call][1].getvalue(),
+                                file=sys.stderr
+                            )
+                            raise VariantCallingError(
+                                'Variant Calling for contig {0} failed.'
+                                .format(contig),
+                                call='{0} | {1}'.format(call[0], call[1])
+                            )
+                        if p1_stat == 0 and p2_stat is None:
+                            # VarScan is not handling the no output from
+                            # samtools mpileup situation correctly so maybe
+                            # that's the issue here
+                            last_words = error_table[call][1].getvalue(
+                            ).splitlines()[-4:]
+                            if len(last_words) < 4 or any(
+                                not msg.startswith('Input stream not ready')
+                                for msg in last_words
+                            ):
+                                break
+                                # lets give this process a bit more time
+                            # VarScan is waiting for input it will never
+                            # get, stop it.
+                            p2.terminate()
+                            subprocesses.remove((call, p1, p2, ofo, contig))
+                            ofo.close()
+                            break
+                        if p2_stat == 0:
+                            # Things went fine.
+                            # maintain a list of variant call outputs
+                            # that finished successfully (in the order
+                            # they finished)
+                            tmp_io_finished.append(ofo.name)
+                            if verbose:
+                                print()
+                                print('Contig {0} finished.'.format(contig))
+                            if not quiet:
+                                print()
+                                print(
+                                    'stderr output from samtools mpileup/'
+                                    'bcftools:'.upper(),
+                                    file=sys.stderr
+                                )
+                                print(
+                                    error_table[call][0].getvalue(),
+                                    error_table[call][1].getvalue(),
+                                    file=sys.stderr
+                                )
+                            # Discard the collected stderr output from
+                            # the call, remove the call from the list of
+                            # running calls and close its output file.
+                            del error_table[call]
+                            subprocesses.remove((call, p1, p2, ofo, contig))
+                            # Closing the output file is important or we
+                            # may hit the file system limit for open files
+                            # if there are lots of contigs.
+                            ofo.close()
+                            break
+                # wait a bit in between monitoring cycles
+                time.sleep(2)
+        finally:
+            for call, p1, p2, ofo, contig in subprocesses:
+                # make sure we do not leave running subprocesses behind
+                for proc in (p1, p2):
+                    try:
+                        proc.terminate()
+                    except Exception:
+                        pass
+                # close currently open files
+                ofo.close()
+            # store the files with finished content in the order that
+            # the corresponding jobs were launched
+            self.tmpfiles = [f for f in tmp_io_started if f in tmp_io_finished]
+            # Make sure remaining buffered stderr output of
+            # subprocesses does not get lost.
+            # Currently, we don't screen this output for real errors,
+            # but simply rewrite everything.
+            if not quiet and error_table:
+                print()
+                print(
+                    'stderr output from samtools mpileup/bcftools:'.upper(),
+                    file=sys.stderr
+                )
+                for call, errors in error_table.items():
+                    print(' | '.join(call), ':', file=sys.stderr)
+                    print('-' * 20, file=sys.stderr)
+                    print('samtools mpileup output:', file=sys.stderr)
+                    print(errors[0].getvalue(), file=sys.stderr)
+                    print('varscan somatic output:', file=sys.stderr)
+                    print(errors[1].getvalue(), file=sys.stderr)
+
+    def _add_ref_contigs_to_header(self, header):
+        for chrom, length in zip(self.ref_contigs, self.ref_lengths):
+            header.add_meta(
+                'contig',
+                items=[('ID', chrom), ('length', length)]
+            )
+
+    def _add_filters_to_header(self, header):
+        varscan_fpfilters = {
+            'VarCount': 'Fewer than {min_var_count2} variant-supporting reads',
+            'VarFreq': 'Variant allele frequency below {min_var_freq2}',
+            'VarAvgRL':
+                'Average clipped length of variant-supporting reads < '
+                '{min_var_len}',
+            'VarReadPos': 'Relative average read position < {min_var_readpos}',
+            'VarDist3':
+                'Average distance to effective 3\' end < {min_var_dist3}',
+            'VarMMQS':
+                'Average mismatch quality sum for variant reads > '
+                '{max_var_mmqs}',
+            'VarMapQual':
+                'Average mapping quality of variant reads < {min_var_mapqual}',
+            'VarBaseQual':
+                'Average base quality of variant reads < {min_var_basequal}',
+            'Strand':
+                'Strand representation of variant reads < {min_strandedness}',
+            'RefAvgRL':
+                'Average clipped length of ref-supporting reads < '
+                '{min_ref_len}',
+            'RefReadPos':
+                'Relative average read position < {min_ref_readpos}',
+            'RefDist3':
+                'Average distance to effective 3\' end < {min_ref_dist3}',
+            'RefMapQual':
+                'Average mapping quality of reference reads < '
+                '{min_ref_mapqual}',
+            'RefBaseQual':
+                'Average base quality of ref-supporting reads < '
+                '{min_ref_basequal}',
+            'RefMMQS':
+                'Average mismatch quality sum for ref-supporting reads > '
+                '{max_ref_mmqs}',
+            'MMQSdiff':
+                'Mismatch quality sum difference (var - ref) > '
+                '{max_mmqs_diff}',
+            'MinMMQSdiff':
+                'Mismatch quality sum difference (var - ref) < '
+                '{max_mmqs_diff}',
+            'MapQualDiff':
+                'Mapping quality difference (ref - var) > {max_mapqual_diff}',
+            'MaxBAQdiff':
+                'Average base quality difference (ref - var) > '
+                '{max_basequal_diff}',
+            'ReadLenDiff':
+                'Average supporting read length difference (ref - var) > '
+                '{max_relative_len_diff}',
+        }
+        for filter_id, description in varscan_fpfilters.items():
+            header.filters.add(filter_id, None, None, description)
+
+    def _add_indel_info_flag_to_header(self, header):
+        header.info.add(
+            'INDEL', 0, 'Flag', 'Indicates that the variant is an INDEL'
+        )
+
+    def _compile_common_header(self, varcall_template, no_filters=False):
+        # fix the header generated by VarScan
+        # by adding reference and contig information
+        common_header = pysam.VariantHeader()
+        common_header.add_meta('reference', value=self.ref_genome)
+        self._add_ref_contigs_to_header(common_header)
+        if not no_filters:
+            # add filter info
+            self._add_filters_to_header(common_header)
+        # change the source information
+        common_header.add_meta('source', value='varscan.py')
+        # declare an INDEL flag for record INFO fields
+        self._add_indel_info_flag_to_header(common_header)
+        # take the remaining metadata from the template header produced by
+        # VarScan
+        with pysam.VariantFile(varcall_template, 'r') as original_data:
+            varscan_header = original_data.header
+        for sample in varscan_header.samples:
+            common_header.samples.add(sample)
+        common_header.merge(varscan_header)
+        return common_header
+
+    def pileup_masker(self, mask):
+        def apply_mask_on_pileup(piled_items):
+            for item, status in zip(piled_items, mask):
+                if status:
+                    yield item
+        return apply_mask_on_pileup
+
+    def get_allele_specific_pileup_column_stats(
+        self, allele, pile_column, ref_fetch
+    ):
+        # number of reads supporting the given allele on
+        # forward and reverse strand, and in total
+        var_reads_plus = var_reads_minus = 0
+        var_supp_read_mask = []
+        for base in pile_column.get_query_sequences():
+            if base == allele:
+                # allele supporting read on + strand
+                var_reads_plus += 1
+                var_supp_read_mask.append(True)
+            elif base.upper() == allele:
+                # allele supporting read on - strand
+                var_reads_minus += 1
+                var_supp_read_mask.append(True)
+            else:
+                var_supp_read_mask.append(False)
+        var_reads_total = var_reads_plus + var_reads_minus
+
+        if var_reads_total == 0:
+            # No stats without reads!
+            return None
+
+        var_supp_only = self.pileup_masker(var_supp_read_mask)
+
+        # average mapping quality of the reads supporting the
+        # given allele
+        avg_mapping_quality = sum(
+            mq for mq in var_supp_only(
+                pile_column.get_mapping_qualities()
+            )
+        ) / var_reads_total
+
+        # for the remaining stats we need access to complete
+        # read information
+        piled_reads = [
+            p for p in var_supp_only(pile_column.pileups)
+        ]
+        assert len(piled_reads) == var_reads_total
+        sum_avg_base_qualities = 0
+        sum_dist_from_center = 0
+        sum_dist_from_3prime = 0
+        sum_clipped_length = 0
+        sum_unclipped_length = 0
+        sum_num_mismatches_as_fraction = 0
+        sum_mismatch_qualities = 0
+
+        for p in piled_reads:
+            sum_avg_base_qualities += sum(
+                p.alignment.query_qualities
+            ) / p.alignment.infer_query_length()
+            sum_clipped_length += p.alignment.query_alignment_length
+            unclipped_length = p.alignment.infer_read_length()
+            sum_unclipped_length += unclipped_length
+            read_center = p.alignment.query_alignment_length / 2
+            sum_dist_from_center += 1 - abs(
+                p.query_position - read_center
+            ) / read_center
+            if p.alignment.is_reverse:
+                sum_dist_from_3prime += p.query_position / unclipped_length
+            else:
+                sum_dist_from_3prime += 1 - p.query_position / unclipped_length
+
+            sum_num_mismatches = 0
+            for qpos, rpos in p.alignment.get_aligned_pairs():
+                if qpos is not None and rpos is not None:
+                    if p.alignment.query_sequence[qpos] != ref_fetch(
+                        rpos, rpos + 1
+                    ).upper():  # ref bases can be lowercase!
+                        sum_num_mismatches += 1
+                        sum_mismatch_qualities += p.alignment.query_qualities[
+                            qpos
+                        ]
+            sum_num_mismatches_as_fraction += (
+                sum_num_mismatches / p.alignment.query_alignment_length
+            )
+        avg_basequality = sum_avg_base_qualities / var_reads_total
+        avg_pos_as_fraction = sum_dist_from_center / var_reads_total
+        avg_num_mismatches_as_fraction = (
+            sum_num_mismatches_as_fraction / var_reads_total
+        )
+        avg_sum_mismatch_qualities = sum_mismatch_qualities / var_reads_total
+        avg_clipped_length = sum_clipped_length / var_reads_total
+        avg_distance_to_effective_3p_end = (
+            sum_dist_from_3prime / var_reads_total
+        )
+
+        return (
+            avg_mapping_quality,
+            avg_basequality,
+            var_reads_plus,
+            var_reads_minus,
+            avg_pos_as_fraction,
+            avg_num_mismatches_as_fraction,
+            avg_sum_mismatch_qualities,
+            avg_clipped_length,
+            avg_distance_to_effective_3p_end
+        )
+
+    def _postprocess_variant_records(self, invcf,
+                                     min_var_count2, min_var_count2_lc,
+                                     min_var_freq2, max_somatic_p,
+                                     max_somatic_p_depth,
+                                     min_ref_readpos, min_var_readpos,
+                                     min_ref_dist3, min_var_dist3,
+                                     min_ref_len, min_var_len,
+                                     max_relative_len_diff,
+                                     min_strandedness, min_strand_reads,
+                                     min_ref_basequal, min_var_basequal,
+                                     max_basequal_diff,
+                                     min_ref_mapqual, min_var_mapqual,
+                                     max_mapqual_diff,
+                                     max_ref_mmqs, max_var_mmqs,
+                                     min_mmqs_diff, max_mmqs_diff,
+                                     **args):
+        # set FILTER field according to Varscan criteria
+        # multiple FILTER entries must be separated by semicolons
+        # No filters applied should be indicated with MISSING
+
+        # since posterior filters are always applied to just one sample,
+        # a better place to store the info is in the FT genotype field:
+        # can be PASS, '.' to indicate that filters have not been applied,
+        # or a semicolon-separated list of filters that failed
+        # unfortunately, gemini does not support this field
+
+        with ExitStack() as io_stack:
+            normal_reads, tumor_reads = (
+                io_stack.enter_context(
+                    pysam.Samfile(fn, 'rb')) for fn in self.bam_input_files
+            )
+            refseq = io_stack.enter_context(pysam.FastaFile(self.ref_genome))
+            pileup_args = self._get_pysam_pileup_args()
+            for record in invcf:
+                if any(len(allele) > 1 for allele in record.alleles):
+                    # skip indel postprocessing for the moment
+                    yield record
+                    continue
+                # get pileup for genomic region affected by this variant
+                if record.info['SS'] == '2':
+                    # a somatic variant => generate pileup from tumor data
+                    pile = tumor_reads.pileup(
+                        record.chrom, record.start, record.stop,
+                        **pileup_args
+                    )
+                    sample_of_interest = 'TUMOR'
+                elif record.info['SS'] in ['1', '3']:
+                    # a germline or LOH variant => pileup from normal data
+                    pile = normal_reads.pileup(
+                        record.chrom, record.start, record.stop,
+                        **pileup_args
+                    )
+                    sample_of_interest = 'NORMAL'
+                else:
+                    # TO DO: figure out if there is anything interesting to do
+                    # for SS status codes 0 (reference) and 5 (unknown)
+                    yield record
+                    continue
+
+                # apply false-positive filtering a la varscan fpfilter
+                # find the variant site in the pileup columns
+                for pile_column in pile:
+                    if pile_column.reference_pos == record.start:
+                        break
+                # extract required information
+                # overall read depth at the site
+                read_depth = pile_column.get_num_aligned()
+                assert read_depth > 0
+                # no multiallelic sites in varscan
+                assert len(record.alleles) == 2
+                if record.samples[sample_of_interest]['RD'] > 0:
+                    ref_stats, alt_stats = [
+                        self.get_allele_specific_pileup_column_stats(
+                            allele,
+                            pile_column,
+                            partial(
+                                pysam.FastaFile.fetch, refseq, record.chrom
+                            )
+                        )
+                        for allele in record.alleles
+                    ]
+                else:
+                    ref_stats = None
+                    alt_stats = self.get_allele_specific_pileup_column_stats(
+                        record.alleles[1],
+                        pile_column,
+                        partial(pysam.FastaFile.fetch, refseq, record.chrom)
+                    )
+                ref_count = 0
+                if ref_stats:
+                    ref_count = ref_stats[2] + ref_stats[3]
+                    if ref_stats[1] < min_ref_basequal:
+                        record.filter.add('RefBaseQual')
+                    if ref_count >= 2:
+                        if ref_stats[0] < min_ref_mapqual:
+                            record.filter.add('RefMapQual')
+                        if ref_stats[4] < min_ref_readpos:
+                            record.filter.add('RefReadPos')
+                        # ref_stats[5] (avg_num_mismatches_as_fraction
+                        # is not a filter criterion in VarScan fpfilter
+                        if ref_stats[6] > max_ref_mmqs:
+                            record.filter.add('RefMMQS')
+                        if ref_stats[7] < min_ref_len:
+                            # VarScan fpfilter does not apply this filter
+                            # for indels, but there is no reason
+                            # not to do it.
+                            record.filter.add('RefAvgRL')
+                        if ref_stats[8] < min_ref_dist3:
+                            record.filter.add('RefDist3')
+                if alt_stats:
+                    alt_count = alt_stats[2] + alt_stats[3]
+                    if (
+                        alt_count < min_var_count2_lc
+                    ) or (
+                        read_depth >= max_somatic_p_depth and
+                        alt_count < min_var_count2
+                    ):
+                        record.filter.add('VarCount')
+                    if alt_count / read_depth < min_var_freq2:
+                        record.filter.add('VarFreq')
+                    if alt_stats[1] < min_var_basequal:
+                        record.filter.add('VarBaseQual')
+                    if alt_count > min_strand_reads:
+                        if (
+                            alt_stats[2] / alt_count < min_strandedness
+                        ) or (
+                            alt_stats[3] / alt_count < min_strandedness
+                        ):
+                            record.filter.add('Strand')
+                    if alt_stats[2] + alt_stats[3] >= 2:
+                        if alt_stats[0] < min_var_mapqual:
+                            record.filter.add('VarMapQual')
+                        if alt_stats[4] < min_var_readpos:
+                            record.filter.add('VarReadPos')
+                        # alt_stats[5] (avg_num_mismatches_as_fraction
+                        # is not a filter criterion in VarScan fpfilter
+                        if alt_stats[6] > max_var_mmqs:
+                            record.filter.add('VarMMQS')
+                        if alt_stats[7] < min_var_len:
+                            # VarScan fpfilter does not apply this filter
+                            # for indels, but there is no reason
+                            # not to do it.
+                            record.filter.add('VarAvgRL')
+                        if alt_stats[8] < min_var_dist3:
+                            record.filter.add('VarDist3')
+                    if ref_count >= 2 and alt_count >= 2:
+                        if (ref_stats[0] - alt_stats[0]) > max_mapqual_diff:
+                            record.filter.add('MapQualDiff')
+                        if (ref_stats[1] - alt_stats[1]) > max_basequal_diff:
+                            record.filter.add('MaxBAQdiff')
+                        mmqs_diff = alt_stats[6] - ref_stats[6]
+                        if mmqs_diff < min_mmqs_diff:
+                            record.filter.add('MinMMQSdiff')
+                        if mmqs_diff > max_mmqs_diff:
+                            record.filter.add('MMQSdiff')
+                        if (
+                            1 - alt_stats[7] / ref_stats[7]
+                        ) > max_relative_len_diff:
+                            record.filter.add('ReadLenDiff')
+                else:
+                    # No variant-supporting reads for this record!
+                    # This can happen in rare cases because of
+                    # samtools mpileup issues, but indicates a
+                    # rather unreliable variant call.
+                    record.filter.add('VarCount')
+                    record.filter.add('VarFreq')
+                yield record
+
+    def _indel_flagged_records(self, vcf):
+        for record in vcf:
+            record.info['INDEL'] = True
+            yield record
+
+    def _merge_generator(self, vcf1, vcf2):
+        try:
+            record1 = next(vcf1)
+        except StopIteration:
+            for record2 in vcf2:
+                yield record2
+            return
+        try:
+            record2 = next(vcf2)
+        except StopIteration:
+            yield record1
+            for record1 in vcf1:
+                yield record1
+            return
+        while True:
+            if (record1.start, record1.stop) < (record2.start, record2.stop):
+                yield record1
+                try:
+                    record1 = next(vcf1)
+                except StopIteration:
+                    yield record2
+                    for record2 in vcf2:
+                        yield record2
+                    return
+            else:
+                yield record2
+                try:
+                    record2 = next(vcf2)
+                except StopIteration:
+                    yield record1
+                    for record1 in vcf1:
+                        yield record1
+                    return
+
+    def merge_and_postprocess(self, snps_out, indels_out=None,
+                              no_filters=False, **filter_args):
+        temporary_data = self.tmpfiles
+        self.tmpfiles = []
+        temporary_snp_files = [f + '.snp.vcf' for f in temporary_data]
+        temporary_indel_files = [f + '.indel.vcf' for f in temporary_data]
+
+        for f in temporary_data:
+            try:
+                os.remove(f)
+            except Exception:
+                pass
+
+        def noop_gen(data, **kwargs):
+            for d in data:
+                yield d
+
+        if no_filters:
+            apply_filters = noop_gen
+        else:
+            apply_filters = self._postprocess_variant_records
+
+        output_header = self._compile_common_header(
+            temporary_snp_files[0],
+            no_filters
+        )
+        if indels_out is None:
+            with open(snps_out, 'w') as o:
+                o.write(str(output_header).format(**filter_args))
+                for snp_f, indel_f in zip(
+                    temporary_snp_files, temporary_indel_files
+                ):
+                    with pysam.VariantFile(snp_f, 'r') as snp_invcf:
+                        # fix the input header on the fly
+                        # to avoid Warnings from htslib about missing contig
+                        # info
+                        self._add_ref_contigs_to_header(snp_invcf.header)
+                        self._add_filters_to_header(snp_invcf.header)
+                        self._add_indel_info_flag_to_header(snp_invcf.header)
+                        with pysam.VariantFile(indel_f, 'r') as indel_invcf:
+                            # fix the input header on the fly
+                            # to avoid Warnings from htslib about missing
+                            # contig info
+                            self._add_ref_contigs_to_header(indel_invcf.header)
+                            self._add_filters_to_header(indel_invcf.header)
+                            self._add_indel_info_flag_to_header(
+                                indel_invcf.header
+                            )
+                            for record in apply_filters(
+                                self._merge_generator(
+                                    snp_invcf,
+                                    self._indel_flagged_records(indel_invcf)
+                                ),
+                                **filter_args
+                            ):
+                                o.write(str(record))
+                    try:
+                        os.remove(snp_f)
+                    except Exception:
+                        pass
+                    try:
+                        os.remove(indel_f)
+                    except Exception:
+                        pass
+
+        else:
+            with open(snps_out, 'w') as o:
+                o.write(str(output_header).format(**filter_args))
+                for f in temporary_snp_files:
+                    with pysam.VariantFile(f, 'r') as invcf:
+                        # fix the input header on the fly
+                        # to avoid Warnings from htslib about missing
+                        # contig info and errors because of undeclared
+                        # filters
+                        self._add_ref_contigs_to_header(invcf.header)
+                        self._add_filters_to_header(invcf.header)
+                        for record in apply_filters(
+                            invcf, **filter_args
+                        ):
+                            o.write(str(record))
+                    try:
+                        os.remove(f)
+                    except Exception:
+                        pass
+            with open(indels_out, 'w') as o:
+                o.write(str(output_header))
+                for f in temporary_indel_files:
+                    with pysam.VariantFile(f, 'r') as invcf:
+                        # fix the input header on the fly
+                        # to avoid Warnings from htslib about missing
+                        # contig info and errors because of undeclared
+                        # filters
+                        self._add_ref_contigs_to_header(invcf.header)
+                        self._add_filters_to_header(invcf.header)
+                        self._add_indel_info_flag_to_header(invcf.header)
+                        for record in apply_filters(
+                            self._indel_flagged_records(invcf), **filter_args
+                        ):
+                            o.write(str(record))
+                    try:
+                        os.remove(f)
+                    except Exception:
+                        pass
+
+
+def varscan_call(ref_genome, normal, tumor, output_path, **args):
+    """Preparse arguments and orchestrate calling and postprocessing."""
+
+    if args.pop('split_output'):
+        if '%T' in output_path:
+            out = (
+                output_path.replace('%T', 'snp'),
+                output_path.replace('%T', 'indel')
+            )
+        else:
+            out = (
+                output_path + '.snp',
+                output_path + '.indel'
+            )
+    else:
+        out = (output_path, None)
+
+    instance_args = {
+        k: args.pop(k) for k in [
+            'max_depth',
+            'min_mapqual',
+            'min_basequal',
+            'threads',
+            'verbose',
+            'quiet'
+        ]
+    }
+    varscan_somatic_args = {
+        k: args.pop(k) for k in [
+            'normal_purity',
+            'tumor_purity',
+            'min_coverage',
+            'min_var_count',
+            'min_var_freq',
+            'min_hom_freq',
+            'somatic_p_value',
+            'p_value'
+        ]
+    }
+
+    v = VarScanCaller(ref_genome, [normal, tumor], **instance_args)
+    v.varcall_parallel(**varscan_somatic_args)
+    v.merge_and_postprocess(*out, **args)
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser()
+    p.add_argument(
+        'ref_genome',
+        metavar='reference_genome',
+        help='the reference genome (in fasta format)'
+    )
+    p.add_argument(
+        '--normal',
+        metavar='BAM_file', required=True,
+        help='the BAM input file of aligned reads from the normal sample'
+    )
+    p.add_argument(
+        '--tumor',
+        metavar='BAM_file', required=True,
+        help='the BAM input file of aligned reads from the tumor sample'
+    )
+    p.add_argument(
+        '-o', '--ofile', required=True,
+        metavar='OFILE', dest='output_path',
+        help='Name of the variant output file. With --split-output, the name '
+             'may use the %%T replacement token or will be used as the '
+             'basename for the two output files to be generated (see '
+             '-s|--split-output below).'
+    )
+    p.add_argument(
+        '-s', '--split-output',
+        dest='split_output', action='store_true', default=False,
+        help='indicate that separate output files for SNPs and indels '
+             'should be generated (original VarScan behavior). If specified, '
+             '%%T in the --ofile file name will be replaced with "snp" and '
+             '"indel" to generate the names of the SNP and indel output '
+             'files, respectively. If %%T is not found in the file name, it '
+             'will get interpreted as a basename to which ".snp"/".indel" '
+             'will be appended.'
+    )
+    p.add_argument(
+        '-t', '--threads',
+        type=int, default=1,
+        help='level of parallelism'
+    )
+    p.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='be verbose about progress'
+    )
+    p.add_argument(
+        '-q', '--quiet',
+        action='store_true',
+        help='suppress output from wrapped tools'
+    )
+    call_group = p.add_argument_group('Variant calling parameters')
+    call_group.add_argument(
+        '--normal-purity',
+        dest='normal_purity', type=float,
+        default=1.0,
+        help='Estimated purity of the normal sample (default: 1.0)'
+    )
+    call_group.add_argument(
+        '--tumor-purity',
+        dest='tumor_purity', type=float,
+        default=1.0,
+        help='Estimated purity of the tumor sample (default: 1.0)'
+    )
+    call_group.add_argument(
+        '--max-pileup-depth',
+        dest='max_depth', type=int, default=8000,
+        help='Maximum depth of generated pileups (samtools mpileup -d option; '
+             'default: 8000)'
+    )
+    call_group.add_argument(
+        '--min-basequal',
+        dest='min_basequal', type=int,
+        default=13,
+        help='Minimum base quality at the variant position to use a read '
+             '(default: 13)'
+    )
+    call_group.add_argument(
+        '--min-mapqual',
+        dest='min_mapqual', type=int,
+        default=0,
+        help='Minimum mapping quality required to use a read '
+             '(default: 0)'
+    )
+    call_group.add_argument(
+        '--min-coverage',
+        dest='min_coverage', type=int,
+        default=8,
+        help='Minimum site coverage required in the normal and in the tumor '
+             'sample to call a variant (default: 8)'
+    )
+    call_group.add_argument(
+        '--min-var-count',
+        dest='min_var_count', type=int,
+        default=2,
+        help='Minimum number of variant-supporting reads required to call a '
+             'variant (default: 2)'
+    )
+    call_group.add_argument(
+        '--min-var-freq',
+        dest='min_var_freq', type=float,
+        default=0.1,
+        help='Minimum variant allele frequency for calling (default: 0.1)'
+    )
+    call_group.add_argument(
+        '--min-hom-freq',
+        dest='min_hom_freq', type=float,
+        default=0.75,
+        help='Minimum variant allele frequency for homozygous call '
+             '(default: 0.75)'
+    )
+    call_group.add_argument(
+        '--p-value',
+        dest='p_value', type=float,
+        default=0.99,
+        help='P-value threshold for heterozygous call (default: 0.99)'
+    )
+    call_group.add_argument(
+        '--somatic-p-value',
+        dest='somatic_p_value', type=float,
+        default=0.05,
+        help='P-value threshold for somatic call (default: 0.05)'
+    )
+    filter_group = p.add_argument_group('Posterior variant filter parameters')
+    filter_group.add_argument(
+        '--no-filters',
+        dest='no_filters', action='store_true',
+        help='Disable all posterior variant filters. '
+             'If specified, all following options will be ignored'
+    )
+    filter_group.add_argument(
+        '--min-var-count2',
+        dest='min_var_count2', type=int,
+        default=4,
+        help='Minimum number of variant-supporting reads (default: 4)'
+    )
+    filter_group.add_argument(
+        '--min-var-count2-lc',
+        dest='min_var_count2_lc', type=int,
+        default=2,
+        help='Minimum number of variant-supporting reads when depth below '
+             '--somatic-p-depth (default: 2)'
+    )
+    filter_group.add_argument(
+        '--min-var-freq2',
+        dest='min_var_freq2', type=float,
+        default=0.05,
+        help='Minimum variant allele frequency (default: 0.05)'
+    )
+    filter_group.add_argument(
+        '--max-somatic-p',
+        dest='max_somatic_p', type=float,
+        default=0.05,
+        help='Maximum somatic p-value (default: 0.05)'
+    )
+    filter_group.add_argument(
+        '--max-somatic-p-depth',
+        dest='max_somatic_p_depth', type=int,
+        default=10,
+        help='Depth required to run --max-somatic-p filter (default: 10)'
+    )
+    filter_group.add_argument(
+        '--min-ref-readpos',
+        dest='min_ref_readpos', type=float,
+        default=0.1,
+        help='Minimum average relative distance of site from the ends of '
+             'ref-supporting reads (default: 0.1)'
+    )
+    filter_group.add_argument(
+        '--min-var-readpos',
+        dest='min_var_readpos', type=float,
+        default=0.1,
+        help='Minimum average relative distance of site from the ends of '
+             'variant-supporting reads (default: 0.1)'
+    )
+    filter_group.add_argument(
+        '--min-ref-dist3',
+        dest='min_ref_dist3', type=float,
+        default=0.1,
+        help='Minimum average relative distance of site from the effective '
+             '3\'end of ref-supporting reads (default: 0.1)'
+    )
+    filter_group.add_argument(
+        '--min-var-dist3',
+        dest='min_var_dist3', type=float,
+        default=0.1,
+        help='Minimum average relative distance of site from the effective '
+             '3\'end of variant-supporting reads (default: 0.1)'
+    )
+    filter_group.add_argument(
+        '--min-ref-len',
+        dest='min_ref_len', type=int,
+        default=90,
+        help='Minimum average trimmed length of reads supporting the ref '
+             'allele (default: 90)'
+    )
+    filter_group.add_argument(
+        '--min-var-len',
+        dest='min_var_len', type=int,
+        default=90,
+        help='Minimum average trimmed length of reads supporting the variant '
+             'allele (default: 90)'
+    )
+    filter_group.add_argument(
+        '--max-len-diff',
+        dest='max_relative_len_diff', type=float,
+        default=0.25,
+        help='Maximum average relative read length difference (ref - var; '
+             'default: 0.25)'
+    )
+    filter_group.add_argument(
+        '--min-strandedness',
+        dest='min_strandedness', type=float,
+        default=0.01,
+        help='Minimum fraction of variant reads from each strand '
+             '(default: 0.01)'
+    )
+    filter_group.add_argument(
+        '--min-strand-reads',
+        dest='min_strand_reads', type=int,
+        default=5,
+        help='Minimum allele depth required to run --min-strandedness filter '
+             '(default: 5)'
+    )
+    filter_group.add_argument(
+        '--min-ref-basequal',
+        dest='min_ref_basequal', type=int,
+        default=15,
+        help='Minimum average base quality for the ref allele (default: 15)'
+    )
+    filter_group.add_argument(
+        '--min-var-basequal',
+        dest='min_var_basequal', type=int,
+        default=15,
+        help='Minimum average base quality for the variant allele '
+             '(default: 15)'
+    )
+    filter_group.add_argument(
+        '--max-basequal-diff',
+        dest='max_basequal_diff', type=int,
+        default=50,
+        help='Maximum average base quality diff (ref - var; default: 50)'
+    )
+    filter_group.add_argument(
+        '--min-ref-mapqual',
+        dest='min_ref_mapqual', type=int,
+        default=15,
+        help='Minimum average mapping quality of reads supporting the ref '
+             'allele (default: 15)'
+    )
+    filter_group.add_argument(
+        '--min-var-mapqual',
+        dest='min_var_mapqual', type=int,
+        default=15,
+        help='Minimum average mapping quality of reads supporting the variant '
+             'allele (default: 15)'
+    )
+    filter_group.add_argument(
+        '--max-mapqual-diff',
+        dest='max_mapqual_diff', type=int,
+        default=50,
+        help='Maximum average mapping quality difference (ref - var; '
+             'default: 50)'
+    )
+    filter_group.add_argument(
+        '--max-ref-mmqs',
+        dest='max_ref_mmqs', type=int,
+        default=100,
+        help='Maximum mismatch quality sum of reads supporting the ref '
+             'allele (default: 100)'
+    )
+    filter_group.add_argument(
+        '--max-var-mmqs',
+        dest='max_var_mmqs', type=int,
+        default=100,
+        help='Maximum mismatch quality sum of reads supporting the variant '
+             'allele (default: 100)'
+    )
+    filter_group.add_argument(
+        '--min-mmqs-diff',
+        dest='min_mmqs_diff', type=int,
+        default=0,
+        help='Minimum mismatch quality sum difference (var - ref; default: 0)'
+    )
+    filter_group.add_argument(
+        '--max-mmqs-diff',
+        dest='max_mmqs_diff', type=int,
+        default=50,
+        help='Maximum mismatch quality sum difference (var - ref; default: 50)'
+    )
+    args = vars(p.parse_args())
+    varscan_call(**args)
--- a/varscan_somatic.xml	Sun Jul 15 09:19:25 2018 -0400
+++ b/varscan_somatic.xml	Tue Dec 04 05:15:50 2018 -0500
@@ -1,120 +1,534 @@
 <tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.1">
-    <description>Call germline/somatic variants from tumor-normal pileups</description>
+    <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>
     <macros>
         <import>macros.xml</import>
+        <macro name="test_mentions_contig">
+            <assert_contents>
+                <has_line_matching
+                expression="##contig=.ID=chrM,length=16571." />
+            </assert_contents>
+        </macro>
+        <macro name="test_mentions_filters">
+            <assert_contents>
+                <has_line_matching
+                expression="##FILTER=.ID=VarCount,Description=.+" />
+                <has_line_matching
+                expression="##FILTER=.ID=ReadLenDiff,Description=.+" />
+                <has_line_matching
+                expression="##FILTER=.ID=RefDist3,Description=.+" />
+            </assert_contents>
+        </macro>
+        <macro name="test_not_mentions_filters">
+            <assert_contents>
+                <not_has_text
+                text="##FILTER=&lt;ID=VarCount,Description=" />
+                <not_has_text
+                text="##FILTER=&lt;ID=ReadLenDiff,Description=" />
+                <not_has_text
+                text="##FILTER=&lt;ID=RefDist3,Description=" />
+            </assert_contents>
+        </macro>
     </macros>
-    <expand macro="requirements" />
-    <expand macro="stdio" />
+    <expand macro="requirements">
+        <requirement type="package" version="3.6.7">python</requirement>
+        <requirement type="package" version="0.15.1">pysam</requirement>
+    </expand>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
     <command><![CDATA[
-        varscan somatic
-            @INPUT_PILEUPS@
-            --min-coverage ${min_coverage}
-            --min-reads2 ${min_reads2}
-            --min-avg-qual ${min_avg_qual}
-            --min-var-freq ${min_var_freq}
-            --min-freq-for-hom ${min_freq_for_hom}
+        #if str($reference.source) == "history":
+            #set ref_genome = 'ref.fa'
+            ln -s -f '$reference.genome' $ref_genome &&
+        #else:
+            #set ref_genome = '$reference.genome.fields.path'
+        #end if
+        #set normal_data = 'normal.bam'
+        #set tumor_data = 'tumor.bam'
+        ln -s -f '$normal_bam' $normal_data &&
+        ln -s -f '$tumor_bam' $tumor_data &&
+        ln -s -f '${normal_bam.metadata.bam_index}' ${normal_data}.bai &&
+        ln -s -f '${tumor_bam.metadata.bam_index}' ${tumor_data}.bai &&
+        python3 $__tool_directory__/varscan.py
+            --normal '$normal_data'
+            --tumor '$tumor_data'
             --normal-purity ${normal_purity}
             --tumor-purity ${tumor_purity}
-            --tumor-purity ${tumor_purity}
-            --min-coverage-normal ${min_coverage_normal}
-            --somatic-p-value ${somatic_p_value}
-            --p-value ${p_value}
-            #if str($strand_filter) == 'yes':
-              --strand-filter 1
+            #if str($split_output):
+                --ofile variants_out
+                $split_output
+            #else:
+                --ofile '$output'
+            #end if
+            --threads \${GALAXY_SLOTS:-2}
+            #if str($call_params.settings) == "custom":
+                ## samtools mpileup parameters
+                --min-basequal ${call_params.min_avg_qual}
+                --min-mapqual ${call_params.min_mapqual}
+                ## VarScan parameters
+                --min-coverage ${call_params.min_coverage}
+                --min-var-count ${call_params.min_reads2}
+                --min-var-freq ${call_params.min_var_freq}
+                --min-hom-freq ${call_params.min_freq_for_hom}
+                --p-value ${call_params.p_value}
+                --somatic-p-value ${call_params.somatic_p_value}
             #end if
-
-            --output-vcf 1
+            #if str($filter_params.settings) == "no_filter":
+                --no-filters
+            #elif str($filter_params.settings) == "dream3_settings":
+                --min-var-count2 3
+                --min-var-count2-lc 1
+                --min-var-freq2 0.05
+                --max-somatic-p 0.05
+                --max-somatic-p-depth 10
+                --min-ref-readpos 0.2
+                --min-var-readpos 0.15
+                --min-ref-dist3 0.2
+                --min-var-dist3 0.15
+                --min-ref-len 90
+                --min-var-len 90
+                --max-len-diff 0.05
+                --min-strandedness 0
+                --min-strand-reads 5
+                --min-ref-basequal 15
+                --min-var-basequal 30
+                --max-basequal-diff 50
+                --min-ref-mapqual 20
+                --min-var-mapqual 30
+                --max-mapqual-diff 10
+                --max-ref-mmqs 50
+                --max-var-mmqs 100
+                --min-mmqs-diff 0
+                --max-mmqs-diff 50
+            #elif str($filter_params.settings) == "custom":
+                --min-var-count2 ${filter_params.min_var_count}
+                --min-var-count2-lc ${filter_params.min_var_count_lc}
+                --min-var-freq2 ${filter_params.min_var_freq2}
+                --max-somatic-p ${filter_params.max_somatic_p}
+                --max-somatic-p-depth ${filter_params.max_somatic_p_depth}
+                --min-ref-readpos ${filter_params.min_ref_readpos}
+                --min-var-readpos ${filter_params.min_var_readpos}
+                --min-ref-dist3 ${filter_params.min_ref_dist3}
+                --min-var-dist3 ${filter_params.min_var_dist3}
+                --min-ref-len ${filter_params.min_ref_len}
+                --min-var-len ${filter_params.min_var_len}
+                --max-len-diff ${filter_params.max_len_diff}
+                --min-strandedness ${filter_params.min_strandedness}
+                --min-strand-reads ${filter_params.min_strand_reads}
+                --min-ref-basequal ${filter_params.min_ref_basequal}
+                --min-var-basequal ${filter_params.min_var_basequal}
+                --max-basequal-diff ${filter_params.max_basequal_diff}
+                --min-ref-mapqual ${filter_params.min_ref_mapqual}
+                --min-var-mapqual ${filter_params.min_var_mapqual}
+                --max-mapqual-diff ${filter_params.max_mapqual_diff}
+                --max-ref-mmqs ${filter_params.max_ref_mmqs}
+                --max-var-mmqs ${filter_params.max_var_mmqs}
+                --min-mmqs-diff ${filter_params.min_mmqs_diff}
+                --max-mmqs-diff ${filter_params.max_mmqs_diff}
+            #end if
+            --verbose
+            $ref_genome
     ]]></command>
 
     <inputs>
-
-        <expand macro="input_pileups"/>
-
-        <expand macro="min_coverage" />
-        <param argument="--min-coverage-normal" name="min_coverage_normal" type="integer" value="8" min="1" max="200"
-            label="Minimum read depth from the normal sample" help="Minimum depth at a position to make a call" />
-        <param argument="--min-coverage-tumor" name="min_coverage_tumor" type="integer" value="6" min="1" max="200"
-            label="Minimum read depth from the tumor sample" help="Minimum depth at a position to make a call" />
-        <expand macro="min_reads2" />
-        <expand macro="min_avg_qual" />
-        <expand macro="min_var_freq" value="0.10" />
-        <expand macro="min_freq_for_hom" />
-        <param argument="--normal-purity" name="normal_purity" type="float" value="1.00" min="0" max="1.00"
-            label="Estimated purity (non-tumor content) of normal sample"/>
-        <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.00" min="0" max="1.00"
-            label="Estimated purity (tumor content) of tumor sample"/>
-        <expand macro="p_value" label="P-value threshold to call a heterozygote" value="0.99"/>
-        <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1"
-            label="p-value threshold for calling somatic sites"/>
-        <expand macro="strand_filter" />
+        <conditional name="reference">
+            <param name="source" type="select"
+            label="Will you select a reference genome from your history or use a built-in genome?">
+                <option value="cached">Use a built-in genome</option>
+                <option value="history">Use a genome from my history</option>
+            </param>
+            <when value="cached">
+                <param name="genome" type="select"
+                label="reference genome"
+                help="The fasta reference genome that variants should be called against.">
+                    <options from_data_table="fasta_indexes" />
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome" type="data" format="fasta"
+                label="reference genome"
+                help="The fasta reference genome that variants should be called against."/>
+            </when>
+        </conditional>
+        <param name="normal_bam" type="data" format="bam"
+        label="aligned reads from normal sample" />
+        <param name="tumor_bam" type="data" format="bam"
+        label="aligned reads from tumor sample" />
+        <param argument="--normal-purity" name="normal_purity" type="float" value="1.0" min="0" max="1.0"
+        label="Estimated purity (non-tumor content) of normal sample"/>
+        <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.0" min="0" max="1.0"
+        label="Estimated purity (tumor content) of tumor sample"/>
+        <param name="split_output" type="boolean" truevalue="--split-output" falsevalue="" checked="false"
+        label="Generate separate output datasets for SNP and indel calls?" />
+        <conditional name="call_params">
+            <param name="settings" label="Settings for Variant Calling" type="select">
+                <option value="varscan_defaults" selected="true">Use default values</option>
+                <option value="custom">Customize settings</option>
+            </param>
+            <when value="custom">
+                <param argument="samtools mpileup -Q" name="min_avg_qual" type="integer" value="13" min="0" max="50"
+                label="Minimum base quality"
+                help="The minimum base quality at the variant position required to use a read for calling" />
+                <param argument="samtools mpileup -q" name="min_mapqual" type="integer" value="0" min="0" max="60"
+                label="Minimum mapping quality"
+                help="The minimum mapping quality required for a read to be considered in variant calling" />
+                <expand macro="min_coverage"
+                help="Minimum site coverage required in the normal and in the tumor sample to call a variant. This threshold gets applied after eliminating reads with low base and mapping qualitiy as defined above." />
+                <expand macro="min_reads2" />
+                <expand macro="min_var_freq" value="0.1" />
+                <expand macro="min_freq_for_hom" />
+                <expand macro="p_value" value="0.99"
+                help="The p-value threshold used to determine if a variant should be called for either sample" />
+                <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1"
+                label="P-value threshold for calling somatic variants and LOH events"
+                help="The p-value threshold used to determine if read count differences between the normal and the tumor sample justify classification of a variant as somatic or as an LOH event" />
+            </when>
+            <when value="varscan_defaults" />
+        </conditional>
+        <conditional name="filter_params">
+            <param name="settings" label="Settings for Posterior Variant Filtering" type="select">
+                <option value="varscan_defaults" selected="true">Use default values</option>
+                <option value="dream3_settings">Use settings optimized for DREAM-3</option>
+                <option value="no_filter">Do not perform posterior filtering</option>
+                <option value="custom">Customize settings</option>
+            </param>
+            <when value="varscan_defaults" />
+            <when value="dream3_settings" />
+            <when value="no_filter" />
+            <when value="custom">
+                <param argument="--min-var-count" name="min_var_count" type="integer" value="4" min="1" max="200"
+                label="Minimum number of variant-supporting reads"
+                help="" />
+                <param argument="--min-var-count-lc" name="min_var_count_lc" type="integer" value="2" min="1" max="200"
+                label="Low coverage minimum number of variant-supporting reads"
+                help="Will be applied instead of the --min-var-count limit for sites with poor overall (less than --max-somatic-p-depth) coverage" />
+                <param argument="--min-var-freq" name="min_var_freq2" type="float" value="0.05" min="0" max="1"
+                label="Minimum variant allele frequency"
+                help="" />
+                <param argument="--max-somatic-p" name="max_somatic_p" type="float" value="0.05" min="0" max="1"
+                label="Maximum somatic p-value allowed for a somatic call"
+                help="" />
+                <param argument="--max-somatic-p-depth" name="max_somatic_p_depth" type="integer" value="10" min="2" max="200"
+                label="Depth required at variant site to run --max-somatic-p filter"
+                help="" />
+                <param argument="--min-ref-readpos" name="min_ref_readpos" type="float" value="0.1" min="0" max="1"
+                label="Minimum relative variant position in ref-supporting reads"
+                help="The minimum average relative distance from the ends of ref-supporting reads required for variant sites" />
+                <param argument="--min-var-readpos" name="min_var_readpos" type="float" value="0.1" min="0" max="1"
+                label="Minimum relative variant position in variant-supporting reads"
+                help="The minimum average relative distance from the ends of variant-supporting reads required for variant sites" />
+                <param argument="--min-ref-dist3" name="min_ref_dist3" type="float" value="0.1" min="0" max="1"
+                label="Minimum distance of variant site from 3'-end of ref-supporting reads"
+                help="The minimum average relative distance from the effective 3'end of ref-supporting reads required for variant sites" />
+                <param argument="--min-var-dist3" name="min_var_dist3" type="float" value="0.1" min="0" max="1"
+                label="Minimum distance of variant site from 3'-end of variant-supporting reads"
+                help="The minimum average relative distance from the effective 3'end of variant-supporting reads required for variant sites" />
+                <param argument="--min-ref-avgrl" name="min_ref_len" type="integer" value="90" min="0" max="200"
+                label="Minimum length of ref-supporting reads"
+                help="The minimum average trimmed length required for reads supporting the reference allele" />
+                <param argument="--min-var-avgrl" name="min_var_len" type="integer" value="90" min="0" max="200"
+                label="Minimum length of variant-supporting reads"
+                help="The minimum average trimmed length required for reads supporting the variant allele" />
+                <param argument="--max-rl-diff" name="max_len_diff" type="float" value="0.25" min="0" max="1"
+                label="Maximum relative read length difference"
+                help="The maximum allowed average relative read length difference (ref - var) between reads supporting the reference and the variant allele" />
+                <param argument="--min-strandedness" name="min_strandedness" type="float" value="0.01" min="0" max="0.5"
+                label="Minimum fraction of variant reads from each strand"
+                help="The minimum fraction of variant reads that are required to come from the forward and from the reverse strand" />
+                <param argument="--min-strand-reads" name="min_strand_reads" type="integer" value="5" min="2" max="200"
+                label="Minimum variant allele depth required to apply the --min-strandedness filter"
+                help="" />
+                <param argument="--min-ref-basequal" name="min_ref_basequal" type="integer" value="15" min="1" max="50"
+                label="Minimum average base quality for the ref allele"
+                help="The minimum average base quality required at the variant site for reads supporting the reference allele" />
+                <param argument="--min-var-basequal" name="min_var_basequal" type="integer" value="15" min="1" max="50"
+                label="Minimum average base quality for the variant allele"
+                help="The minimum average base quality required at the variant site for reads supporting the variant allele" />
+                <param argument="max-basequal-diff" name="max_basequal_diff" type="integer" value="50" min="0" max="50"
+                label="Maximum base quality difference between ref- and variant-supporting reads"
+                help="The maximum average base quality difference (ref - var) allowed between the variant site positions of reads supporting the reference and the variant allele" />
+                <param argument="--min-ref-mapqual" name="min_ref_mapqual" type="integer" value="15" min="1" max="60"
+                label="Minimum average mapping quality of ref-supporting reads"
+                help="The minimum average mapping quality required for reads supporting the reference allele" />
+                <param argument="--min-var-mapqual" name="min_var_mapqual" type="integer" value="15" min="1" max="60"
+                label="Minimum average mapping quality of variant-supporting reads"
+                help="The minimum average mapping quality required for reads supporting the variant allele" />
+                <param argument="--max-mapqual-diff" name="max_mapqual_diff" type="integer" value="50" min="0" max="60"
+                label="Maximum mapping quality difference between ref- and variant-supporting reads"
+                help="The maximum average mapping quality difference (ref - var) allowed between reads supporting the reference and the variant allele" />
+                <param argument="--max-ref-mmqs" name="max_ref_mmqs" type="integer" value="100" min="0"
+                label="Maximum mismatch base quality sum of ref-supporting reads"
+                help="The maximum mismatch base quality sum allowed for reads supporting the reference allele" />
+                <param argument="--max-var-mmqs" name="max_var_mmqs" type="integer" value="100" min="0"
+                label="Maximum mismatch base quality sum of var-supporting reads"
+                help="The maximum mismatch base quality sum allowed for reads supporting the variant allele" />
+                <param argument="--min-mmqs-diff" name="min_mmqs_diff" type="integer" value="0" min="0"
+                label="Minimum difference between mismatch base quality sums of variant- and ref-supporting reads"
+                help="The minimum difference in the mismatch base quality sums (var - ref) required between reads supporting the variant and the reference allele" />
+                <param argument="--max-mmqs-diff" name="max_mmqs_diff" type="integer" value="50" min="1"
+                label="Maximum difference between mismatch base quality sums of variant- and ref-supporting reads"
+                help="The maximum difference in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />
+            </when>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="output_indel" from_work_dir="galaxy_out.indel.vcf" format="vcf"/>
-        <data name="output_snp" from_work_dir="galaxy_out.snp.vcf" format="vcf"/>
+        <data name="output" format="vcf">
+            <filter>not split_output</filter>
+        </data>
+        <data name="output_snp" from_work_dir="variants_out.snp" format="vcf"
+        label="Varscan somatic SNP calls on ${on_string}">
+            <filter>split_output</filter>
+        </data>
+        <data name="output_indel" from_work_dir="variants_out.indel" format="vcf"
+        label="Varscan somatic indel calls on ${on_string}">
+            <filter>split_output</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
-            <conditional name="pileup">
-                <param name="pileup_select" value="separated" />
-                <param name="normal_pileup" value="N_Region_Chr1_CDKN2C.pileup.gz" />
-                <param name="tumor_pileup" value="T_Region_Chr1_CDKN2C.pileup.gz" />
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="true" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
             </conditional>
-            <param name="min_coverage" value="2" />
-            <param name="min_coverage_normal" value="2" />
-            <param name="min_coverage_tumor" value="2" />
-            <param name="min_reads2" value="1" />
-            <param name="min_avg_qual" value="5" />
-            <param name="min_var_freq" value="0.01" />
-            <param name="min_freq_for_hom" value="0.75" />
+            <conditional name="filter_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <output name="output_indel">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+            <output name="output_snp">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="custom" />
+                <param name="min_coverage" value="2" />
+                <param name="min_reads2" value="1" />
+                <param name="min_avg_qual" value="5" />
+                <param name="min_var_freq" value="0.01" />
+                <param name="min_freq_for_hom" value="0.66" />
+                <param name="p_value" value="0.97" />
+                <param name="somatic_p_value" value="0.09" />
+            </conditional>
             <param name="normal_purity" value="0.6" />
             <param name="tumor_purity" value="0.6" />
-            <param name="p_value" value="0.99" />
-            <output name="output_indel" file="varscan_somatic_indel_result1.vcf" lines_diff="0" />
-            <output name="output_snp" file="varscan_somatic_snp_result1.vcf" lines_diff="0" />
+            <conditional name="filter_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <assert_stderr>
+                <has_line_matching
+                expression=" Min coverage:&#09;2x for Normal, 2x for Tumor" />
+                <has_line_matching
+                expression="Min reads2:&#09;1" />
+                <has_line_matching
+                expression="Min var freq:&#09;0.01" />
+                <has_line_matching
+                expression="Min freq for hom:&#09;0.66" />
+                <has_line_matching
+                expression="Normal purity:&#09;0.6" />
+                <has_line_matching
+                expression="Tumor purity:&#09;0.6" />
+                <has_line_matching
+                expression="Min avg qual:&#09;5" />
+                <has_line_matching
+                expression="P-value thresh:&#09;0.97" />
+                <has_line_matching
+                expression="Somatic p-value:&#09;0.09" />
+            </assert_stderr>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
         </test>
-        <test>
-            <conditional name="pileup">
-                <param name="pileup_select" value="combined" />
-                <param name="combined_pileup" value="NT.pileup.gz" />
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="dream3_settings" />
             </conditional>
-            <param name="min_coverage" value="2" />
-            <param name="min_coverage_normal" value="2" />
-            <param name="min_coverage_tumor" value="2" />
-            <param name="min_reads2" value="1" />
-            <param name="min_avg_qual" value="5" />
-            <param name="min_var_freq" value="0.01" />
-            <param name="min_freq_for_hom" value="0.75" />
-            <param name="normal_purity" value="0.6" />
-            <param name="tumor_purity" value="0.6" />
-            <param name="p_value" value="0.99" />
-            <output name="output_indel" file="varscan_somatic_indel_result2.vcf" lines_diff="0" />
-            <output name="output_snp" file="varscan_somatic_snp_result2.vcf" lines_diff="0" />
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="no_filter" />
+            </conditional>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_not_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="custom" />
+                <param name="min_ref_basequal" value="28" />
+                <param name="min_var_basequal" value="28" />
+            </conditional>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
         </test>
     </tests>
 
     <help>
-**VarScan Overview**
+@HELP_HEADER@
+
+**The Varscan Somatic tool for Galaxy**
+
+This tool wraps the functionality of the ``varscan somatic`` and the
+``varscan fpfilter`` command line tools.
 
-VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data.
-It calls variants from a mpileup dataset and produces a VCF 4.1. Full documentation is available online_.
+.. class:: infomark
+
+   The wrapper aims at providing the same functionality as the
+   ``varscan fpfilter`` tool, but implements it using ``pysam`` internally.
+   Note that, as one limitation compared to the original ``varscan`` tool,
+   the current version does not apply filters to indels!
 
-This tool calls germline/somatic variants from tumor-normal pileups.
+The tool is designed to detect genetic variants in a **pair of samples**
+representing normal and tumor tissue from the same individual. It classifies
+the variants, according to their most likely origin, as **somatic** (variant is
+found in the tumor, but not in the normal sample, *i.e.*, is the consequence of
+a somatic mutation event), **germline** (variant is found in both samples =>
+germline mutation event) and **LOH** (variant is found in both samples, but
+only the tumor sample appears to be homozygous for it => loss of heterozygosity
+event).
+This classification is encoded in the variant ``INFO`` fields of the VCF output
+produced by the tool in the form of a status code ``SS`` (somatic status),
+where:
+
+- ``SS=1`` signifies a likely germline variant,
+- ``SS=2`` a somatic variant
+- ``SS=3`` a LOH variant
 
-.. _VarScan: http://dkoboldt.github.io/varscan/
-.. _online: http://dkoboldt.github.io/varscan/using-varscan.html
+In addition, ``SS=0`` indicates a possible variant, but with insufficient
+evidence for an, at least, heterozygous state in either individual sample, and
+``SS=5`` is used for variants of unexplained origin (*e.g.*, variants found in
+the normal, but not in the tumor tissue sample).
+
+In a second step, following variant calling, the tool can try to detect likely
+false-positive calls by re-inspecting the data at the variant sites more
+carefully and looking for signs that may indicate problems with the
+sequencing data or its mapping. If a called variant is deemed a possible
+false-positive at this step, this gets indicated in the ``FILTER`` field of the
+variant record in the VCF output. For high confidence variants passing all
+posterior (applied after variant calling) filters the value of the field will
+be ``PASS``, for variants failing any of the posterior filters the value will
+be a ``;``-separated list of the problematic filters.
+
 
 **Input**
 
-::
-
-  mpileup file - The SAMtools mpileup files for the normal and tumor tissue
- 
+The tool takes as input a reference genome (in fasta format) and a pair of
+aligned reads datasets (bam format).
 
 **Output**
 
-VarScan produces a VCF 4.1 dataset as output.
+A VCF dataset of called variants. When asked to *Generate separate output
+datasets for SNP and indel calls*, the tool will behave like the
+``varscan somatic`` command line tool and produce two VCF datasets - one with
+just the single nucleotide variants, while the other one will store
+insertion/deletion variants.
+
+**Options**
+
+*Estimated purity of normal sample / of tumor sample*
+
+Since, in practice, it is often impossible to isolate tissue samples without
+contamination from surrounding tissue or from invading cells, these two fields
+let you indicate your estimate of the purity of the two samples (as fractions
+between 0 and 1, where 1 would indicate a contamination-free sample and 0.5 a
+sample to which the desired tissue contributes only 50%, while the other 50%
+consist of cells from the other tissue type).
+
+*Settings for Variant Calling*
 
+Settings in this section will affect the steps of variant calling and
+classification. You can accept VarScan's default values for the corresponding
+parameters or customize them according to your needs.
 
+*Settings for Posterior Variant Filtering*
+
+Use the parameters in this section to configure the false-positive filtering
+step that follows variant calling and classification. These settings will not
+influence the number of variants detected nor their classification, but may
+change the ``FILTER`` field of variant records to indicate which variants
+failed to pass certain filters. You can use this information with downstream
+tools to exclude certain variants from further analysis steps or include only
+high confidence variants that passed all filters (those with ``PASS`` as their
+``INFO`` field value. You can accept the orignal filter defaults of the
+``varscan fpfilter`` command line tool, use the settings established for the
+tool in the `DREAM3 challenge`_, or choose to customize the settings.
+Alternatively, you can also choose to skip posterior filtering entirely, in
+which case all variants will have their ``INFO`` field set to ``PASS``.
+
+.. _DREAM3 challenge: https://www.synapse.org/#!Synapse:syn312572/wiki/58893
     </help>
     <expand macro="citations" />
 </tool>