diff varscan_somatic.xml @ 2:2fe9ebb98aad draft

planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit 30867f1f022bed18ba1c3b8dc9c54226890b3a9c
author iuc
date Tue, 04 Dec 2018 05:15:50 -0500
parents 31a38ce7e8ae
children d37adcc2ec03
line wrap: on
line diff
--- a/varscan_somatic.xml	Sun Jul 15 09:19:25 2018 -0400
+++ b/varscan_somatic.xml	Tue Dec 04 05:15:50 2018 -0500
@@ -1,120 +1,534 @@
 <tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.1">
-    <description>Call germline/somatic variants from tumor-normal pileups</description>
+    <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>
     <macros>
         <import>macros.xml</import>
+        <macro name="test_mentions_contig">
+            <assert_contents>
+                <has_line_matching
+                expression="##contig=.ID=chrM,length=16571." />
+            </assert_contents>
+        </macro>
+        <macro name="test_mentions_filters">
+            <assert_contents>
+                <has_line_matching
+                expression="##FILTER=.ID=VarCount,Description=.+" />
+                <has_line_matching
+                expression="##FILTER=.ID=ReadLenDiff,Description=.+" />
+                <has_line_matching
+                expression="##FILTER=.ID=RefDist3,Description=.+" />
+            </assert_contents>
+        </macro>
+        <macro name="test_not_mentions_filters">
+            <assert_contents>
+                <not_has_text
+                text="##FILTER=&lt;ID=VarCount,Description=" />
+                <not_has_text
+                text="##FILTER=&lt;ID=ReadLenDiff,Description=" />
+                <not_has_text
+                text="##FILTER=&lt;ID=RefDist3,Description=" />
+            </assert_contents>
+        </macro>
     </macros>
-    <expand macro="requirements" />
-    <expand macro="stdio" />
+    <expand macro="requirements">
+        <requirement type="package" version="3.6.7">python</requirement>
+        <requirement type="package" version="0.15.1">pysam</requirement>
+    </expand>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
     <command><![CDATA[
-        varscan somatic
-            @INPUT_PILEUPS@
-            --min-coverage ${min_coverage}
-            --min-reads2 ${min_reads2}
-            --min-avg-qual ${min_avg_qual}
-            --min-var-freq ${min_var_freq}
-            --min-freq-for-hom ${min_freq_for_hom}
+        #if str($reference.source) == "history":
+            #set ref_genome = 'ref.fa'
+            ln -s -f '$reference.genome' $ref_genome &&
+        #else:
+            #set ref_genome = '$reference.genome.fields.path'
+        #end if
+        #set normal_data = 'normal.bam'
+        #set tumor_data = 'tumor.bam'
+        ln -s -f '$normal_bam' $normal_data &&
+        ln -s -f '$tumor_bam' $tumor_data &&
+        ln -s -f '${normal_bam.metadata.bam_index}' ${normal_data}.bai &&
+        ln -s -f '${tumor_bam.metadata.bam_index}' ${tumor_data}.bai &&
+        python3 $__tool_directory__/varscan.py
+            --normal '$normal_data'
+            --tumor '$tumor_data'
             --normal-purity ${normal_purity}
             --tumor-purity ${tumor_purity}
-            --tumor-purity ${tumor_purity}
-            --min-coverage-normal ${min_coverage_normal}
-            --somatic-p-value ${somatic_p_value}
-            --p-value ${p_value}
-            #if str($strand_filter) == 'yes':
-              --strand-filter 1
+            #if str($split_output):
+                --ofile variants_out
+                $split_output
+            #else:
+                --ofile '$output'
+            #end if
+            --threads \${GALAXY_SLOTS:-2}
+            #if str($call_params.settings) == "custom":
+                ## samtools mpileup parameters
+                --min-basequal ${call_params.min_avg_qual}
+                --min-mapqual ${call_params.min_mapqual}
+                ## VarScan parameters
+                --min-coverage ${call_params.min_coverage}
+                --min-var-count ${call_params.min_reads2}
+                --min-var-freq ${call_params.min_var_freq}
+                --min-hom-freq ${call_params.min_freq_for_hom}
+                --p-value ${call_params.p_value}
+                --somatic-p-value ${call_params.somatic_p_value}
             #end if
-
-            --output-vcf 1
+            #if str($filter_params.settings) == "no_filter":
+                --no-filters
+            #elif str($filter_params.settings) == "dream3_settings":
+                --min-var-count2 3
+                --min-var-count2-lc 1
+                --min-var-freq2 0.05
+                --max-somatic-p 0.05
+                --max-somatic-p-depth 10
+                --min-ref-readpos 0.2
+                --min-var-readpos 0.15
+                --min-ref-dist3 0.2
+                --min-var-dist3 0.15
+                --min-ref-len 90
+                --min-var-len 90
+                --max-len-diff 0.05
+                --min-strandedness 0
+                --min-strand-reads 5
+                --min-ref-basequal 15
+                --min-var-basequal 30
+                --max-basequal-diff 50
+                --min-ref-mapqual 20
+                --min-var-mapqual 30
+                --max-mapqual-diff 10
+                --max-ref-mmqs 50
+                --max-var-mmqs 100
+                --min-mmqs-diff 0
+                --max-mmqs-diff 50
+            #elif str($filter_params.settings) == "custom":
+                --min-var-count2 ${filter_params.min_var_count}
+                --min-var-count2-lc ${filter_params.min_var_count_lc}
+                --min-var-freq2 ${filter_params.min_var_freq2}
+                --max-somatic-p ${filter_params.max_somatic_p}
+                --max-somatic-p-depth ${filter_params.max_somatic_p_depth}
+                --min-ref-readpos ${filter_params.min_ref_readpos}
+                --min-var-readpos ${filter_params.min_var_readpos}
+                --min-ref-dist3 ${filter_params.min_ref_dist3}
+                --min-var-dist3 ${filter_params.min_var_dist3}
+                --min-ref-len ${filter_params.min_ref_len}
+                --min-var-len ${filter_params.min_var_len}
+                --max-len-diff ${filter_params.max_len_diff}
+                --min-strandedness ${filter_params.min_strandedness}
+                --min-strand-reads ${filter_params.min_strand_reads}
+                --min-ref-basequal ${filter_params.min_ref_basequal}
+                --min-var-basequal ${filter_params.min_var_basequal}
+                --max-basequal-diff ${filter_params.max_basequal_diff}
+                --min-ref-mapqual ${filter_params.min_ref_mapqual}
+                --min-var-mapqual ${filter_params.min_var_mapqual}
+                --max-mapqual-diff ${filter_params.max_mapqual_diff}
+                --max-ref-mmqs ${filter_params.max_ref_mmqs}
+                --max-var-mmqs ${filter_params.max_var_mmqs}
+                --min-mmqs-diff ${filter_params.min_mmqs_diff}
+                --max-mmqs-diff ${filter_params.max_mmqs_diff}
+            #end if
+            --verbose
+            $ref_genome
     ]]></command>
 
     <inputs>
-
-        <expand macro="input_pileups"/>
-
-        <expand macro="min_coverage" />
-        <param argument="--min-coverage-normal" name="min_coverage_normal" type="integer" value="8" min="1" max="200"
-            label="Minimum read depth from the normal sample" help="Minimum depth at a position to make a call" />
-        <param argument="--min-coverage-tumor" name="min_coverage_tumor" type="integer" value="6" min="1" max="200"
-            label="Minimum read depth from the tumor sample" help="Minimum depth at a position to make a call" />
-        <expand macro="min_reads2" />
-        <expand macro="min_avg_qual" />
-        <expand macro="min_var_freq" value="0.10" />
-        <expand macro="min_freq_for_hom" />
-        <param argument="--normal-purity" name="normal_purity" type="float" value="1.00" min="0" max="1.00"
-            label="Estimated purity (non-tumor content) of normal sample"/>
-        <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.00" min="0" max="1.00"
-            label="Estimated purity (tumor content) of tumor sample"/>
-        <expand macro="p_value" label="P-value threshold to call a heterozygote" value="0.99"/>
-        <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1"
-            label="p-value threshold for calling somatic sites"/>
-        <expand macro="strand_filter" />
+        <conditional name="reference">
+            <param name="source" type="select"
+            label="Will you select a reference genome from your history or use a built-in genome?">
+                <option value="cached">Use a built-in genome</option>
+                <option value="history">Use a genome from my history</option>
+            </param>
+            <when value="cached">
+                <param name="genome" type="select"
+                label="reference genome"
+                help="The fasta reference genome that variants should be called against.">
+                    <options from_data_table="fasta_indexes" />
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome" type="data" format="fasta"
+                label="reference genome"
+                help="The fasta reference genome that variants should be called against."/>
+            </when>
+        </conditional>
+        <param name="normal_bam" type="data" format="bam"
+        label="aligned reads from normal sample" />
+        <param name="tumor_bam" type="data" format="bam"
+        label="aligned reads from tumor sample" />
+        <param argument="--normal-purity" name="normal_purity" type="float" value="1.0" min="0" max="1.0"
+        label="Estimated purity (non-tumor content) of normal sample"/>
+        <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.0" min="0" max="1.0"
+        label="Estimated purity (tumor content) of tumor sample"/>
+        <param name="split_output" type="boolean" truevalue="--split-output" falsevalue="" checked="false"
+        label="Generate separate output datasets for SNP and indel calls?" />
+        <conditional name="call_params">
+            <param name="settings" label="Settings for Variant Calling" type="select">
+                <option value="varscan_defaults" selected="true">Use default values</option>
+                <option value="custom">Customize settings</option>
+            </param>
+            <when value="custom">
+                <param argument="samtools mpileup -Q" name="min_avg_qual" type="integer" value="13" min="0" max="50"
+                label="Minimum base quality"
+                help="The minimum base quality at the variant position required to use a read for calling" />
+                <param argument="samtools mpileup -q" name="min_mapqual" type="integer" value="0" min="0" max="60"
+                label="Minimum mapping quality"
+                help="The minimum mapping quality required for a read to be considered in variant calling" />
+                <expand macro="min_coverage"
+                help="Minimum site coverage required in the normal and in the tumor sample to call a variant. This threshold gets applied after eliminating reads with low base and mapping qualitiy as defined above." />
+                <expand macro="min_reads2" />
+                <expand macro="min_var_freq" value="0.1" />
+                <expand macro="min_freq_for_hom" />
+                <expand macro="p_value" value="0.99"
+                help="The p-value threshold used to determine if a variant should be called for either sample" />
+                <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1"
+                label="P-value threshold for calling somatic variants and LOH events"
+                help="The p-value threshold used to determine if read count differences between the normal and the tumor sample justify classification of a variant as somatic or as an LOH event" />
+            </when>
+            <when value="varscan_defaults" />
+        </conditional>
+        <conditional name="filter_params">
+            <param name="settings" label="Settings for Posterior Variant Filtering" type="select">
+                <option value="varscan_defaults" selected="true">Use default values</option>
+                <option value="dream3_settings">Use settings optimized for DREAM-3</option>
+                <option value="no_filter">Do not perform posterior filtering</option>
+                <option value="custom">Customize settings</option>
+            </param>
+            <when value="varscan_defaults" />
+            <when value="dream3_settings" />
+            <when value="no_filter" />
+            <when value="custom">
+                <param argument="--min-var-count" name="min_var_count" type="integer" value="4" min="1" max="200"
+                label="Minimum number of variant-supporting reads"
+                help="" />
+                <param argument="--min-var-count-lc" name="min_var_count_lc" type="integer" value="2" min="1" max="200"
+                label="Low coverage minimum number of variant-supporting reads"
+                help="Will be applied instead of the --min-var-count limit for sites with poor overall (less than --max-somatic-p-depth) coverage" />
+                <param argument="--min-var-freq" name="min_var_freq2" type="float" value="0.05" min="0" max="1"
+                label="Minimum variant allele frequency"
+                help="" />
+                <param argument="--max-somatic-p" name="max_somatic_p" type="float" value="0.05" min="0" max="1"
+                label="Maximum somatic p-value allowed for a somatic call"
+                help="" />
+                <param argument="--max-somatic-p-depth" name="max_somatic_p_depth" type="integer" value="10" min="2" max="200"
+                label="Depth required at variant site to run --max-somatic-p filter"
+                help="" />
+                <param argument="--min-ref-readpos" name="min_ref_readpos" type="float" value="0.1" min="0" max="1"
+                label="Minimum relative variant position in ref-supporting reads"
+                help="The minimum average relative distance from the ends of ref-supporting reads required for variant sites" />
+                <param argument="--min-var-readpos" name="min_var_readpos" type="float" value="0.1" min="0" max="1"
+                label="Minimum relative variant position in variant-supporting reads"
+                help="The minimum average relative distance from the ends of variant-supporting reads required for variant sites" />
+                <param argument="--min-ref-dist3" name="min_ref_dist3" type="float" value="0.1" min="0" max="1"
+                label="Minimum distance of variant site from 3'-end of ref-supporting reads"
+                help="The minimum average relative distance from the effective 3'end of ref-supporting reads required for variant sites" />
+                <param argument="--min-var-dist3" name="min_var_dist3" type="float" value="0.1" min="0" max="1"
+                label="Minimum distance of variant site from 3'-end of variant-supporting reads"
+                help="The minimum average relative distance from the effective 3'end of variant-supporting reads required for variant sites" />
+                <param argument="--min-ref-avgrl" name="min_ref_len" type="integer" value="90" min="0" max="200"
+                label="Minimum length of ref-supporting reads"
+                help="The minimum average trimmed length required for reads supporting the reference allele" />
+                <param argument="--min-var-avgrl" name="min_var_len" type="integer" value="90" min="0" max="200"
+                label="Minimum length of variant-supporting reads"
+                help="The minimum average trimmed length required for reads supporting the variant allele" />
+                <param argument="--max-rl-diff" name="max_len_diff" type="float" value="0.25" min="0" max="1"
+                label="Maximum relative read length difference"
+                help="The maximum allowed average relative read length difference (ref - var) between reads supporting the reference and the variant allele" />
+                <param argument="--min-strandedness" name="min_strandedness" type="float" value="0.01" min="0" max="0.5"
+                label="Minimum fraction of variant reads from each strand"
+                help="The minimum fraction of variant reads that are required to come from the forward and from the reverse strand" />
+                <param argument="--min-strand-reads" name="min_strand_reads" type="integer" value="5" min="2" max="200"
+                label="Minimum variant allele depth required to apply the --min-strandedness filter"
+                help="" />
+                <param argument="--min-ref-basequal" name="min_ref_basequal" type="integer" value="15" min="1" max="50"
+                label="Minimum average base quality for the ref allele"
+                help="The minimum average base quality required at the variant site for reads supporting the reference allele" />
+                <param argument="--min-var-basequal" name="min_var_basequal" type="integer" value="15" min="1" max="50"
+                label="Minimum average base quality for the variant allele"
+                help="The minimum average base quality required at the variant site for reads supporting the variant allele" />
+                <param argument="max-basequal-diff" name="max_basequal_diff" type="integer" value="50" min="0" max="50"
+                label="Maximum base quality difference between ref- and variant-supporting reads"
+                help="The maximum average base quality difference (ref - var) allowed between the variant site positions of reads supporting the reference and the variant allele" />
+                <param argument="--min-ref-mapqual" name="min_ref_mapqual" type="integer" value="15" min="1" max="60"
+                label="Minimum average mapping quality of ref-supporting reads"
+                help="The minimum average mapping quality required for reads supporting the reference allele" />
+                <param argument="--min-var-mapqual" name="min_var_mapqual" type="integer" value="15" min="1" max="60"
+                label="Minimum average mapping quality of variant-supporting reads"
+                help="The minimum average mapping quality required for reads supporting the variant allele" />
+                <param argument="--max-mapqual-diff" name="max_mapqual_diff" type="integer" value="50" min="0" max="60"
+                label="Maximum mapping quality difference between ref- and variant-supporting reads"
+                help="The maximum average mapping quality difference (ref - var) allowed between reads supporting the reference and the variant allele" />
+                <param argument="--max-ref-mmqs" name="max_ref_mmqs" type="integer" value="100" min="0"
+                label="Maximum mismatch base quality sum of ref-supporting reads"
+                help="The maximum mismatch base quality sum allowed for reads supporting the reference allele" />
+                <param argument="--max-var-mmqs" name="max_var_mmqs" type="integer" value="100" min="0"
+                label="Maximum mismatch base quality sum of var-supporting reads"
+                help="The maximum mismatch base quality sum allowed for reads supporting the variant allele" />
+                <param argument="--min-mmqs-diff" name="min_mmqs_diff" type="integer" value="0" min="0"
+                label="Minimum difference between mismatch base quality sums of variant- and ref-supporting reads"
+                help="The minimum difference in the mismatch base quality sums (var - ref) required between reads supporting the variant and the reference allele" />
+                <param argument="--max-mmqs-diff" name="max_mmqs_diff" type="integer" value="50" min="1"
+                label="Maximum difference between mismatch base quality sums of variant- and ref-supporting reads"
+                help="The maximum difference in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />
+            </when>
+        </conditional>
     </inputs>
     <outputs>
-        <data name="output_indel" from_work_dir="galaxy_out.indel.vcf" format="vcf"/>
-        <data name="output_snp" from_work_dir="galaxy_out.snp.vcf" format="vcf"/>
+        <data name="output" format="vcf">
+            <filter>not split_output</filter>
+        </data>
+        <data name="output_snp" from_work_dir="variants_out.snp" format="vcf"
+        label="Varscan somatic SNP calls on ${on_string}">
+            <filter>split_output</filter>
+        </data>
+        <data name="output_indel" from_work_dir="variants_out.indel" format="vcf"
+        label="Varscan somatic indel calls on ${on_string}">
+            <filter>split_output</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
-            <conditional name="pileup">
-                <param name="pileup_select" value="separated" />
-                <param name="normal_pileup" value="N_Region_Chr1_CDKN2C.pileup.gz" />
-                <param name="tumor_pileup" value="T_Region_Chr1_CDKN2C.pileup.gz" />
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="true" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
             </conditional>
-            <param name="min_coverage" value="2" />
-            <param name="min_coverage_normal" value="2" />
-            <param name="min_coverage_tumor" value="2" />
-            <param name="min_reads2" value="1" />
-            <param name="min_avg_qual" value="5" />
-            <param name="min_var_freq" value="0.01" />
-            <param name="min_freq_for_hom" value="0.75" />
+            <conditional name="filter_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <output name="output_indel">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+            <output name="output_snp">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="custom" />
+                <param name="min_coverage" value="2" />
+                <param name="min_reads2" value="1" />
+                <param name="min_avg_qual" value="5" />
+                <param name="min_var_freq" value="0.01" />
+                <param name="min_freq_for_hom" value="0.66" />
+                <param name="p_value" value="0.97" />
+                <param name="somatic_p_value" value="0.09" />
+            </conditional>
             <param name="normal_purity" value="0.6" />
             <param name="tumor_purity" value="0.6" />
-            <param name="p_value" value="0.99" />
-            <output name="output_indel" file="varscan_somatic_indel_result1.vcf" lines_diff="0" />
-            <output name="output_snp" file="varscan_somatic_snp_result1.vcf" lines_diff="0" />
+            <conditional name="filter_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <assert_stderr>
+                <has_line_matching
+                expression=" Min coverage:&#09;2x for Normal, 2x for Tumor" />
+                <has_line_matching
+                expression="Min reads2:&#09;1" />
+                <has_line_matching
+                expression="Min var freq:&#09;0.01" />
+                <has_line_matching
+                expression="Min freq for hom:&#09;0.66" />
+                <has_line_matching
+                expression="Normal purity:&#09;0.6" />
+                <has_line_matching
+                expression="Tumor purity:&#09;0.6" />
+                <has_line_matching
+                expression="Min avg qual:&#09;5" />
+                <has_line_matching
+                expression="P-value thresh:&#09;0.97" />
+                <has_line_matching
+                expression="Somatic p-value:&#09;0.09" />
+            </assert_stderr>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
         </test>
-        <test>
-            <conditional name="pileup">
-                <param name="pileup_select" value="combined" />
-                <param name="combined_pileup" value="NT.pileup.gz" />
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="dream3_settings" />
             </conditional>
-            <param name="min_coverage" value="2" />
-            <param name="min_coverage_normal" value="2" />
-            <param name="min_coverage_tumor" value="2" />
-            <param name="min_reads2" value="1" />
-            <param name="min_avg_qual" value="5" />
-            <param name="min_var_freq" value="0.01" />
-            <param name="min_freq_for_hom" value="0.75" />
-            <param name="normal_purity" value="0.6" />
-            <param name="tumor_purity" value="0.6" />
-            <param name="p_value" value="0.99" />
-            <output name="output_indel" file="varscan_somatic_indel_result2.vcf" lines_diff="0" />
-            <output name="output_snp" file="varscan_somatic_snp_result2.vcf" lines_diff="0" />
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="no_filter" />
+            </conditional>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_not_mentions_filters" />
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="genome" value="hg19_chrM.fa" />
+            </conditional>
+            <param name="normal_bam" value="control_chrM.bam" />
+            <param name="tumor_bam" value="tumor_chrM.bam" />
+            <param name="split_output" value="false" />
+            <conditional name="call_params">
+                <param name="settings" value="varscan_defaults" />
+            </conditional>
+            <conditional name="filter_params">
+                <param name="settings" value="custom" />
+                <param name="min_ref_basequal" value="28" />
+                <param name="min_var_basequal" value="28" />
+            </conditional>
+            <output name="output">
+                <expand macro="test_mentions_contig" />
+                <expand macro="test_mentions_filters" />
+            </output>
         </test>
     </tests>
 
     <help>
-**VarScan Overview**
+@HELP_HEADER@
+
+**The Varscan Somatic tool for Galaxy**
+
+This tool wraps the functionality of the ``varscan somatic`` and the
+``varscan fpfilter`` command line tools.
 
-VarScan_ performs variant detection for massively parallel sequencing data, such as exome, WGS, and transcriptome data.
-It calls variants from a mpileup dataset and produces a VCF 4.1. Full documentation is available online_.
+.. class:: infomark
+
+   The wrapper aims at providing the same functionality as the
+   ``varscan fpfilter`` tool, but implements it using ``pysam`` internally.
+   Note that, as one limitation compared to the original ``varscan`` tool,
+   the current version does not apply filters to indels!
 
-This tool calls germline/somatic variants from tumor-normal pileups.
+The tool is designed to detect genetic variants in a **pair of samples**
+representing normal and tumor tissue from the same individual. It classifies
+the variants, according to their most likely origin, as **somatic** (variant is
+found in the tumor, but not in the normal sample, *i.e.*, is the consequence of
+a somatic mutation event), **germline** (variant is found in both samples =>
+germline mutation event) and **LOH** (variant is found in both samples, but
+only the tumor sample appears to be homozygous for it => loss of heterozygosity
+event).
+This classification is encoded in the variant ``INFO`` fields of the VCF output
+produced by the tool in the form of a status code ``SS`` (somatic status),
+where:
+
+- ``SS=1`` signifies a likely germline variant,
+- ``SS=2`` a somatic variant
+- ``SS=3`` a LOH variant
 
-.. _VarScan: http://dkoboldt.github.io/varscan/
-.. _online: http://dkoboldt.github.io/varscan/using-varscan.html
+In addition, ``SS=0`` indicates a possible variant, but with insufficient
+evidence for an, at least, heterozygous state in either individual sample, and
+``SS=5`` is used for variants of unexplained origin (*e.g.*, variants found in
+the normal, but not in the tumor tissue sample).
+
+In a second step, following variant calling, the tool can try to detect likely
+false-positive calls by re-inspecting the data at the variant sites more
+carefully and looking for signs that may indicate problems with the
+sequencing data or its mapping. If a called variant is deemed a possible
+false-positive at this step, this gets indicated in the ``FILTER`` field of the
+variant record in the VCF output. For high confidence variants passing all
+posterior (applied after variant calling) filters the value of the field will
+be ``PASS``, for variants failing any of the posterior filters the value will
+be a ``;``-separated list of the problematic filters.
+
 
 **Input**
 
-::
-
-  mpileup file - The SAMtools mpileup files for the normal and tumor tissue
- 
+The tool takes as input a reference genome (in fasta format) and a pair of
+aligned reads datasets (bam format).
 
 **Output**
 
-VarScan produces a VCF 4.1 dataset as output.
+A VCF dataset of called variants. When asked to *Generate separate output
+datasets for SNP and indel calls*, the tool will behave like the
+``varscan somatic`` command line tool and produce two VCF datasets - one with
+just the single nucleotide variants, while the other one will store
+insertion/deletion variants.
+
+**Options**
+
+*Estimated purity of normal sample / of tumor sample*
+
+Since, in practice, it is often impossible to isolate tissue samples without
+contamination from surrounding tissue or from invading cells, these two fields
+let you indicate your estimate of the purity of the two samples (as fractions
+between 0 and 1, where 1 would indicate a contamination-free sample and 0.5 a
+sample to which the desired tissue contributes only 50%, while the other 50%
+consist of cells from the other tissue type).
+
+*Settings for Variant Calling*
 
+Settings in this section will affect the steps of variant calling and
+classification. You can accept VarScan's default values for the corresponding
+parameters or customize them according to your needs.
 
+*Settings for Posterior Variant Filtering*
+
+Use the parameters in this section to configure the false-positive filtering
+step that follows variant calling and classification. These settings will not
+influence the number of variants detected nor their classification, but may
+change the ``FILTER`` field of variant records to indicate which variants
+failed to pass certain filters. You can use this information with downstream
+tools to exclude certain variants from further analysis steps or include only
+high confidence variants that passed all filters (those with ``PASS`` as their
+``INFO`` field value. You can accept the orignal filter defaults of the
+``varscan fpfilter`` command line tool, use the settings established for the
+tool in the `DREAM3 challenge`_, or choose to customize the settings.
+Alternatively, you can also choose to skip posterior filtering entirely, in
+which case all variants will have their ``INFO`` field set to ``PASS``.
+
+.. _DREAM3 challenge: https://www.synapse.org/#!Synapse:syn312572/wiki/58893
     </help>
     <expand macro="citations" />
 </tool>