view varscan_somatic.xml @ 9:4e97191a1ff7 draft

"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/varscan commit fcf5ac14629c694f0f64773fab0428b1e78fe156"
author iuc
date Fri, 16 Aug 2019 15:49:54 -0400
parents b79bb8b09822
children a57606054bd7
line wrap: on
line source

<tool id="varscan_somatic" name="VarScan somatic" version="@VERSION@.5">
    <description>Call germline/somatic and LOH variants from tumor-normal sample pairs</description>
    <macros>
        <import>macros.xml</import>
        <macro name="test_mentions_contig">
            <assert_contents>
                <has_line_matching
                expression="##contig=.ID=chrM,length=16571." />
            </assert_contents>
        </macro>
        <macro name="test_mentions_filters">
            <assert_contents>
                <has_line_matching
                expression="##FILTER=.ID=VarCount,Description=.+" />
                <has_line_matching
                expression="##FILTER=.ID=ReadLenDiff,Description=.+" />
                <has_line_matching
                expression="##FILTER=.ID=RefDist3,Description=.+" />
            </assert_contents>
        </macro>
        <macro name="test_not_mentions_filters">
            <assert_contents>
                <not_has_text
                text="##FILTER=&lt;ID=VarCount,Description=" />
                <not_has_text
                text="##FILTER=&lt;ID=ReadLenDiff,Description=" />
                <not_has_text
                text="##FILTER=&lt;ID=RefDist3,Description=" />
            </assert_contents>
        </macro>
        <macro name="filter_compat_options">
            <section name="experts_only" title="Compatibility options for experts" expanded="false">
                <param name="compat_opts" type="select" display="checkboxes" multiple="true" optional="true"
                label="Compatibility Options for Posterior Variant Filtering"
                help="">
                    <option value="--ignore-md" selected="true">Always determine mismatch quality statistics from recalculated read to reference alignments. Ignore read MD tags if present.</option>
                    <option value="--detect-q2-runs" selected="false">Treat runs of base qualities of value 2 at the 3' end of reads as quality control indicator (Illumina 1.5 compatibility setting)</option>
                </param>
            </section>
        </macro>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="3.6.7">python</requirement>
        <requirement type="package" version="0.15.1">pysam</requirement>
    </expand>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
        #if str($reference.source) == "history":
            #set $ref_genome = 'ref.fa'
            ln -s -f '$reference.genome' $ref_genome &&
        #else:
            #set $ref_genome = str($reference.genome.fields.path)
        #end if
        #set $normal_data = 'normal.bam'
        #set $tumor_data = 'tumor.bam'
        ln -s -f '$normal_bam' $normal_data &&
        ln -s -f '$tumor_bam' $tumor_data &&
        ln -s -f '${normal_bam.metadata.bam_index}' ${normal_data}.bai &&
        ln -s -f '${tumor_bam.metadata.bam_index}' ${tumor_data}.bai &&
        python3 $__tool_directory__/varscan.py
            --normal '$normal_data'
            --tumor '$tumor_data'
            --normal-purity ${normal_purity}
            --tumor-purity ${tumor_purity}
            #if str($split_output):
                --ofile variants_out
                $split_output
            #else:
                --ofile '$output'
            #end if
            --threads \${GALAXY_SLOTS:-2}
            #if str($call_params.settings) == "custom":
                ## samtools mpileup parameters
                --min-basequal ${call_params.read_selection.min_basequal}
                --min-mapqual ${call_params.read_selection.min_mapqual}
                ${call_params.read_selection.count_orphans}
                ${call_params.read_selection.detect_overlaps}
                --max-pileup-depth ${call_params.read_selection.max_pileup_depth}
                ## VarScan parameters
                --min-coverage ${call_params.min_coverage}
                --min-var-count ${call_params.min_reads2}
                --min-var-freq ${call_params.min_var_freq}
                --min-hom-freq ${call_params.min_freq_for_hom}
                --p-value ${call_params.p_value}
                --somatic-p-value ${call_params.somatic_p_value}
            #end if
            #if str($filter_params.settings) == "no_filter":
                --no-filters
            #else:
                #if str($filter_params.settings) == "dream3_settings":
                    --min-var-count2 3
                    --min-var-count2-lc 1
                    --min-var-count2-depth 10
                    --min-var-freq2 0.05
                    --min-ref-readpos 0.2
                    --min-var-readpos 0.15
                    --min-ref-dist3 0.2
                    --min-var-dist3 0.15
                    --min-ref-len 90
                    --min-var-len 90
                    --max-len-diff 0.05
                    --min-strandedness 0
                    --min-strand-reads 5
                    --min-ref-basequal 15
                    --min-var-basequal 30
                    --max-basequal-diff 50
                    --min-ref-mapqual 20
                    --min-var-mapqual 30
                    --max-mapqual-diff 10
                    --max-ref-mmqs 50
                    --max-var-mmqs 100
                    --min-mmqs-diff 0
                    --max-mmqs-diff 50
                #elif str($filter_params.settings) == "custom":
                    --min-var-count2 ${filter_params.min_var_count}
                    --min-var-count2-lc ${filter_params.min_var_count_lc}
                    --min-var-count2-depth ${filter_params.min_var_count_depth}
                    --min-var-freq2 ${filter_params.min_var_freq2}
                    --min-ref-readpos ${filter_params.min_ref_readpos}
                    --min-var-readpos ${filter_params.min_var_readpos}
                    --min-ref-dist3 ${filter_params.min_ref_dist3}
                    --min-var-dist3 ${filter_params.min_var_dist3}
                    --min-ref-len ${filter_params.min_ref_len}
                    --min-var-len ${filter_params.min_var_len}
                    --max-len-diff ${filter_params.max_len_diff}
                    --min-strandedness ${filter_params.min_strandedness}
                    --min-strand-reads ${filter_params.min_strand_reads}
                    --min-ref-basequal ${filter_params.min_ref_basequal}
                    --min-var-basequal ${filter_params.min_var_basequal}
                    --max-basequal-diff ${filter_params.max_basequal_diff}
                    --min-ref-mapqual ${filter_params.min_ref_mapqual}
                    --min-var-mapqual ${filter_params.min_var_mapqual}
                    --max-mapqual-diff ${filter_params.max_mapqual_diff}
                    --max-ref-mmqs ${filter_params.max_ref_mmqs}
                    --max-var-mmqs ${filter_params.max_var_mmqs}
                    --min-mmqs-diff ${filter_params.min_mmqs_diff}
                    --max-mmqs-diff ${filter_params.max_mmqs_diff}
                #end if
                #if $filter_params.experts_only.compat_opts:
                    #for $opt in str($filter_params.experts_only.compat_opts).split(','):
                        $opt
                    #end for
                #end if
            #end if
            --verbose
            '$ref_genome'
    ]]></command>

    <inputs>
        <conditional name="reference">
            <param name="source" type="select"
            label="Will you select a reference genome from your history or use a built-in genome?">
                <option value="cached">Use a built-in genome</option>
                <option value="history">Use a genome from my history</option>
            </param>
            <when value="cached">
                <param name="genome" type="select"
                label="reference genome"
                help="The fasta reference genome that variants should be called against.">
                    <options from_data_table="fasta_indexes" />
                </param>
            </when>
            <when value="history">
                <param name="genome" type="data" format="fasta"
                label="reference genome"
                help="The fasta reference genome that variants should be called against."/>
            </when>
        </conditional>
        <param name="normal_bam" type="data" format="bam"
        label="aligned reads from normal sample" />
        <param name="tumor_bam" type="data" format="bam"
        label="aligned reads from tumor sample" />
        <param argument="--normal-purity" name="normal_purity" type="float" value="1.0" min="0" max="1.0"
        label="Estimated purity (non-tumor content) of normal sample"/>
        <param argument="--tumor-purity" name="tumor_purity" type="float" value="1.0" min="0" max="1.0"
        label="Estimated purity (tumor content) of tumor sample"/>
        <param name="split_output" type="boolean" truevalue="--split-output" falsevalue="" checked="false"
        label="Generate separate output datasets for SNP and indel calls?" />
        <conditional name="call_params">
            <param name="settings" label="Settings for Variant Calling" type="select">
                <option value="varscan_defaults" selected="true">Use default values</option>
                <option value="custom">Customize settings</option>
            </param>
            <when value="custom">
                <section name="read_selection" title="Read selection" expanded="true"
                help="The settings in this section control which mapped reads will be used for variant calling.">
                    <param argument="samtools mpileup -Q" name="min_basequal" type="integer" value="13" min="1" max="50"
                    label="Minimum base quality"
                    help="The minimum base quality (default: 13) at a given position required to use a read for calling variants at that site" />
                    <param argument="samtools mpileup -q" name="min_mapqual" type="integer" value="0" min="0" max="60"
                    label="Minimum mapping quality"
                    help="The minimum mapping quality (default: 0) required for a read to be considered in variant calling" />
                    <param argument="samtools mpileup -A" name="count_orphans" type="boolean" truevalue="--count-orphans" falsevalue="" checked="false"
                    label="Use reads from anomalously mapped pairs"
                    help="Applies to paired-end reads only. If set to true, reads from pairs that are flagged as non-proper pairs (SAM/BAM FLAG field 2) will be used in variant calling. The default is to ignore such reads." />
                    <param argument="samtools mpileup -x" name="detect_overlaps" type="boolean" truevalue="" falsevalue="--no-detect-overlaps" checked="true"
                    label="Try to correct for read-pair overlaps"
                    help="Applies to paired-end reads only. If the reads of a pair overlap on the reference, then with this option (on by default), the bases for which both reads provide evidence will be counted only once in variant calling (of the two sequenced bases in the reads, the base with the better base quality will be used)." />
                    <param argument="samtools mpileup -d" name="max_pileup_depth" type="integer" value="8000" min="4000"
                    label="Maximum number of reads per site"
                    help="Restrict the number of reads used for variant calling at a site to this maximum (default: 8000) for each sample. Helps protect against excessive memory usage (and slow tool runs) at sites of extraordinary high coverage." />
                </section>
                <expand macro="min_coverage"
                help="Minimum site coverage (default: 8) required in the normal and in the tumor sample to call a variant. This threshold gets applied after eliminating reads based on the read selection criteria above." />
                <expand macro="min_reads2"
                help="Minimum number (default: 2) of variant-supporting reads (after read selection) at a position required to make a call"/>
                <expand macro="min_var_freq" value="0.1" />
                <expand macro="min_freq_for_hom" />
                <expand macro="p_value" value="0.99"
                help="The p-value threshold (default: 0.99) used to determine if a variant should be called for either sample" />
                <param argument="--somatic-p-value" name="somatic_p_value" type="float" value="0.05" min="0" max="1"
                label="P-value threshold for calling somatic variants and LOH events"
                help="The p-value threshold (default: 0.05) used to determine if read count differences between the normal and the tumor sample justify classification of a variant as somatic or as an LOH event" />
            </when>
            <when value="varscan_defaults" />
        </conditional>
        <conditional name="filter_params">
            <param name="settings" label="Settings for Posterior Variant Filtering" type="select">
                <option value="varscan_defaults" selected="true">Use default values</option>
                <option value="dream3_settings">Use settings optimized for DREAM-3</option>
                <option value="no_filter">Do not perform posterior filtering</option>
                <option value="custom">Customize settings</option>
            </param>
            <when value="no_filter" />
            <when value="varscan_defaults">
                <expand macro="filter_compat_options" />
            </when>
            <when value="dream3_settings">
                <expand macro="filter_compat_options" />
            </when>
            <when value="custom">
                <param argument="--min-var-count" name="min_var_count" type="integer" value="4" min="1" max="200"
                label="Minimum number of variant-supporting reads"
                help="(default: 4)" />
                <param argument="--min-var-count-lc" name="min_var_count_lc" type="integer" value="2" min="1" max="200"
                label="Low coverage minimum number of variant-supporting reads"
                help="This setting (default: 2) will be applied instead of the --min-var-count limit for sites with poor overall (see threshold below) coverage." />
                <param argument="--max-somatic-p-depth" name="min_var_count_depth" type="integer" value="10" min="2" max="200"
                label="Minimum variant allele count threshold"
                help="Combined depth (default: 10) of ref- and variant-supporting reads required at variant site to apply the (stricter) --min-var-count filter instead of --min-var-count-lc" />
                <param argument="--min-var-freq" name="min_var_freq2" type="float" value="0.05" min="0" max="1"
                label="Minimum variant allele frequency"
                help="(default: 0.05)" />
                <param argument="--min-ref-readpos" name="min_ref_readpos" type="float" value="0.1" min="0" max="1"
                label="Minimum relative variant position in ref-supporting reads"
                help="The minimum average relative distance from either end of ref-supporting reads (default: 0.1) required for variant sites" />
                <param argument="--min-var-readpos" name="min_var_readpos" type="float" value="0.1" min="0" max="1"
                label="Minimum relative variant position in variant-supporting reads"
                help="The minimum average relative distance from either end of variant-supporting reads (default: 0.1) required for variant sites" />
                <param argument="--min-ref-dist3" name="min_ref_dist3" type="float" value="0.1" min="0" max="1"
                label="Minimum distance of variant site from 3'-end of ref-supporting reads"
                help="The minimum average relative distance from the effective 3'end of ref-supporting reads (default: 0.1) required for variant sites. The effective 3'end is defined by the end of the alignment of the read to the reference (or, if the Illumina 1.5 compatibility setting is used, by the first base in 3'->5' direction with a base quality > 2)." />
                <param argument="--min-var-dist3" name="min_var_dist3" type="float" value="0.1" min="0" max="1"
                label="Minimum distance of variant site from 3'-end of variant-supporting reads"
                help="The minimum average relative distance from the effective 3'end of variant-supporting reads (default: 0.1) required for variant sites. The  effective 3'end is defined as above." />
                <param argument="--min-ref-avgrl" name="min_ref_len" type="integer" value="90" min="0" max="200"
                label="Minimum length of ref-supporting reads"
                help="The minimum average trimmed length (default: 90) required for reads supporting the reference allele" />
                <param argument="--min-var-avgrl" name="min_var_len" type="integer" value="90" min="0" max="200"
                label="Minimum length of variant-supporting reads"
                help="The minimum average trimmed length (default: 90) required for reads supporting the variant allele" />
                <param argument="--max-rl-diff" name="max_len_diff" type="float" value="0.25" min="0" max="1"
                label="Maximum relative read length difference"
                help="The maximum allowed difference (default: 0.25) in the average relative read length (ref - var) between reads supporting the reference and the variant allele" />
                <param argument="--min-strandedness" name="min_strandedness" type="float" value="0.01" min="0" max="0.5"
                label="Minimum fraction of variant reads from each strand"
                help="The minimum fraction (default: 0.01) of variant reads that are required to come from the forward and from the reverse strand" />
                <param argument="--min-strand-reads" name="min_strand_reads" type="integer" value="5" min="2" max="200"
                label="Minimum variant allele depth required to apply the --min-strandedness filter"
                help="(default: 5)" />
                <param argument="--min-ref-basequal" name="min_ref_basequal" type="integer" value="15" min="1" max="50"
                label="Minimum average base quality for the ref allele"
                help="The minimum average base quality (default: 15) required at the variant site for reads supporting the reference allele" />
                <param argument="--min-var-basequal" name="min_var_basequal" type="integer" value="15" min="1" max="50"
                label="Minimum average base quality for the variant allele"
                help="The minimum average base quality (default: 15) required at the variant site for reads supporting the variant allele" />
                <param argument="max-basequal-diff" name="max_basequal_diff" type="integer" value="50" min="0" max="50"
                label="Maximum base quality difference between ref- and variant-supporting reads"
                help="The maximum difference (default: 50) in the average base quality (ref - var) allowed between the variant site positions of reads supporting the reference and the variant allele" />
                <param argument="--min-ref-mapqual" name="min_ref_mapqual" type="integer" value="15" min="1" max="60"
                label="Minimum average mapping quality of ref-supporting reads"
                help="The minimum average mapping quality (default: 15) required for reads supporting the reference allele" />
                <param argument="--min-var-mapqual" name="min_var_mapqual" type="integer" value="15" min="1" max="60"
                label="Minimum average mapping quality of variant-supporting reads"
                help="The minimum average mapping quality (default: 15) required for reads supporting the variant allele" />
                <param argument="--max-mapqual-diff" name="max_mapqual_diff" type="integer" value="50" min="0" max="60"
                label="Maximum mapping quality difference between ref- and variant-supporting reads"
                help="The maximum difference (default: 50) in the average mapping quality (ref - var) allowed between reads supporting the reference and the variant allele" />
                <param argument="--max-ref-mmqs" name="max_ref_mmqs" type="integer" value="100" min="0"
                label="Maximum mismatch base quality sum of ref-supporting reads"
                help="The maximum mismatch base quality sum (default: 100) allowed for reads supporting the reference allele" />
                <param argument="--max-var-mmqs" name="max_var_mmqs" type="integer" value="100" min="0"
                label="Maximum mismatch base quality sum of var-supporting reads"
                help="The maximum mismatch base quality sum (default: 100) allowed for reads supporting the variant allele" />
                <param argument="--min-mmqs-diff" name="min_mmqs_diff" type="integer" value="0" min="0"
                label="Minimum difference between mismatch base quality sums of variant- and ref-supporting reads"
                help="The minimum difference (default: 0) in the mismatch base quality sums (var - ref) required between reads supporting the variant and the reference allele" />
                <param argument="--max-mmqs-diff" name="max_mmqs_diff" type="integer" value="50" min="1"
                label="Maximum difference between mismatch base quality sums of variant- and ref-supporting reads"
                help="The maximum difference (default: 50) in the mismatch base quality sums (var - ref) allowed between reads supporting the variant and the reference allele" />
                <expand macro="filter_compat_options" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output" format="vcf">
            <filter>not split_output</filter>
        </data>
        <data name="output_snp" from_work_dir="variants_out.snp" format="vcf"
        label="Varscan somatic SNP calls on ${on_string}">
            <filter>split_output</filter>
        </data>
        <data name="output_indel" from_work_dir="variants_out.indel" format="vcf"
        label="Varscan somatic indel calls on ${on_string}">
            <filter>split_output</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!-- run with default settings and genome from history -->
            <conditional name="reference">
                <param name="source" value="history" />
                <param name="genome" value="hg19_chrM.fa" />
            </conditional>
            <param name="normal_bam" value="control_chrM.bam" />
            <param name="tumor_bam" value="tumor_chrM.bam" />
            <param name="split_output" value="false" />
            <conditional name="call_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <conditional name="filter_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <output name="output">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- run with default settings and cached genome -->
            <conditional name="reference">
                <param name="source" value="cached" />
                <param name="genome" value="hg19mito" />
            </conditional>
            <param name="normal_bam" dbkey="hg19" value="control_chrM.bam" />
            <param name="tumor_bam" dbkey="hg19" value="tumor_chrM.bam" />
            <param name="split_output" value="false" />
            <conditional name="call_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <conditional name="filter_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <output name="output">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
            </output>
        </test>
        <test expect_num_outputs="2">
            <!-- run with default settings and split output -->
            <conditional name="reference">
                <param name="source" value="history" />
                <param name="genome" value="hg19_chrM.fa" />
            </conditional>
            <param name="normal_bam" value="control_chrM.bam" />
            <param name="tumor_bam" value="tumor_chrM.bam" />
            <param name="split_output" value="true" />
            <conditional name="call_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <conditional name="filter_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <output name="output_indel">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
            </output>
            <output name="output_snp">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- run with custom params for variant calling -->
            <conditional name="reference">
                <param name="source" value="history" />
                <param name="genome" value="hg19_chrM.fa" />
            </conditional>
            <param name="normal_bam" value="control_chrM.bam" />
            <param name="tumor_bam" value="tumor_chrM.bam" />
            <param name="split_output" value="false" />
            <conditional name="call_params">
                <section name="read_selection">
                    <param name="min_basequal" value="5" />
                </section>
                <param name="settings" value="custom" />
                <param name="min_coverage" value="2" />
                <param name="min_reads2" value="1" />
                <param name="min_var_freq" value="0.01" />
                <param name="min_freq_for_hom" value="0.66" />
                <param name="p_value" value="0.97" />
                <param name="somatic_p_value" value="0.09" />
            </conditional>
            <param name="normal_purity" value="0.6" />
            <param name="tumor_purity" value="0.6" />
            <conditional name="filter_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <assert_stderr>
                <has_line_matching
                expression=" Min coverage:&#09;2x for Normal, 2x for Tumor" />
                <has_line_matching
                expression="Min reads2:&#09;1" />
                <has_line_matching
                expression="Min var freq:&#09;0.01" />
                <has_line_matching
                expression="Min freq for hom:&#09;0.66" />
                <has_line_matching
                expression="Normal purity:&#09;0.6" />
                <has_line_matching
                expression="Tumor purity:&#09;0.6" />
                <has_line_matching
                expression="Min avg qual:&#09;5" />
                <has_line_matching
                expression="P-value thresh:&#09;0.97" />
                <has_line_matching
                expression="Somatic p-value:&#09;0.09" />
            </assert_stderr>
            <output name="output">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- run with preconfigured dream3 post-filter settings -->
            <conditional name="reference">
                <param name="source" value="history" />
                <param name="genome" value="hg19_chrM.fa" />
            </conditional>
            <param name="normal_bam" value="control_chrM.bam" />
            <param name="tumor_bam" value="tumor_chrM.bam" />
            <param name="split_output" value="false" />
            <conditional name="call_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <conditional name="filter_params">
                <param name="settings" value="dream3_settings" />
            </conditional>
            <output name="output">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- run without post-filters -->
            <conditional name="reference">
                <param name="source" value="history" />
                <param name="genome" value="hg19_chrM.fa" />
            </conditional>
            <param name="normal_bam" value="control_chrM.bam" />
            <param name="tumor_bam" value="tumor_chrM.bam" />
            <param name="split_output" value="false" />
            <conditional name="call_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <conditional name="filter_params">
                <param name="settings" value="no_filter" />
            </conditional>
            <output name="output">
                <expand macro="test_mentions_contig" />
                <expand macro="test_not_mentions_filters" />
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- run with custom post-filters -->
            <conditional name="reference">
                <param name="source" value="history" />
                <param name="genome" value="hg19_chrM.fa" />
            </conditional>
            <param name="normal_bam" value="control_chrM.bam" />
            <param name="tumor_bam" value="tumor_chrM.bam" />
            <param name="split_output" value="false" />
            <conditional name="call_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <conditional name="filter_params">
                <param name="settings" value="custom" />
                <param name="min_ref_basequal" value="28" />
                <param name="min_var_basequal" value="28" />
            </conditional>
            <output name="output">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- Test with an input bam with extremely high coverage at a
            variant site (high enough to trigger an AssertionError from
            pysam.libcalignedsegment.get_query_sequences) to see if the
            tool can handle this.
            Also test that the tool doesn't report SNVs against Ns in
            the reference genome (this would lead to more than one variant
            getting called). -->
            <conditional name="reference">
                <param name="source" value="history" />
                <param name="genome" value="hg19_chrM.fa" />
            </conditional>
            <param name="normal_bam" value="high_cov_chrM.bam" />
            <param name="tumor_bam" value="high_cov_chrM.bam" />
            <param name="split_output" value="false" />
            <conditional name="call_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <conditional name="filter_params">
                <param name="settings" value="varscan_defaults" />
            </conditional>
            <output name="output">
                <expand macro="test_mentions_contig" />
                <expand macro="test_mentions_filters" />
                <metadata name="data_lines" value="1" />
            </output>
        </test>
    </tests>

    <help>
@HELP_HEADER@

**The Varscan Somatic tool for Galaxy**

This tool wraps the functionality of the ``varscan somatic`` and the
``varscan fpfilter`` command line tools.

The tool is designed to detect genetic variants in a **pair of samples**
representing normal and tumor tissue from the same individual. It classifies
the variants, according to their most likely origin, as **somatic** (variant is
found in the tumor, but not in the normal sample, *i.e.*, is the consequence of
a somatic mutation event), **germline** (variant is found in both samples =>
germline mutation event) and **LOH** (variant is found in both samples, but
only the tumor sample appears to be homozygous for it => loss of heterozygosity
event).
This classification is encoded in the variant ``INFO`` fields of the VCF output
produced by the tool in the form of a status code ``SS`` (somatic status),
where:

- ``SS=1`` signifies a likely germline variant,
- ``SS=2`` a somatic variant
- ``SS=3`` a LOH variant

In addition, ``SS=0`` indicates a possible variant, but with insufficient
evidence for an, at least, heterozygous state in either individual sample, and
``SS=5`` is used for variants of unexplained origin (*e.g.*, variants found in
the normal, but not in the tumor tissue sample).

In a second step, following variant calling, the tool can try to detect likely
false-positive calls by re-inspecting the data at the variant sites more
carefully and looking for signs that may indicate problems with the
sequencing data or its mapping. If a called variant is deemed a possible
false-positive at this step, this gets indicated in the ``FILTER`` field of the
variant record in the VCF output. For high confidence variants passing all
posterior (applied after variant calling) filters the value of the field will
be ``PASS``, for variants failing any of the posterior filters the value will
be a ``;``-separated list of the problematic filters.


**Input**

The tool takes as input a reference genome (in fasta format) and a pair of
aligned reads datasets (bam format).

**Output**

A VCF dataset of called variants. When asked to *Generate separate output
datasets for SNP and indel calls*, the tool will behave like the
``varscan somatic`` command line tool and produce two VCF datasets - one with
just the single nucleotide variants, while the other one will store
insertion/deletion variants.

**Options**

*Estimated purity of normal sample / of tumor sample*

Since, in practice, it is often impossible to isolate tissue samples without
contamination from surrounding tissue or from invading cells, these two fields
let you indicate your estimate of the purity of the two samples (as fractions
between 0 and 1, where 1 would indicate a contamination-free sample and 0.5 a
sample to which the desired tissue contributes only 50%, while the other 50%
consist of cells from the other tissue type).

*Settings for Variant Calling*

Settings in this section will affect the steps of variant calling and
classification. You can accept VarScan's default values for the corresponding
parameters or customize them according to your needs.

*Settings for Posterior Variant Filtering*

Use the parameters in this section to configure the false-positive filtering
step that follows variant calling and classification. These settings will not
influence the number of variants detected nor their classification, but may
change the ``FILTER`` field of variant records to indicate which variants
failed to pass certain filters. You can use this information with downstream
tools to exclude certain variants from further analysis steps or include only
high confidence variants that passed all filters (those with ``PASS`` as their
``INFO`` field value. You can accept the orignal filter defaults of the
``varscan fpfilter`` command line tool, use the settings established for the
tool in the `DREAM3 challenge`_, or choose to customize the settings.
Alternatively, you can also choose to skip posterior filtering entirely, in
which case all variants will have their ``INFO`` field set to ``PASS``.

.. _DREAM3 challenge: https://www.synapse.org/#!Synapse:syn312572/wiki/58893
    </help>
    <expand macro="citations" />
</tool>