view lofreq_filter.xml @ 5:7dfca164d2e3 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lofreq commit c1fda24943cf04866d408133ffb6aabf4a90866b"
author iuc
date Mon, 05 Oct 2020 20:14:41 +0000
parents 950d1d49d678
children
line wrap: on
line source

<tool id="lofreq_filter" name="Lofreq filter" version="@TOOL_VERSION@+galaxy0" profile="18.01">
    <description>called variants posteriorly</description>
    <macros>
        <import>macros.xml</import>
        <xml name="snvqual_filter_config">
            <conditional name="snvqual_filter">
                <param name="snvqual" type="select"
                label="Filter SNVs based on call quality?">
                    <option value="no">No, don't apply call quality filter</option>
                    <option value="min-phred">Yes, filter on explicit QUAL threshold</option>
                    <option value="mtc">Yes, filter on multiple testing corrected p-value</option>
                </param>
                <when value="no">
                </when>
                <when value="min-phred">
                    <param argument="-Q" name="snvqual_thresh" type="integer" min="0" value="0"
                    label="Minimum QUAL value"
                    help="Specify the minimum value of the QUAL field required to retain a variant" />
                </when>
                <when value="mtc">
                    <param argument="-r" name="snvqual_alpha" type="float" min="0" max="1" value="1"
                    label="Multiple-testing corrected p-value threshold" />
                    <param argument="-q" name="snvqual_mtc" type="select"
                    label="Multiple testing correction method">
                        <option value="bonf">Bonferroni</option>
                        <option value="holm">Holm-Sidak</option>
                        <option value="fdr">False-discovery rate</option>
                    </param>
                    <param argument="-s" name="snvqual_ntests" type="integer" min="1" value="1"
                    label="Estimate of number of tests performed"
                    help="Ideally, this would be the number of SNVs considered during variant calling. The lofreq variant caller emits this number as part of its output. For other variant callers, all you will typically have is a lower bound estimate given by the number of SNV records in your VCF input." />
                </when>
            </conditional>
        </xml>
        <xml name="indelqual_filter_config">
            <conditional name="indelqual_filter">
                <param name="indelqual" type="select"
                label="Filter indels based on call quality?">
                    <option value="no">No, don't apply call quality filter</option>
                    <option value="min-phred">Yes, filter on explicit QUAL threshold</option>
                    <option value="mtc">Yes, filter on multiple testing corrected p-value</option>
                </param>
                <when value="no">
                </when>
                <when value="min-phred">
                    <param argument="-K" name="indelqual_thresh" type="integer" min="0" value="0"
                    label="Minimum QUAL value"
                    help="Specify the minimum value of the QUAL field required to retain a variant" />
                </when>
                <when value="mtc">
                    <param argument="-l" name="indelqual_alpha" type="float" min="0" max="1" value="1"
                    label="Multiple-testing corrected p-value threshold" />
                    <param argument="-k" name="indelqual_mtc" type="select"
                    label="Multiple testing correction method">
                        <option value="bonf">Bonferroni</option>
                        <option value="holm">Holm-Sidak</option>
                        <option value="fdr">False-discovery rate</option>
                    </param>
                    <param argument="-m" name="indelqual_ntests" type="integer" min="1" value="1"
                    label="Estimate of number of tests performed"
                    help="Ideally, this would be the number of indels considered during variant calling. The lofreq variant caller emits this number as part of its output. For other variant callers, all you will typically have is a lower bound estimate given by the number of indel records in your VCF input." />
                </when>
            </conditional>
        </xml>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        ## filter variants with lofreq 
        lofreq filter -i $invcf --no-defaults --verbose
        $flag_or_drop
        ${filter_by_type.keep_only}
        #if str($filter_by_type.keep_only) in ['', '--only-snvs']:
            #if str($filter_by_type.qual.snvqual_filter.snvqual) == 'min-phred':
                -Q ${filter_by_type.qual.snvqual_filter.snvqual_thresh}
            #elif str($filter_by_type.qual.snvqual_filter.snvqual) == 'mtc':
                -q ${filter_by_type.qual.snvqual_filter.snvqual_mtc}
                -r ${filter_by_type.qual.snvqual_filter.snvqual_alpha}
                -s ${filter_by_type.qual.snvqual_filter.snvqual_ntests}
            #end if
        #end if
        #if str($filter_by_type.keep_only) in ['', '--only-indels']:
            #if str($filter_by_type.qual.indelqual_filter.indelqual) == 'min-phred':
                -K ${filter_by_type.qual.indelqual_filter.indelqual_thresh}
            #elif str($filter_by_type.qual.indelqual_filter.indelqual) == 'mtc':
                -k ${filter_by_type.qual.indelqual_filter.indelqual_mtc}
                -l ${filter_by_type.qual.indelqual_filter.indelqual_alpha}
                -m ${filter_by_type.qual.indelqual_filter.indelqual_ntests}
            #end if
        #end if
        -v ${coverage.cov_min}
        -V ${coverage.cov_max}
        -a ${af.af_min}
        -A ${af.af_max}
        #if str($sb.sb_filter.strand_bias) == 'max-phred':
            -B ${sb.sb_filter.sb_thresh}
        #elif str($sb.sb_filter.strand_bias) == 'mtc':
            -b ${sb.sb_filter.sb_mtc}
            -c ${sb.sb_filter.sb_alpha}
        #end if
        #if str($sb.sb_filter.strand_bias) != 'no':
            ${sb.sb_filter.sb_compound}
            ${sb.sb_filter.sb_indels}
        #end if
        -o filtered.vcf
    ]]></command>
    <inputs>
        <param argument="-i" name="invcf" type="data" format="vcf,vcf_bgzip"
        label="List of variants to filter" />
        <conditional name="filter_by_type">
            <param name="keep_only" type="select"
            label="Types of variants to keep">
                <option value="">SNVs and Indels</option>
                <option value="--only-snvs">SNVs only</option>
                <option value="--only-indels">Indels only</option>
            </param>
            <when value="">
                <section name="qual" title="Quality-based filter options" expanded="True">
                    <expand macro="snvqual_filter_config" />
                    <expand macro="indelqual_filter_config" />
                </section>
            </when>
            <when value="--only-snvs">
                <section name="qual" title="Quality-based filter options" expanded="True">
                    <expand macro="snvqual_filter_config" />
                </section>
            </when>
            <when value="--only-indels">
                <section name="qual" title="Quality-based filter options" expanded="True">
                    <expand macro="indelqual_filter_config" />
                </section>
            </when>
        </conditional>
        <section name="coverage" title="Coverage-based filter options" expanded="True"
        help="You can specify a range of acceptable coverage values at variant sites by setting minimum and maximum coverage. Set either value to zero to make the range unbounded on the corresponding side, or disable filtering based on coverage by setting both values to zero.">
            <param argument="-v" name="cov_min" type="integer" min="0" value="10"
            label="Minimum coverage"
            help="The minimum coverage at a site required to keep variants" />
            <param argument="-V" name="cov_max" type="integer" min="0" value="0"
            label="Maximum coverage"
            help="The maximum coverage at a site allowed to keep variants" />
        </section>
        <section name="af" title="Allele frequency filter options" expanded="True"
        help="You can specify a range of acceptable allele frequencies at variant sites by setting minimum and maximum AF values. Set either value to zero to make the range unbounded on the corresponding side, or disable filtering based on allele frequency by setting both values to zero.">
            <param argument="-a" name="af_min" type="float" min="0" max="1" value="0"
            label="Minimum allele frequency"
            help="To keep a variant its allele needs to be observed at this frequency at least." />
            <param argument="-A" name="af_max" type="float" min="0" max="1" value="0"
            label="Maximum allele frequency"
            help="To keep a variant its allele is allowed to be observed at this frequency at most." />
        </section>
        <section name="sb" title="Strand bias filter options" expanded="True">
            <conditional name="sb_filter">
                <param name="strand_bias" type="select"
                label="Filter variants based on supporting strand bias?">
                    <option value="no">No, don't apply strand-bias filter</option>
                    <option value="max-phred">Yes, filter on explicit SB threshold</option>
                    <option value="mtc" selected="True">Yes, filter on multiple testing corrected strand-bias p-value (lofreq default)</option>
                </param>
                <when value="no">
                </when>
                <when value="max-phred">
                    <param argument="-B" name="sb_thresh" type="integer" min="0" value="0"
                    label="" />
                </when>
                <when value="mtc">
                    <param argument="-c" name="sb_alpha" type="float" min="0" max="1" value="0.001"
                    label="Multiple-testing corrected p-value threshold" />
                    <param argument="-b" name="sb_mtc" type="select"
                    label="Multiple testing correction method">
                        <option value="bonf">Bonferroni</option>
                        <option value="holm">Holm-Sidak</option>
                        <option value="fdr" selected="True">False-discovery rate</option>
                    </param>
                    <param name="sb_compound" type="boolean" checked="True" truevalue="" falsevalue="--sb-no-compound"
                    label="Use compound strand-bias filter?"
                    help="With compound filtering a variant is filtered only if it fails the strand-bias filter configured above AND has 85% of its supporting reads mapped to one reference genome strand. This guards against filtering based on statistically significant, but minor strand bias effects at high-coverage sites." />
                    <param argument="--sb-incl-indels" name="sb_indels" type="boolean" checked="False" truevalue="--sb-incl-indels" falsevalue=""
                    label="Apply to indels?"
                    help="By default, indels are not filtered based on strand bias." />
                </when>
            </conditional>
        </section>
        <param argument="--print-all" name="flag_or_drop" type="select" display="radio"
        label="Action to be taken for variants that do not pass the filters defined above">
            <option value="">Drop variants not passing one or more filters</option>
            <option value="--print-all">Keep variants, but indicate failed filters in output FILTER column</option>
        </param>
    </inputs>
    <outputs>
        <data name="outvcf" format="vcf" from_work_dir="filtered.vcf"/>
    </outputs>
    <tests>
        <test>
            <!-- Mimick lofreq call default filtering -->
            <param name="invcf" ftype="vcf" value="call-out2.vcf" />
            <conditional name="filter_by_type">
                <section name="qual">
                    <conditional name="snvqual_filter">
                        <param name="snvqual" value="min-phred" />
                        <param name="snvqual_thresh" value="38" />
                    </conditional>
                    <conditional name="indelqual_filter">
                        <param name="indelqual" value="min-phred" />
                        <param name="indelqual_thresh" value="20" />
                    </conditional>
                </section>
            </conditional>
            <!-- lofreq call writes duplicate ##FILTER declarations, while
            lofreq filter behaves correctly. => two additional diff lines -->
            <output name="outvcf" file="call-out1.vcf" lines_diff="6" />
        </test>
        <test>
            <!-- Express lofreq call default filtering as a
            multiple testing correction filter -->
            <param name="invcf" ftype="vcf" value="call-out2.vcf" />
            <conditional name="filter_by_type">
                <param name="keep_only" value="--only-snvs" />
                <section name="qual">
                    <conditional name="snvqual_filter">
                        <param name="snvqual" value="mtc" />
                        <param name="snvqual_alpha" value="0.01" />
                        <param name="snvqual_mtc" value="bonf" />
                        <param name="snvqual_ntests" value="66" />
                    </conditional>
                </section>
            </conditional>
            <!-- expect additional diff lines because of different
            ##FILTER declarations -->
            <output name="outvcf" file="call-out1.vcf" lines_diff="9" />
        </test>
        <test>
            <!-- Test print-all option -->
            <param name="invcf" ftype="vcf" value="call-out2.vcf" />
            <conditional name="filter_by_type">
                <section name="qual">
                    <conditional name="snvqual_filter">
                        <param name="snvqual" value="min-phred" />
                        <param name="snvqual_thresh" value="38" />
                    </conditional>
                    <conditional name="indelqual_filter">
                        <param name="indelqual" value="min-phred" />
                        <param name="indelqual_thresh" value="20" />
                    </conditional>
                </section>
            </conditional>
            <param name="flag_or_drop" value="--print-all" />
            <!-- All variants should be retained with print-all,
            but variants failing filters should be flagged with the names
            of those filters -->
            <output name="outvcf">
                <assert_contents>
                    <has_line_matching expression="pBR322&#009;1134&#009;.&#009;C&#009;T&#009;49314&#009;PASS&#009;.+" />
                    <has_line_matching expression="pBR322&#009;1193&#009;.&#009;G&#009;A&#009;0&#009;min_snvqual_38&#009;.+" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

**Lofreq filter** tries to eliminate false-positive calls from a list of
variants in VCF format.

To this end, it applies a variety of user-configurable filters to the input
variants, which all operate on variant attributes expected to be embedded in
the VCF input.

Specifically, certain filters expect:

- the `QUAL` field of the variant records to be set
- any of the following subfields of a variant's `INFO` field:

  * `DP` (required for coverage-based filtering)
  * `AF` (required for filtering based on variant allele frequency)
  * `SB` (required for filtering on strand bias)
  * `DP4` (required for the compound strand bias filter)

------

**Note**:

.. class:: Warning mark

   This tool is optimized for posterior filtering of variants called with
   `Lofreq call`, which outputs all variant attributes required by the various
   configurable filters.

If you are using `Lofreq filter` to filter VCF variant lists produced with
other tools, be prepared for surprises.

In general, if any piece of variant information required for applying a
given filter is missing from the input data, the tool will try to disable
that filter. Watch out for corresponding warnings in the tool's standard
output.

In addition, any p-value based filtering on variant qualities may behave
incorrectly since different variant callers might use different QUAL scales.
    ]]></help>
    <expand macro="citations" />
</tool>