view tb_variant_filter.xml @ 4:e7aff4a85df5 draft

"planemo upload for repository https://github.com/COMBAT-TB/tb_variant_filter commit 94fac53d84d560d45ccc94c9143674edbf9213c8"
author iuc
date Sun, 29 Aug 2021 21:46:17 +0000
parents 40f0591deda0
children ee4a90760848
line wrap: on
line source

<tool id="tb_variant_filter" name="TB Variant Filter" version="@TOOL_VERSION@+galaxy2" profile="20.09">
    <description>M. tuberculosis H37Rv VCF filter</description>
    <macros>
        <token name="@TOOL_VERSION@">0.3.5</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">tb_variant_filter</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        tb_variant_filter
        #if "region_filter" in str($filters).split(',')
            #if str($filter_options.show_filter_options) == "yes":
                --region_filter $filter_options.region_filter
            #else
                --region_filter pe_ppe,uvp
            #end if
        #end if
        #if "close_to_indel_filter" in str($filters).split(',')
            --close_to_indel_filter
            #if str($filter_options.show_filter_options) == "yes":
                --indel_window_size $filter_options.indel_window_size
            #end if
        #end if
        #if "min_percentage_alt_filter" in str($filters).split(',') 
            --min_percentage_alt_filter 
            #if str($filter_options.show_filter_options) == "yes":
                --min_percentage_alt $filter_options.min_percentage_alt
            #end if
        #end if
        #if "min_depth_filter" in str($filters).split(',') 
            --min_depth_filter 
            #if str($filter_options.show_filter_options) == "yes":
                --min_depth $filter_options.min_depth
            #end if
        #end if
        #if "snv_only_filter" in str($filters).split(','):
            --snv_only_filter
        #end if
        '$input1' '$output1'
    ]]></command>
    <inputs>
        <param type="data" name="input1" label="VCF file to be filter"  format="vcf"/>

        <param type="select" name="filters" multiple="true" label="Filters to apply">
            <option value="region_filter" selected="true">Filter out variants by regions</option>
            <option value="close_to_indel_filter">Filter variants close to indels</option>
            <option value="min_percentage_alt_filter">Filter variants by percentage alt allele</option>
            <option value="min_depth_filter">Filter sites by read alignment depth</option>
            <option value="snv_only_filter">Only accept SNVs</option>
        </param>

        <conditional name="filter_options">
            <param type="select" name="show_filter_options" label="Show options for the filters">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param argument="--region_filter" type="select" multiple="true" label="Region filters to enable">
                    <!-- if these are changed the code above needs to change to keep the defaults in line with those that are default here -->
                    <option value="farhat_rlc">Refined Low Confidence regions from Farhat lab</option>
                    <option value="pe_ppe" selected="true">PE/PPE</option>
                    <option value="tbprofiler">TBProfiler antibiotic resistant genes</option>
                    <option value="mtbseq">MTBseq antibiotic resistant genes</option>
                    <option value="uvp" selected="true">UVP repeat / insertion sequence sites</option>
                </param>
                <param argument="--indel_window_size" type="integer" value="5" label="Window to mask around indels"/>
                <param argument="--min_percentage_alt" type="float" value="90"
                    label="Minimum alternate allele percentage to accept"/>
                <param argument="--min_depth" type="integer" value="30" label="Minimum read depth (coverage)"/>
            </when>
            <when value="no"></when>

        </conditional>
    </inputs>
    <outputs>
        <data name="output1" format="vcf"/>
    </outputs>
    <tests>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="region_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="region_filter" value="pe_ppe,tbprofiler,uvp" />
            </conditional>
            <output name="output1" file="test_output1.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="close_to_indel_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="indel_window_size" value="5" />
            </conditional>
            <output name="output1" file="test_output2.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="min_percentage_alt_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="min_percentage_alt" value="95.0" />
            </conditional>
            <output name="output1" file="test_output3.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input2.vcf" ftype="vcf" />
            <param name="filters" value="min_percentage_alt_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="min_percentage_alt" value="30.0" />
            </conditional>
            <output name="output1" file="test_output4.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="min_depth_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="min_depth" value="30" />
            </conditional>
            <output name="output1" file="test_output5.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="snv_only_filter" />
            <output name="output1" file="test_output6.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="region_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="region_filter" value="farhat_rlc" />
            </conditional>
            <assert_stderr>
                <has_text text="masked: 2" />
            </assert_stderr>
        </test>
    </tests>
    <help><![CDATA[
This tool offers multiple options for filtering variants (in
VCF files, relative to M. tuberculosis H37Rv).

It currently has 5 main modes:

1. Filter by region. Mask out variants in certain regions. Region lists available as:
    1.  Refined Low Confidence (RLC) regions from `Marin et al 2021 <https://www.biorxiv.org/content/10.1101/2021.04.08.438862v1.full>`_
    2.  PE/PPE genes from `Fishbein et al 2015 <https://onlinelibrary.wiley.com/doi/full/10.1111/mmi.12981>`_
    3. `TBProfiler <http://tbdr.lshtm.ac.uk/>`_ list of antibiotic resistant genes
    4. `MTBseq <https://github.com/ngs-fzb/MTBseq_source>`_ list of antibiotic resistant genes
    5. `UVP <https://github.com/CPTR-ReSeqTB/UVP>`_ list of repetitive loci in M. tuberculosis genome
2. Filter by window around indels. Masks out variants within a certain distance (by default 5 bases) of an insertion or deletion site.
3. Filter by percentage of alternate allele bases. Mask out variants with less than a minimum percentage (by default 90%) alternative alleles.
4. Filter by depth of aligned reads.
5. Filter out all variants that are not SNV (single nucleotide variants).

For region filtering, the default choice is to use the PE/PPE and UVP regions to mask out variants. `Marin et al 2021 <https://www.biorxiv.org/content/10.1101/2021.04.08.438862v1.full>`_
from Prof Maha Farhat's lab make a persuasive argument that their smaller list of Refined Low Confidence (RLC) regions is a better argument but this work has not yet been peer
review so it is included as an option that is not currently the default.

When used together the effects of the filters are added (i.e. a variant is masked out if it is masked by any of the filters).
    ]]></help>
    <citations>
        <citation type="bibtex"><![CDATA[
@misc{vanHeusden2019,
  author = {van Heusden, P.},
  title = {tb_variant_filter},
  year = {2019},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/pvanheus/tb_variant_filter}},
  commit = {4a9b2a4a85ddbfbb0d713a02373c8aa0aa159a6c}
}
        ]]></citation>
    </citations>
</tool>