view tb_variant_filter.xml @ 5:ee4a90760848 draft

planemo upload for repository https://github.com/COMBAT-TB/tb_variant_filter commit e064fb07acad057d3df849a6f153ed6ef90837f1
author iuc
date Sun, 18 Dec 2022 17:39:30 +0000
parents e7aff4a85df5
children 32f14a2723ec
line wrap: on
line source

<tool id="tb_variant_filter" name="TB Variant Filter" version="@TOOL_VERSION@+galaxy0" profile="20.09">
    <description>M. tuberculosis H37Rv VCF filter</description>
    <macros>
        <token name="@TOOL_VERSION@">0.3.6</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">tb_variant_filter</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        tb_variant_filter
        #if "region_filter" in str($filters).split(',')
            #if str($filter_options.show_filter_options) == "yes":
                --region_filter $filter_options.region_filter
            #else
                --region_filter farhat_rlc
            #end if
        #end if
        #if "close_to_indel_filter" in str($filters).split(',')
            --close_to_indel_filter
            #if str($filter_options.show_filter_options) == "yes":
                --indel_window_size $filter_options.indel_window_size
            #end if
        #end if
        #if "min_percentage_alt_filter" in str($filters).split(',') 
            --min_percentage_alt_filter 
            #if str($filter_options.show_filter_options) == "yes":
                --min_percentage_alt $filter_options.min_percentage_alt
            #end if
        #end if
        #if "min_depth_filter" in str($filters).split(',') 
            --min_depth_filter 
            #if str($filter_options.show_filter_options) == "yes":
                --min_depth $filter_options.min_depth
            #end if
        #end if
        #if "snv_only_filter" in str($filters).split(','):
            --snv_only_filter
        #end if
        '$input1' '$output1'
    ]]></command>
    <inputs>
        <param type="data" name="input1" label="VCF file to be filter"  format="vcf"/>

        <param type="select" name="filters" multiple="true" label="Filters to apply">
            <option value="region_filter" selected="true">Filter out variants by regions</option>
            <option value="close_to_indel_filter">Filter variants close to indels</option>
            <option value="min_percentage_alt_filter">Filter variants by percentage alt allele</option>
            <option value="min_depth_filter">Filter sites by read alignment depth</option>
            <option value="snv_only_filter">Only accept SNVs</option>
        </param>

        <conditional name="filter_options">
            <param type="select" name="show_filter_options" label="Show options for the filters">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
                <param argument="--region_filter" type="select" multiple="true" label="Region filters to enable">
                    <!-- if these are changed the code above needs to change to keep the defaults in line with those that are default here -->
                    <option value="farhat_rlc" selected="true">Refined Low Confidence regions from Farhat lab</option>
                    <option value="farhat_rlc_lowmap">Refined Low Confidence and Low Mappability regions from Farhat lab (for &lt; 100bp or single ended reads)</option>
                    <option value="pe_ppe">PE/PPE</option>
                    <option value="tbprofiler">TBProfiler antibiotic resistant genes</option>
                    <option value="mtbseq">MTBseq antibiotic resistant genes</option>
                    <option value="uvp">UVP repeat / insertion sequence sites</option>
                </param>
                <param argument="--indel_window_size" type="integer" value="5" label="Window to mask around indels"/>
                <param argument="--min_percentage_alt" type="float" value="90"
                    label="Minimum alternate allele percentage to accept"/>
                <param argument="--min_depth" type="integer" value="30" label="Minimum read depth (coverage)"/>
            </when>
            <when value="no"></when>

        </conditional>
    </inputs>
    <outputs>
        <data name="output1" format="vcf"/>
    </outputs>
    <tests>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="region_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="region_filter" value="pe_ppe,tbprofiler,uvp" />
            </conditional>
            <output name="output1" file="test_output1.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="close_to_indel_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="indel_window_size" value="5" />
            </conditional>
            <output name="output1" file="test_output2.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="min_percentage_alt_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="min_percentage_alt" value="95.0" />
            </conditional>
            <output name="output1" file="test_output3.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input2.vcf" ftype="vcf" />
            <param name="filters" value="min_percentage_alt_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="min_percentage_alt" value="30.0" />
            </conditional>
            <output name="output1" file="test_output4.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="min_depth_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="min_depth" value="30" />
            </conditional>
            <output name="output1" file="test_output5.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="snv_only_filter" />
            <output name="output1" file="test_output6.vcf" ftype="vcf" />
        </test>
        <test>
            <param name="input1" value="test_input1.vcf" ftype="vcf" />
            <param name="filters" value="region_filter" />
            <conditional name="filter_options">
                <param name="show_filter_options" value="yes" />
                <param name="region_filter" value="farhat_rlc" />
            </conditional>
            <assert_stderr>
                <has_text text="masked: 2" />
            </assert_stderr>
        </test>
    </tests>
    <help><![CDATA[
This tool offers multiple options for filtering variants (in
VCF files, relative to M. tuberculosis H37Rv).

It currently has 5 main modes:

1. Filter by region. Mask out variants in certain regions. Region lists available as:
    1.  Refined Low Confidence (RLC) regions from `Marin et al 2022 <https://doi.org/10.1093/bioinformatics/btac023>`_
    2.  Refined Low Confidence (RLC) and Low Mappability regions from `Marin et al 2022 <https://doi.org/10.1093/bioinformatics/btac023>`_
    3.  PE/PPE genes from `Fishbein et al 2015 <https://onlinelibrary.wiley.com/doi/full/10.1111/mmi.12981>`_
    4. `TBProfiler <http://tbdr.lshtm.ac.uk/>`_ list of antibiotic resistant genes
    5. `MTBseq <https://github.com/ngs-fzb/MTBseq_source>`_ list of antibiotic resistant genes
    6. `UVP <https://github.com/CPTR-ReSeqTB/UVP>`_ list of repetitive loci in M. tuberculosis genome
2. Filter by window around indels. Masks out variants within a certain distance (by default 5 bases) of an insertion or deletion site.
3. Filter by percentage of alternate allele bases. Mask out variants with less than a minimum percentage (by default 90%) alternative alleles.
4. Filter by depth of aligned reads.
5. Filter out all variants that are not SNV (single nucleotide variants).

For region filtering, the default choice is to use the RLC regions. These are based on `Marin et al 2022 <https://doi.org/10.1093/bioinformatics/btac023>`_, 
a study of regions of the M. tuberculosis H37Rv genome where Illumina reads don't map well. If you are using reads shower than 100 base pairs
or single-ended reads, you should use the RLC and Low Mappability region filter. The PE/PPE and UVP region filters are retained for backward compatibility
but the afore-mentioned paper has shown that they exclude too much of the genome from analysis.

When used together the effects of the filters are added (i.e. a variant is masked out if it is masked by any of the filters).
    ]]></help>
    <citations>
        <citation type="bibtex"><![CDATA[
@misc{vanHeusden2019,
  author = {van Heusden, P.},
  title = {tb_variant_filter},
  year = {2019},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/pvanheus/tb_variant_filter}},
  commit = {4a9b2a4a85ddbfbb0d713a02373c8aa0aa159a6c}
}
        ]]></citation>
    </citations>
</tool>