view bcftools_norm.xml @ 11:67e2bd1c4579 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 7ea221c576832871e9e5876d6927887d567d4f98"
author iuc
date Thu, 28 Nov 2019 10:05:48 -0500
parents 76655fe2c70e
children a62e934346f5
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy2">
    <description>Left-align and normalize indels; check if REF alleles match the reference; split multiallelic sites into multiple rows; recover multiallelics from multiple rows</description>
    <macros>
        <token name="@EXECUTABLE@">norm</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
        <expand macro="samtools_requirement"/>
        <!-- gawk only required for current bcftools norm bug workaround
        Remove once fixed (see command section below -->
        <requirement type="package" version="5.0.1">gawk</requirement>
    </expand>
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $section = $reference_source
@PREPARE_FASTA_REF@

## The next two lines are a workaround for a bug in bcftools norm up to version
## 1.9, which caused indel normalization to be case-sensitive
## see:
## https://github.com/samtools/bcftools/commit/65b211da1f5cdbf1c7c11eb610481a736fa88bda
## https://github.com/samtools/bcftools/commit/77b0a27a156803e382d60651ff94ea0bd95fad0f
## With the next release of bcftools, this should be fixed and, at that point,
## the time-consuming on-the-fly uppercasing should be removed again.
awk '{ if ($0 !~ />/) {print toupper($0)} else {print $0} }' '$input_fa_ref' > ref_upper.fa &&
#set $input_fa_ref = 'ref_upper.fa'

#set $section = $sec_restrict
@PREPARE_TARGETS_FILE@
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

#set $section = $reference_source
@FASTA_REF@
--check-ref $check_ref
$normalize_indels
#if $rm_dup:
  --rm-dup "$rm_dup"
#end if
#if $multiallelics.mode:
  --multiallelics '${multiallelics.mode}${multiallelics.multiallelic_types}'
#end if
#if $multiallelics.mode == '+':
  ${multiallelics.strict_filter}
#end if
#if $sec_default.site_win:
  --site-win ${sec_default.site_win}
#end if

#set $section = $sec_restrict
@REGIONS@
@TARGETS@

@OUTPUT_TYPE@
@THREADS@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'
]]>
    </command>
    <inputs>
        <expand macro="macro_input" />
        <expand macro="macro_fasta_ref" />
        <param name="check_ref" type="select" display="radio"
        label="When any REF allele does not match the reference genome base"
        help="Warnings about REF mismatches will be emitted to the standard
        error (stderr) stream, and it is recommended to check there for
        problems if you choose not to exit with an error immediately upon
        encountering a mismatch.">
            <option value="w">ignore the problem (-w)</option>
            <option value="wx">exclude the variant record from the output (-wx)</option>
            <option value="ws">fix the variant record using the reference genome information (-ws)</option>
            <option value="e">exit with an error (-e)</option>
        </param>
        <param argument="--do-not-normalize" name="normalize_indels" type="boolean" truevalue="" falsevalue="--do-not-normalize" checked="false"
        label="Left-align and normalize indels?" />
        <param name="rm_dup" type="select" display="radio"
        label="Perform deduplication for the folowing types of variant records">
            <option value="">do not deduplicate any records</option>
            <option value="snps">snps</option>
            <option value="indels">indels</option>
            <option value="both">both</option>
            <option value="any">any</option>
        </param>
        <conditional name="multiallelics">
            <param name="mode" type="select" label="~multiallelics">
                <option value="">preserve multiallelic/biallelic sites</option>
                <option value="-">split multiallelic sites into biallelic records (-)</option> 
                <option value="+">join biallelic sites into multiallelic records (+)</option> 
            </param>
            <when value="" />
            <when value="-">
                <param name="multiallelic_types" type="select" display="radio"
                label="split the following variant types">
                    <option value="snps">SNPs</option>
                    <option value="indels">indels</option>
                    <option value="both" selected="true">both</option>
                </param>
            </when>
            <when value="+">
                <param name="multiallelic_types" type="select" display="radio"
                label="merge the following variant types">
                    <option value="snps">SNPs</option>
                    <option value="indels">indels</option>
                    <option value="both" selected="true">SNPs and indels, but keep variants of the two types separate (both)</option>
                    <option value="any">SNPs and indels, and merge variant records of different types (any)</option>
                </param>
                <param name="strict_filter" type="boolean" truevalue="--strict-filter" falsevalue=""
                label="Strict Filter"
                help="merged site is PASS only if all sites being merged PASS" />
            </when>
        </conditional>
        <section name="sec_restrict" expanded="false" title="Restrict all operations to">
            <expand macro="macro_restrict" type="region" label_type="Region" />
            <expand macro="macro_restrict" type="target" label_type="Target">
              <expand macro="macro_invert_targets" />
            </expand>
        </section>

        <section name="sec_default" expanded="false" title="Other Options">
            <param name="site_win" type="integer" label="Site Window" value="1000" optional="True" 
                   help="(-w, --site-win) Buffer for sorting lines which changed position during realignment" />
        </section>
        <expand macro="macro_select_output_type" />
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output"/>
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="norm.vcf" />
            <expand macro="test_using_reference" ref="norm.fa" />
            <param name="normalize_indels" value="true" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="T,TAACCCTA" />
                    <not_has_text text="TAA,TAACCCTAAA" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_file" ftype="vcf" dbkey="?" value="norm.vcf" />
            <expand macro="test_using_reference" select_from="cached" ref="norm" />
            <param name="normalize_indels" value="true" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="T,TAACCCTA" />
                    <not_has_text text="TAA,TAACCCTAAA" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_file" ftype="vcf" value="norm.split.vcf" />
            <expand macro="test_using_reference" ref="norm.fa" />
            <conditional name="multiallelics">
                <param name="mode" value="-" />
            </conditional>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="TAA,TAACCCTAAA" />
                    <has_text_matching expression="1\t105\t.\tTAAACCCTAAA\tTAA\t"/>
                    <has_text_matching expression="1\t105\t.\tTAAACCCTAAA\tTAACCCTAAA\t"/>
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_file" ftype="vcf" value="norm.split.vcf" />
            <expand macro="test_using_reference" ref="norm.fa" />
            <param name="normalize_indels" value="true" />
            <conditional name="multiallelics">
                <param name="mode" value="-" />
            </conditional>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="TAA,TAACCCTAAA" />
                    <has_text_matching expression="1\t105\t.\tTAAACCCTA\tT\t"/>
                    <has_text_matching expression="1\t105\t.\tTA\tT\t"/>
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_file" ftype="vcf" value="norm.merge.vcf" />
            <expand macro="test_using_reference" ref="norm.fa" />
            <conditional name="multiallelics">
                <param name="mode" value="+" />
            </conditional>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="TAA,TAACCCTAAA" />
                    <has_text_matching expression="2\t114\t.\tTC\tTTCC,TTC\t999\tFAIL"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="norm.merge.vcf" />
            <expand macro="test_using_reference" ref="norm.fa" />
            <conditional name="multiallelics">
                <param name="mode" value="+" />
                <param name="strict_filter" value="true" />
            </conditional>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="TAA,TAACCCTAAA" />
                    <has_text_matching expression="2\t114\t.\tTC\tTTCC,TTC\t999\tPASS"/>
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="input_file" ftype="vcf" value="norm.setref.vcf" />
            <expand macro="test_using_reference" ref="norm.fa" />
            <param name="check_ref" value="ws" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="2\t101\t.\tA\tc\t999\tPASS"/>
                    <has_text_matching expression="2\t105\t.\tT\t&lt;DEL&gt;\t999\tPASS"/>
                </assert_contents>
            </output>
        </test>

    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@
=====================================


Left-align and normalize indels; check if REF alleles match the reference; split multiallelic sites into multiple rows; recover multiallelics from multiple rows.

@REGIONS_HELP@
@TARGETS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>