view bcftools_consensus.xml @ 21:1171446da5db draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit bfc4ff4956b94885638ae07a2560bac5f84fcca8
author iuc
date Tue, 16 Jul 2024 17:12:58 +0000
parents 95c6f3f4c77f
children
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Create consensus sequence by applying VCF variants to a reference fasta file</description>
    <macros>
        <token name="@EXECUTABLE@">consensus</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements">
        <expand macro="samtools_requirement"/>
        <requirement type="package" version="5.0.1">gawk</requirement>
    </expand>
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $section = $reference_source
@PREPARE_FASTA_REF@

bcftools @EXECUTABLE@

@FASTA_REF@

## Default section
#set $section = $sec_default

${section.iupac_codes}

#if $section.mask:
  --mask '${section.mask}'
#end if

#if $section.mark_del
    --mark-del '$section.mark_del'
#end if

#if $section.mark_ins
    --mark-ins $section.mark_ins
#end if

#if $section.mark_snv
    --mark-snv $section.mark_snv
#end if

#if $section.select_haplotype:
  --haplotype '${section.select_haplotype}'
#end if
@SAMPLE@

#set $section = $sec_restrict
@INCLUDE@
@EXCLUDE@

#if $chain:
    --chain '$chain_file'
#end if

#if $absent
    --absent '$absent'
#end if

## Primary Input/Outputs
#if str($rename) == "no"
    --output '$output_file'
#end if
@INPUT_FILE@
#if str($rename) == "yes":
    #set basename=$input_file.element_identifier
    | awk 'BEGIN {i=1} {if (match($0, /^>/)) {if (i==1) {name="${basename}"} else {name=sprintf("%s-%d","${basename}",i);} print(gensub(/>[^ ]+( ?.*)/, ">" name "\\1", 1)); i=i+1;} else {print}}' > '$output_file'
#end if
]]>
    </command>
    <inputs>
        <expand macro="macro_input" />
        <expand macro="macro_fasta_ref" />
        <section name="sec_default" expanded="true" title="Default Options">
            <param name="mask" type="data" format="tabular" label="Mask" optional="True" help="Replace regions with N" />
            <param name="iupac_codes" type="boolean" truevalue="--iupac-codes" falsevalue="" label="Iupac Codes" 
                   help="Output variants in the form of IUPAC ambiguity codes" />
            <expand macro="macro_sample" />
            <param name="select_haplotype" type="select" optional="true">
                <option value="1">1</option>
                <option value="2">2</option>
            </param>
            <param argument="--mark-del" type="text" value="" optional="true" label="Mark deletions" help="Instead of removing sequence, insert CHAR for deletions">
                <sanitizer invalid_char="">
                    <valid initial="string.letters,string.digits">
                        <add value="_" />
                    </valid>
                </sanitizer>
                <validator type="regex">[0-9a-zA-Z_]+</validator>
            </param>
            <param argument="--mark-ins" type="select" optional="true" label="Mark insertions" help="Highlight insertions in uppercase (uc) or lowercase (lc), leaving the rest as is">
                <option value="uc">Uppercase</option>
                <option value="lc">Lowercase</option>
            </param>
            <param argument="--mark-snv" type="select" optional="true" label="Mark substitutions" help="Highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest as is">
                <option value="uc">Uppercase</option>
                <option value="lc">Lowercase</option>
            </param>
            <conditional name="conditional_mask">
                <param name="selector" type="select" label="Mask file option">
                    <option value="disabled">Disabled</option>
                    <option value="enabled">Enabled</option>
                </param>
                <when value="disabled"/>
                <when value="enabled">
                    <param argument="--mask" type="data" format="tabular" label="Mask" help="Replace regions according to the next --mask-with option" />
                    <param argument="--mask-with" type="text" value="N" optional="true" label="Mask with" help="Replace with CHAR (skips overlapping variants); change to uppercase (uc) or lowercase (lc)">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                        <validator type="regex">[0-9a-zA-Z_]+</validator>
                    </param>
                </when>
            </conditional>
        </section>
        <param name="chain" type="boolean" truevalue="yes" falsevalue="no" label="Write a chain file for liftover" />
        <param name="rename" type="boolean" truevalue="yes" falsevalue="no" label="Set output FASTA ID from name of VCF" />
        <param argument="--absent" type="text" value="" label="Absent" optional="true" help="It allows to set positions with no supporting evidence to N (or any other character)">
            <sanitizer invalid_char="">
                <valid initial="string.letters,string.digits,string.punctuation">
                    <remove value="@" />
                    <remove value="'" />
                </valid>
            </sanitizer>
        </param>
        <section name="sec_restrict" expanded="false" title="Restrict to">    
            <expand macro="macro_include" />
            <expand macro="macro_exclude" />
        </section>
    </inputs>
    <outputs>
        <data name="output_file" format="fasta" label="${tool.name} on ${on_string}: consensus fasta"/>
        <data name="chain_file" format="txt" label="${tool.name} on ${on_string}: chain">
            <filter>chain</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <expand macro="test_using_reference" ref="consensus.fa" />
            <param name="input_file" ftype="vcf" value="consensus.vcf" />
            <param name="mask" ftype="tabular" value="consensus.tab" />
            <param name="chain" value="True" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="NNNNNNNNNNNNNNNNNNNNNNNNNN" />
                </assert_contents>
            </output>
            <output name="chain_file">
                <assert_contents>
                    <has_text text="chain 497 1 501 + 1 501 1 502 + 1 502 1" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <expand macro="test_using_reference" select_from="cached" ref="consensus" />
            <param name="input_file" ftype="vcf" dbkey="?" value="consensus.vcf" />
            <param name="mask" ftype="tabular" value="consensus.tab" />
            <param name="chain" value="True" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="NNNNNNNNNNNNNNNNNNNNNNNNNN" />
                </assert_contents>
            </output>
            <output name="chain_file">
                <assert_contents>
                    <has_text text="chain 497 1 501 + 1 501 1 502 + 1 502 1" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa" />
            <param name="input_file" ftype="vcf" value="consensus.vcf" />
            <param name="mask" ftype="tabular" value="consensus.tab" />
            <param name="chain" value="False" />
            <param name="rename" value="True" />
            <output name="output_file">
                <assert_contents>
                    <has_text text=">consensus.vcf" />
                </assert_contents>
                <assert_contents>
                    <has_text text=">consensus.vcf-2" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa" />
            <param name="input_file" ftype="vcf" value="consensus.vcf" />
            <section name="sec_restrict">
                <param name="include" value='TYPE="snp"' />
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="TACAAAATATGACATATCAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGA" />
                </assert_contents>
            </output>
        </test>
        <!--Test absent option-->
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa" />
            <param name="input_file" ftype="vcf" value="consensus.vcf" />
            <section name="sec_restrict">
                <param name="include" value='TYPE="snp"' />
            </section>
            <param name="absent" value="W"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="WWWAWAWWAWWWWWWWWCWWWWWWWW" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--absent" />
            </assert_command>
        </test>
        <!--Test mask options -->
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa" />
            <param name="input_file" ftype="vcf" value="consensus.vcf" />
            <section name="sec_restrict">
                <param name="include" value='TYPE="snp"' />
            </section>
            <section name="sec_default">
                <param name="mark_del" value="DEL"/>
                <param name="mark_ins" value="uc"/>
                <param name="mark_snv" value="uc"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="TACAAAATATGACATATCAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGA" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--mark-del" />
                <has_text text="--mark-ins" />
                <has_text text="--mark-snv" />
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@ plugin
=====================================


Create consensus sequence by applying VCF variants to a reference fasta file.

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@

The option to set the new consensus' FASTA ID from the name of the VCF is provided by post-processing
the bcftools consensus output. It is primarily intended for use when the VCF is coming from a list
collection where the elements of the list are named meaningfully (e.g. named after sample names). This
is useful when consensus sequences are being prepared for, for example, feeding a multiple sequence
alignment to a phylogeny program.
]]>
    </help>
    <expand macro="citations" />
</tool>