view bcftools_csq.xml @ 17:d237eaef451f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 784611c9caf2680d41414ca2880b93a69d719701
author iuc
date Sun, 18 Aug 2024 10:00:41 +0000
parents 6c790a091c7c
children
line wrap: on
line source

<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Haplotype aware consequence predictor</description>
    <macros>
        <token name="@EXECUTABLE@">csq</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements">
        <expand macro="samtools_requirement"/>
    </expand>
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $section = $sec_required.reference_source
@PREPARE_FASTA_REF@

#set $section = $sec_restrict
@PREPARE_TARGETS_FILE@
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

## csq required inputs section
#set $section = $sec_required.reference_source
@FASTA_REF@
--gff-annot '$sec_required.gff_annot'

## csq options section
#if str($sec_csq_opts.ncsq):
    --ncsq $sec_csq_opts.ncsq
#end if
$sec_csq_opts.local_csq
#if $sec_csq_opts.phase:
    --phase $sec_csq_opts.phase
#end if
#if str($sec_csq_opts.custom_tag):
    --custom-tag '$sec_csq_opts.custom_tag'
#end if
#if str($sec_csq_opts.trim_protein_seq)
    --trim-protein-seq $sec_csq_opts.trim_protein_seq
#end if

## Subset section
#set $section = $sec_subset
@SAMPLES@

## Filter section
#set $section = $sec_restrict
@INCLUDE@
@EXCLUDE@
@REGIONS@
@TARGETS@

@OUTPUT_TYPE@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'

]]>
    </command>
    <inputs>
        <expand macro="macro_input" />
        <section name="sec_required" expanded="true" title="Required References">
            <expand macro="macro_fasta_ref" />
            <param name="gff_annot" type="data" format="gff3" label="GFF3 annotation file" 
                 help="From Ensembl:  ftp://ftp.ensembl.org/pub/current_gff3/"/>
        </section>
        <section name="sec_csq_opts" expanded="true" title="CSQ Options">
            <param name="ncsq" type="integer" value="16" min="1" max="50" label="maximum number of consequences to consider per site"
                help="-ncsq 16"/>
            <param name="local_csq" type="boolean" truevalue="--local-csq" falsevalue="" checked="false" label="run localized predictions considering only one VCF record at a time"
                 help="--local-csq switch off haplotype-aware calling, run localized predictions considering only one VCF record at a time"/>
            <param name="phase" type="select" optional="true" label="phase" 
                 help="how to construct haplotypes and how to deal with unphased data">
                <option value="a">take GTs as is, create haplotypes regardless of phase (0/1 -> 0|1)</option>
                <option value="m">merge *all* GTs into a single haplotype (0/1 -> 1, 1/2 -> 1)</option>
                <option value="r">require phased GTs, throw an error on unphased het GTs</option>
                <option value="R">create non-reference haplotypes if possible (0/1 -> 1|1, 1/2 -> 1|2)</option>
                <option value="s">skip unphased GTs</option>
            </param>
            <param name="custom_tag" type="text" value="" optional="true" 
                 label="use this custom tag to store consequences rather than the default BCSQ tag">
                 <validator type="regex" message="">^(\w+)?$</validator>
            </param>
            <param argument="--trim-protein-seq" type="integer" min="0" value="" optional="true" label="Abbreviate protein-changing predictions" 
                help="Abbreviate protein-changing predictions to max INT aminoacids" />
        </section>

        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_restrict" />
            <expand macro="macro_restrict" type="target" label_type="Target" />
            <expand macro="macro_include" />
            <expand macro="macro_exclude" />
        </section>
        <section name="sec_subset" expanded="false" title="Subset Options">
            <expand macro="macro_samples" />
        </section>
        <expand macro="macro_select_output_type" />
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output" />
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf" />
            <expand macro="test_using_reference" ref="csq.fa" />
            <param name="gff_annot" ftype="gff3" value="csq.gff3" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf" />
            <expand macro="test_using_reference" select_from="cached" ref="csq" />
            <param name="gff_annot" ftype="gff3" value="csq.gff3" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ" />
                </assert_contents>
            </output>
        </test>
        <!-- Test protein seq prediction-->
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf" />
            <expand macro="test_using_reference" select_from="cached" ref="csq" />
            <param name="gff_annot" ftype="gff3" value="csq.gff3" />
            <param name="output_type" value="v" />
            <section name="sec_csq_opts">
                <param name="trim_protein_seq" value="10"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--trim-protein-seq" />
            </assert_command>
        </test>
        <!-- Test region overlap-->
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf" />
            <expand macro="test_using_reference" select_from="cached" ref="csq" />
            <param name="gff_annot" ftype="gff3" value="csq.gff3" />
            <param name="output_type" value="v" />
            <section name="sec_restrict">
                <param name="regions_overlap" value="1"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap" />
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@
=====================================

Haplotype aware consequence predictor which correctly handles combined variants such as MNPs split over
multiple VCF records, SNPs separated by an intron (but adjacent in the spliced transcript) or nearby
frame-shifting indels which in combination in fact are not frame-shifting.

The output VCF is annotated with INFO/BCSQ and FORMAT/BCSQ tag (configurable with the -c option).
The latter is a bitmask of indexes to INFO/BCSQ, with interleaved haplotypes. See the usage examples
below for using the %TBCSQ converter in query for extracting a more human readable form from this bitmask.
The contruction of the bitmask limits the number of consequences that can be referenced in the FORMAT/BCSQ tags.
By default this is 16, but if more are required, see the --ncsq option.

The program requires on input a VCF/BCF file, the reference genome in fasta format (--fasta-ref)
and genomic features in the GFF3 format downloadable from the Ensembl website (--gff-annot),
and outputs an annotated VCF/BCF file. Currently, only Ensembl GFF3 files are supported.

By default, the input VCF should be phased. If phase is unknown, or only partially known,
the --phase option can be used to indicate how to handle unphased data. Alternatively,
haplotype aware calling can be turned off with the --local-csq option.

If conflicting (overlapping) variants within one haplotype are detected, a warning will
be emitted and predictions will be based on only the first variant in the analysis.

Symbolic alleles are not supported. They will remain unannotated in the output VCF and are
ignored for the prediction analysis.


@REGIONS_HELP@
@TARGETS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>