view bcftools_call.xml @ 22:bf06b66ab5c2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 868f4386992de55d25da197660a43a913a09c8df
author iuc
date Wed, 14 Aug 2024 16:43:44 +0000
parents 1001172b5089
children 07396b423218
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy4" profile="@PROFILE@">
    <description>SNP/indel variant calling from VCF/BCF</description>
    <macros>
        <token name="@EXECUTABLE@">call</token>
        <import>macros.xml</import>
        <xml name="macro_novel_rate">
            <param name="novel_rate_snp" type="float" value="" optional="true" label="Novel rate for SNPs"
                   help="Likelihood of novel mutation for constrained trio calling, see man page for details" />
            <param name="novel_rate_del" type="float" value="" optional="true" label="Novel rate for deletions"
                   help="Likelihood of novel mutation for constrained trio calling, see man page for details" />
            <param name="novel_rate_ins" type="float" value="" optional="true" label="Novel rate for insertions"
                   help="Likelihood of novel mutation for constrained trio calling, see man page for details" />
        </xml>
        <token name="@NOVEL_RATE@">
            #set $novel_rate = []
            #if str($section.genotypes.novel_rate_snp):
              #silent $novel_rate.append(str($section.genotypes.novel_rate_snp))
            #end if
            #if str($section.genotypes.novel_rate_del):
              #silent $novel_rate.append(str($section.genotypes.novel_rate_del))
            #end if
            #if str($section.genotypes.novel_rate_ins):
              #silent $novel_rate.append(str($section.genotypes.novel_rate_ins))
            #end if
            #if len($novel_rate) > 0:
               --novel-rate '#echo ','.join($novel_rate)#'
            #end if
        </token>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $section = $sec_consensus_variant_calling.variant_calling
#set $targets_path = None
#if $section.method == 'multiallelic':
    #if $section.genotypes.constrain == 'alleles':
        #set $section = $sec_consensus_variant_calling.variant_calling.genotypes
        @PREPARE_TARGETS_FILE@
    #end if
#end if
#set $section = $sec_restrict
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

#set $section = $sec_consensus_variant_calling.variant_calling
#if $section.method == 'multiallelic':
    -m
    #if str($section.gvcf):
        --gvcf $section.gvcf
    #end if
    #if str($section.prior_freqs):
        --prior-freqs '$section.prior_freqs'
    #end if
    #if str($section.prior):
        --prior $section.prior
    #end if
    #if $section.genotypes.constrain == 'alleles':
        --constrain alleles $section.genotypes.insert_missed
        #set $section = $sec_consensus_variant_calling.variant_calling.genotypes
        @TARGETS_FILE@
    #else
        #if $section.genotypes.constrain == 'trio':
            --constrain trio
            @NOVEL_RATE@
        #end if
        #set $section = $sec_consensus_variant_calling.variant_calling.genotypes
        @TARGETS@
    #end if
#else
    -c
    #if str($section.pval_threshold):
        --pval-threshold $section.pval_threshold
    #end if
#end if

#set $section = $sec_restrict
@REGIONS@
@SAMPLES@

## File format section
#set $section = $sec_file_format
#if $section.ploidy:
    --ploidy ${section.ploidy}
#end if
#if $section.ploidy_file:
    --ploidy-file '${section.ploidy_file}'
#end if

## Input/output section
#set $section = $sec_input_output
${section.group_samples}
${section.keep_alts}
#if $section.format_fields:
    --format-fields '${section.format_fields}'
#end if
${section.keep_masked_ref}
#if $section.skip_variants:
    --skip-variants ${section.skip_variants}
#end if
${section.variants_only}

#if $section.output_tags
    --annotate $section.output_tags
#end if

@OUTPUT_TYPE@
@THREADS@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'
    ]]></command>
    <inputs>
        <expand macro="macro_input" />
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_restrict" />
            <expand macro="macro_samples" />
        </section>
        <section name="sec_consensus_variant_calling" expanded="true" title="Consensus/variant calling Options">
            <conditional name="variant_calling">
                <param name="method" type="select" label="Calling method">
                    <option value="multiallelic">Multiallelic and rare-variant caller</option>
                    <option value="consensus">Consensus caller</option>
                </param>
                <when value="multiallelic">
                    <conditional name="genotypes">
                        <param argument="--constrain" type="select" label="Constrain" help="One of: alleles, trio (see manual)">
                            <option value="none">Do not constrain</option>
                            <option value="alleles">alleles - call genotypes given alleles</option>
                            <option value="trio">trio - call genotypes given the father-mother-child constraint</option>
                        </param>
                        <when value="none">
                            <expand macro="macro_restrict" type="target" label_type="Target">
                                <expand macro="macro_invert_targets"/>
                            </expand>
                        </when>
                        <when value="alleles">
                            <expand macro="macro_restrictions_file" type="target" label_type="Target" />
                            <expand macro="macro_invert_targets"/>
                            <param argument="--insert-missed" type="boolean" truevalue="--insert-missed" falsevalue="" label="Insert missed" help="Output also sites missed by mpileup but present in -T" />
                        </when>
                        <when value="trio">
                            <expand macro="macro_restrict" type="target" label_type="Target">
                                <expand macro="macro_invert_targets"/>
                            </expand>
                            <expand macro="macro_novel_rate" />
                        </when>
                    </conditional>
                    <param argument="--prior-freqs" type="text" value="" optional="true" label="Use prior knowledge of population allele frequencies">
                        <help>
<![CDATA[
For example: --prior-freqs REF_AN,REF_AC
<br>if the input VCF has the following INFO tags:
<br>##INFO=&lt;ID=REF_AN,Number=1,Type=Integer,Description="Total number of alleles in reference genotypes"&gt;
<br>##INFO=&lt;ID=REF_AC,Number=A,Type=Integer,Description="Allele count in reference genotypes for each ALT allele"&gt;
]]>
                        </help>
                        <validator type="regex" message="The INFO tags (separated by a comma), e.g. AN,AC">^(\w+,\w+)?$</validator>
                    </param>
                    <param argument="--prior" type="float" value="1.1e-3" optional="true" label="Prior" help="Expected substitution rate" />
                    <param argument="--gvcf" type="integer" optional="true" label="Output also gVCF blocks of homozygous REF calls" help="The parameter value is the minimum per-sample depth required to include a site in the non-variant block" />
                </when>
                <when value="consensus">
                    <conditional name="genotypes">
                        <param argument="--constrain" type="select" label="Constrain" help="One of: alleles, trio (see manual)">
                            <option value="none">Do not constrain</option>
                            <option value="trio">trio - call genotypes given the father-mother-child constraint</option>
                        </param>
                        <when value="none" />
                        <when value="trio">
                            <expand macro="macro_novel_rate" />
                        </when>
                    </conditional>
                    <expand macro="macro_restrict" type="target" label_type="Target">
                        <expand macro="macro_invert_targets" type="target" label_type="Target" />
                    </expand>
                    <param argument="--pval-threshold" type="float" value="0.5" optional="true" label="Pval Threshold" help="Accept variant if P(ref|D)&lt;FLOAT" />
                </when>
            </conditional>
        </section>
        <section name="sec_file_format" expanded="false" title="File format Options">
            <param argument="--ploidy" type="select" optional="true" label="Select predefined ploidy">
                <option value="GRCh37">GRCh37 - Human Genome reference assembly GRCh37 / hg19</option>
                <option value="GRCh38">GRCh37 - Human Genome reference assembly GRCh38 / hg38</option>
                <option value="X">X - Treat male samples as haploid and female as diploid regardless of the chromosome name</option>
                <option value="Y">Y - Treat male samples as haploid and female as no-copy, regardless of the chromosome name"</option>
                <option value="1">1 - Treat all samples as haploid</option>
            </param>
            <param argument="--ploidy-file" type="data" format="tabular" optional="true" label="Ploidy file" help="Space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" />
            <expand macro="macro_restrict" />
            <expand macro="macro_samples" />
        </section>
        <section name="sec_input_output" expanded="false" title="Input/output Options">
            <param argument="--group-samples" type="boolean" truevalue="--group-samples -" falsevalue="" label="Single Sample Calling" help="Group samples by population for single-sample calling (-G - is the only option implemented so far)" />
            <param argument="--keep-alts" type="boolean" truevalue="--keep-alts" falsevalue="" label="Keep alts" help="Output all alternate alleles present in the alignments even if they do not appear in any of the genotypes" />
            <param argument="--format-fields" type="text" value="" optional="true" label="Comma-separated list of FORMAT fields to output for each sample"
                   help="Currently GQ and GP fields are supported" >
                <validator type="regex" message="FORMAT terms separated by commas">^([A-Za-z]+(,[A-Za-z]+)*)?$</validator>
            </param>
            <param argument="--keep-masked-ref" type="boolean" truevalue="--keep-masked-ref" falsevalue="" label="Keep masked reference alleles" help="Output sites where REF allele is N" />
            <param argument="--skip-variants" type="select" optional="true" label="Skip variants" help="Skip indels/SNP sites">
                <option value="indels">indels</option>
                <option value="snps">snps</option>
            </param>
            <param argument="--variants-only" type="boolean" truevalue="--variants-only" falsevalue="" label="Output variant sites only" />
            <expand macro="macro_output_tags">
                <option value="INFO/PV4">INFO/PV4: P-values for strand bias, baseQ bias, mapQ bias and tail</option>
                <option value="FORMAT/GQ">FORMAT/GQ: Phred-scaled genotype quality</option>
                <option value="FORMAT/GP">FORMAT/GP: Phred-scaled genotype posterior probabilities</option>
            </expand>
        </section>
        <expand macro="macro_select_output_type" />
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output"/>
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="mpileup.vcf" />
            <param name="method" value="multiallelic" />
            <param name="variants_only" value="true" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="DP4=2,4,8,11;MQ=49" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="mpileup.vcf" />
            <param name="method" value="multiallelic" />
            <param name="gvcf" value="0" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="MinDP" />
                    <has_text text="DP4=2,4,8,11;MQ=49" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="mpileup.X.vcf" />
            <param name="method" value="multiallelic" />
            <param name="ploidy_file" value="mpileup.ploidy" />
            <param name="samples_file" value="mpileup.samples" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="DP4=2,4,8,11;MQ=49" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="mpileup.X.vcf" />
            <param name="method" value="consensus" />
            <param name="output_type" value="v" />
            <param name="ploidy_file" value="mpileup.ploidy" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="DP4=2,4,8,11" />
                    <has_text text="PV4=1,1,1,1" />
                </assert_contents>
            </output>
        </test>
        <!-- Test annotate -->
        <test>
            <param name="input_file" ftype="vcf" value="mpileup.X.vcf" />
            <param name="method" value="consensus" />
            <param name="output_type" value="v" />
            <param name="ploidy_file" value="mpileup.ploidy" />
            <section name="sec_input_output">
                <param name="output_tags" value="INFO/PV4"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="DP4=2,4,8,11" />
                    <has_text text="PV4=1,1,1,1" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--annotate" />
            </assert_command>
        </test>
        <!-- Test region overlap-->
        <test>
            <param name="input_file" ftype="vcf" value="mpileup.vcf" />
            <param name="method" value="multiallelic" />
            <param name="variants_only" value="true" />
            <param name="output_type" value="v" />
            <section name="sec_restrict">
                <param name="regions_overlap" value="1"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="DP4=2,4,8,11;MQ=49" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap" />
            </assert_command>
        </test>
        <!-- Test group samples option-->
        <test>
            <param name="input_file" ftype="vcf" value="mpileup.AD.vcf" />
            <param name="method" value="multiallelic" />
            <param name="output_type" value="v" />
            <section name="sec_input_output">
                <param name="group_samples" value="true" />
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="bcftools_callCommand=call -m --prior 0.0011 --group-samples - " />
                    <has_text text="DP=1;ADF=1;ADR=0;DPR=1;MQ0F=0;AN=2;DP4=1,0,0,0;MQ=37" />
                </assert_contents>
            </output>
        </test>
        <test>
            <!-- Test targets-file option-->
            <param name="input_file" ftype="vcf" value="mpileup.AD.vcf" />
            <param name="method" value="multiallelic" />
            <param name="output_type" value="v" />
            <section name="sec_consensus_variant_calling">
                <conditional name="variant_calling">
                    <conditional name="genotypes">
                        <conditional name="targets">
                            <param name="targets_src" value="targets_file"/>
                            <param name="targets_file" value="targets_file.tab"/>
                        </conditional>
                    </conditional>
                </conditional>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="--targets-file" />
                    <not_has_text text="DP=1;ADF=1;ADR=0;DPR=1;MQ0F=0;AN=2;DP4=1,0,0,0;MQ=37" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
==================================
 bcftools @EXECUTABLE@
==================================

SNP/indel variant calling from VCF/BCF. To be used in conjunction with samtools mpileup.

  - This command replaces the former "bcftools view" caller.
  - Some of the original functionality has been temporarily lost in the process of transition to htslib, but will be added back on popular demand.
  - The original calling model can be invoked with the -c option.

The novel-rate option can be set to modify the likelihood of novel mutation for constrained -C trio calling. The trio genotype calling maximizes likelihood of a particular combination of genotypes for father, mother and the child P(F=i,M=j,C=k) = P(unconstrained) * Pn + P(constrained) * (1-Pn). By providing three values, the mutation rate Pn is set explicitly for SNPs, deletions and insertions, respectively. If two values are given, the first is interpreted as the mutation rate of SNPs and the second is used to calculate the mutation rate of indels according to their length as Pn=float*exp(-a-b*len), where a=22.8689, b=0.2994 for insertions and a=21.9313, b=0.2856 for deletions [pubmed:23975140]. If only one value is given, the same mutation rate Pn is used for SNPs and indels.


@REGIONS_HELP@
@TARGETS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>