view bcftools_view.xml @ 24:1a2ef636f48e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 784611c9caf2680d41414ca2880b93a69d719701
author iuc
date Sun, 18 Aug 2024 10:19:46 +0000
parents abc3ced31cd1
children
line wrap: on
line source

<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>VCF/BCF conversion, view, subset and filter VCF/BCF files</description>
    <macros>
        <token name="@EXECUTABLE@">view</token>
        <import>macros.xml</import>
        <xml name="macro_types_options">
            <option value="snps">snps</option>
            <option value="indels">indels</option>
            <option value="mnps">mnps</option>
            <option value="other">other</option>
        </xml>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $section = $sec_restrict
@PREPARE_TARGETS_FILE@
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

## Subset section
#set $section = $sec_subset
${section.trim_alt_alleles}
${section.no_update}
@SAMPLES@
${section.force_samples}


## Filter section
#set $section = $sec_filter
#if str($section.min_ac)
  --min-ac ${section.min_ac}
#end if
#if str($section.max_ac)
  --max-ac ${section.max_ac}
#end if
#if str($section.select_genotype) != "None":
  --genotype "${section.select_genotype}"
#end if
## known or novel
#if $section.known_or_novel:
  ${section.known_or_novel}
#end if
#if str($section.min_alleles)
  --min-alleles ${section.min_alleles}
#end if
#if str($section.max_alleles)
  --max-alleles ${section.max_alleles}
#end if
#if $section.phased:
  ${section.phased}
#end if
#if str($section.min_af)
  --min-af ${section.min_af}
#end if
#if str($section.max_af)
  --max-af ${section.max_af}
#end if
#if $section.uncalled:
  ${section.uncalled}
#end if
#if $section.types:
  --types "${section.types}"
#end if
#if $section.exclude_types:
  --exclude-types "${section.exclude_types}"
#end if
#if $section.private:
  ${section.private}
#end if

## Output section
#set $section = $sec_output
${section.drop_genotypes}
#if $section.header:
  ${section.header}
#end if
#if str($section.compression_level)
  --compression-level ${section.compression_level}
#end if

#set $section = $sec_restrict
@APPLY_FILTERS@
@INCLUDE@
@EXCLUDE@
@REGIONS@
@TARGETS@

@OUTPUT_TYPE@
@THREADS@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'
]]>
    </command>
    <inputs>
        <expand macro="macro_input" />
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_apply_filters" />
            <expand macro="macro_restrict" />
            <expand macro="macro_restrict" type="target" label_type="Target" />
            <expand macro="macro_include" />
            <expand macro="macro_exclude" />
        </section>
        <section name="sec_subset" expanded="false" title="Subset Options">
            <expand macro="macro_samples" />
            <param name="force_samples" type="boolean" truevalue="--force-samples" falsevalue="" label="Force Samples"
                   help="Only warn about unknown subset samples" />
            <param name="no_update" type="boolean" truevalue="--no-update" falsevalue="" label="No Update"
                   help="Do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)" />
            <param name="trim_alt_alleles" type="boolean" truevalue="--trim-alt-alleles" falsevalue="" label="Trim Alt Alleles"
                   help="Trim alternate alleles not seen in the subset" />
        </section>
        <section name="sec_filter" expanded="false" title="Filter Options">
            <param name="min_ac" type="integer" label="Min Ac" optional="True"
                   help="(-c --min-ac) Minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles" />
            <param name="max_ac" type="integer" label="Max Ac" optional="True"
                   help="(-C --max-ac) Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles" />
            <param name="select_genotype" type="select" label="Select Genotype" optional="True">
                <help>
                Include only sites with one or more homozygous (hom), heterozygous (het) or missing (miss) genotypes.
                When prefixed with ^, the logic is reversed.
                Please notice that if the input file doesn't have any genotype columns,
                then this option is ignored altogether.
                </help>
                <option value="hom">hom</option>
                <option value="het">het</option>
                <option value="miss">miss</option>
                <option value="^hom">^hom</option>
                <option value="^het">^het</option>
                <option value="^miss">^miss</option>
            </param>
            <param name="types" type="select" label="Select Types"  multiple="true" optional="True">
                <help>
                 List of variant types to select. Site is selected if any of the ALT alleles is of the type requested.
                 Types are determined by comparing the REF and ALT alleles in the VCF record.
                </help>
                <expand macro="macro_types_options" />
            </param>

            <param name="exclude_types" type="select" label="Exclude Types"  multiple="true" optional="True">
                <help>
                 List of variant types to exclude. Site is excluded if any of the ALT alleles is of the type requested.
                 Types are determined by comparing the REF and ALT alleles in the VCF record.
                </help>
                <expand macro="macro_types_options" />
              </param>


              <param name="known_or_novel" type="select" label="filter known or novel ID" optional="true">
                  <option value="--novel">(-k) print novel sites only (ID column is ".")</option>
                  <option value="--known">(-n) print known sites only (ID column is not ".")</option>
              </param>

              <param name="min_alleles" type="integer" label="Min Alleles" optional="True"
                     help="(-m) Minimum number of alleles listed in REF and ALT (e.g. -m2)" />
              <param name="max_alleles" type="integer" label="Max Alleles" optional="True"
                     help="(-M) Maximum number of alleles listed in REF and ALT (e.g. -M2 for biallelic sites)" />

              <param name="phased" type="select" label="filter phased" optional="true">
                  <help>
                  Haploid genotypes are considered phased. Missing genotypes considered unphased unless the phased bit is set.
                  </help>
                  <option value="--phased">(-p) print sites where all samples are phased</option>
                  <option value="--exclude-phased">(-P) exclude sites where all samples are phased</option>
              </param>

              <!-- TODO optional select for type: nref,alt1,minor,major,sum -->
              <param name="min_af" type="float" label="Min Af" optional="True"
                     help="(-q) Minimum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles" />
              <param name="max_af" type="float" label="Max Af" optional="True"
                     help="(-Q) Maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles" />

              <param name="uncalled" type="select" label="filter on genotype uncalled" optional="true">
                  <option value="--uncalled">uncalled - print sites without a called genotype</option>
                  <option value="--exclude-uncalled">exclude-uncalled - exclude sites without a called genotype</option>
              </param>

              <param name="private" type="select" label="filter on private" optional="true">
                  <help><![CDATA[
                   Private: Where only the subset samples carry an non-reference allele.
                   ( Ignored if samples are not included )
                  ]]></help>
                  <option value="--private">(-x) private - print sites where only the subset samples carry an non-reference allele</option>
                  <option value="--exclude-private">(-X) exclude-private - exclude sites where only the subset samples carry an non-reference allele</option>
              </param>

        </section>

        <section name="sec_output" expanded="false" title="Output Options">
            <param name="drop_genotypes" type="boolean" truevalue="--drop-genotypes" falsevalue="" label="Drop Genotypes"
                   help="Drop individual genotype information (after subsetting if -s option set)" />
            <param name="header" type="select" label="output header" optional="true">
                <option value="--no-header">(-h) no-header</option>
                <option value="--header-only">(-H) header-only</option>
            </param>
            <param name="compression_level" type="integer" min="0" max="9" label="Compression Level" optional="True"
                   help="Compression level: 0 uncompressed, 1 best speed, 9 best compression" />
            <param name="invert_targets_file" type="boolean" truevalue="^" falsevalue="" label="Invert Targets File"
                   help="Inverts the query/filtering applied by Targets File" />
        </section>
        <expand macro="macro_select_output_type" />
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output" />
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="min_ac" value="1" />
            <param name="max_ac" value="1" />
            <param name="samples" value="NA00002" />
            <param name="types" value="snps" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="rs2298108" />
                    <not_has_text text="rs6111385" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="bcf" value="view.bcf" />
            <param name="min_ac" value="1" />
            <param name="max_ac" value="1" />
            <param name="samples" value="NA00002" />
            <param name="types" value="snps" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="rs2298108" />
                    <not_has_text text="rs6111385" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="known_or_novel" value="--novel" />
            <param name="private" value="--exclude-private" />
            <param name="samples" value="NA00003" />
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions"/>
                    <repeat name="regions">
                        <param name="chrom" value="Y" />
                        <param name="start" value="20" />
                    </repeat>
                </conditional>
            </section>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="8657215" />
                    <not_has_text text="rs6111385" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions 'Y:20-'"/>
            </assert_command>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="known_or_novel" value="--known" />
            <param name="private" value="--exclude-private" />
            <param name="apply_filters" value="PASS" />
            <param name="samples" value="NA00003" />
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions"/>
                    <repeat name="regions">
                        <param name="chrom" value="Y" />
                        <param name="start" value="20" />
                    </repeat>
                </conditional>
            </section>
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="Minimum number of alternate bases" />
                    <not_has_text text="8657215" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="private" value="--private" />
            <param name="samples" value="NA00003" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="rs62584840" />
                    <not_has_text text="8657215" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="include" value="QUAL==999 &amp;&amp; (FS&lt;20 || FS&gt;=41.02) &amp;&amp; ICF&gt;-0.1 &amp;&amp; HWE*2&gt;1.2" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="rs5939407" />
                    <not_has_text text="8657215" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="phased" value="--phased" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="rs78249411" />
                    <not_has_text text="rs6111385" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="phased" value="--exclude-phased" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="rs6111385" />
                    <not_has_text text="rs78249411" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="min_alleles" value="2" />
            <param name="max_alleles" value="2" />
            <param name="min_af" value="0.3" />
            <param name="max_af" value="0.7" />
            <param name="header" value="--header-only" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="##bcftools_viewCommand" />
                    <not_has_text text="rs78249411" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="uncalled" value="--uncalled" />
            <param name="header" value="--no-header" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="##bcftools_viewCommand" />
                    <has_text text="5464562" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="exclude_types" value="snps" />
            <param name="drop_genotypes" value="true" />
            <param name="output_type" value="v" />
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="##FORMAT=&lt;ID=GT" />
                    <not_has_text text="rs62584840" />
                    <has_text text="2343543" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="phased" value="--phased" />
            <param name="output_type" value="v" />
            <param name="select_genotype" value="^het" />
            <output name="output_file">
                <assert_contents>
                    <has_text text="--genotype ^het" />
                    <has_text text="rs78249411" />
                    <not_has_text text="3048719" />
                </assert_contents>
            </output>
        </test>
        <!-- Test region overlap option -->
        <test>
            <param name="input_file" ftype="vcf" value="view.vcf" />
            <param name="min_ac" value="1" />
            <param name="max_ac" value="1" />
            <param name="samples" value="NA00002" />
            <param name="types" value="snps" />
            <param name="output_type" value="v" />
            <section name="sec_restrict">
                <param name="regions_overlap" value="1"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="rs2298108" />
                    <not_has_text text="rs6111385" />
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap" />
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@
=====================================

VCF/BCF conversion, view, subset and filter VCF/BCF files.

@REGIONS_HELP@
@TARGETS_HELP@
@EXPRESSIONS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>