Mercurial > repos > iuc > bcftools_convert_from_vcf

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@ from vcf" id="bcftools_@EXECUTABLE@_from_vcf" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Converts VCF/BCF to IMPUTE2/SHAPEIT formats</description>
    <macros>
        <token name="@EXECUTABLE@">convert</token>
        <import>macros.xml</import>
        <xml name="macro_vcf_ids">
            <param name="vcf_ids" type="boolean" truevalue="--vcf-ids" falsevalue="" checked="false"
                   label="Output VCF IDs instead of CHROM:POS_REF_ALT" help="(\-\-vcf_ids)"/>
        </xml>
        <xml name="macro_haploid2diploid">
            <param name="haploid2diploid" type="boolean" truevalue="--haploid2diploid" falsevalue="" checked="false"
                   label="convert haploid genotypes to diploid homozygotes"  help="(\-\-haploid2diploid)"/>
        </xml>
        <xml name="macro_sexinfo">
            <conditional name="sex_info">
                <param name="sex_info_src" type="select" label="sex column">
                    <help><![CDATA[
SampleName SexDesignation:
<br>MaleSample    M
<br>FemaleSample  F
                    ]]></help>
                    <option value="none">None</option>
                    <option value="history">designations from history dataset</option>
                    <option value="entry">designations from text input</option>
                </param>
                <when value="none"/>
                <when value="history">
                    <param name="sex_info_file" type="data" format="tabular" label="per sample sex designation file"/>
                </when>
                <when value="entry">
                    <param name="sex_info_lines" type="text" area="True" label="per sample sex designation" >
                        <validator type="regex" message="sample M or F separated by TAB">^(\S+[ \t][MF](\n\S+[ \t][MF])*)$</validator>
                        <sanitizer sanitize="False"/>
                    </param>
                </when>
            </conditional>
        </xml>
        <token name="@SAMPLE_SEX@">
#if $convert.sex_info.sex_info_src == 'entry':
  --sex "$entered_sex_file"
#elif $convert.sex_info.sex_info_src == 'history':
  --sex "$convert.sex_info.sex_info_file"
#end if
        </token>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements" />
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $section = $sec_restrict
@PREPARE_TARGETS_FILE@
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

#if $convert.convert_to == 'gen_sample':
--tag $convert.tag $convert.convert_3N6 $convert.vcf_ids ## chrom option has been deprecated
--gensample "$output_gen,$output_samples"
#elif $convert.convert_to == 'hap_sample':
$convert.vcf_ids $convert.haploid2diploid
--hapsample "$output_hap,$output_samples"
#elif $convert.convert_to == 'hap_legend_sample':
$convert.vcf_ids $convert.haploid2diploid
--haplegendsample "$output_hap,$output_legend,$output_samples"
#end if
@SAMPLE_SEX@

#if $keep_duplicates
    --keep-duplicates $keep_duplicates
#end if

## VCF input section
#set $section = $sec_restrict
@INCLUDE@
@EXCLUDE@
@REGIONS@
@TARGETS@
@SAMPLES@


## Primary Input/Outputs
"$input_file" .
]]>
    </command>
    <configfiles>
        <configfile name="entered_sex_file"><![CDATA[#slurp
#if $convert.sex_info.sex_info_src == 'entry':
$convert.sex_info.sex_info_lines.__str__.strip().replace(' ','\t')#slurp
#end if]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="macro_input" />
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_restrict" />
            <expand macro="macro_restrict" type="target" label_type="Target" />
            <expand macro="macro_samples" />
            <expand macro="macro_include" />
            <expand macro="macro_exclude" />
        </section>
        <param argument="--keep-duplicates" type="boolean" truevalue="--keep-duplicates" falsevalue="" checked="false" label="Keep duplicates" help="Keep all multiallelic variants" />
        <conditional name="convert">
            <param name="convert_to" type="select" label="convert to">
                <option value="gen_sample">gen sample - IMPUTE2 or SHAPEIT</option>
                <option value="hap_sample">hap sample - IMPUTE2 or SHAPEIT</option>
                <option value="hap_legend_sample">hap legend sample - IMPUTE2 or SHAPEIT</option>
            </param>
            <when value="gen_sample">
                <param name="tag" type="select" label="tag to take values for .gen file: GT,PL,GL,GP">
                    <option value="GT">GT</option>
                    <option value="PL">PL</option>
                    <option value="GP">GP</option>
                    <!--
                    <option value="GL">GL</option>
                    -->
                </param>
                <param name="convert_3N6" argument="--3N6" type="boolean" truevalue="--3N6" falsevalue="" checked="false" label="3N6 format" help="Use 3N+6 column format instead of the old 3N+5 column format" />
                <expand macro="macro_vcf_ids"/>
                <expand macro="macro_sexinfo"/>
            </when>
            <when value="hap_sample">
                <expand macro="macro_haploid2diploid"/>
                <expand macro="macro_vcf_ids"/>
                <expand macro="macro_sexinfo"/>
            </when>
            <when value="hap_legend_sample">
                <expand macro="macro_haploid2diploid"/>
                <expand macro="macro_vcf_ids"/>
                <expand macro="macro_sexinfo"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_gen" format="tabular" label="${input_file.name.rsplit('.',1)[0]}.gen">
            <filter>convert['convert_to'] == 'gen_sample'</filter>
        </data>
        <data name="output_hap" format="tabular" label="${input_file.name.rsplit('.',1)[0]}.haps">
            <filter>convert['convert_to'] in ('hap_sample','hap_legend_sample')</filter>
        </data>
        <data name="output_legend" format="tabular" label="${input_file.name.rsplit('.',1)[0]}.legend">
            <filter>convert['convert_to'] == 'hap_legend_sample'</filter>
        </data>
        <data name="output_samples" format="tabular" label="${input_file.name.rsplit('.',1)[0]}.samples"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="input_file" ftype="vcf" value="convert.vcf" />
            <param name="convert_to"  value="gen_sample" />
            <output name="output_gen">
                <assert_contents>
                    <has_text text="X:2698560_G_A X:2698560_G_A 2698560 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0"/>
                </assert_contents>
            </output>
            <output name="output_samples">
                <assert_contents>
                    <has_text text="NA00001 NA00001 0"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="input_file" ftype="vcf" value="convert.vcf" />
            <param name="convert_to"  value="gen_sample" />
            <param name="tag"  value="GP" />
            <param name="convert_3N6"  value="True" />
            <output name="output_gen">
                <assert_contents>
                    <has_text text="X:2698630_A_G X:2698630_A_G 2698630 A G 1.000000 0.000000 0.000000 1.000000 0.000000 0.000000 1.000000"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="input_file" ftype="vcf" value="check.vcf" />
            <param name="convert_to"  value="gen_sample" />
            <param name="tag"  value="GT" />
            <param name="convert_3N6"  value="True" />
            <param name="vcf_ids"  value="True" />
            <output name="output_gen">
                <assert_contents>
                    <has_text text="1:3062915_GTTT_G id3D 3062915 GTTT G 0 1 0 0 1 0"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="input_file" ftype="vcf" value="convert.vcf" />
            <param name="convert_to"  value="hap_sample" />
            <output name="output_hap">
                <assert_contents>
                    <has_text text="X X:2698769_AAG_A 2698769 AAG A 1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="input_file" ftype="vcf" value="convert.vcf" />
            <param name="convert_to"  value="hap_legend_sample" />
            <output name="output_hap">
                <assert_contents>
                    <has_text text="1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0"/>
                </assert_contents>
            </output>
            <output name="output_legend">
                <assert_contents>
                    <has_text text="X:2698769_AAG_A 2698769 AAG A"/>
                </assert_contents>
            </output>
            <output name="output_samples">
                <assert_contents>
                    <has_text text="sample population group sex"/>
                    <has_text text="NA00001 NA00001 NA00001 2"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test keep-duplicates option -->
        <test expect_num_outputs="3">
            <param name="input_file" ftype="vcf" value="convert.vcf" />
            <param name="convert_to"  value="hap_legend_sample" />
            <param name="keep_duplicates" value="true"/>
            <output name="output_hap">
                <assert_contents>
                    <has_text text="1 0 1 1 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0"/>
                </assert_contents>
            </output>
            <output name="output_legend">
                <assert_contents>
                    <has_text text="X:2698769_AAG_A 2698769 AAG A"/>
                </assert_contents>
            </output>
            <output name="output_samples">
                <assert_contents>
                    <has_text text="sample population group sex"/>
                    <has_text text="NA00001 NA00001 NA00001 2"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--keep-duplicates" />
            </assert_command>
        </test>
        <!-- Test region overlap -->
        <test expect_num_outputs="2">
            <param name="input_file" ftype="vcf" value="convert.vcf" />
            <param name="convert_to"  value="gen_sample" />
            <section name="sec_restrict">
                <param name="regions_overlap" value="1"/>
            </section>
            <output name="output_gen">
                <assert_contents>
                    <has_text text="X:2698560_G_A X:2698560_G_A 2698560 G A 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0 1 0 0"/>
                </assert_contents>
            </output>
            <output name="output_samples">
                <assert_contents>
                    <has_text text="NA00001 NA00001 0"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap" />
            </assert_command>
        </test>
    </tests>

    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@ from vcf
=====================================

Converts VCF/BCF to other formats. See man page for file formats details.

@REGIONS_HELP@
@TARGETS_HELP@
@EXPRESSIONS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>
author	iuc
date	Tue, 16 Jul 2024 17:00:03 +0000
parents	9b29129fb491
children	012656fd329f