Mercurial > repos > iuc > gemini_load

<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.1">
    <description>Loading a VCF file into GEMINI</description>
    <macros>
        <import>gemini_macros.xml</import>
        <token name="@BINARY@">load</token>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command>
<![CDATA[
        @PROVIDE_ANNO_DATA@

        ln -s "${ infile }" input.vcf &&
        bgzip -c input.vcf > input.vcf.gz &&
        tabix -p vcf input.vcf.gz &&

        gemini
            @BINARY@
            -v input.vcf.gz
            #if str( $annotation_type ) != "None":
                -t "$annotation_type"
            #end if

            #if $ped:
                -p $ped
            #end if

            $skip_gerp_bp
            $skip_cadd
            $skip_gene_tables
            $no_load_genotypes
            $no_genotypes
            $passonly
            $infostring
            --cores \${GALAXY_SLOTS:-4}

            "${ outfile }"
]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="vcf" label="VCF file to be loaded in the GEMINI database" help="Only build 37 (aka hg19) of the human genome is supported.">
            <options>
                <filter type="add_value" value="hg19" />
                <filter type="add_value" value="Homo_sapiens_nuHg19_mtrCRS" />
                <filter type="add_value" value="hg_g1k_v37" />
            </options>
        </param>

        <param name="annotation_type" type="select" label="The annotations to be used with the input vcf" help="(-t)">
            <option value="None">None (not recommended)</option>
            <option value="snpEff" selected="True">snpEff annotated VCF file</option>
            <option value="VEP">VEP annotated VCF file</option>
        </param>
        <param name="ped" type="data" format="tabular" optional="True" label="Sample information file in PED+ format" help="(-p)" />
        <expand macro="annotation_dir" />

        <param name="skip_gerp_bp" type="boolean" truevalue="--skip-gerp-bp" falsevalue="" checked="False"
            label="Do not load GERP scores at base pair resolution" help="(--skip-gerp-bp)"/>

        <param name="skip_cadd" type="boolean" truevalue="--skip-cadd" falsevalue="" checked="False"
            label="Do not load CADD scores" help="(--skip-cadd)"/>

        <param name="skip_gene_tables" type="boolean" truevalue="--skip-gene-tables" falsevalue="" checked="False"
            label="Do not load gene tables" help="(--skip-gene-tables)"/>

        <param name="no_load_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False"
            label="Genotypes exist in the file, but should not be stored" help="(--no-load-genotypes)"/>

        <param name="no_genotypes" type="boolean" truevalue="--no-genotypes" falsevalue="" checked="False"
            label="There are no genotypes in the file" help="e.g. some 1000G VCFs (--no-genotypes)"/>

        <param name="passonly" type="boolean" truevalue="--passonly" falsevalue="" checked="False"
            label="Keep only variants that pass all filters" help="e.g. some 1000G VCFs (--passonly)"/>

        <param name="infostring" type="boolean" truevalue="--save-info-string" falsevalue="" checked="False"
            label="Load INFO string from VCF file"  help="(--save-info-string)"/>
    </inputs>
    <outputs>
        <data name="outfile" format="gemini.sqlite" />
    </outputs>
    <tests>
        <test>
            <param name="annotation_databases" value="1999-01-01" />
            <param name="infile" dbkey="hg19" value="gemini_load_input.vcf" ftype="vcf" />
            <param name="skip_gene_tables" value="False" />
            <param name="skip_gerp_bp" value="True" />
            <param name="skip_cadd" value="True" />
            <param name="no_genotypes" value="False" />
            <output name="outfile" file="gemini_load_result1.db" ftype="gemini.sqlite" compare="sim_size" delta="1000" />
        </test>
        <test>
            <param name="annotation_databases" value="1999-01-01" />
            <param name="infile" dbkey="hg19" value="gemini_load_input.vcf" ftype="vcf" />
            <param name="skip_gene_tables" value="False" />
            <param name="skip_gerp_bp" value="False" />
            <param name="skip_cadd" value="False" />
            <param name="no_genotypes" value="False" />
            <output name="outfile" file="gemini_load_result1.db" ftype="gemini.sqlite" compare="sim_size" delta="1000" />
            <assert_stderr>
                <has_text text="CADD scores are not being loaded because the annotation file could not be found." />
                <has_text text="GERP per bp is not being loaded because the annotation file could not be found." />
            </assert_stderr>
        </test>
        <test>
            <param name="annotation_databases" value="1999-01-01" />
            <param name="infile" dbkey="hg19" value="gemini_load_input.vcf" ftype="vcf" />
            <param name="skip_gene_tables" value="True" />
            <param name="skip_gerp_bp" value="True" />
            <param name="skip_cadd" value="True" />
            <param name="no_genotypes" value="True" />
            <output name="outfile" file="gemini_load_result2.db" ftype="gemini.sqlite" compare="sim_size" delta="1000" />
        </test>
        <test>
            <param name="annotation_databases" value="1999-01-01" />
            <param name="infile" dbkey="hg19" value="gemini_amend.vcf" ftype="vcf" />
            <param name="skip_gene_tables" value="False" />
            <param name="skip_gerp_bp" value="True" />
            <param name="skip_cadd" value="True" />
            <param name="no_genotypes" value="False" />
            <param name="ped" value="gemini_amend.ped" ftype="tabular" />
            <output name="outfile" file="gemini_auto_rec_input.db" ftype="gemini.sqlite" compare="sim_size" delta="1000" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Before we can use GEMINI to explore genetic variation, we must first load our VCF file into the GEMINI database framework.
We expect you to have first annotated the functional consequence of each variant in your VCF using either VEP or snpEff.

    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Fri, 14 Dec 2018 13:01:22 -0500
parents	269c40fdcccb
children	b5b53c27baca