view GetHaplotypesFromPhasedVCF/getHaplotypesFromPhasedVCF.xml @ 13:734a3572c1d6 draft

Uploaded
author dereeper
date Tue, 08 Jan 2019 08:45:34 -0500
parents 88748d846a20
children
line wrap: on
line source

<tool id="getHaplotypesFromPhasedVCF" name="Get Haplotypes From Phased VCF" version="2.0.0">
    <description>Get Haplotypes From Phased VCF</description>

    <requirements>
	<requirement type="binary">perl</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command interpreter="perl">
        GetHaplotypesFromPhasedVCF.pl $input $output_label &amp;&amp; mv ${output_label}.distinct_haplotypes.txt $output_distinct &amp;&amp; mv ${output_label}.haplo.fas $output_haplo &amp;&amp; mv ${output_label}.distinct_haplotypes.fa $output_distinct_fasta
    </command>
    <inputs>
        <param type="data" name="input" format="vcf" label="Phased VCF" />
        <param type="text" name="output_label" label="Output_label" value='Haplotypes' />
    </inputs>
    <outputs>
        <data name="output_distinct" format="txt" label="${output_label}.distinct_haplotypes.txt"/>
        <data name="output_haplo" format="fasta" label="${output_label}.haplo.fas"/>
        <data name="output_distinct_fasta" format="fasta" label="${output_label}.distinct_haplotypes.fa"/>
    </outputs>
    <tests>
        <test>
            <param name="input" value="getHaplotypesFromPhasedVCF-input.vcf"/>
            <output name="output_distinct" file="getHaplotypesFromPhasedVCF-result.distinct_haplotypes.txt" compare="sim_size" delta="0"/>
            <output name="output_haplo" file="getHaplotypesFromPhasedVCF-result.haplo.fas" compare="sim_size" delta="0"/>
            <output name="output_distinct_fasta" file="getHaplotypesFromPhasedVCF-result.distinct_haplotypes.fas" compare="sim_size" delta="0"/>
        </test>
    </tests>
    <help><![CDATA[

.. class:: infomark

**Authors** Dereeper Alexis (alexis.dereeper@ird.fr), IRD, South Green platform

 | **Please cite** "SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations", **Dereeper A. et al.**, Nucl. Acids Res. (1 july 2015) 43 (W1).

.. class:: infomark

**Galaxy integration** Provided by Southgreen & Andres Gwendoline (Institut Français de Bioinformatique) & Marcon Valentin (IFB & INRA)

.. class:: infomark

**Support** For any questions about Galaxy integration, please send an e-mail to alexis.dereeper@ird.fr

---------------------------------------------------

==============================
Get Haplotypes From Phased VCF
==============================

-----------
Description
-----------

    | Get Haplotype from phased VCF

----------
Input file
----------

VCF file
        Phased VCF file

---------
Parameter
---------

Output file basename
        Prefix for the output VCF file

------------
Output files
------------


Distinct Haplotypes text file
        File describing distincts haplotypes

Fasta file
        Fasta file with haplotypes

Distinct Haplotypes fasta file
        Fasta file with distincts haplotypes


---------------------------------------------------

---------------
Working example
---------------

Input file
==========

VCF file
---------

::

        #fileformat=VCFv4.1
        #FILTER=&lt;ID=LowQual,Description="Low quality">
        #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
        [...]
        CHROM   POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  AZUCENA
	Chr1    4299    .       G       A       .       PASS    AR2=1;DR2=1;AF=0.168    GT:DS:GP        0|0:0:1,0,0    

                              
Parameter
=========

Output name -> haplotypes


Output files
============

haplotypes.distinct_haplotypes.txt
----------------------------------

::

	===Chr10===
	haplo1:2:CIRAD403_1,CIRAD403_2,
	TTTAAGAAATTCCTATATAGGTCTTCTAAGCGTATCTATTAACAT
	haplo2:2:MAHAE_1,MAHAE_2,
	TAAATCTTGGTGCTGATCTGATATTTAATGCGT


haplotypes.haplo.fas
--------------------

::

	>Chr10_AZUCENA_1
	TTTAAGAAATTCCTATATAGGTCTTCTAAGCGTATCTATTAACAT
	>Chr10_AZUCENA_2
	TAAATCTTGGTGCTGATCTGATATTTAATGCGT

haplotypes.distinct_haplotypes.fas
----------------------------------

::

	>haplo1|2
	CAATTTATATATACTTGTATATAACCACAACGAGAGAGTTTTACCT
	TTTATAAAAAATAAATAATGTATTACGGCTAATATAGCAATCTTTT
	AAAATAAATCTATATTTAAATGACTATGGAATTACTAATCACAATA
	ACAGGATCTTGTTATTTTTAGCTTGTGTACTTATAATGATCCGATG
	>haplo2|2
	GCTACTTAAATATCTAGCATTAATCCACAACGAGAGGCTCTTACCT
	TTAAAAAAGGGTCATCGCCTATAGGTTAGATAATCGACACATATAA
	TTATAAGAAATTATATATAATTTTTAATCTAGTTCATTCTTGTGCA
	TCATTATGTTATATAATAATAAACGTAACAAATATTGATACTACTC


    ]]></help>
    <citations>
                <!-- [HELP] As DOI or BibTex entry -->
        <citation type="bibtex">@article{Dereeper03062015,
        author = {Dereeper, Alexis and Homa, Felix and Andres, Gwendoline and Sempere, Guilhem and Sarah, Gautier and Hueber, Yann and Dufayard, Jean-François and Ruiz, Manuel},
        title = {SNiPlay3: a web-based application for exploration and large scale analyses of genomic variations},
        year = {2015},
        doi = {10.1093/nar/gkv351},
        abstract ={SNiPlay is a web-based tool for detection, management and analysis of genetic variants including both single nucleotide polymorphisms (SNPs) and InDels. Version 3 now extends functionalities in order to easily manage and exploit SNPs derived from next generation sequencing technologies, such as GBS (genotyping by sequencing), WGRS (whole gre-sequencing) and RNA-Seq technologies. Based on the standard VCF (variant call format) format, the application offers an intuitive interface for filtering and comparing polymorphisms using user-defined sets of individuals and then establishing a reliable genotyping data matrix for further analyses. Namely, in addition to the various scaled-up analyses allowed by the application (genomic annotation of SNP, diversity analysis, haplotype reconstruction and network, linkage disequilibrium), SNiPlay3 proposes new modules for GWAS (genome-wide association studies), population stratification, distance tree analysis and visualization of SNP density. Additionally, we developed a suite of Galaxy wrappers for each step of the SNiPlay3 process, so that the complete pipeline can also be deployed on a Galaxy instance using the Galaxy ToolShed procedure and then be computed as a Galaxy workflow. SNiPlay is accessible at http://sniplay.southgreen.fr.},
        URL = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.abstract},
        eprint = {http://nar.oxfordjournals.org/content/early/2015/06/03/nar.gkv351.full.pdf+html},
        journal = {Nucleic Acids Research}
        }

        }</citation>

    </citations>

</tool>