view VCFToolsStats/vcfToolsStats.xml @ 21:50bd37c444ac draft

Uploaded
author dereeper
date Mon, 23 Mar 2015 05:35:48 -0400
parents
children
line wrap: on
line source

<tool id="sniplay_vcftoolsstats" name="VCF tools Stats" version="1.0.0">
    
    <!-- [REQUIRED] Tool description displayed after the tool name -->
    <description> Various statistics from VCF using VCFtools</description>
    
    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
    <requirements>
        <requirement type="binary">perl</requirement>
	<requirement type="package" version="0.1.13">VCFtools</requirement>
    </requirements>
    
    <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
    <version_command>
<!--
        tool_binary -v
-->
    </version_command>
    
    <!-- [REQUIRED] The command to execute -->
    <command interpreter="perl">
	vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog
    </command>
     
    <!-- [REQUIRED] Input files and tool parameters -->
    <inputs>
	<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
	<param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/>
    </inputs>
    
    <!-- [REQUIRED] Output files -->
    <outputs>
	<data name="fileout_annot" format="txt" label="${fileout_label}.annotation" />
	<data name="fileout_het" format="txt" label="${fileout_label}.het" />
	<data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" />
	<data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" />
	<data name="filelog" format="txt" label="${fileout_label}.log" />
    </outputs>
    
    <!-- [STRONGLY RECOMMANDED] Exit code rules -->
    <stdio>
        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
        <exit_code range="1:" level="fatal" />
    </stdio>
    
    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
    <tests>
        <!-- [HELP] Test files have to be in the ~/test-data directory -->
        <test>
         <param name="filein" value="sample.vcf" />
         <output name="fileout_annot" file="result.annotation" />
         <output name="fileout_het" file="result.het" />
         <output name="fileout_imiss" file="result.imiss" />
         <output name="fileout_sum" file="result.TsTv.summary" />
         <output name="filelog" file="result.log" />
        </test>
    </tests>
    
    <!-- [OPTIONAL] Help displayed in Galaxy -->
    <help>

.. class:: infomark

**Authors** 

---------------------------------------------------

.. class:: infomark

**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.

---------------------------------------------------

================
VCF tools filter
================

-----------
Description
-----------

  Compute statistics on VCF file 

-----------------
Workflow position
-----------------

**Upstream tools**

=========== ========================== =======
Name            output file(s)         format 
=========== ========================== =======
=========== ========================== =======


**Downstream tools**

=========== ========================== =======
Name            output file(s)         format
=========== ========================== =======
=========== ========================== =======


----------
Input file
----------

VCF file
	VCF file with all SNPs

----------
Parameters
----------

Output file basename
	Prefix for the output VCF file

------------
Output files
------------

.annotation file
	Statistics on annotation/location along genome

.het file 
	Statistics on heterozygosity of the individuals

.imiss
	Statistics on missing data of the inidividuals
.TsTv.summary
	Statistics on mutation types and transition/transvertion number

.log file

---------------------------------------------------

---------------
Working example
---------------

Input files
===========

VCF file
---------

::

	#fileformat=VCFv4.1
	#FILTER=&lt;ID=LowQual,Description="Low quality">
	#FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
	[...]
	CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	CATB1
	chr1	2209	.	G	T	213.84	.	AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,7:7:18:242,18,0


Parameters
==========

Output name -> vcf_stat


Output files
============

.annotation file
----------------

::

	Genic	4489
	Intergenic	466
	========
	Intron	960
	Exon	3248
	UTR	281
	========
	Non-syn	226
	Synonym	3022
        
.het file
---------

::

	INDV	O(HOM)	E(HOM)	N_SITES	F
	CATB1	0	0.0	3616	0.00000

.imiss file
-----------

::

	INDV	N_DATA	N_GENOTYPES_FILTERED	N_MISS	F_MISS
	CATB1	4813	0	0	0

.TsTv.summary file
------------------

::

	MODEL	COUNT
	AC	371
	AG	1467
	AT	562
	CG	330
	CT	1659
	GT	397
	Ts	3126
	Tv	1660


    </help>
    
</tool>