view VCFToolFilter/vcfToolsFilter.xml @ 21:50bd37c444ac draft

Uploaded
author dereeper
date Mon, 23 Mar 2015 05:35:48 -0400
parents
children
line wrap: on
line source

<tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1">
    
    <!-- [REQUIRED] Tool description displayed after the tool name -->
    <description> Filter VCF using VCFtools</description>
    
    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
    <requirements>
        <requirement type="binary">perl</requirement>
	<requirement type="package" version="0.1.12b">VCFtools</requirement>
    </requirements>
    
    <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
    <version_command>
<!--
        tool_binary -v
-->
    </version_command>
    
    <!-- [REQUIRED] The command to execute -->
    <command interpreter="perl">
	vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
	#if str( $samples ) == "":
	'None'
	#else
	$samples
	#end if
	#if str( $chromosomes ) == "":
	'None'
	#else
	$chromosomes
	#end if
	#if str( $export ) == "plink":
	$fileout_map
	#else
	''
	#end if
    </command>
     
    <!-- [REQUIRED] Input files and tool parameters -->
    <inputs>
	<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
	<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
	<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
		<validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
	</param>
	<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
	        <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
        </param>
	<param name="export" type="select" label="Output format" >
	    <option value="VCF" selected="true">VCF</option>
	    <option value="freq">freq</option>
            <option value="plink">plink</option>
        </param>
	<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
	<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
	<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
	<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
	<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
        <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
            <option value="ALL" selected="true">All</option>
            <option value="SNP">SNP</option>
            <option value="INDEL">Indel</option>
        </param>
	<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
	<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
    </inputs>
    
    <!-- [REQUIRED] Output files -->
    <outputs>
	<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
		<change_format>
                	<when input="export" value="freq" format="tabular" />
			<when input="export" value="plink" format="txt" />
		</change_format>	
	</data>
	<data name="fileout_map" format="txt" label="${fileout_label}.map">
		<filter>(export == 'plink')</filter>
	</data>
	<data name="filelog" format="txt" label="${fileout_label}.log" />
    </outputs>
    
    <!-- [STRONGLY RECOMMANDED] Exit code rules -->
    <stdio>
        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
        <exit_code range="1:" level="fatal" />
    </stdio>
    
    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
    <tests>
        <!-- [HELP] Test files have to be in the ~/test-data directory -->
        <test>
         <param name="filein" value="sample.vcf" />
         <param name="chromosomes" value="chr1" />
         <param name="export" value="VCF" />
         <param name="frequency" value="0.001" />
         <param name="max_freq" value="0.5" />
         <param name="allow_missing" value="0" />
         <param name="nb_alleles_min" value="2" />
	<param name="nb_alleles_max" value="4" />
         <param name="type_p" value="ALL" />
         <param name="bound_start" value="1" />
         <param name="bound_end" value="100000000" />
         <output name="fileout" file="result.vcf" />
         <output name="filelog" file="result.log" />
        </test>
    </tests>
    
    <!-- [OPTIONAL] Help displayed in Galaxy -->
    <help>

.. class:: infomark

**Authors** 

---------------------------------------------------

.. class:: infomark

**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.

---------------------------------------------------

================
VCF tools filter
================

-----------
Description
-----------

  Filter VCF file 

-----------------
Workflow position
-----------------

**Upstream tools**

=========== ========================== =======
Name            output file(s)         format 
=========== ========================== =======
=========== ========================== =======


**Downstream tools**

=========== ========================== =======
Name            output file(s)         format
=========== ========================== =======
=========== ========================== =======


----------
Input file
----------

VCF file
	VCF file with all SNPs

----------
Parameters
----------

Output file basename
	Prefix for the output VCF file

Samples
        Samples to be analyzed. Comma separated list

Chromosomes
	Chromosomes to be analyzed. Comma separated list

Output format
	VCF/freq/plink

Minimum MAF
	Minimum frequency

Maximum MAF
	Maximum frequency

Missing data proportion
	Allowed missing data proportion per site. Must be comprised between 0 and 1.

Number of alleles
	Accepted number of alleles min and max.

Polymorphisms
	Type of polymorphisms to keep (ALL/SNP/INDEL).
Bounds
	Lower bound and upper bound for a range of sites to be processed.

------------
Output files
------------

VCF file
	VCF file filtered 

Log file

---------------------------------------------------

---------------
Working example
---------------

Input files
===========

VCF file
---------

::

	#fileformat=VCFv4.1
	#FILTER=&lt;ID=LowQual,Description="Low quality">
	#FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
	[...]
	CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	CATB1
	chr1	2209	.	G	T	213.84	.	AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,7:7:18:242,18,0


Parameters
==========

Output name -> filtered_chr1

Chromosomes -> chr1

Output format -> VCF

Minimum MAF -> 0.001

Maximum MAF -> 0.5

Missing data proportion -> 1

Number of alleles min -> 2

Number of alleles max -> 4

Polymorphisms -> All

Lower bound -> 1

Upper bound -> 100000000


Output files
============

filtered_genelist_intron.vcf
---------

::

        #fileformat=VCFv4.1
        #FILTER=&lt;ID=LowQual,Description="Low quality"&gt;
        #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
        [...]
        CHROM   POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  CATB1
	chr1	5059	.	C	G	146.84	.	AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,8:8:18:175,18,0


    </help>
    
</tool>