diff VCFToolFilter/vcfToolsFilter.xml @ 24:21d878747ac6 draft default tip

Uploaded
author dereeper
date Mon, 23 Mar 2015 05:53:20 -0400
parents 50bd37c444ac
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/VCFToolFilter/vcfToolsFilter.xml	Mon Mar 23 05:53:20 2015 -0400
@@ -0,0 +1,268 @@
+<tool id="sniplay_vcftoolsfilter" name="VCF tools filter" version="1.1.1">
+    
+    <!-- [REQUIRED] Tool description displayed after the tool name -->
+    <description> Filter VCF using VCFtools</description>
+    
+    <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
+    <requirements>
+        <requirement type="binary">perl</requirement>
+	<requirement type="package" version="0.1.12b">VCFtools</requirement>
+    </requirements>
+    
+    <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
+    <version_command>
+<!--
+        tool_binary -v
+-->
+    </version_command>
+    
+    <!-- [REQUIRED] The command to execute -->
+    <command interpreter="perl">
+	vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
+	#if str( $samples ) == "":
+	'None'
+	#else
+	$samples
+	#end if
+	#if str( $chromosomes ) == "":
+	'None'
+	#else
+	$chromosomes
+	#end if
+	#if str( $export ) == "plink":
+	$fileout_map
+	#else
+	''
+	#end if
+    </command>
+     
+    <!-- [REQUIRED] Input files and tool parameters -->
+    <inputs>
+	<param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
+	<param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
+	<param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
+		<validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
+	</param>
+	<param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
+	        <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
+        </param>
+	<param name="export" type="select" label="Output format" >
+	    <option value="VCF" selected="true">VCF</option>
+	    <option value="freq">freq</option>
+            <option value="plink">plink</option>
+        </param>
+	<param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
+	<param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
+	<param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
+	<param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
+	<param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
+        <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
+            <option value="ALL" selected="true">All</option>
+            <option value="SNP">SNP</option>
+            <option value="INDEL">Indel</option>
+        </param>
+	<param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
+	<param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
+    </inputs>
+    
+    <!-- [REQUIRED] Output files -->
+    <outputs>
+	<data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
+		<change_format>
+                	<when input="export" value="freq" format="tabular" />
+			<when input="export" value="plink" format="txt" />
+		</change_format>	
+	</data>
+	<data name="fileout_map" format="txt" label="${fileout_label}.map">
+		<filter>(export == 'plink')</filter>
+	</data>
+	<data name="filelog" format="txt" label="${fileout_label}.log" />
+    </outputs>
+    
+    <!-- [STRONGLY RECOMMANDED] Exit code rules -->
+    <stdio>
+        <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    
+    <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
+    <tests>
+        <!-- [HELP] Test files have to be in the ~/test-data directory -->
+        <test>
+         <param name="filein" value="sample.vcf" />
+         <param name="chromosomes" value="chr1" />
+         <param name="export" value="VCF" />
+         <param name="frequency" value="0.001" />
+         <param name="max_freq" value="0.5" />
+         <param name="allow_missing" value="0" />
+         <param name="nb_alleles_min" value="2" />
+	<param name="nb_alleles_max" value="4" />
+         <param name="type_p" value="ALL" />
+         <param name="bound_start" value="1" />
+         <param name="bound_end" value="100000000" />
+         <output name="fileout" file="result.vcf" />
+         <output name="filelog" file="result.log" />
+        </test>
+    </tests>
+    
+    <!-- [OPTIONAL] Help displayed in Galaxy -->
+    <help>
+
+.. class:: infomark
+
+**Authors** 
+
+---------------------------------------------------
+
+.. class:: infomark
+
+**Please cite** If you use this tool, please cite Dereeper et al. 2015 in prep.
+
+---------------------------------------------------
+
+================
+VCF tools filter
+================
+
+-----------
+Description
+-----------
+
+  Filter VCF file 
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
+=========== ========================== =======
+Name            output file(s)         format 
+=========== ========================== =======
+=========== ========================== =======
+
+
+**Downstream tools**
+
+=========== ========================== =======
+Name            output file(s)         format
+=========== ========================== =======
+=========== ========================== =======
+
+
+----------
+Input file
+----------
+
+VCF file
+	VCF file with all SNPs
+
+----------
+Parameters
+----------
+
+Output file basename
+	Prefix for the output VCF file
+
+Samples
+        Samples to be analyzed. Comma separated list
+
+Chromosomes
+	Chromosomes to be analyzed. Comma separated list
+
+Output format
+	VCF/freq/plink
+
+Minimum MAF
+	Minimum frequency
+
+Maximum MAF
+	Maximum frequency
+
+Missing data proportion
+	Allowed missing data proportion per site. Must be comprised between 0 and 1.
+
+Number of alleles
+	Accepted number of alleles min and max.
+
+Polymorphisms
+	Type of polymorphisms to keep (ALL/SNP/INDEL).
+Bounds
+	Lower bound and upper bound for a range of sites to be processed.
+
+------------
+Output files
+------------
+
+VCF file
+	VCF file filtered 
+
+Log file
+
+---------------------------------------------------
+
+---------------
+Working example
+---------------
+
+Input files
+===========
+
+VCF file
+---------
+
+::
+
+	#fileformat=VCFv4.1
+	#FILTER=&lt;ID=LowQual,Description="Low quality">
+	#FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+	[...]
+	CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	CATB1
+	chr1	2209	.	G	T	213.84	.	AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,7:7:18:242,18,0
+
+
+Parameters
+==========
+
+Output name -> filtered_chr1
+
+Chromosomes -> chr1
+
+Output format -> VCF
+
+Minimum MAF -> 0.001
+
+Maximum MAF -> 0.5
+
+Missing data proportion -> 1
+
+Number of alleles min -> 2
+
+Number of alleles max -> 4
+
+Polymorphisms -> All
+
+Lower bound -> 1
+
+Upper bound -> 100000000
+
+
+Output files
+============
+
+filtered_genelist_intron.vcf
+---------
+
+::
+
+        #fileformat=VCFv4.1
+        #FILTER=&lt;ID=LowQual,Description="Low quality"&gt;
+        #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+        [...]
+        CHROM   POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  CATB1
+	chr1	5059	.	C	G	146.84	.	AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|)	GT:AD:DP:GQ:PL	1/1:0,8:8:18:175,18,0
+
+
+    </help>
+    
+</tool>