diff vcf_gff.xml @ 1:a0689dc29b7f draft

Updated vcf to gff conversion tool
author john-mccallum
date Tue, 31 Jul 2012 00:33:11 -0400
parents
children b321e0517be3
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf_gff.xml	Tue Jul 31 00:33:11 2012 -0400
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<tool id="vcf2gff_1" name="Convert vcf to gff">
+  <description>Convert vcf to gff</description>
+  <command interpreter="python">vcf_gff.py $inputVcf  $outputfile</command>
+  <inputs>
+    <param format="vcf" name="inputVcf" type="data" label="vcf file containing SNP data"/>
+  </inputs>
+  <outputs>
+     <data format="gff3" name="outputfile" />
+  </outputs>
+<help>
+
+** Convert vcf to gff3**
+
+This tool takes vcf output from Samtools mpileup and converts to gff3 format.
+It converts a single vcf output file containing output from a single bam file or from multiple bam files.
+Read counts and GT scores are used as an indicator of whether a mutation is homozygous or heterozygous and outputs in the INFO section.
+
+**TIP**
+mpileup **must** be run with Genotype Likelihood Computation selected (-g flag)to generate the GT flag in BCF/VCF output. 
+This then used to estimate the SNP presence in one or more samples.  
+More info is available in the manual pages at: http://samtools.sourceforge.net/mpileup.shtml
+
+
+**Example**
+
+--input vcf
+
+::
+
+	#CHROM    POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  bam
+	PGSC0003DMB000000001    430     .       A       T       3.41    .       DP=4;AF1=0.9904;CI95=0.125,1;DP4=0,0,2,0;MQ=17  PL:DP:GT:GQ     32,6,0:2:1/1:23
+	PGSC0003DMB000000001    445     .       G       T       3.41    .       DP=4;AF1=0.9904;CI95=0.125,1;DP4=0,0,2,0;MQ=17  PL:DP:GT:GQ     32,6,0:2:1/1:23 
+	PGSC0003DMB000000001    446     .       G       A       3.41    .       DP=4;AF1=0.9904;CI95=0.125,1;DP4=0,0,2,0;MQ=17  PL:DP:GT:GQ     32,6,0:2:1/1:23 
+	PGSC0003DMB000000001    452     .       C       T       3.41    .       DP=4;AF1=0.9904;CI95=0.125,1;DP4=0,0,2,0;MQ=17  PL:DP:GT:GQ     32,6,0:2:1/1:23
+
+--output gff
+
+::
+
+	#CHROM    SOURCE    TYPE    START    STOP    QUAL    STRAND   PHASE    INFO
+	PGSC0003DMB000000001    SAMTOOLS        SNP     430     430     3.41    .       .       ID=PGSC0003DMB000000001:SNP:430;Variant_seq=T;Reference_seq=A;Total_reads=4:Zygosity=2:HOM_mut
+	PGSC0003DMB000000001    SAMTOOLS        SNP     445     445     3.41    .       .       ID=PGSC0003DMB000000001:SNP:445;Variant_seq=T;Reference_seq=G;Total_reads=4:Zygosity=2:HOM_mut
+	PGSC0003DMB000000001    SAMTOOLS        SNP     446     446     3.41    .       .       ID=PGSC0003DMB000000001:SNP:446;Variant_seq=A;Reference_seq=G;Total_reads=4:Zygosity=2:HOM_mut
+	PGSC0003DMB000000001    SAMTOOLS        SNP     452     452     3.41    .       .       ID=PGSC0003DMB000000001:SNP:452;Variant_seq=T;Reference_seq=C;Total_reads=4:Zygosity=2:HOM_mut
+
+
+
+-----------------------
+
+*If you use this tool please cite:*
+
+A Toolkit For Bulk PCR-Based Marker Design From Next-Generation Sequence Data: 
+Application For  Development Of A Framework Linkage Map In Bulb Onion (*Allium cepa* L.)
+(2012)
+
+Samantha Baldwin, Roopashree Revanna, Susan Thomson, Meeghan Pither-Joyce, Kathryn Wright, 
+Ross Crowhurst, Mark Fiers, Leshi Chen, Richard MacKnight, John A. McCallum
+
+</help>
+</tool>
\ No newline at end of file