changeset 1:85bdf226b67b default tip

add vcf2genotypes and test-data
author superyuan <shuaiyuan.emory@gmail.com>
date Mon, 30 Jun 2014 10:49:54 -0400
parents 9259f2939357
children
files test-data/hg19test.fa test-data/test.fastq test-data/test.vcf tool_conf.xml tools/refeditor/vcf2genotypes.py tools/refeditor/vcf2genotypes.xml
diffstat 6 files changed, 109 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19test.fa	Mon Jun 30 10:49:54 2014 -0400
@@ -0,0 +1,12 @@
+>chr1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+TCAGTGTCCTCCCATGGATGTTAATGTTCTCCATATGATGTCAGTGTCCT
+CCATATGATGTCAGTGTCCTCCATATGACATCAATATCCTCCATATGATA
+TCAATATCCTCTGTATTGATATTGATATTGATATTTGGAGGATATCAATA
+TCCTCCAAATGATGTCAGTGTCCTCCATATGATGTCAATGTCCTCCATAT
+GATGTCAATATCCTCCGTATGATGTCAATATCCTCCGTATGATGTCAATA
+TCCTCCATATGATGTCAGTGTCCTCTGTATGACATCAATATCCTCCATAC
+GATGCCCCTGTCCTTCATATGATGTCAGTGTCCTTTTGTGAGCACCAGTG
+TCCTTTGTATGACATCAGTAGTCTCCCATGAATGTCACTGTCTTCCCATA
+GATGTCAGTGTCCTCTccaaaagacaagcagaagctgttcatggaatgat
+gtgggggaaccttccagaaagaggcaacatcatgtgctaaggtcccaggt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fastq	Mon Jun 30 10:49:54 2014 -0400
@@ -0,0 +1,4 @@
+@ERR000038.1 BGI-FC302WFAAXX_5_1_1204:1431
+CCTTCATATGATGTCAGAGTCCTCTGTATGACCTCA
++
+IIIIII$IIII(IIIIIIIIIIIII2@IIIII:I>&
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.vcf	Mon Jun 30 10:49:54 2014 -0400
@@ -0,0 +1,8 @@
+##fileformat=VCFv4.0
+##fileDate=20110201
+##reference=NCBI37
+##source=Affymetrix_Axiom_DB
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA18501	NA18502	NA18504	NA18505	NA18507	NA18508	NA18516	NA18517	NA18522	NA18523	NA18852	NA18853	NA18855	NA18856	NA18858	NA18859	NA18861	NA18862	NA18870	NA18871	NA18912	NA18913	NA19092	NA19093	NA19098	NA19099	NA19101	NA19102	NA19116	NA19119	NA19127	NA19128	NA19130	NA19131	NA19137	NA19138	NA19140	NA19141	NA19143	NA19144	NA19152	NA19153	NA19159	NA19160	NA19171	NA19172	NA19192	NA19193	NA19200	NA19201	NA19203	NA19204	NA19206	NA19207	NA19209	NA19210	NA19222	NA19223	NA19238	NA19239
+1	305	rs5	C	T	.	PASS	.	GT	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/0	0/1	0/1	0/0
+1	334	.	A	C	.	PASS	.	GT	0/0	0/0	./.	0/0	./.	0/0	./.	./.	0/0	./.	0/0	0/0	./.	0/0	./.	0/0	0/0	0/0	./.	./.	0/0	./.	0/0	0/0	./.	0/0	./.	./.	./.	0/0	0/0	./.	0/0	0/0	0/0	./.	0/0	0/0	0/0	./.	0/0	./.	./.	0/0	./.	0/0	./.	0/0	0/0	0/0	./.	./.	0/0	0/0	0/0	./.	0/0	0/0	0/1	0/0
--- a/tool_conf.xml	Thu Jun 12 04:56:10 2014 -0400
+++ b/tool_conf.xml	Mon Jun 30 10:49:54 2014 -0400
@@ -1,6 +1,7 @@
 <?xml version="1.0"?>
 <toolbox>
  <section name="Reference Editor" id="refeditor">
+    <tool file="refeditor/vcf2genotypes.xml" />
     <tool file="refeditor/DiploidConstructor.xml" />
     <tool file="refeditor/MappingConverter.xml" />
   </section>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/refeditor/vcf2genotypes.py	Mon Jun 30 10:49:54 2014 -0400
@@ -0,0 +1,66 @@
+# Filename: vcf2genotypes.py
+# Author: Shuai Yuan
+# Version: 06/29/2012
+#
+# This script is a wrapper for vcf2genotypes
+#
+#  vcf2genotypes is launched based on these inputs:
+#  -i		input VCF file
+#  -p		target individual
+#  -o		output genotypes file
+
+import sys
+import os
+import re
+import string
+import commands
+from tempfile import NamedTemporaryFile
+
+# This function is exceedingly useful, perhaps package for reuse?
+def getopts(argv):
+    opts = {}
+    while argv:
+	if argv[0][0] == '-':
+	    opts[argv[0]] = argv[1]
+	    argv = argv[2:]
+	else:
+	    argv = argv[1:]
+    return opts
+
+def main():
+    args = sys.argv[1:]
+
+    try:
+	opts = getopts(args)
+    except IndexError:
+	print "Usage:"
+	return 0
+
+    vcf = opts.get("-i")
+    if vcf == None:
+        print "No input VCF file specified."
+        return -1
+    
+    individual = opts.get("-p")
+    if individual == None:
+        print "No valid individual specified."
+        return -2
+
+    outputfile = opts.get("-o")
+    if outputfile == None:
+        print "No output file specified."
+        return -6
+ 
+
+# All inputs have been specified at this point, now validate.
+	
+    #generate command
+    commandline = "vcf2genotypes %s  %s >  %s " % (vcf, individual, outputfile)
+    #run
+    errorcode, stdout = commands.getstatusoutput(commandline)
+    
+    #return error code
+    return errorcode
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/refeditor/vcf2genotypes.xml	Mon Jun 30 10:49:54 2014 -0400
@@ -0,0 +1,18 @@
+<tool id="vcf2genotypes1" name="vcf2genotypes" version="1.0.1">
+  <description>Extracts genotypes for a particular individual from VCF File</description>
+  <command interpreter="python">vcf2genotypes.py
+        -i '$input'
+        -p '$person'
+	-o $out_file1
+  </command>
+  <inputs>
+    <param format="vcf" name="input" type="data" label="Input VCF File"/>
+    <param format="txt" name="person" type="text" value="" label="Target individual (e.g. NA19238)"/>
+
+  </inputs>
+  <outputs>
+    <data format="genotypes" name="out_file1" metadata_source="input"/>
+  </outputs>
+  <help>
+</help>
+</tool>