annotate tools/refeditor/vcf2genotypes.py @ 1:85bdf226b67b default tip

add vcf2genotypes and test-data
author superyuan <shuaiyuan.emory@gmail.com>
date Mon, 30 Jun 2014 10:49:54 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
1 # Filename: vcf2genotypes.py
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
2 # Author: Shuai Yuan
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
3 # Version: 06/29/2012
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
4 #
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
5 # This script is a wrapper for vcf2genotypes
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
6 #
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
7 # vcf2genotypes is launched based on these inputs:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
8 # -i input VCF file
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
9 # -p target individual
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
10 # -o output genotypes file
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
11
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
12 import sys
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
13 import os
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
14 import re
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
15 import string
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
16 import commands
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
17 from tempfile import NamedTemporaryFile
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
18
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
19 # This function is exceedingly useful, perhaps package for reuse?
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
20 def getopts(argv):
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
21 opts = {}
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
22 while argv:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
23 if argv[0][0] == '-':
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
24 opts[argv[0]] = argv[1]
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
25 argv = argv[2:]
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
26 else:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
27 argv = argv[1:]
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
28 return opts
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
29
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
30 def main():
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
31 args = sys.argv[1:]
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
32
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
33 try:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
34 opts = getopts(args)
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
35 except IndexError:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
36 print "Usage:"
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
37 return 0
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
38
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
39 vcf = opts.get("-i")
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
40 if vcf == None:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
41 print "No input VCF file specified."
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
42 return -1
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
43
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
44 individual = opts.get("-p")
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
45 if individual == None:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
46 print "No valid individual specified."
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
47 return -2
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
48
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
49 outputfile = opts.get("-o")
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
50 if outputfile == None:
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
51 print "No output file specified."
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
52 return -6
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
53
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
54
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
55 # All inputs have been specified at this point, now validate.
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
56
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
57 #generate command
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
58 commandline = "vcf2genotypes %s %s > %s " % (vcf, individual, outputfile)
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
59 #run
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
60 errorcode, stdout = commands.getstatusoutput(commandline)
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
61
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
62 #return error code
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
63 return errorcode
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
64
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
65 if __name__ == "__main__":
85bdf226b67b add vcf2genotypes and test-data
superyuan <shuaiyuan.emory@gmail.com>
parents:
diff changeset
66 main()