comparison discover_familial_relationships.py @ 31:a631c2f6d913

Update to Miller Lab devshed revision 3c4110ffacc3
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:25:27 -0400
parents
children
comparison
equal deleted inserted replaced
30:4188853b940b 31:a631c2f6d913
1 #!/usr/bin/env python
2
3 import sys
4 import gd_util
5
6 from Population import Population
7
8 ################################################################################
9
10 if len(sys.argv) != 6:
11 gd_util.die('Usage')
12
13 input, input_type, ind_arg, pop_input, output = sys.argv[1:]
14
15 p_total = Population()
16 p_total.from_wrapped_dict(ind_arg)
17
18 p1 = Population()
19 p1.from_population_file(pop_input)
20 if not p_total.is_superset(p1):
21 gd_util.die('There is an individual in the population that is not in the SNP table')
22
23 ################################################################################
24
25 prog = 'kinship_prep'
26
27 args = [ prog ]
28 args.append(input) # a Galaxy SNP table
29 args.append(0) # required number of reads for each individual to use a SNP
30 args.append(0) # required genotype quality for each individual to use a SNP
31 args.append(0) # minimum spacing between SNPs on the same scaffold
32
33 for tag in p1.tag_list():
34 if input_type == 'gd_genotype':
35 column, name = tag.split(':')
36 tag = '{0}:{1}'.format(int(column) - 2, name)
37 args.append(tag)
38
39 gd_util.run_program(prog, args)
40
41 # kinship.map
42 # kinship.ped
43 # kinship.dat
44
45 ################################################################################
46
47 prog = 'king'
48
49 args = [ prog ]
50 args.append('-d')
51 args.append('kinship.dat')
52 args.append('-p')
53 args.append('kinship.ped')
54 args.append('-m')
55 args.append('kinship.map')
56 args.append('--kinship')
57
58 gd_util.run_program(prog, args)
59
60 # king.kin
61
62 ################################################################################
63
64 valid_header = 'FID\tID1\tID2\tN_SNP\tZ0\tPhi\tHetHet\tIBS0\tKinship\tError\n'
65
66 with open('king.kin') as fh:
67 header = fh.readline()
68 if header != valid_header:
69 gd_util.die('crap')
70
71 with open(output, 'w') as ofh:
72
73 for line in fh:
74 elems = line.split('\t')
75 if len(elems) != 10:
76 gd_util.die('crap')
77
78 x = elems[1]
79 y = elems[2]
80 z = elems[8]
81
82 f = float(z)
83
84 message = ''
85
86 if f > 0.354:
87 message = 'duplicate or MZ twin'
88 elif f >= 0.177:
89 message = '1st degree relatives'
90 elif f >= 0.0884:
91 message = '2nd degree relatives'
92 elif f >= 0.0442:
93 message = '3rd degree relatives'
94
95 print >> ofh, '\t'.join([x, y, z, message])
96
97 ################################################################################
98
99 sys.exit(0)
100