Mercurial > repos > miller-lab > genome_diversity
comparison discover_familial_relationships.py @ 31:a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:25:27 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
30:4188853b940b | 31:a631c2f6d913 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 import gd_util | |
5 | |
6 from Population import Population | |
7 | |
8 ################################################################################ | |
9 | |
10 if len(sys.argv) != 6: | |
11 gd_util.die('Usage') | |
12 | |
13 input, input_type, ind_arg, pop_input, output = sys.argv[1:] | |
14 | |
15 p_total = Population() | |
16 p_total.from_wrapped_dict(ind_arg) | |
17 | |
18 p1 = Population() | |
19 p1.from_population_file(pop_input) | |
20 if not p_total.is_superset(p1): | |
21 gd_util.die('There is an individual in the population that is not in the SNP table') | |
22 | |
23 ################################################################################ | |
24 | |
25 prog = 'kinship_prep' | |
26 | |
27 args = [ prog ] | |
28 args.append(input) # a Galaxy SNP table | |
29 args.append(0) # required number of reads for each individual to use a SNP | |
30 args.append(0) # required genotype quality for each individual to use a SNP | |
31 args.append(0) # minimum spacing between SNPs on the same scaffold | |
32 | |
33 for tag in p1.tag_list(): | |
34 if input_type == 'gd_genotype': | |
35 column, name = tag.split(':') | |
36 tag = '{0}:{1}'.format(int(column) - 2, name) | |
37 args.append(tag) | |
38 | |
39 gd_util.run_program(prog, args) | |
40 | |
41 # kinship.map | |
42 # kinship.ped | |
43 # kinship.dat | |
44 | |
45 ################################################################################ | |
46 | |
47 prog = 'king' | |
48 | |
49 args = [ prog ] | |
50 args.append('-d') | |
51 args.append('kinship.dat') | |
52 args.append('-p') | |
53 args.append('kinship.ped') | |
54 args.append('-m') | |
55 args.append('kinship.map') | |
56 args.append('--kinship') | |
57 | |
58 gd_util.run_program(prog, args) | |
59 | |
60 # king.kin | |
61 | |
62 ################################################################################ | |
63 | |
64 valid_header = 'FID\tID1\tID2\tN_SNP\tZ0\tPhi\tHetHet\tIBS0\tKinship\tError\n' | |
65 | |
66 with open('king.kin') as fh: | |
67 header = fh.readline() | |
68 if header != valid_header: | |
69 gd_util.die('crap') | |
70 | |
71 with open(output, 'w') as ofh: | |
72 | |
73 for line in fh: | |
74 elems = line.split('\t') | |
75 if len(elems) != 10: | |
76 gd_util.die('crap') | |
77 | |
78 x = elems[1] | |
79 y = elems[2] | |
80 z = elems[8] | |
81 | |
82 f = float(z) | |
83 | |
84 message = '' | |
85 | |
86 if f > 0.354: | |
87 message = 'duplicate or MZ twin' | |
88 elif f >= 0.177: | |
89 message = '1st degree relatives' | |
90 elif f >= 0.0884: | |
91 message = '2nd degree relatives' | |
92 elif f >= 0.0442: | |
93 message = '3rd degree relatives' | |
94 | |
95 print >> ofh, '\t'.join([x, y, z, message]) | |
96 | |
97 ################################################################################ | |
98 | |
99 sys.exit(0) | |
100 |