Mercurial > repos > miller-lab > genome_diversity
annotate diversity_pi.py @ 32:03c22b722882
remove BeautifulSoup dependency
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:54:23 -0400 |
parents | a631c2f6d913 |
children |
rev | line source |
---|---|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
2 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
3 import gd_util |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
4 import sys |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
5 from Population import Population |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
6 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
7 ################################################################################ |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
8 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
9 def load_pop(file, wrapped_dict): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
10 if file == '/dev/null': |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
11 pop = None |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
12 else: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
13 pop = Population() |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
14 pop.from_wrapped_dict(wrapped_dict) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
15 return pop |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
16 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
17 def append_tags(the_list, p, p_type, val): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
18 if p is None: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
19 return |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
20 for tag in p.tag_list(): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
21 column, name = tag.split(':') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
22 if p_type == 'gd_genotype': |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
23 column = int(column) - 2 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
24 the_list.append('{0}:{1}:{2}'.format(val, column, name)) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
25 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
26 ################################################################################ |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
27 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
28 if len(sys.argv) != 11: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
29 gd_util.die('Usage') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
30 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
31 snp_input, snp_ext, snp_arg, cov_input, cov_ext, cov_arg, indiv_input, min_coverage, req_thresh, output = sys.argv[1:] |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
32 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
33 p_snp = load_pop(snp_input, snp_arg) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
34 p_cov = load_pop(cov_input, cov_arg) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
35 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
36 p_ind = Population() |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
37 p_ind.from_population_file(indiv_input) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
38 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
39 if not p_snp.is_superset(p_ind): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
40 gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype table') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
41 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
42 if p_cov is not None and (not p_cov.is_superset(p_ind)): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
43 gd_util.die('There is an individual in the population individuals that is not in the Coverage table') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
44 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
45 ################################################################################ |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
46 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
47 prog = 'mito_pi' |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
48 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
49 args = [ prog ] |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
50 args.append(snp_input) |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
51 args.append(cov_input) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
52 args.append(min_coverage) |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
53 args.append(req_thresh) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
54 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
55 append_tags(args, p_ind, 'gd_indivs', 0) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
56 append_tags(args, p_snp, snp_ext, 1) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
57 append_tags(args, p_cov, cov_ext, 2) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
58 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
59 with open(output, 'w') as fh: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
60 gd_util.run_program(prog, args, stdout=fh) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
61 |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
62 sys.exit(0) |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
63 |