Mercurial > repos > miller-lab > genome_diversity
annotate draw_variants.py @ 32:03c22b722882
remove BeautifulSoup dependency
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:54:23 -0400 |
parents | a631c2f6d913 |
children | ea52b23f1141 |
rev | line source |
---|---|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
2 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
3 import gd_util |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
4 import sys |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
5 from Population import Population |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
6 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
7 ################################################################################ |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
8 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
9 def load_pop(file, wrapped_dict): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
10 if file == '/dev/null': |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
11 pop = None |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
12 else: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
13 pop = Population() |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
14 pop.from_wrapped_dict(wrapped_dict) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
15 return pop |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
16 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
17 def append_tags(the_list, p, p_type, val): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
18 if p is None: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
19 return |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
20 for tag in p.tag_list(): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
21 column, name = tag.split(':') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
22 if p_type == 'gd_genotype': |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
23 column = int(column) - 2 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
24 the_list.append('{0}:{1}:{2}'.format(val, column, name)) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
25 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
26 ################################################################################ |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
27 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
28 if len(sys.argv) != 11: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
29 gd_util.die('Usage') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
30 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
31 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
32 snp_file, snp_ext, snp_arg, indiv_input, annotation_input, cov_file, cov_ext, cov_arg, min_coverage, output = sys.argv[1:] |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
33 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
34 p_snp = load_pop(snp_file, snp_arg) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
35 p_cov = load_pop(cov_file, cov_arg) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
36 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
37 if indiv_input == '/dev/null': |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
38 if p_snp is not None: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
39 p_ind = p_snp |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
40 elif p_cov is not None: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
41 p_ind = p_cov |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
42 else: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
43 p_ind = None |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
44 order_p_ind = True |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
45 else: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
46 p_ind = Population() |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
47 p_ind.from_population_file(indiv_input) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
48 order_p_ind = False |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
49 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
50 ## p ind must be from either p_snp or p_cov |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
51 if p_snp is not None and p_cov is not None: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
52 if not (p_snp.is_superset(p_ind) or p_cov.is_superset(p_ind)): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
53 gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype or Coverage table') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
54 elif p_snp is not None: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
55 if not p_snp.is_superset(p_ind): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
56 gd_util.die('There is an individual in the population individuals that is not in the SNP/Genotype table') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
57 elif p_cov is not None: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
58 if not p_cov.is_superset(p_ind): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
59 gd_util.die('There is an individual in the population individuals that is not in the Coverage table') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
60 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
61 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
62 ################################################################################ |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
63 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
64 prog = 'mito_draw' |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
65 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
66 args = [ prog ] |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
67 args.append(snp_file) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
68 args.append(cov_file) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
69 args.append(annotation_input) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
70 args.append(min_coverage) |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
71 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
72 if order_p_ind: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
73 for column in sorted(p_ind.column_list()): |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
74 individual = p_ind.individual_with_column(column) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
75 name = individual.name.split()[0] |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
76 args.append('{0}:{1}:{2}'.format(0, column, name)) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
77 else: |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
78 append_tags(args, p_ind, 'gd_indivs', 0) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
79 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
80 append_tags(args, p_snp, snp_ext, 1) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
81 append_tags(args, p_cov, cov_ext, 2) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
82 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
83 with open('Ji.spec', 'w') as fh: |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
84 gd_util.run_program(prog, args, stdout=fh) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
85 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
86 ################################################################################ |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
87 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
88 prog = 'varplot' |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
89 |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
90 args = [ prog ] |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
91 args.append('-w') |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
92 args.append(3) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
93 args.append('-s') |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
94 args.append(0.3) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
95 args.append('-g') |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
96 args.append(0.2) |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
97 args.append('Ji.spec') |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
98 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
99 with open('Ji.svg', 'w') as fh: |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
100 gd_util.run_program(prog, args, stdout=fh) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
101 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
102 ################################################################################ |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
103 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
104 prog = 'convert' |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
105 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
106 args = [ prog ] |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
107 args.append('-density') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
108 args.append(100) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
109 args.append('Ji.svg') |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
110 args.append('tiff:{0}'.format(output)) |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
111 |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
112 gd_util.run_program(prog, args) |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
113 sys.exit(0) |