Mercurial > repos > miller-lab > genome_diversity
annotate dpmix.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children | a631c2f6d913 |
rev | line source |
---|---|
12 | 1 #!/usr/bin/env python |
2 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
3 import gd_util |
12 | 4 import sys |
5 import os | |
6 from Population import Population | |
7 import gd_composite | |
8 from dpmix_plot import make_dpmix_plot | |
9 from LocationFile import LocationFile | |
10 | |
11 ################################################################################ | |
12 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
13 if len(sys.argv) != 22: |
12 | 14 print "usage" |
15 sys.exit(1) | |
16 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
17 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
18 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
19 if ap3_input == '/dev/null': |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
20 populations = 2 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
21 else: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
22 populations = 3 |
12 | 23 |
24 chrom = 'all' | |
25 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
26 if het_arg == 'use_installed': |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
27 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
28 location_file = LocationFile(loc_path) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
29 heterochrom_path = location_file.get_values_if_exists(dbkey) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
30 if heterochrom_path is None: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
31 heterochrom_path = '/dev/null' |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
32 elif het_arg == 'use_none': |
12 | 33 heterochrom_path = '/dev/null' |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
34 else: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
35 heterochrom_path = het_arg |
12 | 36 |
37 population_list = [] | |
38 | |
39 p_total = Population() | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
40 p_total.from_wrapped_dict(ind_arg) |
12 | 41 |
42 ap1 = Population(name='Ancestral population 1') | |
43 ap1.from_population_file(ap1_input) | |
44 population_list.append(ap1) | |
45 if not p_total.is_superset(ap1): | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
46 gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table') |
12 | 47 |
48 ap2 = Population(name='Ancestral population 2') | |
49 ap2.from_population_file(ap2_input) | |
50 population_list.append(ap2) | |
51 if not p_total.is_superset(ap2): | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
52 gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table') |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
53 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
54 if populations == 3: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
55 ap3 = Population(name='Ancestral population 3') |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
56 ap3.from_population_file(ap3_input) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
57 population_list.append(ap3) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
58 if not p_total.is_superset(ap3): |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
59 gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table') |
12 | 60 |
61 p = Population(name='Potentially admixed') | |
62 p.from_population_file(p_input) | |
63 population_list.append(p) | |
64 if not p_total.is_superset(p): | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
65 gd_util.die('There is an individual in the population that is not in the SNP table') |
12 | 66 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
67 gd_util.mkdir_p(output2_dir) |
12 | 68 |
69 ################################################################################ | |
70 # Create tabular file | |
71 ################################################################################ | |
72 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
73 misc_file = os.path.join(output2_dir, 'summary.txt') |
12 | 74 |
75 prog = 'dpmix' | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
76 |
12 | 77 args = [ prog ] |
78 args.append(input) | |
79 args.append(ref_column) | |
80 args.append(chrom) | |
81 args.append(data_source) | |
82 args.append(add_logs) | |
83 args.append(switch_penalty) | |
84 args.append(heterochrom_path) | |
85 args.append(misc_file) | |
86 | |
87 columns = ap1.column_list() | |
88 for column in columns: | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
89 col = int(column) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
90 name = ap1.individual_with_column(column).name |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
91 first_token = name.split()[0] |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
92 if input_type == 'gd_genotype': |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
93 col -= 2 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
94 args.append('{0}:1:{1}'.format(col, first_token)) |
12 | 95 |
96 columns = ap2.column_list() | |
97 for column in columns: | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
98 col = int(column) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
99 name = ap2.individual_with_column(column).name |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
100 first_token = name.split()[0] |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
101 if input_type == 'gd_genotype': |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
102 col -= 2 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
103 args.append('{0}:2:{1}'.format(col, first_token)) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
104 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
105 if populations == 3: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
106 columns = ap3.column_list() |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
107 for column in columns: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
108 col = int(column) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
109 name = ap3.individual_with_column(column).name |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
110 first_token = name.split()[0] |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
111 if input_type == 'gd_genotype': |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
112 col -= 2 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
113 args.append('{0}:3:{1}'.format(col, first_token)) |
12 | 114 |
115 columns = p.column_list() | |
116 for column in columns: | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
117 col = int(column) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
118 name = p.individual_with_column(column).name |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
119 first_token = name.split()[0] |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
120 if input_type == 'gd_genotype': |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
121 col -= 2 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
122 args.append('{0}:0:{1}'.format(col, first_token)) |
12 | 123 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
124 with open(output, 'w') as fh: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
125 gd_util.run_program(prog, args, stdout=fh) |
12 | 126 |
127 ################################################################################ | |
128 # Create pdf file | |
129 ################################################################################ | |
130 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
131 if populations == 3: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
132 state2name = { |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
133 0:'heterochromatin', |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
134 1:ap1_name, |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
135 2:ap2_name, |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
136 3:ap3_name |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
137 } |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
138 else: |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
139 state2name = { |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
140 0:'heterochromatin', |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
141 1:ap1_name, |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
142 2:ap2_name |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
143 } |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
144 |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
145 pdf_file = os.path.join(output2_dir, 'picture.pdf') |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
146 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations) |
12 | 147 |
148 ################################################################################ | |
149 # Create html | |
150 ################################################################################ | |
151 | |
152 info_page = gd_composite.InfoPage() | |
153 info_page.set_title('dpmix Galaxy Composite Dataset') | |
154 | |
155 display_file = gd_composite.DisplayFile() | |
156 display_value = gd_composite.DisplayValue() | |
157 | |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
158 out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file) |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
159 out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file) |
12 | 160 |
161 info_page.add_output_parameter(out_pdf) | |
162 info_page.add_output_parameter(out_misc) | |
163 | |
164 if data_source == '0': | |
165 data_source_value = 'sequence coverage' | |
166 elif data_source == '1': | |
167 data_source_value = 'estimated genotype' | |
168 | |
169 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
170 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) | |
171 | |
172 info_page.add_input_parameter(in_data_source) | |
173 info_page.add_input_parameter(in_switch_penalty) | |
174 | |
175 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
176 | |
177 info_page.add_misc(misc_populations) | |
178 | |
179 with open(output2, 'w') as ofh: | |
180 print >> ofh, info_page.render() | |
181 | |
182 sys.exit(0) | |
183 |