Mercurial > repos > miller-lab > genome_diversity
annotate dpmix.py @ 28:184d14e4270d
Update to Miller Lab devshed revision 4ede22dd5500
| author | Richard Burhans <burhans@bx.psu.edu> |
|---|---|
| date | Wed, 17 Jul 2013 12:46:46 -0400 |
| parents | 8997f2ca8c7a |
| children | a631c2f6d913 |
| rev | line source |
|---|---|
| 12 | 1 #!/usr/bin/env python |
| 2 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
3 import gd_util |
| 12 | 4 import sys |
| 5 import os | |
| 6 from Population import Population | |
| 7 import gd_composite | |
| 8 from dpmix_plot import make_dpmix_plot | |
| 9 from LocationFile import LocationFile | |
| 10 | |
| 11 ################################################################################ | |
| 12 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
13 if len(sys.argv) != 22: |
| 12 | 14 print "usage" |
| 15 sys.exit(1) | |
| 16 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
17 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
18 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
19 if ap3_input == '/dev/null': |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
20 populations = 2 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
21 else: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
22 populations = 3 |
| 12 | 23 |
| 24 chrom = 'all' | |
| 25 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
26 if het_arg == 'use_installed': |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
27 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
28 location_file = LocationFile(loc_path) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
29 heterochrom_path = location_file.get_values_if_exists(dbkey) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
30 if heterochrom_path is None: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
31 heterochrom_path = '/dev/null' |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
32 elif het_arg == 'use_none': |
| 12 | 33 heterochrom_path = '/dev/null' |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
34 else: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
35 heterochrom_path = het_arg |
| 12 | 36 |
| 37 population_list = [] | |
| 38 | |
| 39 p_total = Population() | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
40 p_total.from_wrapped_dict(ind_arg) |
| 12 | 41 |
| 42 ap1 = Population(name='Ancestral population 1') | |
| 43 ap1.from_population_file(ap1_input) | |
| 44 population_list.append(ap1) | |
| 45 if not p_total.is_superset(ap1): | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
46 gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table') |
| 12 | 47 |
| 48 ap2 = Population(name='Ancestral population 2') | |
| 49 ap2.from_population_file(ap2_input) | |
| 50 population_list.append(ap2) | |
| 51 if not p_total.is_superset(ap2): | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
52 gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
53 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
54 if populations == 3: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
55 ap3 = Population(name='Ancestral population 3') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
56 ap3.from_population_file(ap3_input) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
57 population_list.append(ap3) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
58 if not p_total.is_superset(ap3): |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
59 gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table') |
| 12 | 60 |
| 61 p = Population(name='Potentially admixed') | |
| 62 p.from_population_file(p_input) | |
| 63 population_list.append(p) | |
| 64 if not p_total.is_superset(p): | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
65 gd_util.die('There is an individual in the population that is not in the SNP table') |
| 12 | 66 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
67 gd_util.mkdir_p(output2_dir) |
| 12 | 68 |
| 69 ################################################################################ | |
| 70 # Create tabular file | |
| 71 ################################################################################ | |
| 72 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
73 misc_file = os.path.join(output2_dir, 'summary.txt') |
| 12 | 74 |
| 75 prog = 'dpmix' | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
76 |
| 12 | 77 args = [ prog ] |
| 78 args.append(input) | |
| 79 args.append(ref_column) | |
| 80 args.append(chrom) | |
| 81 args.append(data_source) | |
| 82 args.append(add_logs) | |
| 83 args.append(switch_penalty) | |
| 84 args.append(heterochrom_path) | |
| 85 args.append(misc_file) | |
| 86 | |
| 87 columns = ap1.column_list() | |
| 88 for column in columns: | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
89 col = int(column) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
90 name = ap1.individual_with_column(column).name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
91 first_token = name.split()[0] |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
92 if input_type == 'gd_genotype': |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
93 col -= 2 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
94 args.append('{0}:1:{1}'.format(col, first_token)) |
| 12 | 95 |
| 96 columns = ap2.column_list() | |
| 97 for column in columns: | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
98 col = int(column) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
99 name = ap2.individual_with_column(column).name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
100 first_token = name.split()[0] |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
101 if input_type == 'gd_genotype': |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
102 col -= 2 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
103 args.append('{0}:2:{1}'.format(col, first_token)) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
104 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
105 if populations == 3: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
106 columns = ap3.column_list() |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
107 for column in columns: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
108 col = int(column) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
109 name = ap3.individual_with_column(column).name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
110 first_token = name.split()[0] |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
111 if input_type == 'gd_genotype': |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
112 col -= 2 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
113 args.append('{0}:3:{1}'.format(col, first_token)) |
| 12 | 114 |
| 115 columns = p.column_list() | |
| 116 for column in columns: | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
117 col = int(column) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
118 name = p.individual_with_column(column).name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
119 first_token = name.split()[0] |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
12
diff
changeset
|
120 if input_type == 'gd_genotype': |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
121 col -= 2 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
122 args.append('{0}:0:{1}'.format(col, first_token)) |
| 12 | 123 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
124 with open(output, 'w') as fh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
125 gd_util.run_program(prog, args, stdout=fh) |
| 12 | 126 |
| 127 ################################################################################ | |
| 128 # Create pdf file | |
| 129 ################################################################################ | |
| 130 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
131 if populations == 3: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
132 state2name = { |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
133 0:'heterochromatin', |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
134 1:ap1_name, |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
135 2:ap2_name, |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
136 3:ap3_name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
137 } |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
138 else: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
139 state2name = { |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
140 0:'heterochromatin', |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
141 1:ap1_name, |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
142 2:ap2_name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
143 } |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
144 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
145 pdf_file = os.path.join(output2_dir, 'picture.pdf') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
146 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations) |
| 12 | 147 |
| 148 ################################################################################ | |
| 149 # Create html | |
| 150 ################################################################################ | |
| 151 | |
| 152 info_page = gd_composite.InfoPage() | |
| 153 info_page.set_title('dpmix Galaxy Composite Dataset') | |
| 154 | |
| 155 display_file = gd_composite.DisplayFile() | |
| 156 display_value = gd_composite.DisplayValue() | |
| 157 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
158 out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
159 out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file) |
| 12 | 160 |
| 161 info_page.add_output_parameter(out_pdf) | |
| 162 info_page.add_output_parameter(out_misc) | |
| 163 | |
| 164 if data_source == '0': | |
| 165 data_source_value = 'sequence coverage' | |
| 166 elif data_source == '1': | |
| 167 data_source_value = 'estimated genotype' | |
| 168 | |
| 169 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
| 170 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) | |
| 171 | |
| 172 info_page.add_input_parameter(in_data_source) | |
| 173 info_page.add_input_parameter(in_switch_penalty) | |
| 174 | |
| 175 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
| 176 | |
| 177 info_page.add_misc(misc_populations) | |
| 178 | |
| 179 with open(output2, 'w') as ofh: | |
| 180 print >> ofh, info_page.render() | |
| 181 | |
| 182 sys.exit(0) | |
| 183 |
