Mercurial > repos > miller-lab > genome_diversity
annotate dpmix.py @ 39:e56023008e36 default tip
Changed revision of package_fisher_0_1_4 to be2fc454d121
Changed revision of package_matplotlib_1_2 to a03ee94316b5
| author | miller-lab |
|---|---|
| date | Mon, 06 Jul 2015 10:32:24 -0400 |
| parents | a631c2f6d913 |
| children |
| rev | line source |
|---|---|
| 12 | 1 #!/usr/bin/env python |
| 2 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
3 import gd_util |
| 12 | 4 import sys |
| 5 import os | |
| 6 from Population import Population | |
| 7 import gd_composite | |
| 8 from dpmix_plot import make_dpmix_plot | |
| 9 from LocationFile import LocationFile | |
| 10 | |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
11 def load_and_check_pop(name, file, total_pop): |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
12 p = Population(name=name) |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
13 p.from_population_file(file) |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
14 if not total_pop.is_superset(p): |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
15 gd_util.die('There is an individual in {0} that is not in the SNP table'.format(name)) |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
16 return p |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
17 |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
18 def append_pop_tags(the_list, p, input_type, number): |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
19 for tag in p.tag_list(): |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
20 column, name = tag.split(':') |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
21 if input_type == 'gd_genotype': |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
22 column = int(column) - 2 |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
23 the_list.append('{0}:{1}:{2}'.format(column, number, name)) |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
24 |
| 12 | 25 ################################################################################ |
| 26 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
27 if len(sys.argv) != 22: |
| 12 | 28 print "usage" |
| 29 sys.exit(1) | |
| 30 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
31 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
32 |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
33 if ap1_input == '/dev/null': |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
34 use_reference = True |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
35 else: |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
36 use_reference = False |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
37 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
38 if ap3_input == '/dev/null': |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
39 populations = 2 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
40 else: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
41 populations = 3 |
| 12 | 42 |
| 43 chrom = 'all' | |
| 44 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
45 if het_arg == 'use_installed': |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
46 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
47 location_file = LocationFile(loc_path) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
48 heterochrom_path = location_file.get_values_if_exists(dbkey) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
49 if heterochrom_path is None: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
50 heterochrom_path = '/dev/null' |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
51 elif het_arg == 'use_none': |
| 12 | 52 heterochrom_path = '/dev/null' |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
53 else: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
54 heterochrom_path = het_arg |
| 12 | 55 |
| 56 population_list = [] | |
| 57 | |
| 58 p_total = Population() | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
59 p_total.from_wrapped_dict(ind_arg) |
| 12 | 60 |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
61 if not use_reference: |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
62 ap1 = load_and_check_pop('Ancestral population 1', ap1_input, p_total) |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
63 population_list.append(ap1) |
| 12 | 64 |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
65 ap2 = load_and_check_pop('Ancestral population 2', ap2_input, p_total) |
| 12 | 66 population_list.append(ap2) |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
67 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
68 if populations == 3: |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
69 ap3 = load_and_check_pop('Ancestral population 3', ap3_input, p_total) |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
70 population_list.append(ap3) |
| 12 | 71 |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
72 p = load_and_check_pop('Potentially admixed', p_input, p_total) |
| 12 | 73 population_list.append(p) |
| 74 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
75 gd_util.mkdir_p(output2_dir) |
| 12 | 76 |
| 77 ################################################################################ | |
| 78 # Create tabular file | |
| 79 ################################################################################ | |
| 80 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
81 misc_file = os.path.join(output2_dir, 'summary.txt') |
| 12 | 82 |
| 83 prog = 'dpmix' | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
84 |
| 12 | 85 args = [ prog ] |
| 86 args.append(input) | |
| 87 args.append(ref_column) | |
| 88 args.append(chrom) | |
| 89 args.append(data_source) | |
| 90 args.append(add_logs) | |
| 91 args.append(switch_penalty) | |
| 92 args.append(heterochrom_path) | |
| 93 args.append(misc_file) | |
| 94 | |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
95 if use_reference: |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
96 args.append('0:1:reference') |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
97 else: |
|
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
98 append_pop_tags(args, ap1, input_type, 1) |
| 12 | 99 |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
100 append_pop_tags(args, ap2, input_type, 2) |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
101 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
102 if populations == 3: |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
103 append_pop_tags(args, ap3, input_type, 3) |
| 12 | 104 |
|
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
105 append_pop_tags(args, p, input_type, 0) |
| 12 | 106 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
107 with open(output, 'w') as fh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
108 gd_util.run_program(prog, args, stdout=fh) |
| 12 | 109 |
| 110 ################################################################################ | |
| 111 # Create pdf file | |
| 112 ################################################################################ | |
| 113 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
114 if populations == 3: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
115 state2name = { |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
116 0:'heterochromatin', |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
117 1:ap1_name, |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
118 2:ap2_name, |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
119 3:ap3_name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
120 } |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
121 else: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
122 state2name = { |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
123 0:'heterochromatin', |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
124 1:ap1_name, |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
125 2:ap2_name |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
126 } |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
127 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
128 pdf_file = os.path.join(output2_dir, 'picture.pdf') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
129 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations) |
| 12 | 130 |
| 131 ################################################################################ | |
| 132 # Create html | |
| 133 ################################################################################ | |
| 134 | |
| 135 info_page = gd_composite.InfoPage() | |
| 136 info_page.set_title('dpmix Galaxy Composite Dataset') | |
| 137 | |
| 138 display_file = gd_composite.DisplayFile() | |
| 139 display_value = gd_composite.DisplayValue() | |
| 140 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
141 out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
142 out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file) |
| 12 | 143 |
| 144 info_page.add_output_parameter(out_pdf) | |
| 145 info_page.add_output_parameter(out_misc) | |
| 146 | |
| 147 if data_source == '0': | |
| 148 data_source_value = 'sequence coverage' | |
| 149 elif data_source == '1': | |
| 150 data_source_value = 'estimated genotype' | |
| 151 | |
| 152 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
| 153 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) | |
| 154 | |
| 155 info_page.add_input_parameter(in_data_source) | |
| 156 info_page.add_input_parameter(in_switch_penalty) | |
| 157 | |
| 158 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
| 159 | |
| 160 info_page.add_misc(misc_populations) | |
| 161 | |
| 162 with open(output2, 'w') as ofh: | |
| 163 print >> ofh, info_page.render() | |
| 164 | |
| 165 sys.exit(0) | |
| 166 |
