Mercurial > repos > miller-lab > genome_diversity
comparison dpmix.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children | a631c2f6d913 |
comparison
equal
deleted
inserted
replaced
26:91e835060ad2 | 27:8997f2ca8c7a |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import errno | 3 import gd_util |
4 import sys | 4 import sys |
5 import os | 5 import os |
6 import subprocess | |
7 from Population import Population | 6 from Population import Population |
8 import gd_composite | 7 import gd_composite |
9 from dpmix_plot import make_dpmix_plot | 8 from dpmix_plot import make_dpmix_plot |
10 from LocationFile import LocationFile | 9 from LocationFile import LocationFile |
11 | 10 |
12 ################################################################################ | 11 ################################################################################ |
13 | 12 |
14 def mkdir_p(path): | 13 if len(sys.argv) != 22: |
15 try: | |
16 os.makedirs(path) | |
17 except OSError, e: | |
18 if e.errno <> errno.EEXIST: | |
19 raise | |
20 | |
21 def run_program(prog, args, stdout_file=None, space_to_tab=False): | |
22 #print "args: ", ' '.join(args) | |
23 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
24 (stdoutdata, stderrdata) = p.communicate() | |
25 rc = p.returncode | |
26 | |
27 if stdout_file is not None: | |
28 with open(stdout_file, 'w') as ofh: | |
29 lines = stdoutdata.split('\n') | |
30 for line in lines: | |
31 line = line.strip() | |
32 if line: | |
33 if space_to_tab: | |
34 line = line.replace(' ', '\t') | |
35 print >> ofh, line | |
36 | |
37 if rc != 0: | |
38 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) | |
39 print >> sys.stderr, stderrdata | |
40 sys.exit(1) | |
41 | |
42 ################################################################################ | |
43 | |
44 if len(sys.argv) < 16: | |
45 print "usage" | 14 print "usage" |
46 sys.exit(1) | 15 sys.exit(1) |
47 | 16 |
48 input, input_type, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:15] | 17 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] |
49 individual_metadata = sys.argv[15:] | 18 |
19 if ap3_input == '/dev/null': | |
20 populations = 2 | |
21 else: | |
22 populations = 3 | |
50 | 23 |
51 chrom = 'all' | 24 chrom = 'all' |
52 add_logs = '0' | |
53 | 25 |
54 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) | 26 if het_arg == 'use_installed': |
55 location_file = LocationFile(loc_path) | 27 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) |
56 heterochrom_path = location_file.get_values_if_exists(dbkey) | 28 location_file = LocationFile(loc_path) |
57 if heterochrom_path is None: | 29 heterochrom_path = location_file.get_values_if_exists(dbkey) |
30 if heterochrom_path is None: | |
31 heterochrom_path = '/dev/null' | |
32 elif het_arg == 'use_none': | |
58 heterochrom_path = '/dev/null' | 33 heterochrom_path = '/dev/null' |
34 else: | |
35 heterochrom_path = het_arg | |
59 | 36 |
60 population_list = [] | 37 population_list = [] |
61 | 38 |
62 p_total = Population() | 39 p_total = Population() |
63 p_total.from_tag_list(individual_metadata) | 40 p_total.from_wrapped_dict(ind_arg) |
64 | 41 |
65 ap1 = Population(name='Ancestral population 1') | 42 ap1 = Population(name='Ancestral population 1') |
66 ap1.from_population_file(ap1_input) | 43 ap1.from_population_file(ap1_input) |
67 population_list.append(ap1) | 44 population_list.append(ap1) |
68 if not p_total.is_superset(ap1): | 45 if not p_total.is_superset(ap1): |
69 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' | 46 gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table') |
70 sys.exit(1) | |
71 | 47 |
72 ap2 = Population(name='Ancestral population 2') | 48 ap2 = Population(name='Ancestral population 2') |
73 ap2.from_population_file(ap2_input) | 49 ap2.from_population_file(ap2_input) |
74 population_list.append(ap2) | 50 population_list.append(ap2) |
75 if not p_total.is_superset(ap2): | 51 if not p_total.is_superset(ap2): |
76 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' | 52 gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table') |
77 sys.exit(1) | 53 |
54 if populations == 3: | |
55 ap3 = Population(name='Ancestral population 3') | |
56 ap3.from_population_file(ap3_input) | |
57 population_list.append(ap3) | |
58 if not p_total.is_superset(ap3): | |
59 gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table') | |
78 | 60 |
79 p = Population(name='Potentially admixed') | 61 p = Population(name='Potentially admixed') |
80 p.from_population_file(p_input) | 62 p.from_population_file(p_input) |
81 population_list.append(p) | 63 population_list.append(p) |
82 if not p_total.is_superset(p): | 64 if not p_total.is_superset(p): |
83 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | 65 gd_util.die('There is an individual in the population that is not in the SNP table') |
84 sys.exit(1) | |
85 | 66 |
86 mkdir_p(output2_dir) | 67 gd_util.mkdir_p(output2_dir) |
87 | 68 |
88 ################################################################################ | 69 ################################################################################ |
89 # Create tabular file | 70 # Create tabular file |
90 ################################################################################ | 71 ################################################################################ |
91 | 72 |
92 misc_file = os.path.join(output2_dir, 'misc.txt') | 73 misc_file = os.path.join(output2_dir, 'summary.txt') |
93 | 74 |
94 prog = 'dpmix' | 75 prog = 'dpmix' |
76 | |
95 args = [ prog ] | 77 args = [ prog ] |
96 args.append(input) | 78 args.append(input) |
97 args.append(ref_column) | 79 args.append(ref_column) |
98 args.append(chrom) | 80 args.append(chrom) |
99 args.append(data_source) | 81 args.append(data_source) |
102 args.append(heterochrom_path) | 84 args.append(heterochrom_path) |
103 args.append(misc_file) | 85 args.append(misc_file) |
104 | 86 |
105 columns = ap1.column_list() | 87 columns = ap1.column_list() |
106 for column in columns: | 88 for column in columns: |
89 col = int(column) | |
90 name = ap1.individual_with_column(column).name | |
91 first_token = name.split()[0] | |
107 if input_type == 'gd_genotype': | 92 if input_type == 'gd_genotype': |
108 args.append('{0}:1:{1}'.format(int(column) - 2, ap1.individual_with_column(column).name)) | 93 col -= 2 |
109 else: | 94 args.append('{0}:1:{1}'.format(col, first_token)) |
110 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name)) | |
111 | 95 |
112 columns = ap2.column_list() | 96 columns = ap2.column_list() |
113 for column in columns: | 97 for column in columns: |
98 col = int(column) | |
99 name = ap2.individual_with_column(column).name | |
100 first_token = name.split()[0] | |
114 if input_type == 'gd_genotype': | 101 if input_type == 'gd_genotype': |
115 args.append('{0}:2:{1}'.format(int(column) - 2, ap2.individual_with_column(column).name)) | 102 col -= 2 |
116 else: | 103 args.append('{0}:2:{1}'.format(col, first_token)) |
117 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) | 104 |
105 if populations == 3: | |
106 columns = ap3.column_list() | |
107 for column in columns: | |
108 col = int(column) | |
109 name = ap3.individual_with_column(column).name | |
110 first_token = name.split()[0] | |
111 if input_type == 'gd_genotype': | |
112 col -= 2 | |
113 args.append('{0}:3:{1}'.format(col, first_token)) | |
118 | 114 |
119 columns = p.column_list() | 115 columns = p.column_list() |
120 for column in columns: | 116 for column in columns: |
117 col = int(column) | |
118 name = p.individual_with_column(column).name | |
119 first_token = name.split()[0] | |
121 if input_type == 'gd_genotype': | 120 if input_type == 'gd_genotype': |
122 args.append('{0}:0:{1}'.format(int(column) - 2, p.individual_with_column(column).name)) | 121 col -= 2 |
123 else: | 122 args.append('{0}:0:{1}'.format(col, first_token)) |
124 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name)) | |
125 | 123 |
126 run_program(None, args, stdout_file=output, space_to_tab=True) | 124 with open(output, 'w') as fh: |
125 gd_util.run_program(prog, args, stdout=fh) | |
127 | 126 |
128 ################################################################################ | 127 ################################################################################ |
129 # Create pdf file | 128 # Create pdf file |
130 ################################################################################ | 129 ################################################################################ |
131 | 130 |
132 pdf_file = os.path.join(output2_dir, 'dpmix.pdf') | 131 if populations == 3: |
133 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) | 132 state2name = { |
133 0:'heterochromatin', | |
134 1:ap1_name, | |
135 2:ap2_name, | |
136 3:ap3_name | |
137 } | |
138 else: | |
139 state2name = { | |
140 0:'heterochromatin', | |
141 1:ap1_name, | |
142 2:ap2_name | |
143 } | |
144 | |
145 pdf_file = os.path.join(output2_dir, 'picture.pdf') | |
146 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations) | |
134 | 147 |
135 ################################################################################ | 148 ################################################################################ |
136 # Create html | 149 # Create html |
137 ################################################################################ | 150 ################################################################################ |
138 | 151 |
140 info_page.set_title('dpmix Galaxy Composite Dataset') | 153 info_page.set_title('dpmix Galaxy Composite Dataset') |
141 | 154 |
142 display_file = gd_composite.DisplayFile() | 155 display_file = gd_composite.DisplayFile() |
143 display_value = gd_composite.DisplayValue() | 156 display_value = gd_composite.DisplayValue() |
144 | 157 |
145 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) | 158 out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file) |
146 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) | 159 out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file) |
147 | 160 |
148 info_page.add_output_parameter(out_pdf) | 161 info_page.add_output_parameter(out_pdf) |
149 info_page.add_output_parameter(out_misc) | 162 info_page.add_output_parameter(out_misc) |
150 | 163 |
151 if data_source == '0': | 164 if data_source == '0': |
166 with open(output2, 'w') as ofh: | 179 with open(output2, 'w') as ofh: |
167 print >> ofh, info_page.render() | 180 print >> ofh, info_page.render() |
168 | 181 |
169 sys.exit(0) | 182 sys.exit(0) |
170 | 183 |
171 |