Mercurial > repos > miller-lab > genome_diversity
comparison dpmix.py @ 12:4b6590dd7250
Uploaded
author | miller-lab |
---|---|
date | Wed, 12 Sep 2012 17:10:26 -0400 |
parents | |
children | 248b06e86022 |
comparison
equal
deleted
inserted
replaced
11:d4ec09e8079f | 12:4b6590dd7250 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import errno | |
4 import sys | |
5 import os | |
6 import subprocess | |
7 from Population import Population | |
8 import gd_composite | |
9 from dpmix_plot import make_dpmix_plot | |
10 from LocationFile import LocationFile | |
11 | |
12 ################################################################################ | |
13 | |
14 def mkdir_p(path): | |
15 try: | |
16 os.makedirs(path) | |
17 except OSError, e: | |
18 if e.errno <> errno.EEXIST: | |
19 raise | |
20 | |
21 def run_program(prog, args, stdout_file=None, space_to_tab=False): | |
22 #print "args: ", ' '.join(args) | |
23 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
24 (stdoutdata, stderrdata) = p.communicate() | |
25 rc = p.returncode | |
26 | |
27 if stdout_file is not None: | |
28 with open(stdout_file, 'w') as ofh: | |
29 lines = stdoutdata.split('\n') | |
30 for line in lines: | |
31 line = line.strip() | |
32 if line: | |
33 if space_to_tab: | |
34 line = line.replace(' ', '\t') | |
35 print >> ofh, line | |
36 | |
37 if rc != 0: | |
38 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) | |
39 print >> sys.stderr, stderrdata | |
40 sys.exit(1) | |
41 | |
42 ################################################################################ | |
43 | |
44 if len(sys.argv) < 15: | |
45 print "usage" | |
46 sys.exit(1) | |
47 | |
48 input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:14] | |
49 individual_metadata = sys.argv[14:] | |
50 | |
51 chrom = 'all' | |
52 add_logs = '0' | |
53 | |
54 loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) | |
55 location_file = LocationFile(loc_path) | |
56 heterochrom_path = location_file.get_values_if_exists(dbkey) | |
57 if heterochrom_path is None: | |
58 heterochrom_path = '/dev/null' | |
59 | |
60 population_list = [] | |
61 | |
62 p_total = Population() | |
63 p_total.from_tag_list(individual_metadata) | |
64 | |
65 ap1 = Population(name='Ancestral population 1') | |
66 ap1.from_population_file(ap1_input) | |
67 population_list.append(ap1) | |
68 if not p_total.is_superset(ap1): | |
69 print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' | |
70 sys.exit(1) | |
71 | |
72 ap2 = Population(name='Ancestral population 2') | |
73 ap2.from_population_file(ap2_input) | |
74 population_list.append(ap2) | |
75 if not p_total.is_superset(ap2): | |
76 print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' | |
77 sys.exit(1) | |
78 | |
79 p = Population(name='Potentially admixed') | |
80 p.from_population_file(p_input) | |
81 population_list.append(p) | |
82 if not p_total.is_superset(p): | |
83 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | |
84 sys.exit(1) | |
85 | |
86 mkdir_p(output2_dir) | |
87 | |
88 ################################################################################ | |
89 # Create tabular file | |
90 ################################################################################ | |
91 | |
92 misc_file = os.path.join(output2_dir, 'misc.txt') | |
93 | |
94 prog = 'dpmix' | |
95 args = [ prog ] | |
96 args.append(input) | |
97 args.append(ref_column) | |
98 args.append(chrom) | |
99 args.append(data_source) | |
100 args.append(add_logs) | |
101 args.append(switch_penalty) | |
102 args.append(heterochrom_path) | |
103 args.append(misc_file) | |
104 | |
105 columns = ap1.column_list() | |
106 for column in columns: | |
107 args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name)) | |
108 | |
109 columns = ap2.column_list() | |
110 for column in columns: | |
111 args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) | |
112 | |
113 columns = p.column_list() | |
114 for column in columns: | |
115 args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name)) | |
116 | |
117 run_program(None, args, stdout_file=output, space_to_tab=True) | |
118 | |
119 ################################################################################ | |
120 # Create pdf file | |
121 ################################################################################ | |
122 | |
123 pdf_file = os.path.join(output2_dir, 'dpmix.pdf') | |
124 make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) | |
125 | |
126 ################################################################################ | |
127 # Create html | |
128 ################################################################################ | |
129 | |
130 info_page = gd_composite.InfoPage() | |
131 info_page.set_title('dpmix Galaxy Composite Dataset') | |
132 | |
133 display_file = gd_composite.DisplayFile() | |
134 display_value = gd_composite.DisplayValue() | |
135 | |
136 out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) | |
137 out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) | |
138 | |
139 info_page.add_output_parameter(out_pdf) | |
140 info_page.add_output_parameter(out_misc) | |
141 | |
142 if data_source == '0': | |
143 data_source_value = 'sequence coverage' | |
144 elif data_source == '1': | |
145 data_source_value = 'estimated genotype' | |
146 | |
147 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
148 in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) | |
149 | |
150 info_page.add_input_parameter(in_data_source) | |
151 info_page.add_input_parameter(in_switch_penalty) | |
152 | |
153 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
154 | |
155 info_page.add_misc(misc_populations) | |
156 | |
157 with open(output2, 'w') as ofh: | |
158 print >> ofh, info_page.render() | |
159 | |
160 sys.exit(0) | |
161 | |
162 |