Mercurial > repos > miller-lab > genome_diversity
comparison coverage_distributions.py @ 12:4b6590dd7250
Uploaded
author | miller-lab |
---|---|
date | Wed, 12 Sep 2012 17:10:26 -0400 |
parents | 2c498d40ecde |
children | 8997f2ca8c7a |
comparison
equal
deleted
inserted
replaced
11:d4ec09e8079f | 12:4b6590dd7250 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import errno | |
5 import sys | |
6 import shutil | |
7 import subprocess | |
8 from Population import Population | |
9 import gd_composite | |
10 | |
11 ################################################################################ | |
12 | |
13 def mkdir_p(path): | |
14 try: | |
15 os.makedirs(path) | |
16 except OSError, e: | |
17 if e.errno <> errno.EEXIST: | |
18 raise | |
19 | |
20 ################################################################################ | |
21 | |
22 if len(sys.argv) < 7: | |
23 print >> sys.stderr, "Usage" | |
24 sys.exit(1) | |
25 | |
26 input, data_source, output, extra_files_path = sys.argv[1:5] | |
27 | |
28 individual_metadata = [] | |
29 population_info = [] | |
30 p1_input = None | |
31 all_individuals = False | |
32 | |
33 for arg in sys.argv[5:]: | |
34 if arg == 'all_individuals': | |
35 all_individuals = True | |
36 elif len(arg) > 12 and arg[:12] == 'individuals:': | |
37 p1_input = arg[12:] | |
38 elif len(arg) > 11: | |
39 if arg[:11] == 'population:': | |
40 file, name = arg[11:].split(':', 1) | |
41 population_info.append((file, name)) | |
42 elif arg[:11] == 'individual:': | |
43 individual_metadata.append(arg[11:]) | |
44 | |
45 p_total = Population() | |
46 p_total.from_tag_list(individual_metadata) | |
47 | |
48 ################################################################################ | |
49 | |
50 mkdir_p(extra_files_path) | |
51 | |
52 ################################################################################ | |
53 | |
54 prog = 'coverage' | |
55 | |
56 args = [] | |
57 args.append(prog) | |
58 args.append(input) | |
59 args.append(data_source) | |
60 | |
61 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') | |
62 args.append(user_coverage_file) | |
63 | |
64 population_list = [] | |
65 | |
66 if all_individuals: | |
67 tags = p_total.tag_list() | |
68 elif p1_input is not None: | |
69 p1 = Population() | |
70 this_pop = Population() | |
71 this_pop.from_population_file(p1_input) | |
72 population_list.append(this_pop) | |
73 p1.from_population_file(p1_input) | |
74 if not p_total.is_superset(p1): | |
75 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | |
76 sys.exit(1) | |
77 tags = p1.tag_list() | |
78 else: | |
79 tags = [] | |
80 for population_file, population_name in population_info: | |
81 population = Population() | |
82 this_pop = Population() | |
83 this_pop.from_population_file(population_file) | |
84 population_list.append(this_pop) | |
85 population.from_population_file(population_file) | |
86 if not p_total.is_superset(population): | |
87 print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name) | |
88 sys.exit(1) | |
89 columns = population.column_list() | |
90 for column in columns: | |
91 tags.append('{0}:{1}'.format(column, population_name)) | |
92 | |
93 for tag in tags: | |
94 args.append(tag) | |
95 | |
96 ## text output | |
97 coverage_file = 'coverage.txt' | |
98 fh = open(coverage_file, 'w') | |
99 #print "args:", ' '.join(args) | |
100 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) | |
101 rc = p.wait() | |
102 fh.close() | |
103 | |
104 ## graphical output | |
105 fh = open(coverage_file) | |
106 coverage2_file = 'coverage2.txt' | |
107 ofh = open(coverage2_file, 'w') | |
108 | |
109 for line in fh: | |
110 line = line.rstrip('\r\n') | |
111 elems = line.split('\t') | |
112 name = elems.pop(0) | |
113 values = [ elems[0] ] | |
114 for idx in range(1, len(elems)): | |
115 val = str(float(elems[idx]) - float(elems[idx-1])) | |
116 values.append(val) | |
117 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) | |
118 | |
119 fh.close() | |
120 ofh.close() | |
121 | |
122 ################################################################################ | |
123 | |
124 prog = 'R' | |
125 | |
126 args = [] | |
127 args.append(prog) | |
128 args.append('--vanilla') | |
129 args.append('--quiet') | |
130 | |
131 _realpath = os.path.realpath(__file__) | |
132 _script_dir = os.path.dirname(_realpath) | |
133 r_script_file = os.path.join(_script_dir, 'coverage_plot.r') | |
134 | |
135 ifh = open(r_script_file) | |
136 ofh = open('/dev/null', 'w') | |
137 #print "args:", ' '.join(args) | |
138 p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) | |
139 rc = p.wait() | |
140 ifh.close() | |
141 ofh.close() | |
142 | |
143 pdf_file = os.path.join(extra_files_path, 'coverage.pdf') | |
144 shutil.copy2('coverage.pdf', pdf_file) | |
145 os.remove('coverage.pdf') | |
146 os.remove(coverage2_file) | |
147 | |
148 ################################################################################ | |
149 | |
150 info_page = gd_composite.InfoPage() | |
151 info_page.set_title('Coverage distributions Galaxy Composite Dataset') | |
152 | |
153 display_file = gd_composite.DisplayFile() | |
154 display_value = gd_composite.DisplayValue() | |
155 | |
156 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) | |
157 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) | |
158 | |
159 info_page.add_output_parameter(out_pdf) | |
160 info_page.add_output_parameter(out_txt) | |
161 | |
162 | |
163 if data_source == '0': | |
164 data_source_value = 'sequence coverage' | |
165 elif data_source == '1': | |
166 data_source_value = 'estimated genotype' | |
167 | |
168 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
169 | |
170 info_page.add_input_parameter(in_data_source) | |
171 | |
172 if population_list: | |
173 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
174 info_page.add_misc(misc_populations) | |
175 else: | |
176 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) | |
177 info_page.add_misc(misc_individuals) | |
178 | |
179 | |
180 | |
181 | |
182 with open (output, 'w') as ofh: | |
183 print >> ofh, info_page.render() | |
184 | |
185 | |
186 sys.exit(0) | |
187 |