comparison coverage_distributions.py @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents 2c498d40ecde
children 8997f2ca8c7a
comparison
equal deleted inserted replaced
11:d4ec09e8079f 12:4b6590dd7250
1 #!/usr/bin/env python
2
3 import os
4 import errno
5 import sys
6 import shutil
7 import subprocess
8 from Population import Population
9 import gd_composite
10
11 ################################################################################
12
13 def mkdir_p(path):
14 try:
15 os.makedirs(path)
16 except OSError, e:
17 if e.errno <> errno.EEXIST:
18 raise
19
20 ################################################################################
21
22 if len(sys.argv) < 7:
23 print >> sys.stderr, "Usage"
24 sys.exit(1)
25
26 input, data_source, output, extra_files_path = sys.argv[1:5]
27
28 individual_metadata = []
29 population_info = []
30 p1_input = None
31 all_individuals = False
32
33 for arg in sys.argv[5:]:
34 if arg == 'all_individuals':
35 all_individuals = True
36 elif len(arg) > 12 and arg[:12] == 'individuals:':
37 p1_input = arg[12:]
38 elif len(arg) > 11:
39 if arg[:11] == 'population:':
40 file, name = arg[11:].split(':', 1)
41 population_info.append((file, name))
42 elif arg[:11] == 'individual:':
43 individual_metadata.append(arg[11:])
44
45 p_total = Population()
46 p_total.from_tag_list(individual_metadata)
47
48 ################################################################################
49
50 mkdir_p(extra_files_path)
51
52 ################################################################################
53
54 prog = 'coverage'
55
56 args = []
57 args.append(prog)
58 args.append(input)
59 args.append(data_source)
60
61 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
62 args.append(user_coverage_file)
63
64 population_list = []
65
66 if all_individuals:
67 tags = p_total.tag_list()
68 elif p1_input is not None:
69 p1 = Population()
70 this_pop = Population()
71 this_pop.from_population_file(p1_input)
72 population_list.append(this_pop)
73 p1.from_population_file(p1_input)
74 if not p_total.is_superset(p1):
75 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
76 sys.exit(1)
77 tags = p1.tag_list()
78 else:
79 tags = []
80 for population_file, population_name in population_info:
81 population = Population()
82 this_pop = Population()
83 this_pop.from_population_file(population_file)
84 population_list.append(this_pop)
85 population.from_population_file(population_file)
86 if not p_total.is_superset(population):
87 print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name)
88 sys.exit(1)
89 columns = population.column_list()
90 for column in columns:
91 tags.append('{0}:{1}'.format(column, population_name))
92
93 for tag in tags:
94 args.append(tag)
95
96 ## text output
97 coverage_file = 'coverage.txt'
98 fh = open(coverage_file, 'w')
99 #print "args:", ' '.join(args)
100 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
101 rc = p.wait()
102 fh.close()
103
104 ## graphical output
105 fh = open(coverage_file)
106 coverage2_file = 'coverage2.txt'
107 ofh = open(coverage2_file, 'w')
108
109 for line in fh:
110 line = line.rstrip('\r\n')
111 elems = line.split('\t')
112 name = elems.pop(0)
113 values = [ elems[0] ]
114 for idx in range(1, len(elems)):
115 val = str(float(elems[idx]) - float(elems[idx-1]))
116 values.append(val)
117 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
118
119 fh.close()
120 ofh.close()
121
122 ################################################################################
123
124 prog = 'R'
125
126 args = []
127 args.append(prog)
128 args.append('--vanilla')
129 args.append('--quiet')
130
131 _realpath = os.path.realpath(__file__)
132 _script_dir = os.path.dirname(_realpath)
133 r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
134
135 ifh = open(r_script_file)
136 ofh = open('/dev/null', 'w')
137 #print "args:", ' '.join(args)
138 p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
139 rc = p.wait()
140 ifh.close()
141 ofh.close()
142
143 pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
144 shutil.copy2('coverage.pdf', pdf_file)
145 os.remove('coverage.pdf')
146 os.remove(coverage2_file)
147
148 ################################################################################
149
150 info_page = gd_composite.InfoPage()
151 info_page.set_title('Coverage distributions Galaxy Composite Dataset')
152
153 display_file = gd_composite.DisplayFile()
154 display_value = gd_composite.DisplayValue()
155
156 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
157 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
158
159 info_page.add_output_parameter(out_pdf)
160 info_page.add_output_parameter(out_txt)
161
162
163 if data_source == '0':
164 data_source_value = 'sequence coverage'
165 elif data_source == '1':
166 data_source_value = 'estimated genotype'
167
168 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
169
170 info_page.add_input_parameter(in_data_source)
171
172 if population_list:
173 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
174 info_page.add_misc(misc_populations)
175 else:
176 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
177 info_page.add_misc(misc_individuals)
178
179
180
181
182 with open (output, 'w') as ofh:
183 print >> ofh, info_page.render()
184
185
186 sys.exit(0)
187