Mercurial > repos > miller-lab > genome_diversity
comparison phylogenetic_tree.py @ 12:4b6590dd7250
Uploaded
author | miller-lab |
---|---|
date | Wed, 12 Sep 2012 17:10:26 -0400 |
parents | 2c498d40ecde |
children | 248b06e86022 |
comparison
equal
deleted
inserted
replaced
11:d4ec09e8079f | 12:4b6590dd7250 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import errno | |
5 import sys | |
6 import subprocess | |
7 import shutil | |
8 from Population import Population | |
9 import gd_composite | |
10 | |
11 ################################################################################ | |
12 | |
13 def mkdir_p(path): | |
14 try: | |
15 os.makedirs(path) | |
16 except OSError, e: | |
17 if e.errno <> errno.EEXIST: | |
18 raise | |
19 | |
20 ################################################################################ | |
21 | |
22 if len(sys.argv) < 11: | |
23 print >> sys.stderr, "Usage" | |
24 sys.exit(1) | |
25 | |
26 input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10] | |
27 | |
28 individual_metadata = sys.argv[10:] | |
29 | |
30 # note: TEST THIS | |
31 if dbkey in ['', '?', 'None']: | |
32 dbkey = 'none' | |
33 | |
34 p_total = Population() | |
35 p_total.from_tag_list(individual_metadata) | |
36 | |
37 | |
38 ################################################################################ | |
39 | |
40 mkdir_p(extra_files_path) | |
41 | |
42 ################################################################################ | |
43 | |
44 def run_program(prog, args, ofh): | |
45 #print "args: ", ' '.join(args) | |
46 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE) | |
47 (stdoutdata, stderrdata) = p.communicate() | |
48 rc = p.returncode | |
49 ofh.close() | |
50 | |
51 if rc != 0: | |
52 #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) | |
53 print >> sys.stderr, stderrdata | |
54 sys.exit(1) | |
55 | |
56 ################################################################################ | |
57 | |
58 phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip') | |
59 newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick') | |
60 ps_outfile = 'tree.ps' | |
61 pdf_outfile = os.path.join(extra_files_path, 'tree.pdf') | |
62 | |
63 ################################################################################ | |
64 | |
65 informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt') | |
66 mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt') | |
67 | |
68 prog = 'dist_mat' | |
69 | |
70 args = [] | |
71 args.append(prog) | |
72 args.append(input) | |
73 args.append(minimum_coverage) | |
74 args.append(minimum_quality) | |
75 args.append(dbkey) | |
76 args.append(data_source) | |
77 args.append(informative_snp_file) | |
78 args.append(mega_distance_matrix_file) | |
79 | |
80 if p1_input == "all_individuals": | |
81 tags = p_total.tag_list() | |
82 else: | |
83 p1 = Population() | |
84 p1.from_population_file(p1_input) | |
85 if not p_total.is_superset(p1): | |
86 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | |
87 sys.exit(1) | |
88 tags = p1.tag_list() | |
89 | |
90 for tag in tags: | |
91 args.append(tag) | |
92 | |
93 fh = open(phylip_outfile, 'w') | |
94 run_program(None, args, fh) | |
95 | |
96 ################################################################################ | |
97 | |
98 prog = 'quicktree' | |
99 | |
100 args = [] | |
101 args.append(prog) | |
102 args.append('-in') | |
103 args.append('m') | |
104 args.append('-out') | |
105 args.append('t') | |
106 args.append(phylip_outfile) | |
107 | |
108 fh = open(newick_outfile, 'w') | |
109 run_program(None, args, fh) | |
110 | |
111 ################################################################################ | |
112 | |
113 prog = 'draw_tree' | |
114 | |
115 args = [] | |
116 args.append(prog) | |
117 if draw_tree_options: | |
118 args.append(draw_tree_options) | |
119 args.append(newick_outfile) | |
120 | |
121 fh = open(ps_outfile, 'w') | |
122 run_program(None, args, fh) | |
123 | |
124 ################################################################################ | |
125 | |
126 prog = 'ps2pdf' | |
127 | |
128 args = [] | |
129 args.append(prog) | |
130 args.append('-dPDFSETTINGS=/prepress') | |
131 args.append(ps_outfile) | |
132 args.append('-') | |
133 | |
134 fh = open(pdf_outfile, 'w') | |
135 run_program(None, args, fh) | |
136 | |
137 shutil.copyfile(pdf_outfile, output) | |
138 | |
139 ################################################################################ | |
140 | |
141 info_page = gd_composite.InfoPage() | |
142 info_page.set_title('Phylogenetic tree Galaxy Composite Dataset') | |
143 | |
144 display_file = gd_composite.DisplayFile() | |
145 display_value = gd_composite.DisplayValue() | |
146 | |
147 out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file) | |
148 out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file) | |
149 out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file) | |
150 out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file) | |
151 out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file) | |
152 | |
153 info_page.add_output_parameter(out_pdf) | |
154 info_page.add_output_parameter(out_newick) | |
155 info_page.add_output_parameter(out_phylip) | |
156 info_page.add_output_parameter(out_mega) | |
157 info_page.add_output_parameter(out_snps) | |
158 | |
159 in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value) | |
160 in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value) | |
161 | |
162 include_ref_value = 'no' | |
163 if dbkey != 'none': | |
164 include_ref_value = 'yes' | |
165 | |
166 in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value) | |
167 | |
168 if data_source == '0': | |
169 data_source_value = 'sequence coverage' | |
170 elif data_source == '1': | |
171 data_source_value = 'estimated genotype' | |
172 | |
173 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
174 | |
175 branch_type_value = 'square' | |
176 if 'd' in draw_tree_options: | |
177 branch_type_value = 'diagonal' | |
178 | |
179 in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value) | |
180 | |
181 branch_scale_value = 'yes' | |
182 if 's' in draw_tree_options: | |
183 branch_scale_value = 'no' | |
184 | |
185 in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value) | |
186 | |
187 branch_length_value = 'yes' | |
188 if 'b' in draw_tree_options: | |
189 branch_length_value = 'no' | |
190 | |
191 in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value) | |
192 | |
193 tree_layout_value = 'horizontal' | |
194 if 'v' in draw_tree_options: | |
195 tree_layout_value = 'vertical' | |
196 | |
197 in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value) | |
198 | |
199 info_page.add_input_parameter(in_min_cov) | |
200 info_page.add_input_parameter(in_min_qual) | |
201 info_page.add_input_parameter(in_include_ref) | |
202 info_page.add_input_parameter(in_data_source) | |
203 info_page.add_input_parameter(in_branch_type) | |
204 info_page.add_input_parameter(in_branch_scale) | |
205 info_page.add_input_parameter(in_branch_length) | |
206 info_page.add_input_parameter(in_tree_layout) | |
207 | |
208 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) | |
209 | |
210 info_page.add_misc(misc_individuals) | |
211 | |
212 | |
213 with open(output, 'w') as ofh: | |
214 print >> ofh, info_page.render() | |
215 | |
216 ################################################################################ | |
217 | |
218 sys.exit(0) | |
219 |