comparison phylogenetic_tree.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 248b06e86022
children
comparison
equal deleted inserted replaced
26:91e835060ad2 27:8997f2ca8c7a
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import gd_util
3 import os 4 import os
4 import errno
5 import sys 5 import sys
6 import subprocess
7 import shutil
8 from Population import Population 6 from Population import Population
9 import gd_composite 7 import gd_composite
10 8
11 ################################################################################ 9 ################################################################################
12 10
13 def mkdir_p(path): 11 if len(sys.argv) != 12:
14 try: 12 gd_util.die('Usage')
15 os.makedirs(path)
16 except OSError, e:
17 if e.errno <> errno.EEXIST:
18 raise
19 13
20 ################################################################################ 14 input, output, extra_files_path, input_type, data_source_arg, minimum_coverage, minimum_quality, p1_input, dbkey, draw_tree_options, ind_arg = sys.argv[1:]
21
22 # <command interpreter="python">
23 # phylogenetic_tree.py "$input" "$output" "$output.files_path"
24 #
25 # #if $input_type.choice == '0'
26 # "gd_snp"
27 # #if $input_type.data_source.choice == '0'
28 # "sequence_coverage"
29 # "$input_type.data_source.minimum_coverage"
30 # "$input_type.data_source.minimum_quality"
31 # #else if $input_type.data_source.choice == '1'
32 # "estimated_genotype"
33 # #else if $input_type.choice == '1'
34 # "gd_genotype"
35 # #end if
36 #
37 # #if $individuals.choice == '0'
38 # "all_individuals"
39 # #else if $individuals.choice == '1'
40 # "$individuals.p1_input"
41 # #end if
42 #
43 # #if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0')
44 # "none"
45 # #else
46 # "$input.metadata.dbkey"
47 # #end if
48 #
49 # #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style])
50 # #if $draw_tree_options == ''
51 # ""
52 # #else
53 # "-$draw_tree_options"
54 # #end if
55 #
56 # #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
57 # #set $arg = '%s:%s' % ($individual_col, $individual_name)
58 # "$arg"
59 # #end for
60 # </command>
61
62 ################################################################################
63
64 # if len(sys.argv) < 11:
65 # print >> sys.stderr, "Usage"
66 # sys.exit(1)
67 #
68 # input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10]
69 #
70 # individual_metadata = sys.argv[10:]
71 #
72 # # note: TEST THIS
73 # if dbkey in ['', '?', 'None']:
74 # dbkey = 'none'
75 #
76 # p_total = Population()
77 # p_total.from_tag_list(individual_metadata)
78
79 if len(sys.argv) < 5:
80 print >> sys.stderr, 'Usage'
81 sys.exit(1)
82
83 input, output, extra_files_path, input_type = sys.argv[1:5]
84 args = sys.argv[5:]
85
86 data_source = '1'
87 minimum_coverage = '0'
88 minimum_quality = '0'
89 15
90 if input_type == 'gd_snp': 16 if input_type == 'gd_snp':
91 data_source_arg = args.pop(0)
92 if data_source_arg == 'sequence_coverage': 17 if data_source_arg == 'sequence_coverage':
93 data_source = '0' 18 data_source = 0
94 minimum_coverage = args.pop(0)
95 minimum_quality = args.pop(0)
96 elif data_source_arg == 'estimated_genotype': 19 elif data_source_arg == 'estimated_genotype':
97 pass 20 data_source = 1
98 else: 21 else:
99 print >> sys.stderr, 'Unsupported data_source:', data_source_arg 22 gd_util.die('Unsupported data_source: {0}'.format(data_source_arg))
100 sys.exit(1)
101 elif input_type == 'gd_genotype': 23 elif input_type == 'gd_genotype':
102 pass 24 data_source = 1
25 minimum_coverage = 0
26 minimum_quality = 0
103 else: 27 else:
104 print >> sys.stderr, 'Unsupported input_type:', input_type 28 gd_util.die('Unsupported input_type:: {0}'.format(input_type))
105 sys.exit(1)
106
107 p1_input, dbkey, draw_tree_options = args[:3]
108 29
109 # note: TEST THIS 30 # note: TEST THIS
110 if dbkey in ['', '?', 'None']: 31 if dbkey in ['', '?', 'None']:
111 dbkey = 'none' 32 dbkey = 'none'
112 33
113 individual_metadata = args[3:] 34 p_total = Population()
35 p_total.from_wrapped_dict(ind_arg)
114 36
115 p_total = Population() 37 if p1_input == "all_individuals":
116 p_total.from_tag_list(individual_metadata) 38 tags = p_total.tag_list()
39 else:
40 p1 = Population()
41 p1.from_population_file(p1_input)
42 if not p_total.is_superset(p1):
43 gd_util.die('There is an individual in the population that is not in the SNP table')
44 tags = p1.tag_list()
117 45
118 ################################################################################ 46 ################################################################################
119 47
120 mkdir_p(extra_files_path) 48 gd_util.mkdir_p(extra_files_path)
121
122 ################################################################################
123
124 def run_program(prog, args, ofh):
125 #print "args: ", ' '.join(args)
126 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
127 (stdoutdata, stderrdata) = p.communicate()
128 rc = p.returncode
129 ofh.close()
130
131 if rc != 0:
132 #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
133 print >> sys.stderr, stderrdata
134 sys.exit(1)
135
136 ################################################################################
137
138 phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip') 49 phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip')
139 newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick') 50 newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick')
140 ps_outfile = 'tree.ps' 51 ps_outfile = 'tree.ps'
141 pdf_outfile = os.path.join(extra_files_path, 'tree.pdf') 52 pdf_outfile = os.path.join(extra_files_path, 'tree.pdf')
53 informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt')
54 mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt')
142 55
143 ################################################################################ 56 ################################################################################
144 57
145 informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt')
146 mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt')
147
148 prog = 'dist_mat' 58 prog = 'dist_mat'
149 59
150 args = [] 60 args = [ prog ]
151 args.append(prog)
152 args.append(input) 61 args.append(input)
153 args.append(minimum_coverage) 62 args.append(minimum_coverage)
154 args.append(minimum_quality) 63 args.append(minimum_quality)
155 args.append(dbkey) 64 args.append(dbkey)
156 args.append(data_source) 65 args.append(data_source)
157 args.append(informative_snp_file) 66 args.append(informative_snp_file)
158 args.append(mega_distance_matrix_file) 67 args.append(mega_distance_matrix_file)
159 68
160 if p1_input == "all_individuals":
161 tags = p_total.tag_list()
162 else:
163 p1 = Population()
164 p1.from_population_file(p1_input)
165 if not p_total.is_superset(p1):
166 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
167 sys.exit(1)
168 tags = p1.tag_list()
169
170 for tag in tags: 69 for tag in tags:
171 if input_type == 'gd_genotype': 70 if input_type == 'gd_genotype':
172 column, name = tag.split(':') 71 column, name = tag.split(':')
173 tag = '{0}:{1}'.format(int(column) - 2, name) 72 tag = '{0}:{1}'.format(int(column) - 2, name)
174 args.append(tag) 73 args.append(tag)
175 74
176 fh = open(phylip_outfile, 'w') 75 with open(phylip_outfile, 'w') as fh:
177 run_program(None, args, fh) 76 gd_util.run_program(prog, args, stdout=fh)
178 77
179 ################################################################################ 78 ################################################################################
180 79
181 prog = 'quicktree' 80 prog = 'quicktree'
182 81
183 args = [] 82 args = [ prog ]
184 args.append(prog)
185 args.append('-in') 83 args.append('-in')
186 args.append('m') 84 args.append('m')
187 args.append('-out') 85 args.append('-out')
188 args.append('t') 86 args.append('t')
189 args.append(phylip_outfile) 87 args.append(phylip_outfile)
190 88
191 fh = open(newick_outfile, 'w') 89 with open(newick_outfile, 'w') as fh:
192 run_program(None, args, fh) 90 gd_util.run_program(prog, args, stdout=fh)
193 91
194 ################################################################################ 92 ################################################################################
195 93
196 prog = 'draw_tree' 94 prog = 'draw_tree'
197 95
198 args = [] 96 args = [ prog ]
199 args.append(prog) 97
200 if draw_tree_options: 98 if draw_tree_options:
201 args.append(draw_tree_options) 99 args.append(draw_tree_options)
100
202 args.append(newick_outfile) 101 args.append(newick_outfile)
203 102
204 fh = open(ps_outfile, 'w') 103 with open(ps_outfile, 'w') as fh:
205 run_program(None, args, fh) 104 gd_util.run_program(prog, args, stdout=fh)
206 105
207 ################################################################################ 106 ################################################################################
208 107
209 prog = 'ps2pdf' 108 prog = 'ps2pdf'
210 109
211 args = [] 110 args = [ prog ]
212 args.append(prog)
213 args.append('-dPDFSETTINGS=/prepress') 111 args.append('-dPDFSETTINGS=/prepress')
214 args.append(ps_outfile) 112 args.append(ps_outfile)
215 args.append('-') 113 args.append(pdf_outfile)
216 114
217 fh = open(pdf_outfile, 'w') 115 gd_util.run_program(prog, args)
218 run_program(None, args, fh)
219
220 shutil.copyfile(pdf_outfile, output)
221 116
222 ################################################################################ 117 ################################################################################
223 118
224 info_page = gd_composite.InfoPage() 119 info_page = gd_composite.InfoPage()
225 info_page.set_title('Phylogenetic tree Galaxy Composite Dataset') 120 info_page.set_title('Phylogenetic tree Galaxy Composite Dataset')
246 if dbkey != 'none': 141 if dbkey != 'none':
247 include_ref_value = 'yes' 142 include_ref_value = 'yes'
248 143
249 in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value) 144 in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value)
250 145
251 if data_source == '0': 146 if data_source == 0:
252 data_source_value = 'sequence coverage' 147 data_source_value = 'sequence coverage'
253 elif data_source == '1': 148 elif data_source == 1:
254 data_source_value = 'estimated genotype' 149 data_source_value = 'estimated genotype'
255 150
256 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) 151 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
257 152
258 branch_type_value = 'square' 153 branch_type_value = 'square'