0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Runs RAxML on a sequence file.
|
|
4 For use with RAxML version 7.3.0
|
|
5
|
|
6 usage:
|
|
7 <!-- raxmlHPC-PTHREADS-SSE3 -T 2 -f c -m GTRGAMMA -F -s "/Users/om/Downloads/rana.phy" -n rana_red -w "/Users/om/Downloads/" 0
|
|
8 ## raxmlHPC-PTHREADS-SSE3 -T 2 -m GTRGAMMA -n test -p 323483 -s reduced.phy
|
|
9 command>raxmlHPC-HYBRID-SSE3 -T 4 -f ${search_algorithm} -m ${smodel} -N ${repeats} -o "${html_outfile.files_path}" -s "$input1"
|
|
10 """
|
|
11 import os, shutil, subprocess, sys, optparse, fnmatch, glob
|
|
12
|
|
13 def stop_err(msg):
|
|
14 sys.stderr.write("%s\n" % msg)
|
|
15 sys.exit()
|
|
16
|
|
17 def getint(name):
|
|
18 basename = name.partition('RUN.')
|
|
19 if basename[2] != '':
|
|
20 num = basename[2]
|
|
21 return int(num)
|
|
22
|
|
23 def __main__():
|
|
24 usage = "usage: %prog -T <threads> -s <input> -n <output> -m <model> [optional arguments]"
|
|
25
|
|
26 # Parse the primary wrapper's command line options
|
|
27 parser = optparse.OptionParser(usage = usage)
|
|
28 # raxml binary name, hardcoded in the xml file
|
|
29 parser.add_option("--binary", action="store", type="string", dest="binary", help="Command to run")
|
|
30 # (-a)
|
|
31 parser.add_option("--weightfile", action="store", type="string", dest="weightfile", help="Column weight file")
|
|
32 # (-A)
|
|
33 parser.add_option("--secondary_structure_model", action="store", type="string", dest="secondary_structure_model", help="Secondary structure model")
|
|
34 # (-b)
|
|
35 parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Bootstrap random number seed")
|
|
36 # (-c)
|
|
37 parser.add_option("--numofcats", action="store", type="int", dest="numofcats", help="Number of distinct rate categories")
|
|
38 # (-d)
|
|
39 parser.add_option("--search_complete_random_tree", action="store_true", dest="search_complete_random_tree", help="Search with a complete random starting tree")
|
|
40 # (-D)
|
|
41 parser.add_option("--ml_search_convergence", action="store_true", dest="ml_search_convergence", help="ML search onvergence criterion")
|
|
42 # (-e)
|
|
43 parser.add_option("--model_opt_precision", action="store", type="float", dest="model_opt_precision", help="Model Optimization Precision (-e)")
|
|
44 # (-E)
|
|
45 parser.add_option("--excludefile", action="store", type="string", dest="excludefile", help="Exclude File Name")
|
|
46 # (-f)
|
|
47 parser.add_option("--search_algorithm", action="store", type="string", dest="search_algorithm", help="Search Algorithm")
|
|
48 # (-F)
|
|
49 parser.add_option("--save_memory_cat_model", action="store_true", dest="save_memory_cat_model", help="Save memory under CAT and GTRGAMMA models")
|
|
50 # (-g)
|
|
51 parser.add_option("--groupingfile", action="store", type="string", dest="groupingfile", help="Grouping File Name")
|
|
52 # (-G)
|
|
53 parser.add_option("--enable_evol_heuristics", action="store_true", dest="enable_evol_heuristics", help="Enable evol algo heuristics")
|
|
54 # (-i)
|
|
55 parser.add_option("--initial_rearrangement_setting", action="store", type="int", dest="initial_rearrangement_setting", help="Initial Rearrangement Setting")
|
|
56 # (-I)
|
|
57 parser.add_option("--posterior_bootstopping_analysis", action="store", type="string", dest="posterior_bootstopping_analysis", help="Posterior bootstopping analysis")
|
|
58 # (-J)
|
|
59 parser.add_option("--majority_rule_consensus", action="store", type="string", dest="majority_rule_consensus", help="Majority rule consensus")
|
|
60 # (-k)
|
|
61 parser.add_option("--print_branch_lengths", action="store_true", dest="print_branch_lengths", help="Print branch lengths")
|
|
62 # (-K)
|
|
63 parser.add_option("--multistate_sub_model", action="store", type="string", dest="multistate_sub_model", help="Multistate substitution model")
|
|
64 # (-m)
|
|
65 parser.add_option("--model_type", action="store", type="string", dest="model_type", help="Model Type")
|
|
66 parser.add_option("--base_model", action="store", type="string", dest="base_model", help="Base Model")
|
|
67 parser.add_option("--aa_empirical_freq", action="store_true", dest="aa_empirical_freq", help="Use AA Empirical base frequences")
|
|
68 parser.add_option("--aa_search_matrix", action="store", type="string", dest="aa_search_matrix", help="AA Search Matrix")
|
|
69 # (-n)
|
|
70 parser.add_option("--name", action="store", type="string", dest="name", help="Run Name")
|
|
71 # (-N/#)
|
|
72 parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", help="Number of alternative runs")
|
|
73 parser.add_option("--number_of_runs_bootstop", action="store", type="string", dest="number_of_runs_bootstop", help="Number of alternative runs based on the bootstop criteria")
|
|
74 # (-M)
|
|
75 parser.add_option("--estimate_individual_branch_lengths", action="store_true", dest="estimate_individual_branch_lengths", help="Estimate individual branch lengths")
|
|
76 # (-o)
|
|
77 parser.add_option("--outgroup_name", action="store", type="string", dest="outgroup_name", help="Outgroup Name")
|
|
78 # (-O)
|
|
79 parser.add_option("--disable_undetermined_seq_check", action="store_true", dest="disable_undetermined_seq_check", help="Disable undetermined sequence check")
|
|
80 # (-p)
|
|
81 parser.add_option("--random_seed", action="store", type="int", dest="random_seed", help="Random Number Seed")
|
|
82 # (-P)
|
|
83 parser.add_option("--external_protein_model", action="store", type="string", dest="external_protein_model", help="External Protein Model")
|
|
84 # (-q)
|
|
85 parser.add_option("--multiple_model", action="store", type="string", dest="multiple_model", help="Multiple Model File")
|
|
86 # (-r)
|
|
87 parser.add_option("--constraint_file", action="store", type="string", dest="constraint_file", help="Constraint File")
|
|
88 # (-R)
|
|
89 parser.add_option("--bin_model_parameter_file", action="store", type="string", dest="bin_model_parameter_file", help="Constraint File")
|
|
90 # (-s)
|
|
91 parser.add_option("--source", action="store", type="string", dest="source", help="Input file")
|
|
92 # (-S)
|
|
93 parser.add_option("--secondary_structure_file", action="store", type="string", dest="secondary_structure_file", help="Secondary structure file")
|
|
94 # (-t)
|
|
95 parser.add_option("--starting_tree", action="store", type="string", dest="starting_tree", help="Starting Tree")
|
|
96 # (-T)
|
|
97 parser.add_option("--threads", action="store", type="int", dest="threads", help="Number of threads to use")
|
|
98 # (-u)
|
|
99 parser.add_option("--use_median_approximation", action="store_true", dest="use_median_approximation", help="Use median approximation")
|
|
100 # (-U)
|
|
101 parser.add_option("--save_memory_gappy_alignments", action="store_true", dest="save_memory_gappy_alignments", help="Save memory in large gapped alignments")
|
|
102 # (-V)
|
|
103 parser.add_option("--disable_rate_heterogeneity", action="store_true", dest="disable_rate_heterogeneity", help="Disable rate heterogeneity")
|
|
104 # (-W)
|
|
105 parser.add_option("--sliding_window_size", action="store", type="string", dest="sliding_window_size", help="Sliding window size")
|
|
106 # (-x)
|
|
107 parser.add_option("--rapid_bootstrap_random_seed", action="store", type="int", dest="rapid_bootstrap_random_seed", help="Rapid Boostrap Random Seed")
|
|
108 # (-y)
|
|
109 parser.add_option("--parsimony_starting_tree_only", action="store_true", dest="parsimony_starting_tree_only", help="Generate a parsimony starting tree only")
|
|
110 # (-z)
|
|
111 parser.add_option("--file_multiple_trees", action="store", type="string", dest="file_multiple_trees", help="Multiple Trees File")
|
|
112
|
|
113 (options, args) = parser.parse_args()
|
|
114 cmd = []
|
|
115
|
|
116 # Required parameters
|
|
117 binary = options.binary
|
|
118 cmd.append(binary)
|
|
119 # Threads
|
|
120 threads = "-T %d" % options.threads
|
|
121 cmd.append(threads)
|
|
122 # Source
|
|
123 source = "-s %s" % options.source
|
|
124 cmd.append(source)
|
|
125 #Hardcode to "galaxy" first to simplify the output part of the wrapper
|
|
126 #name = "-n %s" % options.name
|
|
127 name = "-n galaxy"
|
|
128 cmd.append(name)
|
|
129 ## Model
|
|
130 model_type = options.model_type
|
|
131 base_model = options.base_model
|
|
132 aa_search_matrix = options.aa_search_matrix
|
|
133 aa_empirical_freq = options.aa_empirical_freq
|
|
134 if model_type == 'aminoacid':
|
|
135 model = "-m %s%s" % (base_model, aa_search_matrix)
|
|
136 if aa_empirical_freq:
|
|
137 model = "-m %s%s%s" % (base_model, aa_search_matrix, 'F')
|
|
138 # (-P)
|
|
139 if options.external_protein_model:
|
|
140 external_protein_model = "-P %s" % options.external_protein_model
|
|
141 cmd.append(external_protein_model)
|
|
142 else:
|
|
143 model = "-m %s" % base_model
|
|
144 cmd.append(model)
|
|
145 if model == "GTRCAT":
|
|
146 # (-c)
|
|
147 if options.numofcats:
|
|
148 numofcats = "-c %d" % options.numofcats
|
|
149 cmd.append(numofcats)
|
|
150 # Optional parameters
|
|
151 if options.number_of_runs_bootstop:
|
|
152 number_of_runs_bootstop = "-N %s" % options.number_of_runs_bootstop
|
|
153 cmd.append(number_of_runs_bootstop)
|
|
154 else:
|
|
155 number_of_runs_bootstop = ''
|
|
156 if options.number_of_runs:
|
|
157 number_of_runs_opt = "-N %d" % options.number_of_runs
|
|
158 cmd.append(number_of_runs_opt)
|
|
159 else:
|
|
160 number_of_runs_opt = 0
|
|
161 # (-a)
|
|
162 if options.weightfile:
|
|
163 weightfile = "-a %s" % options.weightfile
|
|
164 cmd.append(weightfile)
|
|
165 # (-A)
|
|
166 if options.secondary_structure_model:
|
|
167 secondary_structure_model = "-A %s" % options.secondary_structure_model
|
|
168 cmd.append(secondary_structure_model )
|
|
169 # (-b)
|
|
170 if options.bootseed:
|
|
171 bootseed = "-b %d" % options.bootseed
|
|
172 cmd.append(bootseed)
|
|
173 else:
|
|
174 bootseed = 0
|
|
175 # -C - doesn't work in pthreads version, skipped
|
|
176 if options.search_complete_random_tree:
|
|
177 cmd.append("-d")
|
|
178 if options.ml_search_convergence:
|
|
179 cmd.append("-D" )
|
|
180 if options.model_opt_precision:
|
|
181 model_opt_precision = "-e %f" % options.model_opt_precision
|
|
182 cmd.append(model_opt_precision)
|
|
183 if options.excludefile:
|
|
184 excludefile = "-E %s" % options.excludefile
|
|
185 cmd.append(excludefile)
|
|
186 if options.search_algorithm:
|
|
187 search_algorithm = "-f %s" % options.search_algorithm
|
|
188 cmd.append(search_algorithm)
|
|
189 if options.save_memory_cat_model:
|
|
190 cmd.append("-F")
|
|
191 if options.groupingfile:
|
|
192 groupingfile = "-g %s" % options.groupingfile
|
|
193 cmd.append(groupingfile)
|
|
194 if options.enable_evol_heuristics:
|
|
195 enable_evol_heuristics = "-G %f" % options.enable_evol_heuristics
|
|
196 cmd.append(enable_evol_heuristics )
|
|
197 if options.initial_rearrangement_setting:
|
|
198 initial_rearrangement_setting = "-i %s" % options.initial_rearrangement_setting
|
|
199 cmd.append(initial_rearrangement_setting)
|
|
200 if options.posterior_bootstopping_analysis:
|
|
201 posterior_bootstopping_analysis = "-I %s" % options.posterior_bootstopping_analysis
|
|
202 cmd.append(posterior_bootstopping_analysis)
|
|
203 if options.majority_rule_consensus:
|
|
204 majority_rule_consensus = "-J %s" % options.majority_rule_consensus
|
|
205 cmd.append(majority_rule_consensus)
|
|
206 if options.print_branch_lengths:
|
|
207 cmd.append("-k")
|
|
208 if options.multistate_sub_model:
|
|
209 multistate_sub_model = "-K %s" % options.multistate_sub_model
|
|
210 cmd.append(multistate_sub_model)
|
|
211 if options.estimate_individual_branch_lengths:
|
|
212 cmd.append("-M")
|
|
213 if options.outgroup_name:
|
|
214 outgroup_name = "-o %s" % options.outgroup_name
|
|
215 cmd.append(outgroup_name)
|
|
216 if options.disable_undetermined_seq_check:
|
|
217 cmd.append("-O")
|
|
218 if options.random_seed:
|
|
219 random_seed = "-p %d" % options.random_seed
|
|
220 cmd.append(random_seed)
|
|
221 multiple_model = None
|
|
222 if options.multiple_model:
|
|
223 multiple_model = "-q %s" % options.multiple_model
|
|
224 cmd.append(multiple_model)
|
|
225 if options.constraint_file:
|
|
226 constraint_file = "-r %s" % options.constraint_file
|
|
227 cmd.append(constraint_file)
|
|
228 if options.bin_model_parameter_file:
|
|
229 bin_model_parameter_file_name = "RAxML_binaryModelParameters.galaxy"
|
|
230 os.symlink(options.bin_model_parameter_file, bin_model_parameter_file_name )
|
|
231 bin_model_parameter_file = "-R %s" % options.bin_model_parameter_file
|
|
232 #Needs testing. Is the hardcoded name or the real path needed?
|
|
233 cmd.append(bin_model_parameter_file)
|
|
234 if options.secondary_structure_file:
|
|
235 secondary_structure_file = "-S %s" % options.secondary_structure_file
|
|
236 cmd.append(secondary_structure_file)
|
|
237 if options.starting_tree:
|
|
238 starting_tree = "-t %s" % options.starting_tree
|
|
239 cmd.append(starting_tree)
|
|
240 if options.use_median_approximation:
|
|
241 cmd.append("-u")
|
|
242 if options.save_memory_gappy_alignments:
|
|
243 cmd.append("-U")
|
|
244 if options.disable_rate_heterogeneity:
|
|
245 cmd.append("-V")
|
|
246 if options.sliding_window_size:
|
|
247 sliding_window_size = "-W %d" % options.sliding_window_size
|
|
248 cmd.append(sliding_window_size)
|
|
249 if options.rapid_bootstrap_random_seed:
|
|
250 rapid_bootstrap_random_seed = "-x %d" % options.rapid_bootstrap_random_seed
|
|
251 cmd.append(rapid_bootstrap_random_seed)
|
|
252 else:
|
|
253 rapid_bootstrap_random_seed = 0
|
|
254 if options.parsimony_starting_tree_only:
|
|
255 cmd.append("-y")
|
|
256 if options.file_multiple_trees:
|
|
257 file_multiple_trees = "-z %s" % options.file_multiple_trees
|
|
258 cmd.append(file_multiple_trees)
|
|
259
|
|
260 print "cmd list: ", cmd, "\n"
|
|
261
|
|
262 full_cmd = " ".join(cmd)
|
|
263 print "Command string: %s" % full_cmd
|
|
264
|
|
265 try:
|
|
266 proc = subprocess.Popen(args=full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
267 except Exception, err:
|
|
268 sys.stderr.write("Error invoking command: \n%s\n\n%s\n" % (cmd, err))
|
|
269 sys.exit(1)
|
|
270 stdout, stderr = proc.communicate()
|
|
271 return_code = proc.returncode
|
|
272 if return_code:
|
|
273 sys.stdout.write(stdout)
|
|
274 sys.stderr.write(stderr)
|
|
275 sys.stderr.write("Return error code %i from command:\n" % return_code)
|
|
276 sys.stderr.write("%s\n" % cmd)
|
|
277 else:
|
|
278 sys.stdout.write(stdout)
|
|
279 sys.stdout.write(stderr)
|
|
280
|
|
281 #Multiple runs - concatenate
|
|
282 if number_of_runs_opt > 0:
|
|
283 if (bootseed == 0) and (rapid_bootstrap_random_seed == 0 ):
|
|
284 runfiles = glob.glob('RAxML*RUN*')
|
|
285 runfiles.sort(key=getint)
|
|
286 # Logs
|
|
287 outfile = open('RAxML_log.galaxy','w')
|
|
288 for filename in runfiles:
|
|
289 if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'):
|
|
290 infile = open(filename, 'r')
|
|
291 filename_line = "%s\n" % filename
|
|
292 outfile.write(filename_line)
|
|
293 for line in infile:
|
|
294 outfile.write(line)
|
|
295 infile.close()
|
|
296 outfile.close()
|
|
297 # Parsimony Trees
|
|
298 outfile = open('RAxML_parsimonyTree.galaxy','w')
|
|
299 for filename in runfiles:
|
|
300 if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'):
|
|
301 infile = open(filename, 'r')
|
|
302 filename_line = "%s\n" % filename
|
|
303 outfile.write(filename_line)
|
|
304 for line in infile:
|
|
305 outfile.write(line)
|
|
306 infile.close()
|
|
307 outfile.close()
|
|
308 # Results
|
|
309 outfile = open('RAxML_result.galaxy','w')
|
|
310 for filename in runfiles:
|
|
311 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'):
|
|
312 infile = open(filename, 'r')
|
|
313 filename_line = "%s\n" % filename
|
|
314 outfile.write(filename_line)
|
|
315 for line in infile:
|
|
316 outfile.write(line)
|
|
317 infile.close()
|
|
318 outfile.close()
|
|
319 # Multiple Model Partition Files
|
|
320 if multiple_model:
|
|
321 files = glob.glob('RAxML_bestTree.galaxy.PARTITION.*')
|
|
322 if len(files) > 0:
|
|
323 files.sort(key=getint)
|
|
324 outfile = open('RAxML_bestTreePartitions.galaxy','w')
|
|
325 # Best Tree Partitions
|
|
326 for filename in files:
|
|
327 if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'):
|
|
328 infile = open(filename, 'r')
|
|
329 filename_line = "%s\n" % filename
|
|
330 outfile.write(filename_line)
|
|
331 for line in infile:
|
|
332 outfile.write(line)
|
|
333 infile.close()
|
|
334 outfile.close()
|
|
335 else:
|
|
336 outfile = open('RAxML_bestTreePartitions.galaxy','w')
|
|
337 outfile.write("No partition files were produced.\n")
|
|
338 outfile.close()
|
|
339
|
|
340 # Result Partitions
|
|
341 files = glob.glob('RAxML_result.galaxy.PARTITION.*')
|
|
342 if len(files) > 0:
|
|
343 files.sort(key=getint)
|
|
344 outfile = open('RAxML_resultPartitions.galaxy','w')
|
|
345 for filename in files:
|
|
346 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'):
|
|
347 infile = open(filename, 'r')
|
|
348 filename_line = "%s\n" % filename
|
|
349 outfile.write(filename_line)
|
|
350 for line in infile:
|
|
351 outfile.write(line)
|
|
352 infile.close()
|
|
353 outfile.close()
|
|
354 else:
|
|
355 outfile = open('RAxML_resultPartitions.galaxy','w')
|
|
356 outfile.write("No partition files were produced.\n")
|
|
357 outfile.close()
|
|
358
|
|
359 # DEBUG options
|
|
360 infof = open('RAxML_info.galaxy','a')
|
|
361 infof.write('\nOM: CLI options DEBUG START:\n')
|
|
362 infof.write(options.__repr__())
|
|
363 infof.write('\nOM: CLI options DEBUG END\n')
|
|
364
|
|
365 if __name__=="__main__": __main__()
|