Mercurial > repos > malex > raxml
comparison raxml.py @ 0:d95d87f40f47 draft default tip
Uploaded the first beta.
| author | malex |
|---|---|
| date | Tue, 02 Oct 2012 17:35:46 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d95d87f40f47 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Runs RAxML on a sequence file. | |
| 4 For use with RAxML version 7.3.0 | |
| 5 | |
| 6 usage: | |
| 7 <!-- raxmlHPC-PTHREADS-SSE3 -T 2 -f c -m GTRGAMMA -F -s "/Users/om/Downloads/rana.phy" -n rana_red -w "/Users/om/Downloads/" 0 | |
| 8 ## raxmlHPC-PTHREADS-SSE3 -T 2 -m GTRGAMMA -n test -p 323483 -s reduced.phy | |
| 9 command>raxmlHPC-HYBRID-SSE3 -T 4 -f ${search_algorithm} -m ${smodel} -N ${repeats} -o "${html_outfile.files_path}" -s "$input1" | |
| 10 """ | |
| 11 import os, shutil, subprocess, sys, optparse, fnmatch, glob | |
| 12 | |
| 13 def stop_err(msg): | |
| 14 sys.stderr.write("%s\n" % msg) | |
| 15 sys.exit() | |
| 16 | |
| 17 def getint(name): | |
| 18 basename = name.partition('RUN.') | |
| 19 if basename[2] != '': | |
| 20 num = basename[2] | |
| 21 return int(num) | |
| 22 | |
| 23 def __main__(): | |
| 24 usage = "usage: %prog -T <threads> -s <input> -n <output> -m <model> [optional arguments]" | |
| 25 | |
| 26 # Parse the primary wrapper's command line options | |
| 27 parser = optparse.OptionParser(usage = usage) | |
| 28 # raxml binary name, hardcoded in the xml file | |
| 29 parser.add_option("--binary", action="store", type="string", dest="binary", help="Command to run") | |
| 30 # (-a) | |
| 31 parser.add_option("--weightfile", action="store", type="string", dest="weightfile", help="Column weight file") | |
| 32 # (-A) | |
| 33 parser.add_option("--secondary_structure_model", action="store", type="string", dest="secondary_structure_model", help="Secondary structure model") | |
| 34 # (-b) | |
| 35 parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Bootstrap random number seed") | |
| 36 # (-c) | |
| 37 parser.add_option("--numofcats", action="store", type="int", dest="numofcats", help="Number of distinct rate categories") | |
| 38 # (-d) | |
| 39 parser.add_option("--search_complete_random_tree", action="store_true", dest="search_complete_random_tree", help="Search with a complete random starting tree") | |
| 40 # (-D) | |
| 41 parser.add_option("--ml_search_convergence", action="store_true", dest="ml_search_convergence", help="ML search onvergence criterion") | |
| 42 # (-e) | |
| 43 parser.add_option("--model_opt_precision", action="store", type="float", dest="model_opt_precision", help="Model Optimization Precision (-e)") | |
| 44 # (-E) | |
| 45 parser.add_option("--excludefile", action="store", type="string", dest="excludefile", help="Exclude File Name") | |
| 46 # (-f) | |
| 47 parser.add_option("--search_algorithm", action="store", type="string", dest="search_algorithm", help="Search Algorithm") | |
| 48 # (-F) | |
| 49 parser.add_option("--save_memory_cat_model", action="store_true", dest="save_memory_cat_model", help="Save memory under CAT and GTRGAMMA models") | |
| 50 # (-g) | |
| 51 parser.add_option("--groupingfile", action="store", type="string", dest="groupingfile", help="Grouping File Name") | |
| 52 # (-G) | |
| 53 parser.add_option("--enable_evol_heuristics", action="store_true", dest="enable_evol_heuristics", help="Enable evol algo heuristics") | |
| 54 # (-i) | |
| 55 parser.add_option("--initial_rearrangement_setting", action="store", type="int", dest="initial_rearrangement_setting", help="Initial Rearrangement Setting") | |
| 56 # (-I) | |
| 57 parser.add_option("--posterior_bootstopping_analysis", action="store", type="string", dest="posterior_bootstopping_analysis", help="Posterior bootstopping analysis") | |
| 58 # (-J) | |
| 59 parser.add_option("--majority_rule_consensus", action="store", type="string", dest="majority_rule_consensus", help="Majority rule consensus") | |
| 60 # (-k) | |
| 61 parser.add_option("--print_branch_lengths", action="store_true", dest="print_branch_lengths", help="Print branch lengths") | |
| 62 # (-K) | |
| 63 parser.add_option("--multistate_sub_model", action="store", type="string", dest="multistate_sub_model", help="Multistate substitution model") | |
| 64 # (-m) | |
| 65 parser.add_option("--model_type", action="store", type="string", dest="model_type", help="Model Type") | |
| 66 parser.add_option("--base_model", action="store", type="string", dest="base_model", help="Base Model") | |
| 67 parser.add_option("--aa_empirical_freq", action="store_true", dest="aa_empirical_freq", help="Use AA Empirical base frequences") | |
| 68 parser.add_option("--aa_search_matrix", action="store", type="string", dest="aa_search_matrix", help="AA Search Matrix") | |
| 69 # (-n) | |
| 70 parser.add_option("--name", action="store", type="string", dest="name", help="Run Name") | |
| 71 # (-N/#) | |
| 72 parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", help="Number of alternative runs") | |
| 73 parser.add_option("--number_of_runs_bootstop", action="store", type="string", dest="number_of_runs_bootstop", help="Number of alternative runs based on the bootstop criteria") | |
| 74 # (-M) | |
| 75 parser.add_option("--estimate_individual_branch_lengths", action="store_true", dest="estimate_individual_branch_lengths", help="Estimate individual branch lengths") | |
| 76 # (-o) | |
| 77 parser.add_option("--outgroup_name", action="store", type="string", dest="outgroup_name", help="Outgroup Name") | |
| 78 # (-O) | |
| 79 parser.add_option("--disable_undetermined_seq_check", action="store_true", dest="disable_undetermined_seq_check", help="Disable undetermined sequence check") | |
| 80 # (-p) | |
| 81 parser.add_option("--random_seed", action="store", type="int", dest="random_seed", help="Random Number Seed") | |
| 82 # (-P) | |
| 83 parser.add_option("--external_protein_model", action="store", type="string", dest="external_protein_model", help="External Protein Model") | |
| 84 # (-q) | |
| 85 parser.add_option("--multiple_model", action="store", type="string", dest="multiple_model", help="Multiple Model File") | |
| 86 # (-r) | |
| 87 parser.add_option("--constraint_file", action="store", type="string", dest="constraint_file", help="Constraint File") | |
| 88 # (-R) | |
| 89 parser.add_option("--bin_model_parameter_file", action="store", type="string", dest="bin_model_parameter_file", help="Constraint File") | |
| 90 # (-s) | |
| 91 parser.add_option("--source", action="store", type="string", dest="source", help="Input file") | |
| 92 # (-S) | |
| 93 parser.add_option("--secondary_structure_file", action="store", type="string", dest="secondary_structure_file", help="Secondary structure file") | |
| 94 # (-t) | |
| 95 parser.add_option("--starting_tree", action="store", type="string", dest="starting_tree", help="Starting Tree") | |
| 96 # (-T) | |
| 97 parser.add_option("--threads", action="store", type="int", dest="threads", help="Number of threads to use") | |
| 98 # (-u) | |
| 99 parser.add_option("--use_median_approximation", action="store_true", dest="use_median_approximation", help="Use median approximation") | |
| 100 # (-U) | |
| 101 parser.add_option("--save_memory_gappy_alignments", action="store_true", dest="save_memory_gappy_alignments", help="Save memory in large gapped alignments") | |
| 102 # (-V) | |
| 103 parser.add_option("--disable_rate_heterogeneity", action="store_true", dest="disable_rate_heterogeneity", help="Disable rate heterogeneity") | |
| 104 # (-W) | |
| 105 parser.add_option("--sliding_window_size", action="store", type="string", dest="sliding_window_size", help="Sliding window size") | |
| 106 # (-x) | |
| 107 parser.add_option("--rapid_bootstrap_random_seed", action="store", type="int", dest="rapid_bootstrap_random_seed", help="Rapid Boostrap Random Seed") | |
| 108 # (-y) | |
| 109 parser.add_option("--parsimony_starting_tree_only", action="store_true", dest="parsimony_starting_tree_only", help="Generate a parsimony starting tree only") | |
| 110 # (-z) | |
| 111 parser.add_option("--file_multiple_trees", action="store", type="string", dest="file_multiple_trees", help="Multiple Trees File") | |
| 112 | |
| 113 (options, args) = parser.parse_args() | |
| 114 cmd = [] | |
| 115 | |
| 116 # Required parameters | |
| 117 binary = options.binary | |
| 118 cmd.append(binary) | |
| 119 # Threads | |
| 120 threads = "-T %d" % options.threads | |
| 121 cmd.append(threads) | |
| 122 # Source | |
| 123 source = "-s %s" % options.source | |
| 124 cmd.append(source) | |
| 125 #Hardcode to "galaxy" first to simplify the output part of the wrapper | |
| 126 #name = "-n %s" % options.name | |
| 127 name = "-n galaxy" | |
| 128 cmd.append(name) | |
| 129 ## Model | |
| 130 model_type = options.model_type | |
| 131 base_model = options.base_model | |
| 132 aa_search_matrix = options.aa_search_matrix | |
| 133 aa_empirical_freq = options.aa_empirical_freq | |
| 134 if model_type == 'aminoacid': | |
| 135 model = "-m %s%s" % (base_model, aa_search_matrix) | |
| 136 if aa_empirical_freq: | |
| 137 model = "-m %s%s%s" % (base_model, aa_search_matrix, 'F') | |
| 138 # (-P) | |
| 139 if options.external_protein_model: | |
| 140 external_protein_model = "-P %s" % options.external_protein_model | |
| 141 cmd.append(external_protein_model) | |
| 142 else: | |
| 143 model = "-m %s" % base_model | |
| 144 cmd.append(model) | |
| 145 if model == "GTRCAT": | |
| 146 # (-c) | |
| 147 if options.numofcats: | |
| 148 numofcats = "-c %d" % options.numofcats | |
| 149 cmd.append(numofcats) | |
| 150 # Optional parameters | |
| 151 if options.number_of_runs_bootstop: | |
| 152 number_of_runs_bootstop = "-N %s" % options.number_of_runs_bootstop | |
| 153 cmd.append(number_of_runs_bootstop) | |
| 154 else: | |
| 155 number_of_runs_bootstop = '' | |
| 156 if options.number_of_runs: | |
| 157 number_of_runs_opt = "-N %d" % options.number_of_runs | |
| 158 cmd.append(number_of_runs_opt) | |
| 159 else: | |
| 160 number_of_runs_opt = 0 | |
| 161 # (-a) | |
| 162 if options.weightfile: | |
| 163 weightfile = "-a %s" % options.weightfile | |
| 164 cmd.append(weightfile) | |
| 165 # (-A) | |
| 166 if options.secondary_structure_model: | |
| 167 secondary_structure_model = "-A %s" % options.secondary_structure_model | |
| 168 cmd.append(secondary_structure_model ) | |
| 169 # (-b) | |
| 170 if options.bootseed: | |
| 171 bootseed = "-b %d" % options.bootseed | |
| 172 cmd.append(bootseed) | |
| 173 else: | |
| 174 bootseed = 0 | |
| 175 # -C - doesn't work in pthreads version, skipped | |
| 176 if options.search_complete_random_tree: | |
| 177 cmd.append("-d") | |
| 178 if options.ml_search_convergence: | |
| 179 cmd.append("-D" ) | |
| 180 if options.model_opt_precision: | |
| 181 model_opt_precision = "-e %f" % options.model_opt_precision | |
| 182 cmd.append(model_opt_precision) | |
| 183 if options.excludefile: | |
| 184 excludefile = "-E %s" % options.excludefile | |
| 185 cmd.append(excludefile) | |
| 186 if options.search_algorithm: | |
| 187 search_algorithm = "-f %s" % options.search_algorithm | |
| 188 cmd.append(search_algorithm) | |
| 189 if options.save_memory_cat_model: | |
| 190 cmd.append("-F") | |
| 191 if options.groupingfile: | |
| 192 groupingfile = "-g %s" % options.groupingfile | |
| 193 cmd.append(groupingfile) | |
| 194 if options.enable_evol_heuristics: | |
| 195 enable_evol_heuristics = "-G %f" % options.enable_evol_heuristics | |
| 196 cmd.append(enable_evol_heuristics ) | |
| 197 if options.initial_rearrangement_setting: | |
| 198 initial_rearrangement_setting = "-i %s" % options.initial_rearrangement_setting | |
| 199 cmd.append(initial_rearrangement_setting) | |
| 200 if options.posterior_bootstopping_analysis: | |
| 201 posterior_bootstopping_analysis = "-I %s" % options.posterior_bootstopping_analysis | |
| 202 cmd.append(posterior_bootstopping_analysis) | |
| 203 if options.majority_rule_consensus: | |
| 204 majority_rule_consensus = "-J %s" % options.majority_rule_consensus | |
| 205 cmd.append(majority_rule_consensus) | |
| 206 if options.print_branch_lengths: | |
| 207 cmd.append("-k") | |
| 208 if options.multistate_sub_model: | |
| 209 multistate_sub_model = "-K %s" % options.multistate_sub_model | |
| 210 cmd.append(multistate_sub_model) | |
| 211 if options.estimate_individual_branch_lengths: | |
| 212 cmd.append("-M") | |
| 213 if options.outgroup_name: | |
| 214 outgroup_name = "-o %s" % options.outgroup_name | |
| 215 cmd.append(outgroup_name) | |
| 216 if options.disable_undetermined_seq_check: | |
| 217 cmd.append("-O") | |
| 218 if options.random_seed: | |
| 219 random_seed = "-p %d" % options.random_seed | |
| 220 cmd.append(random_seed) | |
| 221 multiple_model = None | |
| 222 if options.multiple_model: | |
| 223 multiple_model = "-q %s" % options.multiple_model | |
| 224 cmd.append(multiple_model) | |
| 225 if options.constraint_file: | |
| 226 constraint_file = "-r %s" % options.constraint_file | |
| 227 cmd.append(constraint_file) | |
| 228 if options.bin_model_parameter_file: | |
| 229 bin_model_parameter_file_name = "RAxML_binaryModelParameters.galaxy" | |
| 230 os.symlink(options.bin_model_parameter_file, bin_model_parameter_file_name ) | |
| 231 bin_model_parameter_file = "-R %s" % options.bin_model_parameter_file | |
| 232 #Needs testing. Is the hardcoded name or the real path needed? | |
| 233 cmd.append(bin_model_parameter_file) | |
| 234 if options.secondary_structure_file: | |
| 235 secondary_structure_file = "-S %s" % options.secondary_structure_file | |
| 236 cmd.append(secondary_structure_file) | |
| 237 if options.starting_tree: | |
| 238 starting_tree = "-t %s" % options.starting_tree | |
| 239 cmd.append(starting_tree) | |
| 240 if options.use_median_approximation: | |
| 241 cmd.append("-u") | |
| 242 if options.save_memory_gappy_alignments: | |
| 243 cmd.append("-U") | |
| 244 if options.disable_rate_heterogeneity: | |
| 245 cmd.append("-V") | |
| 246 if options.sliding_window_size: | |
| 247 sliding_window_size = "-W %d" % options.sliding_window_size | |
| 248 cmd.append(sliding_window_size) | |
| 249 if options.rapid_bootstrap_random_seed: | |
| 250 rapid_bootstrap_random_seed = "-x %d" % options.rapid_bootstrap_random_seed | |
| 251 cmd.append(rapid_bootstrap_random_seed) | |
| 252 else: | |
| 253 rapid_bootstrap_random_seed = 0 | |
| 254 if options.parsimony_starting_tree_only: | |
| 255 cmd.append("-y") | |
| 256 if options.file_multiple_trees: | |
| 257 file_multiple_trees = "-z %s" % options.file_multiple_trees | |
| 258 cmd.append(file_multiple_trees) | |
| 259 | |
| 260 print "cmd list: ", cmd, "\n" | |
| 261 | |
| 262 full_cmd = " ".join(cmd) | |
| 263 print "Command string: %s" % full_cmd | |
| 264 | |
| 265 try: | |
| 266 proc = subprocess.Popen(args=full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 267 except Exception, err: | |
| 268 sys.stderr.write("Error invoking command: \n%s\n\n%s\n" % (cmd, err)) | |
| 269 sys.exit(1) | |
| 270 stdout, stderr = proc.communicate() | |
| 271 return_code = proc.returncode | |
| 272 if return_code: | |
| 273 sys.stdout.write(stdout) | |
| 274 sys.stderr.write(stderr) | |
| 275 sys.stderr.write("Return error code %i from command:\n" % return_code) | |
| 276 sys.stderr.write("%s\n" % cmd) | |
| 277 else: | |
| 278 sys.stdout.write(stdout) | |
| 279 sys.stdout.write(stderr) | |
| 280 | |
| 281 #Multiple runs - concatenate | |
| 282 if number_of_runs_opt > 0: | |
| 283 if (bootseed == 0) and (rapid_bootstrap_random_seed == 0 ): | |
| 284 runfiles = glob.glob('RAxML*RUN*') | |
| 285 runfiles.sort(key=getint) | |
| 286 # Logs | |
| 287 outfile = open('RAxML_log.galaxy','w') | |
| 288 for filename in runfiles: | |
| 289 if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'): | |
| 290 infile = open(filename, 'r') | |
| 291 filename_line = "%s\n" % filename | |
| 292 outfile.write(filename_line) | |
| 293 for line in infile: | |
| 294 outfile.write(line) | |
| 295 infile.close() | |
| 296 outfile.close() | |
| 297 # Parsimony Trees | |
| 298 outfile = open('RAxML_parsimonyTree.galaxy','w') | |
| 299 for filename in runfiles: | |
| 300 if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'): | |
| 301 infile = open(filename, 'r') | |
| 302 filename_line = "%s\n" % filename | |
| 303 outfile.write(filename_line) | |
| 304 for line in infile: | |
| 305 outfile.write(line) | |
| 306 infile.close() | |
| 307 outfile.close() | |
| 308 # Results | |
| 309 outfile = open('RAxML_result.galaxy','w') | |
| 310 for filename in runfiles: | |
| 311 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'): | |
| 312 infile = open(filename, 'r') | |
| 313 filename_line = "%s\n" % filename | |
| 314 outfile.write(filename_line) | |
| 315 for line in infile: | |
| 316 outfile.write(line) | |
| 317 infile.close() | |
| 318 outfile.close() | |
| 319 # Multiple Model Partition Files | |
| 320 if multiple_model: | |
| 321 files = glob.glob('RAxML_bestTree.galaxy.PARTITION.*') | |
| 322 if len(files) > 0: | |
| 323 files.sort(key=getint) | |
| 324 outfile = open('RAxML_bestTreePartitions.galaxy','w') | |
| 325 # Best Tree Partitions | |
| 326 for filename in files: | |
| 327 if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'): | |
| 328 infile = open(filename, 'r') | |
| 329 filename_line = "%s\n" % filename | |
| 330 outfile.write(filename_line) | |
| 331 for line in infile: | |
| 332 outfile.write(line) | |
| 333 infile.close() | |
| 334 outfile.close() | |
| 335 else: | |
| 336 outfile = open('RAxML_bestTreePartitions.galaxy','w') | |
| 337 outfile.write("No partition files were produced.\n") | |
| 338 outfile.close() | |
| 339 | |
| 340 # Result Partitions | |
| 341 files = glob.glob('RAxML_result.galaxy.PARTITION.*') | |
| 342 if len(files) > 0: | |
| 343 files.sort(key=getint) | |
| 344 outfile = open('RAxML_resultPartitions.galaxy','w') | |
| 345 for filename in files: | |
| 346 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'): | |
| 347 infile = open(filename, 'r') | |
| 348 filename_line = "%s\n" % filename | |
| 349 outfile.write(filename_line) | |
| 350 for line in infile: | |
| 351 outfile.write(line) | |
| 352 infile.close() | |
| 353 outfile.close() | |
| 354 else: | |
| 355 outfile = open('RAxML_resultPartitions.galaxy','w') | |
| 356 outfile.write("No partition files were produced.\n") | |
| 357 outfile.close() | |
| 358 | |
| 359 # DEBUG options | |
| 360 infof = open('RAxML_info.galaxy','a') | |
| 361 infof.write('\nOM: CLI options DEBUG START:\n') | |
| 362 infof.write(options.__repr__()) | |
| 363 infof.write('\nOM: CLI options DEBUG END\n') | |
| 364 | |
| 365 if __name__=="__main__": __main__() |
