0
+ − 1 #!/usr/bin/env python
+ − 2 """
+ − 3 Runs RAxML on a sequence file.
+ − 4 For use with RAxML version 7.3.0
+ − 5
+ − 6 usage:
+ − 7 <!-- raxmlHPC-PTHREADS-SSE3 -T 2 -f c -m GTRGAMMA -F -s "/Users/om/Downloads/rana.phy" -n rana_red -w "/Users/om/Downloads/" 0
+ − 8 ## raxmlHPC-PTHREADS-SSE3 -T 2 -m GTRGAMMA -n test -p 323483 -s reduced.phy
+ − 9 command>raxmlHPC-HYBRID-SSE3 -T 4 -f ${search_algorithm} -m ${smodel} -N ${repeats} -o "${html_outfile.files_path}" -s "$input1"
+ − 10 """
+ − 11 import os, shutil, subprocess, sys, optparse, fnmatch, glob
+ − 12
+ − 13 def stop_err(msg):
+ − 14 sys.stderr.write("%s\n" % msg)
+ − 15 sys.exit()
+ − 16
+ − 17 def getint(name):
+ − 18 basename = name.partition('RUN.')
+ − 19 if basename[2] != '':
+ − 20 num = basename[2]
+ − 21 return int(num)
+ − 22
+ − 23 def __main__():
+ − 24 usage = "usage: %prog -T <threads> -s <input> -n <output> -m <model> [optional arguments]"
+ − 25
+ − 26 # Parse the primary wrapper's command line options
+ − 27 parser = optparse.OptionParser(usage = usage)
+ − 28 # raxml binary name, hardcoded in the xml file
+ − 29 parser.add_option("--binary", action="store", type="string", dest="binary", help="Command to run")
+ − 30 # (-a)
+ − 31 parser.add_option("--weightfile", action="store", type="string", dest="weightfile", help="Column weight file")
+ − 32 # (-A)
+ − 33 parser.add_option("--secondary_structure_model", action="store", type="string", dest="secondary_structure_model", help="Secondary structure model")
+ − 34 # (-b)
+ − 35 parser.add_option("--bootseed", action="store", type="int", dest="bootseed", help="Bootstrap random number seed")
+ − 36 # (-c)
+ − 37 parser.add_option("--numofcats", action="store", type="int", dest="numofcats", help="Number of distinct rate categories")
+ − 38 # (-d)
+ − 39 parser.add_option("--search_complete_random_tree", action="store_true", dest="search_complete_random_tree", help="Search with a complete random starting tree")
+ − 40 # (-D)
+ − 41 parser.add_option("--ml_search_convergence", action="store_true", dest="ml_search_convergence", help="ML search onvergence criterion")
+ − 42 # (-e)
+ − 43 parser.add_option("--model_opt_precision", action="store", type="float", dest="model_opt_precision", help="Model Optimization Precision (-e)")
+ − 44 # (-E)
+ − 45 parser.add_option("--excludefile", action="store", type="string", dest="excludefile", help="Exclude File Name")
+ − 46 # (-f)
+ − 47 parser.add_option("--search_algorithm", action="store", type="string", dest="search_algorithm", help="Search Algorithm")
+ − 48 # (-F)
+ − 49 parser.add_option("--save_memory_cat_model", action="store_true", dest="save_memory_cat_model", help="Save memory under CAT and GTRGAMMA models")
+ − 50 # (-g)
+ − 51 parser.add_option("--groupingfile", action="store", type="string", dest="groupingfile", help="Grouping File Name")
+ − 52 # (-G)
+ − 53 parser.add_option("--enable_evol_heuristics", action="store_true", dest="enable_evol_heuristics", help="Enable evol algo heuristics")
+ − 54 # (-i)
+ − 55 parser.add_option("--initial_rearrangement_setting", action="store", type="int", dest="initial_rearrangement_setting", help="Initial Rearrangement Setting")
+ − 56 # (-I)
+ − 57 parser.add_option("--posterior_bootstopping_analysis", action="store", type="string", dest="posterior_bootstopping_analysis", help="Posterior bootstopping analysis")
+ − 58 # (-J)
+ − 59 parser.add_option("--majority_rule_consensus", action="store", type="string", dest="majority_rule_consensus", help="Majority rule consensus")
+ − 60 # (-k)
+ − 61 parser.add_option("--print_branch_lengths", action="store_true", dest="print_branch_lengths", help="Print branch lengths")
+ − 62 # (-K)
+ − 63 parser.add_option("--multistate_sub_model", action="store", type="string", dest="multistate_sub_model", help="Multistate substitution model")
+ − 64 # (-m)
+ − 65 parser.add_option("--model_type", action="store", type="string", dest="model_type", help="Model Type")
+ − 66 parser.add_option("--base_model", action="store", type="string", dest="base_model", help="Base Model")
+ − 67 parser.add_option("--aa_empirical_freq", action="store_true", dest="aa_empirical_freq", help="Use AA Empirical base frequences")
+ − 68 parser.add_option("--aa_search_matrix", action="store", type="string", dest="aa_search_matrix", help="AA Search Matrix")
+ − 69 # (-n)
+ − 70 parser.add_option("--name", action="store", type="string", dest="name", help="Run Name")
+ − 71 # (-N/#)
+ − 72 parser.add_option("--number_of_runs", action="store", type="int", dest="number_of_runs", help="Number of alternative runs")
+ − 73 parser.add_option("--number_of_runs_bootstop", action="store", type="string", dest="number_of_runs_bootstop", help="Number of alternative runs based on the bootstop criteria")
+ − 74 # (-M)
+ − 75 parser.add_option("--estimate_individual_branch_lengths", action="store_true", dest="estimate_individual_branch_lengths", help="Estimate individual branch lengths")
+ − 76 # (-o)
+ − 77 parser.add_option("--outgroup_name", action="store", type="string", dest="outgroup_name", help="Outgroup Name")
+ − 78 # (-O)
+ − 79 parser.add_option("--disable_undetermined_seq_check", action="store_true", dest="disable_undetermined_seq_check", help="Disable undetermined sequence check")
+ − 80 # (-p)
+ − 81 parser.add_option("--random_seed", action="store", type="int", dest="random_seed", help="Random Number Seed")
+ − 82 # (-P)
+ − 83 parser.add_option("--external_protein_model", action="store", type="string", dest="external_protein_model", help="External Protein Model")
+ − 84 # (-q)
+ − 85 parser.add_option("--multiple_model", action="store", type="string", dest="multiple_model", help="Multiple Model File")
+ − 86 # (-r)
+ − 87 parser.add_option("--constraint_file", action="store", type="string", dest="constraint_file", help="Constraint File")
+ − 88 # (-R)
+ − 89 parser.add_option("--bin_model_parameter_file", action="store", type="string", dest="bin_model_parameter_file", help="Constraint File")
+ − 90 # (-s)
+ − 91 parser.add_option("--source", action="store", type="string", dest="source", help="Input file")
+ − 92 # (-S)
+ − 93 parser.add_option("--secondary_structure_file", action="store", type="string", dest="secondary_structure_file", help="Secondary structure file")
+ − 94 # (-t)
+ − 95 parser.add_option("--starting_tree", action="store", type="string", dest="starting_tree", help="Starting Tree")
+ − 96 # (-T)
+ − 97 parser.add_option("--threads", action="store", type="int", dest="threads", help="Number of threads to use")
+ − 98 # (-u)
+ − 99 parser.add_option("--use_median_approximation", action="store_true", dest="use_median_approximation", help="Use median approximation")
+ − 100 # (-U)
+ − 101 parser.add_option("--save_memory_gappy_alignments", action="store_true", dest="save_memory_gappy_alignments", help="Save memory in large gapped alignments")
+ − 102 # (-V)
+ − 103 parser.add_option("--disable_rate_heterogeneity", action="store_true", dest="disable_rate_heterogeneity", help="Disable rate heterogeneity")
+ − 104 # (-W)
+ − 105 parser.add_option("--sliding_window_size", action="store", type="string", dest="sliding_window_size", help="Sliding window size")
+ − 106 # (-x)
+ − 107 parser.add_option("--rapid_bootstrap_random_seed", action="store", type="int", dest="rapid_bootstrap_random_seed", help="Rapid Boostrap Random Seed")
+ − 108 # (-y)
+ − 109 parser.add_option("--parsimony_starting_tree_only", action="store_true", dest="parsimony_starting_tree_only", help="Generate a parsimony starting tree only")
+ − 110 # (-z)
+ − 111 parser.add_option("--file_multiple_trees", action="store", type="string", dest="file_multiple_trees", help="Multiple Trees File")
+ − 112
+ − 113 (options, args) = parser.parse_args()
+ − 114 cmd = []
+ − 115
+ − 116 # Required parameters
+ − 117 binary = options.binary
+ − 118 cmd.append(binary)
+ − 119 # Threads
+ − 120 threads = "-T %d" % options.threads
+ − 121 cmd.append(threads)
+ − 122 # Source
+ − 123 source = "-s %s" % options.source
+ − 124 cmd.append(source)
+ − 125 #Hardcode to "galaxy" first to simplify the output part of the wrapper
+ − 126 #name = "-n %s" % options.name
+ − 127 name = "-n galaxy"
+ − 128 cmd.append(name)
+ − 129 ## Model
+ − 130 model_type = options.model_type
+ − 131 base_model = options.base_model
+ − 132 aa_search_matrix = options.aa_search_matrix
+ − 133 aa_empirical_freq = options.aa_empirical_freq
+ − 134 if model_type == 'aminoacid':
+ − 135 model = "-m %s%s" % (base_model, aa_search_matrix)
+ − 136 if aa_empirical_freq:
+ − 137 model = "-m %s%s%s" % (base_model, aa_search_matrix, 'F')
+ − 138 # (-P)
+ − 139 if options.external_protein_model:
+ − 140 external_protein_model = "-P %s" % options.external_protein_model
+ − 141 cmd.append(external_protein_model)
+ − 142 else:
+ − 143 model = "-m %s" % base_model
+ − 144 cmd.append(model)
+ − 145 if model == "GTRCAT":
+ − 146 # (-c)
+ − 147 if options.numofcats:
+ − 148 numofcats = "-c %d" % options.numofcats
+ − 149 cmd.append(numofcats)
+ − 150 # Optional parameters
+ − 151 if options.number_of_runs_bootstop:
+ − 152 number_of_runs_bootstop = "-N %s" % options.number_of_runs_bootstop
+ − 153 cmd.append(number_of_runs_bootstop)
+ − 154 else:
+ − 155 number_of_runs_bootstop = ''
+ − 156 if options.number_of_runs:
+ − 157 number_of_runs_opt = "-N %d" % options.number_of_runs
+ − 158 cmd.append(number_of_runs_opt)
+ − 159 else:
+ − 160 number_of_runs_opt = 0
+ − 161 # (-a)
+ − 162 if options.weightfile:
+ − 163 weightfile = "-a %s" % options.weightfile
+ − 164 cmd.append(weightfile)
+ − 165 # (-A)
+ − 166 if options.secondary_structure_model:
+ − 167 secondary_structure_model = "-A %s" % options.secondary_structure_model
+ − 168 cmd.append(secondary_structure_model )
+ − 169 # (-b)
+ − 170 if options.bootseed:
+ − 171 bootseed = "-b %d" % options.bootseed
+ − 172 cmd.append(bootseed)
+ − 173 else:
+ − 174 bootseed = 0
+ − 175 # -C - doesn't work in pthreads version, skipped
+ − 176 if options.search_complete_random_tree:
+ − 177 cmd.append("-d")
+ − 178 if options.ml_search_convergence:
+ − 179 cmd.append("-D" )
+ − 180 if options.model_opt_precision:
+ − 181 model_opt_precision = "-e %f" % options.model_opt_precision
+ − 182 cmd.append(model_opt_precision)
+ − 183 if options.excludefile:
+ − 184 excludefile = "-E %s" % options.excludefile
+ − 185 cmd.append(excludefile)
+ − 186 if options.search_algorithm:
+ − 187 search_algorithm = "-f %s" % options.search_algorithm
+ − 188 cmd.append(search_algorithm)
+ − 189 if options.save_memory_cat_model:
+ − 190 cmd.append("-F")
+ − 191 if options.groupingfile:
+ − 192 groupingfile = "-g %s" % options.groupingfile
+ − 193 cmd.append(groupingfile)
+ − 194 if options.enable_evol_heuristics:
+ − 195 enable_evol_heuristics = "-G %f" % options.enable_evol_heuristics
+ − 196 cmd.append(enable_evol_heuristics )
+ − 197 if options.initial_rearrangement_setting:
+ − 198 initial_rearrangement_setting = "-i %s" % options.initial_rearrangement_setting
+ − 199 cmd.append(initial_rearrangement_setting)
+ − 200 if options.posterior_bootstopping_analysis:
+ − 201 posterior_bootstopping_analysis = "-I %s" % options.posterior_bootstopping_analysis
+ − 202 cmd.append(posterior_bootstopping_analysis)
+ − 203 if options.majority_rule_consensus:
+ − 204 majority_rule_consensus = "-J %s" % options.majority_rule_consensus
+ − 205 cmd.append(majority_rule_consensus)
+ − 206 if options.print_branch_lengths:
+ − 207 cmd.append("-k")
+ − 208 if options.multistate_sub_model:
+ − 209 multistate_sub_model = "-K %s" % options.multistate_sub_model
+ − 210 cmd.append(multistate_sub_model)
+ − 211 if options.estimate_individual_branch_lengths:
+ − 212 cmd.append("-M")
+ − 213 if options.outgroup_name:
+ − 214 outgroup_name = "-o %s" % options.outgroup_name
+ − 215 cmd.append(outgroup_name)
+ − 216 if options.disable_undetermined_seq_check:
+ − 217 cmd.append("-O")
+ − 218 if options.random_seed:
+ − 219 random_seed = "-p %d" % options.random_seed
+ − 220 cmd.append(random_seed)
+ − 221 multiple_model = None
+ − 222 if options.multiple_model:
+ − 223 multiple_model = "-q %s" % options.multiple_model
+ − 224 cmd.append(multiple_model)
+ − 225 if options.constraint_file:
+ − 226 constraint_file = "-r %s" % options.constraint_file
+ − 227 cmd.append(constraint_file)
+ − 228 if options.bin_model_parameter_file:
+ − 229 bin_model_parameter_file_name = "RAxML_binaryModelParameters.galaxy"
+ − 230 os.symlink(options.bin_model_parameter_file, bin_model_parameter_file_name )
+ − 231 bin_model_parameter_file = "-R %s" % options.bin_model_parameter_file
+ − 232 #Needs testing. Is the hardcoded name or the real path needed?
+ − 233 cmd.append(bin_model_parameter_file)
+ − 234 if options.secondary_structure_file:
+ − 235 secondary_structure_file = "-S %s" % options.secondary_structure_file
+ − 236 cmd.append(secondary_structure_file)
+ − 237 if options.starting_tree:
+ − 238 starting_tree = "-t %s" % options.starting_tree
+ − 239 cmd.append(starting_tree)
+ − 240 if options.use_median_approximation:
+ − 241 cmd.append("-u")
+ − 242 if options.save_memory_gappy_alignments:
+ − 243 cmd.append("-U")
+ − 244 if options.disable_rate_heterogeneity:
+ − 245 cmd.append("-V")
+ − 246 if options.sliding_window_size:
+ − 247 sliding_window_size = "-W %d" % options.sliding_window_size
+ − 248 cmd.append(sliding_window_size)
+ − 249 if options.rapid_bootstrap_random_seed:
+ − 250 rapid_bootstrap_random_seed = "-x %d" % options.rapid_bootstrap_random_seed
+ − 251 cmd.append(rapid_bootstrap_random_seed)
+ − 252 else:
+ − 253 rapid_bootstrap_random_seed = 0
+ − 254 if options.parsimony_starting_tree_only:
+ − 255 cmd.append("-y")
+ − 256 if options.file_multiple_trees:
+ − 257 file_multiple_trees = "-z %s" % options.file_multiple_trees
+ − 258 cmd.append(file_multiple_trees)
+ − 259
+ − 260 print "cmd list: ", cmd, "\n"
+ − 261
+ − 262 full_cmd = " ".join(cmd)
+ − 263 print "Command string: %s" % full_cmd
+ − 264
+ − 265 try:
+ − 266 proc = subprocess.Popen(args=full_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ − 267 except Exception, err:
+ − 268 sys.stderr.write("Error invoking command: \n%s\n\n%s\n" % (cmd, err))
+ − 269 sys.exit(1)
+ − 270 stdout, stderr = proc.communicate()
+ − 271 return_code = proc.returncode
+ − 272 if return_code:
+ − 273 sys.stdout.write(stdout)
+ − 274 sys.stderr.write(stderr)
+ − 275 sys.stderr.write("Return error code %i from command:\n" % return_code)
+ − 276 sys.stderr.write("%s\n" % cmd)
+ − 277 else:
+ − 278 sys.stdout.write(stdout)
+ − 279 sys.stdout.write(stderr)
+ − 280
+ − 281 #Multiple runs - concatenate
+ − 282 if number_of_runs_opt > 0:
+ − 283 if (bootseed == 0) and (rapid_bootstrap_random_seed == 0 ):
+ − 284 runfiles = glob.glob('RAxML*RUN*')
+ − 285 runfiles.sort(key=getint)
+ − 286 # Logs
+ − 287 outfile = open('RAxML_log.galaxy','w')
+ − 288 for filename in runfiles:
+ − 289 if fnmatch.fnmatch(filename, 'RAxML_log.galaxy.RUN.*'):
+ − 290 infile = open(filename, 'r')
+ − 291 filename_line = "%s\n" % filename
+ − 292 outfile.write(filename_line)
+ − 293 for line in infile:
+ − 294 outfile.write(line)
+ − 295 infile.close()
+ − 296 outfile.close()
+ − 297 # Parsimony Trees
+ − 298 outfile = open('RAxML_parsimonyTree.galaxy','w')
+ − 299 for filename in runfiles:
+ − 300 if fnmatch.fnmatch(filename, 'RAxML_parsimonyTree.galaxy.RUN.*'):
+ − 301 infile = open(filename, 'r')
+ − 302 filename_line = "%s\n" % filename
+ − 303 outfile.write(filename_line)
+ − 304 for line in infile:
+ − 305 outfile.write(line)
+ − 306 infile.close()
+ − 307 outfile.close()
+ − 308 # Results
+ − 309 outfile = open('RAxML_result.galaxy','w')
+ − 310 for filename in runfiles:
+ − 311 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.RUN.*'):
+ − 312 infile = open(filename, 'r')
+ − 313 filename_line = "%s\n" % filename
+ − 314 outfile.write(filename_line)
+ − 315 for line in infile:
+ − 316 outfile.write(line)
+ − 317 infile.close()
+ − 318 outfile.close()
+ − 319 # Multiple Model Partition Files
+ − 320 if multiple_model:
+ − 321 files = glob.glob('RAxML_bestTree.galaxy.PARTITION.*')
+ − 322 if len(files) > 0:
+ − 323 files.sort(key=getint)
+ − 324 outfile = open('RAxML_bestTreePartitions.galaxy','w')
+ − 325 # Best Tree Partitions
+ − 326 for filename in files:
+ − 327 if fnmatch.fnmatch(filename, 'RAxML_bestTree.galaxy.PARTITION.*'):
+ − 328 infile = open(filename, 'r')
+ − 329 filename_line = "%s\n" % filename
+ − 330 outfile.write(filename_line)
+ − 331 for line in infile:
+ − 332 outfile.write(line)
+ − 333 infile.close()
+ − 334 outfile.close()
+ − 335 else:
+ − 336 outfile = open('RAxML_bestTreePartitions.galaxy','w')
+ − 337 outfile.write("No partition files were produced.\n")
+ − 338 outfile.close()
+ − 339
+ − 340 # Result Partitions
+ − 341 files = glob.glob('RAxML_result.galaxy.PARTITION.*')
+ − 342 if len(files) > 0:
+ − 343 files.sort(key=getint)
+ − 344 outfile = open('RAxML_resultPartitions.galaxy','w')
+ − 345 for filename in files:
+ − 346 if fnmatch.fnmatch(filename, 'RAxML_result.galaxy.PARTITION.*'):
+ − 347 infile = open(filename, 'r')
+ − 348 filename_line = "%s\n" % filename
+ − 349 outfile.write(filename_line)
+ − 350 for line in infile:
+ − 351 outfile.write(line)
+ − 352 infile.close()
+ − 353 outfile.close()
+ − 354 else:
+ − 355 outfile = open('RAxML_resultPartitions.galaxy','w')
+ − 356 outfile.write("No partition files were produced.\n")
+ − 357 outfile.close()
+ − 358
+ − 359 # DEBUG options
+ − 360 infof = open('RAxML_info.galaxy','a')
+ − 361 infof.write('\nOM: CLI options DEBUG START:\n')
+ − 362 infof.write(options.__repr__())
+ − 363 infof.write('\nOM: CLI options DEBUG END\n')
+ − 364
+ − 365 if __name__=="__main__": __main__()