| 2 | 1 #!/usr/bin/env python | 
|  | 2 """A simple wrapper script to call MIRA and collect its output. | 
|  | 3 """ | 
| 4 | 4 | 
|  | 5 from __future__ import print_function | 
|  | 6 | 
| 2 | 7 import os | 
| 4 | 8 import shutil | 
| 2 | 9 import subprocess | 
| 4 | 10 import sys | 
|  | 11 import tempfile | 
| 2 | 12 import time | 
| 4 | 13 | 
| 2 | 14 from optparse import OptionParser | 
|  | 15 | 
| 4 | 16 # Do we need any PYTHONPATH magic? | 
| 2 | 17 from mira4_make_bam import make_bam | 
|  | 18 | 
| 4 | 19 WRAPPER_VER = "0.0.11"  # Keep in sync with the XML file | 
| 2 | 20 | 
|  | 21 | 
|  | 22 def get_version(mira_binary): | 
| 4 | 23     """Run MIRA to find its version number.""" | 
| 2 | 24     # At the commend line I would use: mira -v | head -n 1 | 
|  | 25     # however there is some pipe error when doing that here. | 
|  | 26     cmd = [mira_binary, "-v"] | 
|  | 27     try: | 
| 4 | 28         child = subprocess.Popen(cmd, universal_newlines=True, | 
| 2 | 29                                  stdout=subprocess.PIPE, | 
|  | 30                                  stderr=subprocess.STDOUT) | 
| 4 | 31     except Exception as err: | 
| 2 | 32         sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 
|  | 33         sys.exit(1) | 
|  | 34     ver, tmp = child.communicate() | 
|  | 35     del child | 
|  | 36     return ver.split("\n", 1)[0].strip() | 
|  | 37 | 
| 4 | 38 | 
|  | 39 # Parse Command Line | 
| 2 | 40 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: | 
|  | 41 | 
|  | 42 $ python mira4.py ... | 
|  | 43 | 
|  | 44 This will run the MIRA binary and collect its output files as directed. | 
|  | 45 """ % WRAPPER_VER | 
|  | 46 parser = OptionParser(usage=usage) | 
|  | 47 parser.add_option("-m", "--manifest", dest="manifest", | 
|  | 48                   default=None, metavar="FILE", | 
|  | 49                   help="MIRA manifest filename") | 
|  | 50 parser.add_option("--maf", dest="maf", | 
|  | 51                   default="-", metavar="FILE", | 
|  | 52                   help="MIRA MAF output filename") | 
|  | 53 parser.add_option("--bam", dest="bam", | 
|  | 54                   default="-", metavar="FILE", | 
|  | 55                   help="Unpadded BAM output filename") | 
|  | 56 parser.add_option("--fasta", dest="fasta", | 
|  | 57                   default="-", metavar="FILE", | 
|  | 58                   help="Unpadded FASTA output filename") | 
|  | 59 parser.add_option("--log", dest="log", | 
|  | 60                   default="-", metavar="FILE", | 
|  | 61                   help="MIRA logging output filename") | 
|  | 62 parser.add_option("-v", "--version", dest="version", | 
|  | 63                   default=False, action="store_true", | 
|  | 64                   help="Show version and quit") | 
|  | 65 options, args = parser.parse_args() | 
|  | 66 manifest = options.manifest | 
|  | 67 out_maf = options.maf | 
|  | 68 out_bam = options.bam | 
|  | 69 out_fasta = options.fasta | 
|  | 70 out_log = options.log | 
|  | 71 | 
| 4 | 72 if "MIRA4" in os.environ: | 
| 2 | 73     mira_path = os.environ["MIRA4"] | 
| 4 | 74     mira_binary = os.path.join(mira_path, "mira") | 
|  | 75     if not os.path.isfile(mira_binary): | 
|  | 76         sys.exit("Missing mira under $MIRA4, %r\nFolder contained: %s" | 
|  | 77                  % (mira_binary, ", ".join(os.listdir(mira_path)))) | 
|  | 78     mira_convert = os.path.join(mira_path, "miraconvert") | 
|  | 79     if not os.path.isfile(mira_convert): | 
|  | 80         sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" | 
|  | 81                  % (mira_convert, ", ".join(os.listdir(mira_path)))) | 
|  | 82 else: | 
|  | 83     sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") | 
|  | 84     mira_path = None | 
|  | 85     mira_binary = "mira" | 
|  | 86     mira_convert = "miraconvert" | 
| 2 | 87 | 
|  | 88 mira_ver = get_version(mira_binary) | 
|  | 89 if not mira_ver.strip().startswith("4.0"): | 
| 4 | 90     sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) | 
| 2 | 91 mira_convert_ver = get_version(mira_convert) | 
|  | 92 if not mira_convert_ver.strip().startswith("4.0"): | 
| 4 | 93     sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) | 
| 2 | 94 if options.version: | 
| 4 | 95     print("%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)) | 
| 2 | 96     if mira_ver != mira_convert_ver: | 
| 4 | 97         print("WARNING: miraconvert %s" % mira_convert_ver) | 
| 2 | 98     sys.exit(0) | 
|  | 99 | 
|  | 100 if not manifest: | 
| 4 | 101     sys.exit("Manifest is required") | 
| 2 | 102 elif not os.path.isfile(manifest): | 
| 4 | 103     sys.exit("Missing input MIRA manifest file: %r" % manifest) | 
| 2 | 104 | 
|  | 105 | 
|  | 106 try: | 
|  | 107     threads = int(os.environ.get("GALAXY_SLOTS", "1")) | 
|  | 108 except ValueError: | 
|  | 109     threads = 1 | 
|  | 110 assert 1 <= threads, threads | 
|  | 111 | 
|  | 112 | 
|  | 113 def override_temp(manifest): | 
|  | 114     """Override ``-DI:trt=/tmp`` in manifest with environment variable. | 
|  | 115 | 
|  | 116     Currently MIRA 4 does not allow envronment variables like ``$TMP`` | 
|  | 117     inside the manifest, which is a problem if you need to override | 
|  | 118     the default at run time. | 
|  | 119 | 
|  | 120     The tool XML will ``/tmp`` and we replace that here with | 
|  | 121     ``tempfile.gettempdir()`` which will respect $TMPDIR, $TEMP, $TMP | 
|  | 122     as explained in the Python standard library documentation: | 
|  | 123     http://docs.python.org/2/library/tempfile.html#tempfile.tempdir | 
|  | 124 | 
|  | 125     By default MIRA 4 would write its temporary files within the output | 
|  | 126     folder, which is a problem if that is a network drive. | 
|  | 127     """ | 
|  | 128     handle = open(manifest, "r") | 
|  | 129     text = handle.read() | 
|  | 130     handle.close() | 
|  | 131 | 
| 4 | 132     # At time of writing, this is at the end of a file, | 
|  | 133     # but could be followed by a space in future... | 
| 2 | 134     text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) | 
|  | 135 | 
| 4 | 136     # Want to try to ensure this gets written to disk before MIRA attempts | 
|  | 137     # to open it - any networked file system may impose a delay... | 
| 2 | 138     handle = open(manifest, "w") | 
|  | 139     handle.write(text) | 
|  | 140     handle.flush() | 
|  | 141     os.fsync(handle.fileno()) | 
|  | 142     handle.close() | 
|  | 143 | 
|  | 144 | 
|  | 145 def log_manifest(manifest): | 
|  | 146     """Write the manifest file to stderr.""" | 
| 4 | 147     sys.stderr.write("\n%s\nManifest file\n%s\n" % ("=" * 60, "=" * 60)) | 
| 2 | 148     with open(manifest) as h: | 
|  | 149         for line in h: | 
|  | 150             sys.stderr.write(line) | 
| 4 | 151     sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("=" * 60, "=" * 60)) | 
| 2 | 152 | 
|  | 153 | 
|  | 154 def collect_output(temp, name, handle): | 
|  | 155     """Moves files to the output filenames (global variables).""" | 
|  | 156     f = "%s/%s_assembly/%s_d_results" % (temp, name, name) | 
|  | 157     if not os.path.isdir(f): | 
|  | 158         log_manifest(manifest) | 
| 4 | 159         sys.exit("Missing output folder") | 
| 2 | 160     if not os.listdir(f): | 
|  | 161         log_manifest(manifest) | 
| 4 | 162         sys.exit("Empty output folder") | 
| 2 | 163     missing = [] | 
|  | 164 | 
|  | 165     old_maf = "%s/%s_out.maf" % (f, name) | 
|  | 166     if not os.path.isfile(old_maf): | 
| 4 | 167         # Triggered extractLargeContigs.sh? | 
| 2 | 168         old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) | 
|  | 169 | 
| 4 | 170     # De novo or single strain mapping, | 
| 2 | 171     old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) | 
|  | 172     ref_fasta = "%s/%s_out.padded.fasta" % (f, name) | 
|  | 173     if not os.path.isfile(old_fasta): | 
| 4 | 174         # Mapping (StrainX versus reference) or de novo | 
| 2 | 175         old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) | 
|  | 176         ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) | 
|  | 177     if not os.path.isfile(old_fasta): | 
|  | 178         old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) | 
|  | 179         ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) | 
|  | 180 | 
|  | 181     missing = False | 
|  | 182     for old, new in [(old_maf, out_maf), | 
|  | 183                      (old_fasta, out_fasta)]: | 
|  | 184         if not os.path.isfile(old): | 
|  | 185             missing = True | 
|  | 186         elif not new or new == "-": | 
|  | 187             handle.write("Ignoring %s\n" % old) | 
|  | 188         else: | 
|  | 189             handle.write("Capturing %s\n" % old) | 
|  | 190             shutil.move(old, new) | 
|  | 191     if missing: | 
|  | 192         log_manifest(manifest) | 
|  | 193         sys.stderr.write("Contents of %r:\n" % f) | 
|  | 194         for filename in sorted(os.listdir(f)): | 
|  | 195             sys.stderr.write("%s\n" % filename) | 
|  | 196 | 
| 4 | 197     # For mapping mode, probably most people would expect a BAM file | 
|  | 198     # using the reference FASTA file... | 
| 2 | 199     if out_bam and out_bam != "-": | 
|  | 200         if out_maf and out_maf != "-": | 
|  | 201             msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) | 
|  | 202         else: | 
| 4 | 203             # Not collecting the MAF file, use original location | 
| 2 | 204             msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) | 
|  | 205         if msg: | 
| 4 | 206             sys.exit(msg) | 
|  | 207 | 
| 2 | 208 | 
|  | 209 def clean_up(temp, name): | 
|  | 210     folder = "%s/%s_assembly" % (temp, name) | 
|  | 211     if os.path.isdir(folder): | 
|  | 212         shutil.rmtree(folder) | 
|  | 213 | 
| 4 | 214 | 
|  | 215 # TODO - Run MIRA in /tmp or a configurable directory? | 
|  | 216 # Currently Galaxy puts us somewhere safe like: | 
|  | 217 # /opt/galaxy-dist/database/job_working_directory/846/ | 
| 2 | 218 temp = "." | 
|  | 219 | 
|  | 220 name = "MIRA" | 
|  | 221 | 
|  | 222 override_temp(manifest) | 
|  | 223 | 
|  | 224 start_time = time.time() | 
|  | 225 cmd_list = [mira_binary, "-t", str(threads), manifest] | 
|  | 226 cmd = " ".join(cmd_list) | 
|  | 227 | 
|  | 228 assert os.path.isdir(temp) | 
|  | 229 d = "%s_assembly" % name | 
| 4 | 230 # This can fail on my development machine if stale folders exist | 
|  | 231 # under Galaxy's .../database/job_working_directory/ tree: | 
| 2 | 232 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) | 
|  | 233 try: | 
| 4 | 234     # Check path access | 
| 2 | 235     os.mkdir(d) | 
| 4 | 236 except Exception as err: | 
| 2 | 237     log_manifest(manifest) | 
|  | 238     sys.stderr.write("Error making directory %s\n%s" % (d, err)) | 
|  | 239     sys.exit(1) | 
|  | 240 | 
| 4 | 241 # print(os.path.abspath(".")) | 
|  | 242 # print(cmd) | 
| 2 | 243 | 
|  | 244 if out_log and out_log != "-": | 
|  | 245     handle = open(out_log, "w") | 
|  | 246 else: | 
|  | 247     handle = open(os.devnull, "w") | 
|  | 248 handle.write("======================== MIRA manifest (instructions) ========================\n") | 
|  | 249 m = open(manifest, "rU") | 
|  | 250 for line in m: | 
|  | 251     handle.write(line) | 
|  | 252 m.close() | 
|  | 253 del m | 
|  | 254 handle.write("\n") | 
|  | 255 handle.write("============================ Starting MIRA now ===============================\n") | 
|  | 256 handle.flush() | 
|  | 257 try: | 
| 4 | 258     # Run MIRA | 
| 2 | 259     child = subprocess.Popen(cmd_list, | 
|  | 260                              stdout=handle, | 
|  | 261                              stderr=subprocess.STDOUT) | 
| 4 | 262 except Exception as err: | 
| 2 | 263     log_manifest(manifest) | 
|  | 264     sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) | 
| 4 | 265     # TODO - call clean up? | 
| 2 | 266     handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) | 
|  | 267     handle.close() | 
|  | 268     sys.exit(1) | 
| 4 | 269 # Use .communicate as can get deadlocks with .wait(), | 
| 2 | 270 stdout, stderr = child.communicate() | 
| 4 | 271 assert not stdout and not stderr  # Should be empty as sent to handle | 
| 2 | 272 run_time = time.time() - start_time | 
|  | 273 return_code = child.returncode | 
|  | 274 handle.write("\n") | 
|  | 275 handle.write("============================ MIRA has finished ===============================\n") | 
|  | 276 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) | 
|  | 277 if return_code: | 
| 4 | 278     print("MIRA took %0.2f hours" % (run_time / 3600.0)) | 
| 2 | 279     handle.write("Return error code %i from command:\n" % return_code) | 
|  | 280     handle.write(cmd + "\n") | 
|  | 281     handle.close() | 
|  | 282     clean_up(temp, name) | 
|  | 283     log_manifest(manifest) | 
| 4 | 284     sys.stderr.write("Return error code %i from command:\n" % return_code) | 
|  | 285     sys.stderr.write(cmd + "\n") | 
|  | 286     sys.exit(return_code) | 
| 2 | 287 handle.flush() | 
|  | 288 | 
|  | 289 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 
|  | 290     handle.write("\n") | 
|  | 291     handle.write("====================== Extract Large Contigs failed ==========================\n") | 
|  | 292     e = open("MIRA_assembly/MIRA_d_results/ec.log", "rU") | 
|  | 293     for line in e: | 
|  | 294         handle.write(line) | 
|  | 295     e.close() | 
|  | 296     handle.write("============================ (end of ec.log) =================================\n") | 
|  | 297     handle.flush() | 
|  | 298 | 
| 4 | 299 # print("Collecting output...") | 
| 2 | 300 start_time = time.time() | 
|  | 301 collect_output(temp, name, handle) | 
|  | 302 collect_time = time.time() - start_time | 
| 4 | 303 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" | 
|  | 304              % (run_time / 3600.0, collect_time / 60.0)) | 
|  | 305 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" | 
|  | 306       % (run_time / 3600.0, collect_time / 60.0)) | 
| 2 | 307 | 
|  | 308 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 
| 4 | 309     # Treat as an error, but doing this AFTER collect_output | 
| 2 | 310     sys.stderr.write("Extract Large Contigs failed\n") | 
|  | 311     handle.write("Extract Large Contigs failed\n") | 
|  | 312     handle.close() | 
|  | 313     sys.exit(1) | 
|  | 314 | 
| 4 | 315 # print "Cleaning up..." | 
| 2 | 316 clean_up(temp, name) | 
|  | 317 | 
|  | 318 handle.write("\nDone\n") | 
|  | 319 handle.close() | 
|  | 320 print("Done") |