comparison tools/mira4_0/mira4.py @ 4:1713289d9908 draft default tip

v0.0.11 tweak for use with bioconda dependencies
author peterjc
date Thu, 10 Aug 2017 11:09:10 -0400
parents 4eb32a3d67d1
children
comparison
equal deleted inserted replaced
3:a4f602cc3aa9 4:1713289d9908
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """A simple wrapper script to call MIRA and collect its output. 2 """A simple wrapper script to call MIRA and collect its output.
3 """ 3 """
4
5 from __future__ import print_function
6
4 import os 7 import os
8 import shutil
9 import subprocess
5 import sys 10 import sys
6 import subprocess 11 import tempfile
7 import shutil
8 import time 12 import time
9 import tempfile 13
10 from optparse import OptionParser 14 from optparse import OptionParser
11 15
12 #Do we need any PYTHONPATH magic? 16 # Do we need any PYTHONPATH magic?
13 from mira4_make_bam import make_bam 17 from mira4_make_bam import make_bam
14 18
15 WRAPPER_VER = "0.0.4" #Keep in sync with the XML file 19 WRAPPER_VER = "0.0.11" # Keep in sync with the XML file
16
17 def sys_exit(msg, err=1):
18 sys.stderr.write(msg+"\n")
19 sys.exit(err)
20 20
21 21
22 def get_version(mira_binary): 22 def get_version(mira_binary):
23 """Run MIRA to find its version number""" 23 """Run MIRA to find its version number."""
24 # At the commend line I would use: mira -v | head -n 1 24 # At the commend line I would use: mira -v | head -n 1
25 # however there is some pipe error when doing that here. 25 # however there is some pipe error when doing that here.
26 cmd = [mira_binary, "-v"] 26 cmd = [mira_binary, "-v"]
27 try: 27 try:
28 child = subprocess.Popen(cmd, 28 child = subprocess.Popen(cmd, universal_newlines=True,
29 stdout=subprocess.PIPE, 29 stdout=subprocess.PIPE,
30 stderr=subprocess.STDOUT) 30 stderr=subprocess.STDOUT)
31 except Exception, err: 31 except Exception as err:
32 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 32 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
33 sys.exit(1) 33 sys.exit(1)
34 ver, tmp = child.communicate() 34 ver, tmp = child.communicate()
35 del child 35 del child
36 return ver.split("\n", 1)[0].strip() 36 return ver.split("\n", 1)[0].strip()
37 37
38 #Parse Command Line 38
39 # Parse Command Line
39 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: 40 usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
40 41
41 $ python mira4.py ... 42 $ python mira4.py ...
42 43
43 This will run the MIRA binary and collect its output files as directed. 44 This will run the MIRA binary and collect its output files as directed.
66 out_maf = options.maf 67 out_maf = options.maf
67 out_bam = options.bam 68 out_bam = options.bam
68 out_fasta = options.fasta 69 out_fasta = options.fasta
69 out_log = options.log 70 out_log = options.log
70 71
71 try: 72 if "MIRA4" in os.environ:
72 mira_path = os.environ["MIRA4"] 73 mira_path = os.environ["MIRA4"]
73 except KeyError: 74 mira_binary = os.path.join(mira_path, "mira")
74 sys_exit("Environment variable $MIRA4 not set") 75 if not os.path.isfile(mira_binary):
75 mira_binary = os.path.join(mira_path, "mira") 76 sys.exit("Missing mira under $MIRA4, %r\nFolder contained: %s"
76 if not os.path.isfile(mira_binary): 77 % (mira_binary, ", ".join(os.listdir(mira_path))))
77 sys_exit("Missing mira under $MIRA4, %r\nFolder contained: %s" 78 mira_convert = os.path.join(mira_path, "miraconvert")
78 % (mira_binary, ", ".join(os.listdir(mira_path)))) 79 if not os.path.isfile(mira_convert):
79 mira_convert = os.path.join(mira_path, "miraconvert") 80 sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
80 if not os.path.isfile(mira_convert): 81 % (mira_convert, ", ".join(os.listdir(mira_path))))
81 sys_exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" 82 else:
82 % (mira_convert, ", ".join(os.listdir(mira_path)))) 83 sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n")
84 mira_path = None
85 mira_binary = "mira"
86 mira_convert = "miraconvert"
83 87
84 mira_ver = get_version(mira_binary) 88 mira_ver = get_version(mira_binary)
85 if not mira_ver.strip().startswith("4.0"): 89 if not mira_ver.strip().startswith("4.0"):
86 sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) 90 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary))
87 mira_convert_ver = get_version(mira_convert) 91 mira_convert_ver = get_version(mira_convert)
88 if not mira_convert_ver.strip().startswith("4.0"): 92 if not mira_convert_ver.strip().startswith("4.0"):
89 sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) 93 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
90 if options.version: 94 if options.version:
91 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) 95 print("%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER))
92 if mira_ver != mira_convert_ver: 96 if mira_ver != mira_convert_ver:
93 print "WARNING: miraconvert %s" % mira_convert_ver 97 print("WARNING: miraconvert %s" % mira_convert_ver)
94 sys.exit(0) 98 sys.exit(0)
95 99
96 if not manifest: 100 if not manifest:
97 sys_exit("Manifest is required") 101 sys.exit("Manifest is required")
98 elif not os.path.isfile(manifest): 102 elif not os.path.isfile(manifest):
99 sys_exit("Missing input MIRA manifest file: %r" % manifest) 103 sys.exit("Missing input MIRA manifest file: %r" % manifest)
100 104
101 105
102 try: 106 try:
103 threads = int(os.environ.get("GALAXY_SLOTS", "1")) 107 threads = int(os.environ.get("GALAXY_SLOTS", "1"))
104 except ValueError: 108 except ValueError:
123 """ 127 """
124 handle = open(manifest, "r") 128 handle = open(manifest, "r")
125 text = handle.read() 129 text = handle.read()
126 handle.close() 130 handle.close()
127 131
128 #At time of writing, this is at the end of a file, 132 # At time of writing, this is at the end of a file,
129 #but could be followed by a space in future... 133 # but could be followed by a space in future...
130 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) 134 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir())
131 135
132 #Want to try to ensure this gets written to disk before MIRA attempts 136 # Want to try to ensure this gets written to disk before MIRA attempts
133 #to open it - any networked file system may impose a delay... 137 # to open it - any networked file system may impose a delay...
134 handle = open(manifest, "w") 138 handle = open(manifest, "w")
135 handle.write(text) 139 handle.write(text)
136 handle.flush() 140 handle.flush()
137 os.fsync(handle.fileno()) 141 os.fsync(handle.fileno())
138 handle.close() 142 handle.close()
139 143
140 144
141 def log_manifest(manifest): 145 def log_manifest(manifest):
142 """Write the manifest file to stderr.""" 146 """Write the manifest file to stderr."""
143 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60)) 147 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("=" * 60, "=" * 60))
144 with open(manifest) as h: 148 with open(manifest) as h:
145 for line in h: 149 for line in h:
146 sys.stderr.write(line) 150 sys.stderr.write(line)
147 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) 151 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("=" * 60, "=" * 60))
148 152
149 153
150 def collect_output(temp, name, handle): 154 def collect_output(temp, name, handle):
151 """Moves files to the output filenames (global variables).""" 155 """Moves files to the output filenames (global variables)."""
152 n3 = (temp, name, name, name)
153 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) 156 f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
154 if not os.path.isdir(f): 157 if not os.path.isdir(f):
155 log_manifest(manifest) 158 log_manifest(manifest)
156 sys_exit("Missing output folder") 159 sys.exit("Missing output folder")
157 if not os.listdir(f): 160 if not os.listdir(f):
158 log_manifest(manifest) 161 log_manifest(manifest)
159 sys_exit("Empty output folder") 162 sys.exit("Empty output folder")
160 missing = [] 163 missing = []
161 164
162 old_maf = "%s/%s_out.maf" % (f, name) 165 old_maf = "%s/%s_out.maf" % (f, name)
163 if not os.path.isfile(old_maf): 166 if not os.path.isfile(old_maf):
164 #Triggered extractLargeContigs.sh? 167 # Triggered extractLargeContigs.sh?
165 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) 168 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name)
166 169
167 #De novo or single strain mapping, 170 # De novo or single strain mapping,
168 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) 171 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name)
169 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) 172 ref_fasta = "%s/%s_out.padded.fasta" % (f, name)
170 if not os.path.isfile(old_fasta): 173 if not os.path.isfile(old_fasta):
171 #Mapping (StrainX versus reference) or de novo 174 # Mapping (StrainX versus reference) or de novo
172 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) 175 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name)
173 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) 176 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name)
174 if not os.path.isfile(old_fasta): 177 if not os.path.isfile(old_fasta):
175 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) 178 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name)
176 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) 179 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name)
177
178 180
179 missing = False 181 missing = False
180 for old, new in [(old_maf, out_maf), 182 for old, new in [(old_maf, out_maf),
181 (old_fasta, out_fasta)]: 183 (old_fasta, out_fasta)]:
182 if not os.path.isfile(old): 184 if not os.path.isfile(old):
190 log_manifest(manifest) 192 log_manifest(manifest)
191 sys.stderr.write("Contents of %r:\n" % f) 193 sys.stderr.write("Contents of %r:\n" % f)
192 for filename in sorted(os.listdir(f)): 194 for filename in sorted(os.listdir(f)):
193 sys.stderr.write("%s\n" % filename) 195 sys.stderr.write("%s\n" % filename)
194 196
195 #For mapping mode, probably most people would expect a BAM file 197 # For mapping mode, probably most people would expect a BAM file
196 #using the reference FASTA file... 198 # using the reference FASTA file...
197 if out_bam and out_bam != "-": 199 if out_bam and out_bam != "-":
198 if out_maf and out_maf != "-": 200 if out_maf and out_maf != "-":
199 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) 201 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
200 else: 202 else:
201 #Not collecting the MAF file, use original location 203 # Not collecting the MAF file, use original location
202 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) 204 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle)
203 if msg: 205 if msg:
204 sys_exit(msg) 206 sys.exit(msg)
207
205 208
206 def clean_up(temp, name): 209 def clean_up(temp, name):
207 folder = "%s/%s_assembly" % (temp, name) 210 folder = "%s/%s_assembly" % (temp, name)
208 if os.path.isdir(folder): 211 if os.path.isdir(folder):
209 shutil.rmtree(folder) 212 shutil.rmtree(folder)
210 213
211 #TODO - Run MIRA in /tmp or a configurable directory? 214
212 #Currently Galaxy puts us somewhere safe like: 215 # TODO - Run MIRA in /tmp or a configurable directory?
213 #/opt/galaxy-dist/database/job_working_directory/846/ 216 # Currently Galaxy puts us somewhere safe like:
217 # /opt/galaxy-dist/database/job_working_directory/846/
214 temp = "." 218 temp = "."
215 219
216 name = "MIRA" 220 name = "MIRA"
217 221
218 override_temp(manifest) 222 override_temp(manifest)
221 cmd_list = [mira_binary, "-t", str(threads), manifest] 225 cmd_list = [mira_binary, "-t", str(threads), manifest]
222 cmd = " ".join(cmd_list) 226 cmd = " ".join(cmd_list)
223 227
224 assert os.path.isdir(temp) 228 assert os.path.isdir(temp)
225 d = "%s_assembly" % name 229 d = "%s_assembly" % name
226 #This can fail on my development machine if stale folders exist 230 # This can fail on my development machine if stale folders exist
227 #under Galaxy's .../database/job_working_directory/ tree: 231 # under Galaxy's .../database/job_working_directory/ tree:
228 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) 232 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d))
229 try: 233 try:
230 #Check path access 234 # Check path access
231 os.mkdir(d) 235 os.mkdir(d)
232 except Exception, err: 236 except Exception as err:
233 log_manifest(manifest) 237 log_manifest(manifest)
234 sys.stderr.write("Error making directory %s\n%s" % (d, err)) 238 sys.stderr.write("Error making directory %s\n%s" % (d, err))
235 sys.exit(1) 239 sys.exit(1)
236 240
237 #print os.path.abspath(".") 241 # print(os.path.abspath("."))
238 #print cmd 242 # print(cmd)
239 243
240 if out_log and out_log != "-": 244 if out_log and out_log != "-":
241 handle = open(out_log, "w") 245 handle = open(out_log, "w")
242 else: 246 else:
243 handle = open(os.devnull, "w") 247 handle = open(os.devnull, "w")
249 del m 253 del m
250 handle.write("\n") 254 handle.write("\n")
251 handle.write("============================ Starting MIRA now ===============================\n") 255 handle.write("============================ Starting MIRA now ===============================\n")
252 handle.flush() 256 handle.flush()
253 try: 257 try:
254 #Run MIRA 258 # Run MIRA
255 child = subprocess.Popen(cmd_list, 259 child = subprocess.Popen(cmd_list,
256 stdout=handle, 260 stdout=handle,
257 stderr=subprocess.STDOUT) 261 stderr=subprocess.STDOUT)
258 except Exception, err: 262 except Exception as err:
259 log_manifest(manifest) 263 log_manifest(manifest)
260 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) 264 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
261 #TODO - call clean up? 265 # TODO - call clean up?
262 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) 266 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
263 handle.close() 267 handle.close()
264 sys.exit(1) 268 sys.exit(1)
265 #Use .communicate as can get deadlocks with .wait(), 269 # Use .communicate as can get deadlocks with .wait(),
266 stdout, stderr = child.communicate() 270 stdout, stderr = child.communicate()
267 assert not stdout and not stderr #Should be empty as sent to handle 271 assert not stdout and not stderr # Should be empty as sent to handle
268 run_time = time.time() - start_time 272 run_time = time.time() - start_time
269 return_code = child.returncode 273 return_code = child.returncode
270 handle.write("\n") 274 handle.write("\n")
271 handle.write("============================ MIRA has finished ===============================\n") 275 handle.write("============================ MIRA has finished ===============================\n")
272 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) 276 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0))
273 if return_code: 277 if return_code:
274 print "MIRA took %0.2f hours" % (run_time / 3600.0) 278 print("MIRA took %0.2f hours" % (run_time / 3600.0))
275 handle.write("Return error code %i from command:\n" % return_code) 279 handle.write("Return error code %i from command:\n" % return_code)
276 handle.write(cmd + "\n") 280 handle.write(cmd + "\n")
277 handle.close() 281 handle.close()
278 clean_up(temp, name) 282 clean_up(temp, name)
279 log_manifest(manifest) 283 log_manifest(manifest)
280 sys_exit("Return error code %i from command:\n%s" % (return_code, cmd), 284 sys.stderr.write("Return error code %i from command:\n" % return_code)
281 return_code) 285 sys.stderr.write(cmd + "\n")
286 sys.exit(return_code)
282 handle.flush() 287 handle.flush()
283 288
284 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): 289 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
285 handle.write("\n") 290 handle.write("\n")
286 handle.write("====================== Extract Large Contigs failed ==========================\n") 291 handle.write("====================== Extract Large Contigs failed ==========================\n")
289 handle.write(line) 294 handle.write(line)
290 e.close() 295 e.close()
291 handle.write("============================ (end of ec.log) =================================\n") 296 handle.write("============================ (end of ec.log) =================================\n")
292 handle.flush() 297 handle.flush()
293 298
294 #print "Collecting output..." 299 # print("Collecting output...")
295 start_time = time.time() 300 start_time = time.time()
296 collect_output(temp, name, handle) 301 collect_output(temp, name, handle)
297 collect_time = time.time() - start_time 302 collect_time = time.time() - start_time
298 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) 303 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n"
299 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) 304 % (run_time / 3600.0, collect_time / 60.0))
305 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n"
306 % (run_time / 3600.0, collect_time / 60.0))
300 307
301 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): 308 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
302 #Treat as an error, but doing this AFTER collect_output 309 # Treat as an error, but doing this AFTER collect_output
303 sys.stderr.write("Extract Large Contigs failed\n") 310 sys.stderr.write("Extract Large Contigs failed\n")
304 handle.write("Extract Large Contigs failed\n") 311 handle.write("Extract Large Contigs failed\n")
305 handle.close() 312 handle.close()
306 sys.exit(1) 313 sys.exit(1)
307 314
308 #print "Cleaning up..." 315 # print "Cleaning up..."
309 clean_up(temp, name) 316 clean_up(temp, name)
310 317
311 handle.write("\nDone\n") 318 handle.write("\nDone\n")
312 handle.close() 319 handle.close()
313 print("Done") 320 print("Done")