comparison tools/mira4_0/mira4_convert.py @ 4:1713289d9908 draft default tip

v0.0.11 tweak for use with bioconda dependencies
author peterjc
date Thu, 10 Aug 2017 11:09:10 -0400
parents 4eb32a3d67d1
children
comparison
equal deleted inserted replaced
3:a4f602cc3aa9 4:1713289d9908
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """A simple wrapper script to call MIRA and collect its output. 2 """A simple wrapper script to call MIRA and collect its output.
3 3
4 This focuses on the miraconvert binary. 4 This focuses on the miraconvert binary.
5 """ 5 """
6
7 from __future__ import print_function
8
6 import os 9 import os
10 import shutil
11 import subprocess
7 import sys 12 import sys
8 import subprocess 13
9 import shutil
10 import time
11 import tempfile
12 from optparse import OptionParser 14 from optparse import OptionParser
15
13 try: 16 try:
14 from io import BytesIO 17 from io import BytesIO
15 except ImportError: 18 except ImportError:
16 #Should we worry about Python 2.5 or older? 19 # Should we worry about Python 2.5 or older?
17 from StringIO import StringIO as BytesIO 20 from StringIO import StringIO as BytesIO
18 21
19 #Do we need any PYTHONPATH magic? 22 # Do we need any PYTHONPATH magic?
20 from mira4_make_bam import depad 23 from mira4_make_bam import depad
21 24
22 WRAPPER_VER = "0.0.7" # Keep in sync with the XML file 25 WRAPPER_VER = "0.0.11" # Keep in sync with the XML file
23 26
24 def sys_exit(msg, err=1):
25 sys.stderr.write(msg+"\n")
26 sys.exit(err)
27 27
28 def run(cmd): 28 def run(cmd):
29 #Avoid using shell=True when we call subprocess to ensure if the Python 29 # Avoid using shell=True when we call subprocess to ensure if the Python
30 #script is killed, so too is the child process. 30 # script is killed, so too is the child process.
31 try: 31 try:
32 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 32 child = subprocess.Popen(cmd, universal_newlines=True,
33 except Exception, err: 33 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
34 sys_exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 34 except Exception as err:
35 #Use .communicate as can get deadlocks with .wait(), 35 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
36 # Use .communicate as can get deadlocks with .wait(),
36 stdout, stderr = child.communicate() 37 stdout, stderr = child.communicate()
37 return_code = child.returncode 38 return_code = child.returncode
38 if return_code: 39 if return_code:
39 cmd_str = " ".join(cmd) # doesn't quote spaces etc 40 cmd_str = " ".join(cmd) # doesn't quote spaces etc
40 if stderr and stdout: 41 if stderr and stdout:
41 sys_exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) 42 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr))
42 else: 43 else:
43 sys_exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) 44 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr))
45
44 46
45 def get_version(mira_binary): 47 def get_version(mira_binary):
46 """Run MIRA to find its version number""" 48 """Run MIRA to find its version number"""
47 # At the commend line I would use: mira -v | head -n 1 49 # At the commend line I would use: mira -v | head -n 1
48 # however there is some pipe error when doing that here. 50 # however there is some pipe error when doing that here.
49 cmd = [mira_binary, "-v"] 51 cmd = [mira_binary, "-v"]
50 try: 52 try:
51 child = subprocess.Popen(cmd, 53 child = subprocess.Popen(cmd,
52 stdout=subprocess.PIPE, 54 stdout=subprocess.PIPE,
53 stderr=subprocess.STDOUT) 55 stderr=subprocess.STDOUT)
54 except Exception, err: 56 except Exception as err:
55 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 57 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
56 sys.exit(1) 58 sys.exit(1)
57 ver, tmp = child.communicate() 59 ver, tmp = child.communicate()
58 del child 60 del child
59 return ver.split("\n", 1)[0].strip() 61 return ver.split("\n", 1)[0].strip()
60 62
61 #Parse Command Line 63
64 # Parse Command Line
62 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: 65 usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
63 66
64 $ python mira4_convert.py ... 67 $ python mira4_convert.py ...
65 68
66 This will run the MIRA miraconvert binary and collect its output files as directed. 69 This will run the MIRA miraconvert binary and collect its output files as directed.
96 parser.add_option("-v", "--version", dest="version", 99 parser.add_option("-v", "--version", dest="version",
97 default=False, action="store_true", 100 default=False, action="store_true",
98 help="Show version and quit") 101 help="Show version and quit")
99 options, args = parser.parse_args() 102 options, args = parser.parse_args()
100 if args: 103 if args:
101 sys_exit("Expected options (e.g. --input example.maf), not arguments") 104 sys.exit("Expected options (e.g. --input example.maf), not arguments")
102 105
103 input_maf = options.input 106 input_maf = options.input
104 out_maf = options.maf 107 out_maf = options.maf
105 out_bam = options.bam 108 out_bam = options.bam
106 out_fasta = options.fasta 109 out_fasta = options.fasta
107 out_ace = options.ace 110 out_ace = options.ace
108 out_cstats = options.cstats 111 out_cstats = options.cstats
109 112
110 try: 113 if "MIRA4" in os.environ:
111 mira_path = os.environ["MIRA4"] 114 mira_path = os.environ["MIRA4"]
112 except KeyError: 115 mira_convert = os.path.join(mira_path, "miraconvert")
113 sys_exit("Environment variable $MIRA4 not set") 116 if not os.path.isfile(mira_convert):
114 mira_convert = os.path.join(mira_path, "miraconvert") 117 sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
115 if not os.path.isfile(mira_convert): 118 % (mira_convert, ", ".join(os.listdir(mira_path))))
116 sys_exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" 119 else:
117 % (mira_convert, ", ".join(os.listdir(mira_path)))) 120 sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n")
121 mira_path = None
122 mira_convert = "miraconvert"
118 123
119 mira_convert_ver = get_version(mira_convert) 124 mira_convert_ver = get_version(mira_convert)
120 if not mira_convert_ver.strip().startswith("4.0"): 125 if not mira_convert_ver.strip().startswith("4.0"):
121 sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) 126 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert))
122 if options.version: 127 if options.version:
123 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) 128 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER))
124 sys.exit(0) 129 sys.exit(0)
125 130
126 if not input_maf: 131 if not input_maf:
127 sys_exit("Input MIRA file is required") 132 sys.exit("Input MIRA file is required")
128 elif not os.path.isfile(input_maf): 133 elif not os.path.isfile(input_maf):
129 sys_exit("Missing input MIRA file: %r" % input_maf) 134 sys.exit("Missing input MIRA file: %r" % input_maf)
130 135
131 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): 136 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
132 sys_exit("No output requested") 137 sys.exit("No output requested")
133 138
134 139
135 def check_min_int(value, name): 140 def check_min_int(value, name):
136 try: 141 try:
137 i = int(value) 142 i = int(value)
138 except: 143 except ValueError:
139 sys_exit("Bad %s setting, %r" % (name, value)) 144 sys.exit("Bad %s setting, %r" % (name, value))
140 if i < 0: 145 if i < 0:
141 sys_exit("Negative %s setting, %r" % (name, value)) 146 sys.exit("Negative %s setting, %r" % (name, value))
142 return i 147 return i
148
143 149
144 min_length = check_min_int(options.min_length, "minimum length") 150 min_length = check_min_int(options.min_length, "minimum length")
145 min_cover = check_min_int(options.min_cover, "minimum cover") 151 min_cover = check_min_int(options.min_cover, "minimum cover")
146 min_reads = check_min_int(options.min_reads, "minimum reads") 152 min_reads = check_min_int(options.min_reads, "minimum reads")
147 153
148 #TODO - Run MIRA in /tmp or a configurable directory? 154 # TODO - Run MIRA in /tmp or a configurable directory?
149 #Currently Galaxy puts us somewhere safe like: 155 # Currently Galaxy puts us somewhere safe like:
150 #/opt/galaxy-dist/database/job_working_directory/846/ 156 # /opt/galaxy-dist/database/job_working_directory/846/
151 temp = "." 157 temp = "."
152 158
153 159
154 cmd_list = [mira_convert] 160 cmd_list = [mira_convert]
155 if min_length: 161 if min_length:
162 if out_maf: 168 if out_maf:
163 cmd_list.append("maf") 169 cmd_list.append("maf")
164 if out_bam: 170 if out_bam:
165 cmd_list.append("samnbb") 171 cmd_list.append("samnbb")
166 if not out_fasta: 172 if not out_fasta:
167 #Need this for samtools depad 173 # Need this for samtools depad
168 out_fasta = os.path.join(temp, "depadded.fasta") 174 out_fasta = os.path.join(temp, "depadded.fasta")
169 if out_fasta: 175 if out_fasta:
170 cmd_list.append("fasta") 176 cmd_list.append("fasta")
171 if out_ace: 177 if out_ace:
172 cmd_list.append("ace") 178 cmd_list.append("ace")
173 if out_cstats: 179 if out_cstats:
174 cmd_list.append("cstats") 180 cmd_list.append("cstats")
175 run(cmd_list) 181 run(cmd_list)
176 182
183
177 def collect(old, new): 184 def collect(old, new):
178 if not os.path.isfile(old): 185 if not os.path.isfile(old):
179 sys_exit("Missing expected output file %s" % old) 186 sys.exit("Missing expected output file %s" % old)
180 shutil.move(old, new) 187 shutil.move(old, new)
188
181 189
182 if out_maf: 190 if out_maf:
183 collect(os.path.join(temp, "converted.maf"), out_maf) 191 collect(os.path.join(temp, "converted.maf"), out_maf)
184 if out_fasta: 192 if out_fasta:
185 #Can we look at the MAF file to see if there are multiple strains? 193 # Can we look at the MAF file to see if there are multiple strains?
186 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") 194 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
187 if os.path.isfile(old): 195 if os.path.isfile(old):
188 collect(old, out_fasta) 196 collect(old, out_fasta)
189 else: 197 else:
190 #Might the output be filtered down to zero contigs? 198 # Might the output be filtered down to zero contigs?
191 old = os.path.join(temp, "converted.fasta") 199 old = os.path.join(temp, "converted.fasta")
192 if not os.path.isfile(old): 200 if not os.path.isfile(old):
193 sys_exit("Missing expected output FASTA file") 201 sys.exit("Missing expected output FASTA file")
194 elif os.path.getsize(old) == 0: 202 elif os.path.getsize(old) == 0:
195 print("Warning - no contigs (harsh filters?)") 203 print("Warning - no contigs (harsh filters?)")
196 collect(old, out_fasta) 204 collect(old, out_fasta)
197 else: 205 else:
198 sys_exit("Missing expected output FASTA file (only generic file present)") 206 sys.exit("Missing expected output FASTA file (only generic file present)")
199 if out_ace: 207 if out_ace:
200 collect(os.path.join(temp, "converted.maf"), out_ace) 208 collect(os.path.join(temp, "converted.maf"), out_ace)
201 if out_cstats: 209 if out_cstats:
202 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) 210 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
203 211
205 assert os.path.isfile(out_fasta) 213 assert os.path.isfile(out_fasta)
206 old = os.path.join(temp, "converted.samnbb") 214 old = os.path.join(temp, "converted.samnbb")
207 if not os.path.isfile(old): 215 if not os.path.isfile(old):
208 old = os.path.join(temp, "converted.sam") 216 old = os.path.join(temp, "converted.sam")
209 if not os.path.isfile(old): 217 if not os.path.isfile(old):
210 sys_exit("Missing expected intermediate file %s" % old) 218 sys.exit("Missing expected intermediate file %s" % old)
211 h = BytesIO() 219 h = BytesIO()
212 msg = depad(out_fasta, old, out_bam, h) 220 msg = depad(out_fasta, old, out_bam, h)
213 if msg: 221 if msg:
214 print(msg) 222 print(msg)
215 print(h.getvalue()) 223 print(h.getvalue())
216 h.close() 224 h.close()
217 sys.exit(1) 225 sys.exit(1)
218 h.close() 226 h.close()
219 if out_fasta == os.path.join(temp, "depadded.fasta"): 227 if out_fasta == os.path.join(temp, "depadded.fasta"):
220 #Not asked for by Galaxy, no longer needed 228 # Not asked for by Galaxy, no longer needed
221 os.remove(out_fasta) 229 os.remove(out_fasta)
222 230
223 if min_length or min_cover or min_reads: 231 if min_length or min_cover or min_reads:
224 print("Filtered.") 232 print("Filtered.")
225 else: 233 else: