2
|
1 #!/usr/bin/env python
|
|
2 """A simple wrapper script to call MIRA and collect its output.
|
|
3
|
|
4 This focuses on the miraconvert binary.
|
|
5 """
|
4
|
6
|
|
7 from __future__ import print_function
|
|
8
|
2
|
9 import os
|
|
10 import shutil
|
4
|
11 import subprocess
|
|
12 import sys
|
|
13
|
2
|
14 from optparse import OptionParser
|
4
|
15
|
2
|
16 try:
|
|
17 from io import BytesIO
|
|
18 except ImportError:
|
4
|
19 # Should we worry about Python 2.5 or older?
|
2
|
20 from StringIO import StringIO as BytesIO
|
|
21
|
4
|
22 # Do we need any PYTHONPATH magic?
|
2
|
23 from mira4_make_bam import depad
|
|
24
|
4
|
25 WRAPPER_VER = "0.0.11" # Keep in sync with the XML file
|
2
|
26
|
|
27
|
|
28 def run(cmd):
|
4
|
29 # Avoid using shell=True when we call subprocess to ensure if the Python
|
|
30 # script is killed, so too is the child process.
|
2
|
31 try:
|
4
|
32 child = subprocess.Popen(cmd, universal_newlines=True,
|
|
33 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
34 except Exception as err:
|
|
35 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
|
|
36 # Use .communicate as can get deadlocks with .wait(),
|
2
|
37 stdout, stderr = child.communicate()
|
|
38 return_code = child.returncode
|
|
39 if return_code:
|
|
40 cmd_str = " ".join(cmd) # doesn't quote spaces etc
|
|
41 if stderr and stdout:
|
4
|
42 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr))
|
2
|
43 else:
|
4
|
44 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr))
|
|
45
|
2
|
46
|
|
47 def get_version(mira_binary):
|
|
48 """Run MIRA to find its version number"""
|
|
49 # At the commend line I would use: mira -v | head -n 1
|
|
50 # however there is some pipe error when doing that here.
|
|
51 cmd = [mira_binary, "-v"]
|
|
52 try:
|
|
53 child = subprocess.Popen(cmd,
|
|
54 stdout=subprocess.PIPE,
|
|
55 stderr=subprocess.STDOUT)
|
4
|
56 except Exception as err:
|
2
|
57 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
|
|
58 sys.exit(1)
|
|
59 ver, tmp = child.communicate()
|
|
60 del child
|
|
61 return ver.split("\n", 1)[0].strip()
|
|
62
|
4
|
63
|
|
64 # Parse Command Line
|
2
|
65 usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
|
|
66
|
|
67 $ python mira4_convert.py ...
|
|
68
|
|
69 This will run the MIRA miraconvert binary and collect its output files as directed.
|
|
70 """ % WRAPPER_VER
|
|
71 parser = OptionParser(usage=usage)
|
|
72 parser.add_option("--input", dest="input",
|
|
73 default=None, metavar="FILE",
|
|
74 help="MIRA input filename")
|
|
75 parser.add_option("-x", "--min_length", dest="min_length",
|
|
76 default="0",
|
|
77 help="Minimum contig length")
|
|
78 parser.add_option("-y", "--min_cover", dest="min_cover",
|
|
79 default="0",
|
|
80 help="Minimum average contig coverage")
|
|
81 parser.add_option("-z", "--min_reads", dest="min_reads",
|
|
82 default="0",
|
|
83 help="Minimum reads per contig")
|
|
84 parser.add_option("--maf", dest="maf",
|
|
85 default="", metavar="FILE",
|
|
86 help="MIRA MAF output filename")
|
|
87 parser.add_option("--ace", dest="ace",
|
|
88 default="", metavar="FILE",
|
|
89 help="ACE output filename")
|
|
90 parser.add_option("--bam", dest="bam",
|
|
91 default="", metavar="FILE",
|
|
92 help="Unpadded BAM output filename")
|
|
93 parser.add_option("--fasta", dest="fasta",
|
|
94 default="", metavar="FILE",
|
|
95 help="Unpadded FASTA output filename")
|
|
96 parser.add_option("--cstats", dest="cstats",
|
|
97 default="", metavar="FILE",
|
|
98 help="Contig statistics filename")
|
|
99 parser.add_option("-v", "--version", dest="version",
|
|
100 default=False, action="store_true",
|
|
101 help="Show version and quit")
|
|
102 options, args = parser.parse_args()
|
|
103 if args:
|
4
|
104 sys.exit("Expected options (e.g. --input example.maf), not arguments")
|
2
|
105
|
|
106 input_maf = options.input
|
|
107 out_maf = options.maf
|
|
108 out_bam = options.bam
|
|
109 out_fasta = options.fasta
|
|
110 out_ace = options.ace
|
|
111 out_cstats = options.cstats
|
|
112
|
4
|
113 if "MIRA4" in os.environ:
|
2
|
114 mira_path = os.environ["MIRA4"]
|
4
|
115 mira_convert = os.path.join(mira_path, "miraconvert")
|
|
116 if not os.path.isfile(mira_convert):
|
|
117 sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
|
|
118 % (mira_convert, ", ".join(os.listdir(mira_path))))
|
|
119 else:
|
|
120 sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n")
|
|
121 mira_path = None
|
|
122 mira_convert = "miraconvert"
|
2
|
123
|
|
124 mira_convert_ver = get_version(mira_convert)
|
|
125 if not mira_convert_ver.strip().startswith("4.0"):
|
4
|
126 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert))
|
2
|
127 if options.version:
|
|
128 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER))
|
|
129 sys.exit(0)
|
|
130
|
|
131 if not input_maf:
|
4
|
132 sys.exit("Input MIRA file is required")
|
2
|
133 elif not os.path.isfile(input_maf):
|
4
|
134 sys.exit("Missing input MIRA file: %r" % input_maf)
|
2
|
135
|
|
136 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
|
4
|
137 sys.exit("No output requested")
|
2
|
138
|
|
139
|
|
140 def check_min_int(value, name):
|
|
141 try:
|
|
142 i = int(value)
|
4
|
143 except ValueError:
|
|
144 sys.exit("Bad %s setting, %r" % (name, value))
|
2
|
145 if i < 0:
|
4
|
146 sys.exit("Negative %s setting, %r" % (name, value))
|
2
|
147 return i
|
|
148
|
4
|
149
|
2
|
150 min_length = check_min_int(options.min_length, "minimum length")
|
|
151 min_cover = check_min_int(options.min_cover, "minimum cover")
|
|
152 min_reads = check_min_int(options.min_reads, "minimum reads")
|
|
153
|
4
|
154 # TODO - Run MIRA in /tmp or a configurable directory?
|
|
155 # Currently Galaxy puts us somewhere safe like:
|
|
156 # /opt/galaxy-dist/database/job_working_directory/846/
|
2
|
157 temp = "."
|
|
158
|
|
159
|
|
160 cmd_list = [mira_convert]
|
|
161 if min_length:
|
|
162 cmd_list.extend(["-x", str(min_length)])
|
|
163 if min_cover:
|
|
164 cmd_list.extend(["-y", str(min_cover)])
|
|
165 if min_reads:
|
|
166 cmd_list.extend(["-z", str(min_reads)])
|
|
167 cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
|
|
168 if out_maf:
|
|
169 cmd_list.append("maf")
|
|
170 if out_bam:
|
|
171 cmd_list.append("samnbb")
|
|
172 if not out_fasta:
|
4
|
173 # Need this for samtools depad
|
2
|
174 out_fasta = os.path.join(temp, "depadded.fasta")
|
|
175 if out_fasta:
|
|
176 cmd_list.append("fasta")
|
|
177 if out_ace:
|
|
178 cmd_list.append("ace")
|
|
179 if out_cstats:
|
|
180 cmd_list.append("cstats")
|
|
181 run(cmd_list)
|
|
182
|
4
|
183
|
2
|
184 def collect(old, new):
|
|
185 if not os.path.isfile(old):
|
4
|
186 sys.exit("Missing expected output file %s" % old)
|
2
|
187 shutil.move(old, new)
|
|
188
|
4
|
189
|
2
|
190 if out_maf:
|
|
191 collect(os.path.join(temp, "converted.maf"), out_maf)
|
|
192 if out_fasta:
|
4
|
193 # Can we look at the MAF file to see if there are multiple strains?
|
2
|
194 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
|
|
195 if os.path.isfile(old):
|
|
196 collect(old, out_fasta)
|
|
197 else:
|
4
|
198 # Might the output be filtered down to zero contigs?
|
2
|
199 old = os.path.join(temp, "converted.fasta")
|
|
200 if not os.path.isfile(old):
|
4
|
201 sys.exit("Missing expected output FASTA file")
|
2
|
202 elif os.path.getsize(old) == 0:
|
|
203 print("Warning - no contigs (harsh filters?)")
|
|
204 collect(old, out_fasta)
|
|
205 else:
|
4
|
206 sys.exit("Missing expected output FASTA file (only generic file present)")
|
2
|
207 if out_ace:
|
|
208 collect(os.path.join(temp, "converted.maf"), out_ace)
|
|
209 if out_cstats:
|
|
210 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
|
|
211
|
|
212 if out_bam:
|
|
213 assert os.path.isfile(out_fasta)
|
|
214 old = os.path.join(temp, "converted.samnbb")
|
|
215 if not os.path.isfile(old):
|
|
216 old = os.path.join(temp, "converted.sam")
|
|
217 if not os.path.isfile(old):
|
4
|
218 sys.exit("Missing expected intermediate file %s" % old)
|
2
|
219 h = BytesIO()
|
|
220 msg = depad(out_fasta, old, out_bam, h)
|
|
221 if msg:
|
|
222 print(msg)
|
|
223 print(h.getvalue())
|
|
224 h.close()
|
|
225 sys.exit(1)
|
|
226 h.close()
|
|
227 if out_fasta == os.path.join(temp, "depadded.fasta"):
|
4
|
228 # Not asked for by Galaxy, no longer needed
|
2
|
229 os.remove(out_fasta)
|
|
230
|
|
231 if min_length or min_cover or min_reads:
|
|
232 print("Filtered.")
|
|
233 else:
|
|
234 print("Converted.")
|