comparison tools/mira4_assembler/mira4_convert.py @ 1:70248e6e3efc draft

v0.0.7 move dependency to package_mira_4_0_2 etc
author peterjc
date Wed, 05 Aug 2015 11:31:05 -0400
parents
children
comparison
equal deleted inserted replaced
0:6a88b42ce6b9 1:70248e6e3efc
1 #!/usr/bin/env python
2 """A simple wrapper script to call MIRA and collect its output.
3
4 This focuses on the miraconvert binary.
5 """
6 import os
7 import sys
8 import subprocess
9 import shutil
10 import time
11 import tempfile
12 from optparse import OptionParser
13 try:
14 from io import BytesIO
15 except ImportError:
16 #Should we worry about Python 2.5 or older?
17 from StringIO import StringIO as BytesIO
18
19 #Do we need any PYTHONPATH magic?
20 from mira4_make_bam import depad
21
22 WRAPPER_VER = "0.0.7" # Keep in sync with the XML file
23
24 def sys_exit(msg, err=1):
25 sys.stderr.write(msg+"\n")
26 sys.exit(err)
27
28 def run(cmd):
29 #Avoid using shell=True when we call subprocess to ensure if the Python
30 #script is killed, so too is the child process.
31 try:
32 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
33 except Exception, err:
34 sys_exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
35 #Use .communicate as can get deadlocks with .wait(),
36 stdout, stderr = child.communicate()
37 return_code = child.returncode
38 if return_code:
39 cmd_str = " ".join(cmd) # doesn't quote spaces etc
40 if stderr and stdout:
41 sys_exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr))
42 else:
43 sys_exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr))
44
45 def get_version(mira_binary):
46 """Run MIRA to find its version number"""
47 # At the commend line I would use: mira -v | head -n 1
48 # however there is some pipe error when doing that here.
49 cmd = [mira_binary, "-v"]
50 try:
51 child = subprocess.Popen(cmd,
52 stdout=subprocess.PIPE,
53 stderr=subprocess.STDOUT)
54 except Exception, err:
55 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
56 sys.exit(1)
57 ver, tmp = child.communicate()
58 del child
59 return ver.split("\n", 1)[0].strip()
60
61 #Parse Command Line
62 usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
63
64 $ python mira4_convert.py ...
65
66 This will run the MIRA miraconvert binary and collect its output files as directed.
67 """ % WRAPPER_VER
68 parser = OptionParser(usage=usage)
69 parser.add_option("--input", dest="input",
70 default=None, metavar="FILE",
71 help="MIRA input filename")
72 parser.add_option("-x", "--min_length", dest="min_length",
73 default="0",
74 help="Minimum contig length")
75 parser.add_option("-y", "--min_cover", dest="min_cover",
76 default="0",
77 help="Minimum average contig coverage")
78 parser.add_option("-z", "--min_reads", dest="min_reads",
79 default="0",
80 help="Minimum reads per contig")
81 parser.add_option("--maf", dest="maf",
82 default="", metavar="FILE",
83 help="MIRA MAF output filename")
84 parser.add_option("--ace", dest="ace",
85 default="", metavar="FILE",
86 help="ACE output filename")
87 parser.add_option("--bam", dest="bam",
88 default="", metavar="FILE",
89 help="Unpadded BAM output filename")
90 parser.add_option("--fasta", dest="fasta",
91 default="", metavar="FILE",
92 help="Unpadded FASTA output filename")
93 parser.add_option("--cstats", dest="cstats",
94 default="", metavar="FILE",
95 help="Contig statistics filename")
96 parser.add_option("-v", "--version", dest="version",
97 default=False, action="store_true",
98 help="Show version and quit")
99 options, args = parser.parse_args()
100 if args:
101 sys_exit("Expected options (e.g. --input example.maf), not arguments")
102
103 input_maf = options.input
104 out_maf = options.maf
105 out_bam = options.bam
106 out_fasta = options.fasta
107 out_ace = options.ace
108 out_cstats = options.cstats
109
110 try:
111 mira_path = os.environ["MIRA4"]
112 except KeyError:
113 sys_exit("Environment variable $MIRA4 not set")
114 mira_convert = os.path.join(mira_path, "miraconvert")
115 if not os.path.isfile(mira_convert):
116 sys_exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
117 % (mira_convert, ", ".join(os.listdir(mira_path))))
118
119 mira_convert_ver = get_version(mira_convert)
120 if not mira_convert_ver.strip().startswith("4.0"):
121 sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert))
122 if options.version:
123 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER))
124 sys.exit(0)
125
126 if not input_maf:
127 sys_exit("Input MIRA file is required")
128 elif not os.path.isfile(input_maf):
129 sys_exit("Missing input MIRA file: %r" % input_maf)
130
131 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
132 sys_exit("No output requested")
133
134
135 def check_min_int(value, name):
136 try:
137 i = int(value)
138 except:
139 sys_exit("Bad %s setting, %r" % (name, value))
140 if i < 0:
141 sys_exit("Negative %s setting, %r" % (name, value))
142 return i
143
144 min_length = check_min_int(options.min_length, "minimum length")
145 min_cover = check_min_int(options.min_cover, "minimum cover")
146 min_reads = check_min_int(options.min_reads, "minimum reads")
147
148 #TODO - Run MIRA in /tmp or a configurable directory?
149 #Currently Galaxy puts us somewhere safe like:
150 #/opt/galaxy-dist/database/job_working_directory/846/
151 temp = "."
152
153
154 cmd_list = [mira_convert]
155 if min_length:
156 cmd_list.extend(["-x", str(min_length)])
157 if min_cover:
158 cmd_list.extend(["-y", str(min_cover)])
159 if min_reads:
160 cmd_list.extend(["-z", str(min_reads)])
161 cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
162 if out_maf:
163 cmd_list.append("maf")
164 if out_bam:
165 cmd_list.append("samnbb")
166 if not out_fasta:
167 #Need this for samtools depad
168 out_fasta = os.path.join(temp, "depadded.fasta")
169 if out_fasta:
170 cmd_list.append("fasta")
171 if out_ace:
172 cmd_list.append("ace")
173 if out_cstats:
174 cmd_list.append("cstats")
175 run(cmd_list)
176
177 def collect(old, new):
178 if not os.path.isfile(old):
179 sys_exit("Missing expected output file %s" % old)
180 shutil.move(old, new)
181
182 if out_maf:
183 collect(os.path.join(temp, "converted.maf"), out_maf)
184 if out_fasta:
185 #Can we look at the MAF file to see if there are multiple strains?
186 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
187 if os.path.isfile(old):
188 collect(old, out_fasta)
189 else:
190 #Might the output be filtered down to zero contigs?
191 old = os.path.join(temp, "converted.fasta")
192 if not os.path.isfile(old):
193 sys_exit("Missing expected output FASTA file")
194 elif os.path.getsize(old) == 0:
195 print("Warning - no contigs (harsh filters?)")
196 collect(old, out_fasta)
197 else:
198 sys_exit("Missing expected output FASTA file (only generic file present)")
199 if out_ace:
200 collect(os.path.join(temp, "converted.maf"), out_ace)
201 if out_cstats:
202 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
203
204 if out_bam:
205 assert os.path.isfile(out_fasta)
206 old = os.path.join(temp, "converted.samnbb")
207 if not os.path.isfile(old):
208 old = os.path.join(temp, "converted.sam")
209 if not os.path.isfile(old):
210 sys_exit("Missing expected intermediate file %s" % old)
211 h = BytesIO()
212 msg = depad(out_fasta, old, out_bam, h)
213 if msg:
214 print(msg)
215 print(h.getvalue())
216 h.close()
217 sys.exit(1)
218 h.close()
219 if out_fasta == os.path.join(temp, "depadded.fasta"):
220 #Not asked for by Galaxy, no longer needed
221 os.remove(out_fasta)
222
223 if min_length or min_cover or min_reads:
224 print("Filtered.")
225 else:
226 print("Converted.")