Mercurial > repos > peterjc > mira4_assembler
comparison tools/mira4_0/mira4_convert.py @ 4:1713289d9908 draft default tip
v0.0.11 tweak for use with bioconda dependencies
author | peterjc |
---|---|
date | Thu, 10 Aug 2017 11:09:10 -0400 |
parents | 4eb32a3d67d1 |
children |
comparison
equal
deleted
inserted
replaced
3:a4f602cc3aa9 | 4:1713289d9908 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 """A simple wrapper script to call MIRA and collect its output. | 2 """A simple wrapper script to call MIRA and collect its output. |
3 | 3 |
4 This focuses on the miraconvert binary. | 4 This focuses on the miraconvert binary. |
5 """ | 5 """ |
6 | |
7 from __future__ import print_function | |
8 | |
6 import os | 9 import os |
10 import shutil | |
11 import subprocess | |
7 import sys | 12 import sys |
8 import subprocess | 13 |
9 import shutil | |
10 import time | |
11 import tempfile | |
12 from optparse import OptionParser | 14 from optparse import OptionParser |
15 | |
13 try: | 16 try: |
14 from io import BytesIO | 17 from io import BytesIO |
15 except ImportError: | 18 except ImportError: |
16 #Should we worry about Python 2.5 or older? | 19 # Should we worry about Python 2.5 or older? |
17 from StringIO import StringIO as BytesIO | 20 from StringIO import StringIO as BytesIO |
18 | 21 |
19 #Do we need any PYTHONPATH magic? | 22 # Do we need any PYTHONPATH magic? |
20 from mira4_make_bam import depad | 23 from mira4_make_bam import depad |
21 | 24 |
22 WRAPPER_VER = "0.0.7" # Keep in sync with the XML file | 25 WRAPPER_VER = "0.0.11" # Keep in sync with the XML file |
23 | 26 |
24 def sys_exit(msg, err=1): | |
25 sys.stderr.write(msg+"\n") | |
26 sys.exit(err) | |
27 | 27 |
28 def run(cmd): | 28 def run(cmd): |
29 #Avoid using shell=True when we call subprocess to ensure if the Python | 29 # Avoid using shell=True when we call subprocess to ensure if the Python |
30 #script is killed, so too is the child process. | 30 # script is killed, so too is the child process. |
31 try: | 31 try: |
32 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 32 child = subprocess.Popen(cmd, universal_newlines=True, |
33 except Exception, err: | 33 stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
34 sys_exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 34 except Exception as err: |
35 #Use .communicate as can get deadlocks with .wait(), | 35 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
36 # Use .communicate as can get deadlocks with .wait(), | |
36 stdout, stderr = child.communicate() | 37 stdout, stderr = child.communicate() |
37 return_code = child.returncode | 38 return_code = child.returncode |
38 if return_code: | 39 if return_code: |
39 cmd_str = " ".join(cmd) # doesn't quote spaces etc | 40 cmd_str = " ".join(cmd) # doesn't quote spaces etc |
40 if stderr and stdout: | 41 if stderr and stdout: |
41 sys_exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) | 42 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) |
42 else: | 43 else: |
43 sys_exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) | 44 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) |
45 | |
44 | 46 |
45 def get_version(mira_binary): | 47 def get_version(mira_binary): |
46 """Run MIRA to find its version number""" | 48 """Run MIRA to find its version number""" |
47 # At the commend line I would use: mira -v | head -n 1 | 49 # At the commend line I would use: mira -v | head -n 1 |
48 # however there is some pipe error when doing that here. | 50 # however there is some pipe error when doing that here. |
49 cmd = [mira_binary, "-v"] | 51 cmd = [mira_binary, "-v"] |
50 try: | 52 try: |
51 child = subprocess.Popen(cmd, | 53 child = subprocess.Popen(cmd, |
52 stdout=subprocess.PIPE, | 54 stdout=subprocess.PIPE, |
53 stderr=subprocess.STDOUT) | 55 stderr=subprocess.STDOUT) |
54 except Exception, err: | 56 except Exception as err: |
55 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 57 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
56 sys.exit(1) | 58 sys.exit(1) |
57 ver, tmp = child.communicate() | 59 ver, tmp = child.communicate() |
58 del child | 60 del child |
59 return ver.split("\n", 1)[0].strip() | 61 return ver.split("\n", 1)[0].strip() |
60 | 62 |
61 #Parse Command Line | 63 |
64 # Parse Command Line | |
62 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: | 65 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: |
63 | 66 |
64 $ python mira4_convert.py ... | 67 $ python mira4_convert.py ... |
65 | 68 |
66 This will run the MIRA miraconvert binary and collect its output files as directed. | 69 This will run the MIRA miraconvert binary and collect its output files as directed. |
96 parser.add_option("-v", "--version", dest="version", | 99 parser.add_option("-v", "--version", dest="version", |
97 default=False, action="store_true", | 100 default=False, action="store_true", |
98 help="Show version and quit") | 101 help="Show version and quit") |
99 options, args = parser.parse_args() | 102 options, args = parser.parse_args() |
100 if args: | 103 if args: |
101 sys_exit("Expected options (e.g. --input example.maf), not arguments") | 104 sys.exit("Expected options (e.g. --input example.maf), not arguments") |
102 | 105 |
103 input_maf = options.input | 106 input_maf = options.input |
104 out_maf = options.maf | 107 out_maf = options.maf |
105 out_bam = options.bam | 108 out_bam = options.bam |
106 out_fasta = options.fasta | 109 out_fasta = options.fasta |
107 out_ace = options.ace | 110 out_ace = options.ace |
108 out_cstats = options.cstats | 111 out_cstats = options.cstats |
109 | 112 |
110 try: | 113 if "MIRA4" in os.environ: |
111 mira_path = os.environ["MIRA4"] | 114 mira_path = os.environ["MIRA4"] |
112 except KeyError: | 115 mira_convert = os.path.join(mira_path, "miraconvert") |
113 sys_exit("Environment variable $MIRA4 not set") | 116 if not os.path.isfile(mira_convert): |
114 mira_convert = os.path.join(mira_path, "miraconvert") | 117 sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" |
115 if not os.path.isfile(mira_convert): | 118 % (mira_convert, ", ".join(os.listdir(mira_path)))) |
116 sys_exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" | 119 else: |
117 % (mira_convert, ", ".join(os.listdir(mira_path)))) | 120 sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") |
121 mira_path = None | |
122 mira_convert = "miraconvert" | |
118 | 123 |
119 mira_convert_ver = get_version(mira_convert) | 124 mira_convert_ver = get_version(mira_convert) |
120 if not mira_convert_ver.strip().startswith("4.0"): | 125 if not mira_convert_ver.strip().startswith("4.0"): |
121 sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) | 126 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) |
122 if options.version: | 127 if options.version: |
123 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) | 128 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) |
124 sys.exit(0) | 129 sys.exit(0) |
125 | 130 |
126 if not input_maf: | 131 if not input_maf: |
127 sys_exit("Input MIRA file is required") | 132 sys.exit("Input MIRA file is required") |
128 elif not os.path.isfile(input_maf): | 133 elif not os.path.isfile(input_maf): |
129 sys_exit("Missing input MIRA file: %r" % input_maf) | 134 sys.exit("Missing input MIRA file: %r" % input_maf) |
130 | 135 |
131 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): | 136 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): |
132 sys_exit("No output requested") | 137 sys.exit("No output requested") |
133 | 138 |
134 | 139 |
135 def check_min_int(value, name): | 140 def check_min_int(value, name): |
136 try: | 141 try: |
137 i = int(value) | 142 i = int(value) |
138 except: | 143 except ValueError: |
139 sys_exit("Bad %s setting, %r" % (name, value)) | 144 sys.exit("Bad %s setting, %r" % (name, value)) |
140 if i < 0: | 145 if i < 0: |
141 sys_exit("Negative %s setting, %r" % (name, value)) | 146 sys.exit("Negative %s setting, %r" % (name, value)) |
142 return i | 147 return i |
148 | |
143 | 149 |
144 min_length = check_min_int(options.min_length, "minimum length") | 150 min_length = check_min_int(options.min_length, "minimum length") |
145 min_cover = check_min_int(options.min_cover, "minimum cover") | 151 min_cover = check_min_int(options.min_cover, "minimum cover") |
146 min_reads = check_min_int(options.min_reads, "minimum reads") | 152 min_reads = check_min_int(options.min_reads, "minimum reads") |
147 | 153 |
148 #TODO - Run MIRA in /tmp or a configurable directory? | 154 # TODO - Run MIRA in /tmp or a configurable directory? |
149 #Currently Galaxy puts us somewhere safe like: | 155 # Currently Galaxy puts us somewhere safe like: |
150 #/opt/galaxy-dist/database/job_working_directory/846/ | 156 # /opt/galaxy-dist/database/job_working_directory/846/ |
151 temp = "." | 157 temp = "." |
152 | 158 |
153 | 159 |
154 cmd_list = [mira_convert] | 160 cmd_list = [mira_convert] |
155 if min_length: | 161 if min_length: |
162 if out_maf: | 168 if out_maf: |
163 cmd_list.append("maf") | 169 cmd_list.append("maf") |
164 if out_bam: | 170 if out_bam: |
165 cmd_list.append("samnbb") | 171 cmd_list.append("samnbb") |
166 if not out_fasta: | 172 if not out_fasta: |
167 #Need this for samtools depad | 173 # Need this for samtools depad |
168 out_fasta = os.path.join(temp, "depadded.fasta") | 174 out_fasta = os.path.join(temp, "depadded.fasta") |
169 if out_fasta: | 175 if out_fasta: |
170 cmd_list.append("fasta") | 176 cmd_list.append("fasta") |
171 if out_ace: | 177 if out_ace: |
172 cmd_list.append("ace") | 178 cmd_list.append("ace") |
173 if out_cstats: | 179 if out_cstats: |
174 cmd_list.append("cstats") | 180 cmd_list.append("cstats") |
175 run(cmd_list) | 181 run(cmd_list) |
176 | 182 |
183 | |
177 def collect(old, new): | 184 def collect(old, new): |
178 if not os.path.isfile(old): | 185 if not os.path.isfile(old): |
179 sys_exit("Missing expected output file %s" % old) | 186 sys.exit("Missing expected output file %s" % old) |
180 shutil.move(old, new) | 187 shutil.move(old, new) |
188 | |
181 | 189 |
182 if out_maf: | 190 if out_maf: |
183 collect(os.path.join(temp, "converted.maf"), out_maf) | 191 collect(os.path.join(temp, "converted.maf"), out_maf) |
184 if out_fasta: | 192 if out_fasta: |
185 #Can we look at the MAF file to see if there are multiple strains? | 193 # Can we look at the MAF file to see if there are multiple strains? |
186 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") | 194 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") |
187 if os.path.isfile(old): | 195 if os.path.isfile(old): |
188 collect(old, out_fasta) | 196 collect(old, out_fasta) |
189 else: | 197 else: |
190 #Might the output be filtered down to zero contigs? | 198 # Might the output be filtered down to zero contigs? |
191 old = os.path.join(temp, "converted.fasta") | 199 old = os.path.join(temp, "converted.fasta") |
192 if not os.path.isfile(old): | 200 if not os.path.isfile(old): |
193 sys_exit("Missing expected output FASTA file") | 201 sys.exit("Missing expected output FASTA file") |
194 elif os.path.getsize(old) == 0: | 202 elif os.path.getsize(old) == 0: |
195 print("Warning - no contigs (harsh filters?)") | 203 print("Warning - no contigs (harsh filters?)") |
196 collect(old, out_fasta) | 204 collect(old, out_fasta) |
197 else: | 205 else: |
198 sys_exit("Missing expected output FASTA file (only generic file present)") | 206 sys.exit("Missing expected output FASTA file (only generic file present)") |
199 if out_ace: | 207 if out_ace: |
200 collect(os.path.join(temp, "converted.maf"), out_ace) | 208 collect(os.path.join(temp, "converted.maf"), out_ace) |
201 if out_cstats: | 209 if out_cstats: |
202 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) | 210 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) |
203 | 211 |
205 assert os.path.isfile(out_fasta) | 213 assert os.path.isfile(out_fasta) |
206 old = os.path.join(temp, "converted.samnbb") | 214 old = os.path.join(temp, "converted.samnbb") |
207 if not os.path.isfile(old): | 215 if not os.path.isfile(old): |
208 old = os.path.join(temp, "converted.sam") | 216 old = os.path.join(temp, "converted.sam") |
209 if not os.path.isfile(old): | 217 if not os.path.isfile(old): |
210 sys_exit("Missing expected intermediate file %s" % old) | 218 sys.exit("Missing expected intermediate file %s" % old) |
211 h = BytesIO() | 219 h = BytesIO() |
212 msg = depad(out_fasta, old, out_bam, h) | 220 msg = depad(out_fasta, old, out_bam, h) |
213 if msg: | 221 if msg: |
214 print(msg) | 222 print(msg) |
215 print(h.getvalue()) | 223 print(h.getvalue()) |
216 h.close() | 224 h.close() |
217 sys.exit(1) | 225 sys.exit(1) |
218 h.close() | 226 h.close() |
219 if out_fasta == os.path.join(temp, "depadded.fasta"): | 227 if out_fasta == os.path.join(temp, "depadded.fasta"): |
220 #Not asked for by Galaxy, no longer needed | 228 # Not asked for by Galaxy, no longer needed |
221 os.remove(out_fasta) | 229 os.remove(out_fasta) |
222 | 230 |
223 if min_length or min_cover or min_reads: | 231 if min_length or min_cover or min_reads: |
224 print("Filtered.") | 232 print("Filtered.") |
225 else: | 233 else: |