| 2 | 1 #!/usr/bin/env python | 
|  | 2 """A simple wrapper script to call MIRA and collect its output. | 
|  | 3 | 
|  | 4 This focuses on the miraconvert binary. | 
|  | 5 """ | 
| 4 | 6 | 
|  | 7 from __future__ import print_function | 
|  | 8 | 
| 2 | 9 import os | 
|  | 10 import shutil | 
| 4 | 11 import subprocess | 
|  | 12 import sys | 
|  | 13 | 
| 2 | 14 from optparse import OptionParser | 
| 4 | 15 | 
| 2 | 16 try: | 
|  | 17     from io import BytesIO | 
|  | 18 except ImportError: | 
| 4 | 19     # Should we worry about Python 2.5 or older? | 
| 2 | 20     from StringIO import StringIO as BytesIO | 
|  | 21 | 
| 4 | 22 # Do we need any PYTHONPATH magic? | 
| 2 | 23 from mira4_make_bam import depad | 
|  | 24 | 
| 4 | 25 WRAPPER_VER = "0.0.11"  # Keep in sync with the XML file | 
| 2 | 26 | 
|  | 27 | 
|  | 28 def run(cmd): | 
| 4 | 29     # Avoid using shell=True when we call subprocess to ensure if the Python | 
|  | 30     # script is killed, so too is the child process. | 
| 2 | 31     try: | 
| 4 | 32         child = subprocess.Popen(cmd, universal_newlines=True, | 
|  | 33                                  stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 
|  | 34     except Exception as err: | 
|  | 35         sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 
|  | 36     # Use .communicate as can get deadlocks with .wait(), | 
| 2 | 37     stdout, stderr = child.communicate() | 
|  | 38     return_code = child.returncode | 
|  | 39     if return_code: | 
|  | 40         cmd_str = " ".join(cmd)  # doesn't quote spaces etc | 
|  | 41         if stderr and stdout: | 
| 4 | 42             sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) | 
| 2 | 43         else: | 
| 4 | 44             sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) | 
|  | 45 | 
| 2 | 46 | 
|  | 47 def get_version(mira_binary): | 
|  | 48     """Run MIRA to find its version number""" | 
|  | 49     # At the commend line I would use: mira -v | head -n 1 | 
|  | 50     # however there is some pipe error when doing that here. | 
|  | 51     cmd = [mira_binary, "-v"] | 
|  | 52     try: | 
|  | 53         child = subprocess.Popen(cmd, | 
|  | 54                                  stdout=subprocess.PIPE, | 
|  | 55                                  stderr=subprocess.STDOUT) | 
| 4 | 56     except Exception as err: | 
| 2 | 57         sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 
|  | 58         sys.exit(1) | 
|  | 59     ver, tmp = child.communicate() | 
|  | 60     del child | 
|  | 61     return ver.split("\n", 1)[0].strip() | 
|  | 62 | 
| 4 | 63 | 
|  | 64 # Parse Command Line | 
| 2 | 65 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: | 
|  | 66 | 
|  | 67 $ python mira4_convert.py ... | 
|  | 68 | 
|  | 69 This will run the MIRA miraconvert binary and collect its output files as directed. | 
|  | 70 """ % WRAPPER_VER | 
|  | 71 parser = OptionParser(usage=usage) | 
|  | 72 parser.add_option("--input", dest="input", | 
|  | 73                   default=None, metavar="FILE", | 
|  | 74                   help="MIRA input filename") | 
|  | 75 parser.add_option("-x", "--min_length", dest="min_length", | 
|  | 76                   default="0", | 
|  | 77                   help="Minimum contig length") | 
|  | 78 parser.add_option("-y", "--min_cover", dest="min_cover", | 
|  | 79                   default="0", | 
|  | 80                   help="Minimum average contig coverage") | 
|  | 81 parser.add_option("-z", "--min_reads", dest="min_reads", | 
|  | 82                   default="0", | 
|  | 83                   help="Minimum reads per contig") | 
|  | 84 parser.add_option("--maf", dest="maf", | 
|  | 85                   default="", metavar="FILE", | 
|  | 86                   help="MIRA MAF output filename") | 
|  | 87 parser.add_option("--ace", dest="ace", | 
|  | 88                   default="", metavar="FILE", | 
|  | 89                   help="ACE output filename") | 
|  | 90 parser.add_option("--bam", dest="bam", | 
|  | 91                   default="", metavar="FILE", | 
|  | 92                   help="Unpadded BAM output filename") | 
|  | 93 parser.add_option("--fasta", dest="fasta", | 
|  | 94                   default="", metavar="FILE", | 
|  | 95                   help="Unpadded FASTA output filename") | 
|  | 96 parser.add_option("--cstats", dest="cstats", | 
|  | 97                   default="", metavar="FILE", | 
|  | 98                   help="Contig statistics filename") | 
|  | 99 parser.add_option("-v", "--version", dest="version", | 
|  | 100                   default=False, action="store_true", | 
|  | 101                   help="Show version and quit") | 
|  | 102 options, args = parser.parse_args() | 
|  | 103 if args: | 
| 4 | 104     sys.exit("Expected options (e.g. --input example.maf), not arguments") | 
| 2 | 105 | 
|  | 106 input_maf = options.input | 
|  | 107 out_maf = options.maf | 
|  | 108 out_bam = options.bam | 
|  | 109 out_fasta = options.fasta | 
|  | 110 out_ace = options.ace | 
|  | 111 out_cstats = options.cstats | 
|  | 112 | 
| 4 | 113 if "MIRA4" in os.environ: | 
| 2 | 114     mira_path = os.environ["MIRA4"] | 
| 4 | 115     mira_convert = os.path.join(mira_path, "miraconvert") | 
|  | 116     if not os.path.isfile(mira_convert): | 
|  | 117         sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" | 
|  | 118                  % (mira_convert, ", ".join(os.listdir(mira_path)))) | 
|  | 119 else: | 
|  | 120     sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") | 
|  | 121     mira_path = None | 
|  | 122     mira_convert = "miraconvert" | 
| 2 | 123 | 
|  | 124 mira_convert_ver = get_version(mira_convert) | 
|  | 125 if not mira_convert_ver.strip().startswith("4.0"): | 
| 4 | 126     sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) | 
| 2 | 127 if options.version: | 
|  | 128     print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) | 
|  | 129     sys.exit(0) | 
|  | 130 | 
|  | 131 if not input_maf: | 
| 4 | 132     sys.exit("Input MIRA file is required") | 
| 2 | 133 elif not os.path.isfile(input_maf): | 
| 4 | 134     sys.exit("Missing input MIRA file: %r" % input_maf) | 
| 2 | 135 | 
|  | 136 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): | 
| 4 | 137     sys.exit("No output requested") | 
| 2 | 138 | 
|  | 139 | 
|  | 140 def check_min_int(value, name): | 
|  | 141     try: | 
|  | 142         i = int(value) | 
| 4 | 143     except ValueError: | 
|  | 144         sys.exit("Bad %s setting, %r" % (name, value)) | 
| 2 | 145     if i < 0: | 
| 4 | 146         sys.exit("Negative %s setting, %r" % (name, value)) | 
| 2 | 147     return i | 
|  | 148 | 
| 4 | 149 | 
| 2 | 150 min_length = check_min_int(options.min_length, "minimum length") | 
|  | 151 min_cover = check_min_int(options.min_cover, "minimum cover") | 
|  | 152 min_reads = check_min_int(options.min_reads, "minimum reads") | 
|  | 153 | 
| 4 | 154 # TODO - Run MIRA in /tmp or a configurable directory? | 
|  | 155 # Currently Galaxy puts us somewhere safe like: | 
|  | 156 # /opt/galaxy-dist/database/job_working_directory/846/ | 
| 2 | 157 temp = "." | 
|  | 158 | 
|  | 159 | 
|  | 160 cmd_list = [mira_convert] | 
|  | 161 if min_length: | 
|  | 162     cmd_list.extend(["-x", str(min_length)]) | 
|  | 163 if min_cover: | 
|  | 164     cmd_list.extend(["-y", str(min_cover)]) | 
|  | 165 if min_reads: | 
|  | 166     cmd_list.extend(["-z", str(min_reads)]) | 
|  | 167 cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")]) | 
|  | 168 if out_maf: | 
|  | 169     cmd_list.append("maf") | 
|  | 170 if out_bam: | 
|  | 171     cmd_list.append("samnbb") | 
|  | 172     if not out_fasta: | 
| 4 | 173         # Need this for samtools depad | 
| 2 | 174         out_fasta = os.path.join(temp, "depadded.fasta") | 
|  | 175 if out_fasta: | 
|  | 176     cmd_list.append("fasta") | 
|  | 177 if out_ace: | 
|  | 178     cmd_list.append("ace") | 
|  | 179 if out_cstats: | 
|  | 180     cmd_list.append("cstats") | 
|  | 181 run(cmd_list) | 
|  | 182 | 
| 4 | 183 | 
| 2 | 184 def collect(old, new): | 
|  | 185     if not os.path.isfile(old): | 
| 4 | 186         sys.exit("Missing expected output file %s" % old) | 
| 2 | 187     shutil.move(old, new) | 
|  | 188 | 
| 4 | 189 | 
| 2 | 190 if out_maf: | 
|  | 191     collect(os.path.join(temp, "converted.maf"), out_maf) | 
|  | 192 if out_fasta: | 
| 4 | 193     # Can we look at the MAF file to see if there are multiple strains? | 
| 2 | 194     old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") | 
|  | 195     if os.path.isfile(old): | 
|  | 196         collect(old, out_fasta) | 
|  | 197     else: | 
| 4 | 198         # Might the output be filtered down to zero contigs? | 
| 2 | 199         old = os.path.join(temp, "converted.fasta") | 
|  | 200         if not os.path.isfile(old): | 
| 4 | 201             sys.exit("Missing expected output FASTA file") | 
| 2 | 202         elif os.path.getsize(old) == 0: | 
|  | 203             print("Warning - no contigs (harsh filters?)") | 
|  | 204             collect(old, out_fasta) | 
|  | 205         else: | 
| 4 | 206             sys.exit("Missing expected output FASTA file (only generic file present)") | 
| 2 | 207 if out_ace: | 
|  | 208     collect(os.path.join(temp, "converted.maf"), out_ace) | 
|  | 209 if out_cstats: | 
|  | 210     collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) | 
|  | 211 | 
|  | 212 if out_bam: | 
|  | 213     assert os.path.isfile(out_fasta) | 
|  | 214     old = os.path.join(temp, "converted.samnbb") | 
|  | 215     if not os.path.isfile(old): | 
|  | 216         old = os.path.join(temp, "converted.sam") | 
|  | 217     if not os.path.isfile(old): | 
| 4 | 218         sys.exit("Missing expected intermediate file %s" % old) | 
| 2 | 219     h = BytesIO() | 
|  | 220     msg = depad(out_fasta, old, out_bam, h) | 
|  | 221     if msg: | 
|  | 222         print(msg) | 
|  | 223         print(h.getvalue()) | 
|  | 224         h.close() | 
|  | 225         sys.exit(1) | 
|  | 226     h.close() | 
|  | 227     if out_fasta == os.path.join(temp, "depadded.fasta"): | 
| 4 | 228         # Not asked for by Galaxy, no longer needed | 
| 2 | 229         os.remove(out_fasta) | 
|  | 230 | 
|  | 231 if min_length or min_cover or min_reads: | 
|  | 232     print("Filtered.") | 
|  | 233 else: | 
|  | 234     print("Converted.") |