diff tools/mira4/mira4_convert.py @ 0:6a88b42ce6b9 draft

Uploaded v0.0.4, previously only on the TestToolShed
author peterjc
date Fri, 21 Nov 2014 06:42:56 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_convert.py	Fri Nov 21 06:42:56 2014 -0500
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+"""A simple wrapper script to call MIRA and collect its output.
+
+This focuses on the miraconvert binary.
+"""
+import os
+import sys
+import subprocess
+import shutil
+import time
+import tempfile
+from optparse import OptionParser
+try:
+    from io import BytesIO
+except ImportError:
+    #Should we worry about Python 2.5 or older?
+    from StringIO import StringIO as BytesIO
+
+#Do we need any PYTHONPATH magic?
+from mira4_make_bam import depad
+
+WRAPPER_VER = "0.0.5" #Keep in sync with the XML file
+
+def stop_err(msg, err=1):
+    sys.stderr.write(msg+"\n")
+    sys.exit(err)
+
+def run(cmd):
+    #Avoid using shell=True when we call subprocess to ensure if the Python
+    #script is killed, so too is the child process.
+    try:
+        child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    except Exception, err:
+        stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+    #Use .communicate as can get deadlocks with .wait(),
+    stdout, stderr = child.communicate()
+    return_code = child.returncode
+    if return_code:
+        if stderr and stdout:
+            stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr))
+        else:
+            stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr))
+
+def get_version(mira_binary):
+    """Run MIRA to find its version number"""
+    # At the commend line I would use: mira -v | head -n 1
+    # however there is some pipe error when doing that here.
+    cmd = [mira_binary, "-v"]
+    try:
+        child = subprocess.Popen(cmd,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.STDOUT)
+    except Exception, err:
+        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+        sys.exit(1)
+    ver, tmp = child.communicate()
+    del child
+    return ver.split("\n", 1)[0].strip()
+
+#Parse Command Line
+usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
+
+$ python mira4_convert.py ...
+
+This will run the MIRA miraconvert binary and collect its output files as directed.
+""" % WRAPPER_VER
+parser = OptionParser(usage=usage)
+parser.add_option("--input", dest="input",
+                  default=None, metavar="FILE",
+                  help="MIRA input filename")
+parser.add_option("-x", "--min_length", dest="min_length",
+                  default="0",
+                  help="Minimum contig length")
+parser.add_option("-y", "--min_cover", dest="min_cover",
+                  default="0",
+                  help="Minimum average contig coverage")
+parser.add_option("-z", "--min_reads", dest="min_reads",
+                  default="0",
+                  help="Minimum reads per contig")
+parser.add_option("--maf", dest="maf",
+                  default="", metavar="FILE",
+                  help="MIRA MAF output filename")
+parser.add_option("--ace", dest="ace",
+                  default="", metavar="FILE",
+                  help="ACE output filename")
+parser.add_option("--bam", dest="bam",
+                  default="", metavar="FILE",
+                  help="Unpadded BAM output filename")
+parser.add_option("--fasta", dest="fasta",
+                  default="", metavar="FILE",
+                  help="Unpadded FASTA output filename")
+parser.add_option("--cstats", dest="cstats",
+                  default="", metavar="FILE",
+                  help="Contig statistics filename")
+parser.add_option("-v", "--version", dest="version",
+                  default=False, action="store_true",
+                  help="Show version and quit")
+options, args = parser.parse_args()
+if args:
+    stop_err("Expected options (e.g. --input example.maf), not arguments")
+
+input_maf = options.input
+out_maf = options.maf
+out_bam = options.bam
+out_fasta = options.fasta
+out_ace = options.ace
+out_cstats = options.cstats
+
+try:
+    mira_path = os.environ["MIRA4"]
+except KeyError:
+    stop_err("Environment variable $MIRA4 not set")
+mira_convert = os.path.join(mira_path, "miraconvert")
+if not os.path.isfile(mira_convert):
+    stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
+             % (mira_convert, ", ".join(os.listdir(mira_path))))
+
+mira_convert_ver = get_version(mira_convert)
+if not mira_convert_ver.strip().startswith("4.0"):
+    stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
+if options.version:
+    print "%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)
+    sys.exit(0)
+
+if not input_maf:
+    stop_err("Input MIRA file is required")
+elif not os.path.isfile(input_maf):
+    stop_err("Missing input MIRA file: %r" % input_maf)
+
+if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
+    stop_err("No output requested")
+
+
+def check_min_int(value, name):
+    try:
+        i = int(value)
+    except:
+        stop_err("Bad %s setting, %r" % (name, value))
+    if i < 0:
+        stop_err("Negative %s setting, %r" % (name, value))
+    return i
+
+min_length = check_min_int(options.min_length, "minimum length")
+min_cover = check_min_int(options.min_cover, "minimum cover")
+min_reads = check_min_int(options.min_reads, "minimum reads")
+
+#TODO - Run MIRA in /tmp or a configurable directory?
+#Currently Galaxy puts us somewhere safe like:
+#/opt/galaxy-dist/database/job_working_directory/846/
+temp = "."
+
+
+cmd_list = [mira_convert]
+if min_length:
+    cmd_list.extend(["-x", str(min_length)])
+if min_cover:
+    cmd_list.extend(["-y", str(min_cover)])
+if min_reads:
+    cmd_list.extend(["-z", str(min_reads)])
+cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
+if out_maf:
+    cmd_list.append("maf")
+if out_bam:
+    cmd_list.append("samnbb")
+    if not out_fasta:
+        #Need this for samtools depad
+        out_fasta = os.path.join(temp, "depadded.fasta")
+if out_fasta:
+    cmd_list.append("fasta")
+if out_ace:
+    cmd_list.append("ace")
+if out_cstats:
+    cmd_list.append("cstats")
+run(cmd_list)
+
+def collect(old, new):
+    if not os.path.isfile(old):
+        stop_err("Missing expected output file %s" % old)
+    shutil.move(old, new)
+
+if out_maf:
+    collect(os.path.join(temp, "converted.maf"), out_maf)
+if out_fasta:
+    #Can we look at the MAF file to see if there are multiple strains?
+    old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
+    if os.path.isfile(old):
+        collect(old, out_fasta)
+    else:
+        #Might the output be filtered down to zero contigs?
+        old = os.path.join(temp, "converted.fasta")
+        if not os.path.isfile(old):
+            stop_err("Missing expected output FASTA file")
+        elif os.path.getsize(old) == 0:
+            print("Warning - no contigs (harsh filters?)")
+            collect(old, out_fasta)
+        else:
+            stop_err("Missing expected output FASTA file (only generic file present)")
+if out_ace:
+    collect(os.path.join(temp, "converted.maf"), out_ace)
+if out_cstats:
+    collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
+
+if out_bam:
+    assert os.path.isfile(out_fasta)
+    old = os.path.join(temp, "converted.samnbb")
+    if not os.path.isfile(old):
+        old = os.path.join(temp, "converted.sam")
+    if not os.path.isfile(old):
+        stop_err("Missing expected intermediate file %s" % old)
+    h = BytesIO()
+    msg = depad(out_fasta, old, out_bam, h)
+    if msg:
+        print(msg)
+        print(h.getvalue())
+        h.close()
+        sys.exit(1)
+    h.close()
+    if out_fasta == os.path.join(temp, "depadded.fasta"):
+        #Not asked for by Galaxy, no longer needed
+        os.remove(out_fasta)
+
+if min_length or min_cover or min_reads:
+    print("Filtered.")
+else:
+    print("Converted.")