diff commons/launcher/launchMreps.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/launchMreps.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+
+from commons.core.seq.BioseqDB import BioseqDB
+from commons.core.parsing.MrepsToSet import MrepsToSet
+import subprocess
+import os
+import sys
+import getopt
+
+def help():
+    """
+    Give the list of the command-line options.
+    """
+    print
+    print "usage: ",sys.argv[0],"[ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (format='fasta')"
+    print "     -o: name of the output file (default=inFileName+'.Mreps.set')"
+    print "     -f: error filter (default=1.0)"
+    print "     -c: clean"
+    print "     -v: verbosity level (default=0/1)"
+    print
+
+def main():
+    """
+    Launch Mreps.
+    """
+    inFileName = ""
+    outFileName = ""
+    errorFilter = 1.0
+    clean = False
+    verbose = 0
+
+    try:
+        opts=getopt.getopt(sys.argv[1:],"hi:o:f:cv:")[0]
+    except getopt.GetoptError, err:
+        print str(err)
+        help()
+        sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-o":
+            outFileName = a
+        elif o == "-f":
+            errorFilter = float(a)
+        elif o == "-c":
+            clean = True
+        elif o == "-v":
+            verbose = int(a)
+
+    if inFileName == "":
+        print "ERROR: missing compulsory options"
+        help()
+        sys.exit(1)
+
+    if verbose > 0:
+        print "beginning of %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    # Mreps 2.5 doesn't fully support IUPAC nomenclature
+    if verbose > 0:
+        print "* check IUPAC symbols"; sys.stdout.flush()
+    tmpInFileName = "%s.tmp%i" % ( inFileName, os.getpid() )
+    if os.path.exists( tmpInFileName ):
+        os.system( "rm -f %s" % ( tmpInFileName ) )
+    bsDB = BioseqDB( inFileName )
+    for bs in bsDB.db:
+        if verbose > 0:
+            print bs.header; sys.stdout.flush()
+        bs.partialIUPAC()
+        onlyN = True
+        for nt in ["A","T","G","C"]:
+            if nt in bs.sequence:
+                onlyN = False
+        if onlyN == True:
+            if verbose > 0:
+                print "** Warning: only Ns"; sys.stdout.flush()
+        else:
+            bsDB.save( tmpInFileName )
+
+    if not os.path.exists( tmpInFileName ):
+        sys.exit(0)
+
+    if verbose > 0:
+        print "* remove N stretches"; sys.stdout.flush()
+    prg = os.environ["REPET_PATH"] + "/bin/cutterDB"
+    cmd = prg
+    cmd += " -l 200000"
+    cmd += " -o 0"
+    cmd += " -w 11"
+    cmd += " %s" % ( tmpInFileName )
+    if verbose > 0:
+        print cmd; sys.stdout.flush()
+    log = os.system( cmd )
+    if log != 0:
+        print "ERROR: %s returned %i" % ( prg, log )
+        sys.exit(1)
+
+    # launch Mreps on the input file
+    MrepsOutFileName = "%s.Mreps.xml" % ( tmpInFileName )
+    prg = "mreps"
+    cmd = prg
+    cmd += " -res 3"
+    cmd += " -exp 3.0"
+    cmd += " -maxsize 50"
+    cmd += " -xmloutput %s" % MrepsOutFileName
+    cmd += " -fasta %s_cut" % tmpInFileName
+    process = subprocess.Popen(cmd, shell = True)
+    process.communicate()
+    if process.returncode != 0:
+        raise Exception("ERROR when launching '%s'" % cmd)
+
+    if outFileName == "":
+        outFileName = inFileName + ".Mreps.set"
+
+    # parse Mreps results in xml format
+    iMrepsToSet = MrepsToSet(inFileName, MrepsOutFileName, outFileName, errorFilter)
+    iMrepsToSet.run()
+    if clean:
+        iMrepsToSet.clean()
+
+    # remove temporary input filename
+    os.remove(tmpInFileName)
+    os.remove("%s_cut" % tmpInFileName)
+    os.remove("%s.Nstretch.map" % tmpInFileName)
+
+    if verbose > 0:
+        print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+
+    return 0
+
+
+if __name__ == '__main__':
+    main()