Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/launchMreps.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/launchMreps.py Mon Apr 29 03:20:15 2013 -0400 @@ -0,0 +1,140 @@ +#!/usr/bin/env python + +from commons.core.seq.BioseqDB import BioseqDB +from commons.core.parsing.MrepsToSet import MrepsToSet +import subprocess +import os +import sys +import getopt + +def help(): + """ + Give the list of the command-line options. + """ + print + print "usage: ",sys.argv[0],"[ options ]" + print "options:" + print " -h: this help" + print " -i: name of the input file (format='fasta')" + print " -o: name of the output file (default=inFileName+'.Mreps.set')" + print " -f: error filter (default=1.0)" + print " -c: clean" + print " -v: verbosity level (default=0/1)" + print + +def main(): + """ + Launch Mreps. + """ + inFileName = "" + outFileName = "" + errorFilter = 1.0 + clean = False + verbose = 0 + + try: + opts=getopt.getopt(sys.argv[1:],"hi:o:f:cv:")[0] + except getopt.GetoptError, err: + print str(err) + help() + sys.exit(1) + for o,a in opts: + if o == "-h": + help() + sys.exit(0) + elif o == "-i": + inFileName = a + elif o == "-o": + outFileName = a + elif o == "-f": + errorFilter = float(a) + elif o == "-c": + clean = True + elif o == "-v": + verbose = int(a) + + if inFileName == "": + print "ERROR: missing compulsory options" + help() + sys.exit(1) + + if verbose > 0: + print "beginning of %s" % (sys.argv[0].split("/")[-1]) + sys.stdout.flush() + + # Mreps 2.5 doesn't fully support IUPAC nomenclature + if verbose > 0: + print "* check IUPAC symbols"; sys.stdout.flush() + tmpInFileName = "%s.tmp%i" % ( inFileName, os.getpid() ) + if os.path.exists( tmpInFileName ): + os.system( "rm -f %s" % ( tmpInFileName ) ) + bsDB = BioseqDB( inFileName ) + for bs in bsDB.db: + if verbose > 0: + print bs.header; sys.stdout.flush() + bs.partialIUPAC() + onlyN = True + for nt in ["A","T","G","C"]: + if nt in bs.sequence: + onlyN = False + if onlyN == True: + if verbose > 0: + print "** Warning: only Ns"; sys.stdout.flush() + else: + bsDB.save( tmpInFileName ) + + if not os.path.exists( tmpInFileName ): + sys.exit(0) + + if verbose > 0: + print "* remove N stretches"; sys.stdout.flush() + prg = os.environ["REPET_PATH"] + "/bin/cutterDB" + cmd = prg + cmd += " -l 200000" + cmd += " -o 0" + cmd += " -w 11" + cmd += " %s" % ( tmpInFileName ) + if verbose > 0: + print cmd; sys.stdout.flush() + log = os.system( cmd ) + if log != 0: + print "ERROR: %s returned %i" % ( prg, log ) + sys.exit(1) + + # launch Mreps on the input file + MrepsOutFileName = "%s.Mreps.xml" % ( tmpInFileName ) + prg = "mreps" + cmd = prg + cmd += " -res 3" + cmd += " -exp 3.0" + cmd += " -maxsize 50" + cmd += " -xmloutput %s" % MrepsOutFileName + cmd += " -fasta %s_cut" % tmpInFileName + process = subprocess.Popen(cmd, shell = True) + process.communicate() + if process.returncode != 0: + raise Exception("ERROR when launching '%s'" % cmd) + + if outFileName == "": + outFileName = inFileName + ".Mreps.set" + + # parse Mreps results in xml format + iMrepsToSet = MrepsToSet(inFileName, MrepsOutFileName, outFileName, errorFilter) + iMrepsToSet.run() + if clean: + iMrepsToSet.clean() + + # remove temporary input filename + os.remove(tmpInFileName) + os.remove("%s_cut" % tmpInFileName) + os.remove("%s.Nstretch.map" % tmpInFileName) + + if verbose > 0: + print "%s finished successfully\n" % (sys.argv[0].split("/")[-1]) + sys.stdout.flush() + + return 0 + + +if __name__ == '__main__': + main()