comparison commons/launcher/launchMreps.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 #!/usr/bin/env python
2
3 from commons.core.seq.BioseqDB import BioseqDB
4 from commons.core.parsing.MrepsToSet import MrepsToSet
5 import subprocess
6 import os
7 import sys
8 import getopt
9
10 def help():
11 """
12 Give the list of the command-line options.
13 """
14 print
15 print "usage: ",sys.argv[0],"[ options ]"
16 print "options:"
17 print " -h: this help"
18 print " -i: name of the input file (format='fasta')"
19 print " -o: name of the output file (default=inFileName+'.Mreps.set')"
20 print " -f: error filter (default=1.0)"
21 print " -c: clean"
22 print " -v: verbosity level (default=0/1)"
23 print
24
25 def main():
26 """
27 Launch Mreps.
28 """
29 inFileName = ""
30 outFileName = ""
31 errorFilter = 1.0
32 clean = False
33 verbose = 0
34
35 try:
36 opts=getopt.getopt(sys.argv[1:],"hi:o:f:cv:")[0]
37 except getopt.GetoptError, err:
38 print str(err)
39 help()
40 sys.exit(1)
41 for o,a in opts:
42 if o == "-h":
43 help()
44 sys.exit(0)
45 elif o == "-i":
46 inFileName = a
47 elif o == "-o":
48 outFileName = a
49 elif o == "-f":
50 errorFilter = float(a)
51 elif o == "-c":
52 clean = True
53 elif o == "-v":
54 verbose = int(a)
55
56 if inFileName == "":
57 print "ERROR: missing compulsory options"
58 help()
59 sys.exit(1)
60
61 if verbose > 0:
62 print "beginning of %s" % (sys.argv[0].split("/")[-1])
63 sys.stdout.flush()
64
65 # Mreps 2.5 doesn't fully support IUPAC nomenclature
66 if verbose > 0:
67 print "* check IUPAC symbols"; sys.stdout.flush()
68 tmpInFileName = "%s.tmp%i" % ( inFileName, os.getpid() )
69 if os.path.exists( tmpInFileName ):
70 os.system( "rm -f %s" % ( tmpInFileName ) )
71 bsDB = BioseqDB( inFileName )
72 for bs in bsDB.db:
73 if verbose > 0:
74 print bs.header; sys.stdout.flush()
75 bs.partialIUPAC()
76 onlyN = True
77 for nt in ["A","T","G","C"]:
78 if nt in bs.sequence:
79 onlyN = False
80 if onlyN == True:
81 if verbose > 0:
82 print "** Warning: only Ns"; sys.stdout.flush()
83 else:
84 bsDB.save( tmpInFileName )
85
86 if not os.path.exists( tmpInFileName ):
87 sys.exit(0)
88
89 if verbose > 0:
90 print "* remove N stretches"; sys.stdout.flush()
91 prg = os.environ["REPET_PATH"] + "/bin/cutterDB"
92 cmd = prg
93 cmd += " -l 200000"
94 cmd += " -o 0"
95 cmd += " -w 11"
96 cmd += " %s" % ( tmpInFileName )
97 if verbose > 0:
98 print cmd; sys.stdout.flush()
99 log = os.system( cmd )
100 if log != 0:
101 print "ERROR: %s returned %i" % ( prg, log )
102 sys.exit(1)
103
104 # launch Mreps on the input file
105 MrepsOutFileName = "%s.Mreps.xml" % ( tmpInFileName )
106 prg = "mreps"
107 cmd = prg
108 cmd += " -res 3"
109 cmd += " -exp 3.0"
110 cmd += " -maxsize 50"
111 cmd += " -xmloutput %s" % MrepsOutFileName
112 cmd += " -fasta %s_cut" % tmpInFileName
113 process = subprocess.Popen(cmd, shell = True)
114 process.communicate()
115 if process.returncode != 0:
116 raise Exception("ERROR when launching '%s'" % cmd)
117
118 if outFileName == "":
119 outFileName = inFileName + ".Mreps.set"
120
121 # parse Mreps results in xml format
122 iMrepsToSet = MrepsToSet(inFileName, MrepsOutFileName, outFileName, errorFilter)
123 iMrepsToSet.run()
124 if clean:
125 iMrepsToSet.clean()
126
127 # remove temporary input filename
128 os.remove(tmpInFileName)
129 os.remove("%s_cut" % tmpInFileName)
130 os.remove("%s.Nstretch.map" % tmpInFileName)
131
132 if verbose > 0:
133 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
134 sys.stdout.flush()
135
136 return 0
137
138
139 if __name__ == '__main__':
140 main()