18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 from commons.core.seq.BioseqDB import BioseqDB
|
|
4 from commons.core.parsing.MrepsToSet import MrepsToSet
|
|
5 import subprocess
|
|
6 import os
|
|
7 import sys
|
|
8 import getopt
|
|
9
|
|
10 def help():
|
|
11 """
|
|
12 Give the list of the command-line options.
|
|
13 """
|
|
14 print
|
|
15 print "usage: ",sys.argv[0],"[ options ]"
|
|
16 print "options:"
|
|
17 print " -h: this help"
|
|
18 print " -i: name of the input file (format='fasta')"
|
|
19 print " -o: name of the output file (default=inFileName+'.Mreps.set')"
|
|
20 print " -f: error filter (default=1.0)"
|
|
21 print " -c: clean"
|
|
22 print " -v: verbosity level (default=0/1)"
|
|
23 print
|
|
24
|
|
25 def main():
|
|
26 """
|
|
27 Launch Mreps.
|
|
28 """
|
|
29 inFileName = ""
|
|
30 outFileName = ""
|
|
31 errorFilter = 1.0
|
|
32 clean = False
|
|
33 verbose = 0
|
|
34
|
|
35 try:
|
|
36 opts=getopt.getopt(sys.argv[1:],"hi:o:f:cv:")[0]
|
|
37 except getopt.GetoptError, err:
|
|
38 print str(err)
|
|
39 help()
|
|
40 sys.exit(1)
|
|
41 for o,a in opts:
|
|
42 if o == "-h":
|
|
43 help()
|
|
44 sys.exit(0)
|
|
45 elif o == "-i":
|
|
46 inFileName = a
|
|
47 elif o == "-o":
|
|
48 outFileName = a
|
|
49 elif o == "-f":
|
|
50 errorFilter = float(a)
|
|
51 elif o == "-c":
|
|
52 clean = True
|
|
53 elif o == "-v":
|
|
54 verbose = int(a)
|
|
55
|
|
56 if inFileName == "":
|
|
57 print "ERROR: missing compulsory options"
|
|
58 help()
|
|
59 sys.exit(1)
|
|
60
|
|
61 if verbose > 0:
|
|
62 print "beginning of %s" % (sys.argv[0].split("/")[-1])
|
|
63 sys.stdout.flush()
|
|
64
|
|
65 # Mreps 2.5 doesn't fully support IUPAC nomenclature
|
|
66 if verbose > 0:
|
|
67 print "* check IUPAC symbols"; sys.stdout.flush()
|
|
68 tmpInFileName = "%s.tmp%i" % ( inFileName, os.getpid() )
|
|
69 if os.path.exists( tmpInFileName ):
|
|
70 os.system( "rm -f %s" % ( tmpInFileName ) )
|
|
71 bsDB = BioseqDB( inFileName )
|
|
72 for bs in bsDB.db:
|
|
73 if verbose > 0:
|
|
74 print bs.header; sys.stdout.flush()
|
|
75 bs.partialIUPAC()
|
|
76 onlyN = True
|
|
77 for nt in ["A","T","G","C"]:
|
|
78 if nt in bs.sequence:
|
|
79 onlyN = False
|
|
80 if onlyN == True:
|
|
81 if verbose > 0:
|
|
82 print "** Warning: only Ns"; sys.stdout.flush()
|
|
83 else:
|
|
84 bsDB.save( tmpInFileName )
|
|
85
|
|
86 if not os.path.exists( tmpInFileName ):
|
|
87 sys.exit(0)
|
|
88
|
|
89 if verbose > 0:
|
|
90 print "* remove N stretches"; sys.stdout.flush()
|
|
91 prg = os.environ["REPET_PATH"] + "/bin/cutterDB"
|
|
92 cmd = prg
|
|
93 cmd += " -l 200000"
|
|
94 cmd += " -o 0"
|
|
95 cmd += " -w 11"
|
|
96 cmd += " %s" % ( tmpInFileName )
|
|
97 if verbose > 0:
|
|
98 print cmd; sys.stdout.flush()
|
|
99 log = os.system( cmd )
|
|
100 if log != 0:
|
|
101 print "ERROR: %s returned %i" % ( prg, log )
|
|
102 sys.exit(1)
|
|
103
|
|
104 # launch Mreps on the input file
|
|
105 MrepsOutFileName = "%s.Mreps.xml" % ( tmpInFileName )
|
|
106 prg = "mreps"
|
|
107 cmd = prg
|
|
108 cmd += " -res 3"
|
|
109 cmd += " -exp 3.0"
|
|
110 cmd += " -maxsize 50"
|
|
111 cmd += " -xmloutput %s" % MrepsOutFileName
|
|
112 cmd += " -fasta %s_cut" % tmpInFileName
|
|
113 process = subprocess.Popen(cmd, shell = True)
|
|
114 process.communicate()
|
|
115 if process.returncode != 0:
|
|
116 raise Exception("ERROR when launching '%s'" % cmd)
|
|
117
|
|
118 if outFileName == "":
|
|
119 outFileName = inFileName + ".Mreps.set"
|
|
120
|
|
121 # parse Mreps results in xml format
|
|
122 iMrepsToSet = MrepsToSet(inFileName, MrepsOutFileName, outFileName, errorFilter)
|
|
123 iMrepsToSet.run()
|
|
124 if clean:
|
|
125 iMrepsToSet.clean()
|
|
126
|
|
127 # remove temporary input filename
|
|
128 os.remove(tmpInFileName)
|
|
129 os.remove("%s_cut" % tmpInFileName)
|
|
130 os.remove("%s.Nstretch.map" % tmpInFileName)
|
|
131
|
|
132 if verbose > 0:
|
|
133 print "%s finished successfully\n" % (sys.argv[0].split("/")[-1])
|
|
134 sys.stdout.flush()
|
|
135
|
|
136 return 0
|
|
137
|
|
138
|
|
139 if __name__ == '__main__':
|
|
140 main()
|