annotate mdust_wrapper.py @ 0:cc6db1ee2d48 draft

Uploaded
author urgi-team
date Mon, 05 Oct 2015 11:05:21 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
1 #!/usr/bin/env python
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
2
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
3 import subprocess
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
4 import tempfile
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
5 import sys
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
6 import os
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
7 import re
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
8 from optparse import OptionParser
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
9
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
10
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
11 class MdustWrapper(object):
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
12
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
13 def __init__(self):
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
14 self._options = None
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
15
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
16 def stop_err(self, msg):
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
17 sys.stderr.write("%s\n" % msg)
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
18 sys.exit()
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
19
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
20 def setAttributesFromCmdLine(self):
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
21 description = "mdust_wrapper"
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
22 description += "\nWrapper for mdust\n"
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
23 description += "example: mdust_wrapper.py -i seq.fasta -v 27\n"
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
24 parser = OptionParser(description = description, version = "0.1")
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
25 parser.add_option("-i", "--input", dest = "FastaFile", action = "store", type = "string", help = "Input Fasta File name [compulsory] [format: Fasta]", default = "")
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
26 parser.add_option("-o", "--output", dest = "outFile", action = "store", type = "string", help = "output File name [compulsory] [format: fasta,tab or bed]", default = "")
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
27 parser.add_option("-v", "--cutoff", dest = "cutoff", action = "store", type = "int", help = "cutoff", default = 28)
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
28 parser.add_option("-w", "--wsize", dest = "wsize", action = "store", type = "int", help = "window size", default = 3)
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
29 parser.add_option("-m", "--maskingletter", dest = "maskingletter", action = "store", type = "string", help = "masking letter", default = "N")
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
30 parser.add_option("-f", "--format", dest = "format", action = "store", type = "string", help = "format", default = "default")
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
31 options = parser.parse_args()[0]
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
32 self._setAttributesFromOptions(options)
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
33
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
34
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
35 def _setAttributesFromOptions(self, options):
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
36 self._options = options
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
37
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
38 if self._options.FastaFile == "":
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
39 raise Exception("Missing input file, please provide fasta file with -i file !")
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
40 if self._options.outFile == "":
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
41 raise Exception("Missing output file, please provide output file with -o file !")
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
42
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
43
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
44 def run(self):
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
45
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
46 prg = "mdust"
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
47 args = ""
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
48 args += " %s" % self._options.FastaFile
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
49 args += " -v %d" % self._options.cutoff
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
50 args += " -w %d" % self._options.wsize
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
51 args += " -m %s" % self._options.maskingletter
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
52 if self._options.format == "tab" or self._options.format == "bed":
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
53 args += " -c "
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
54 cmd = "%s %s" %(prg, args)
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
55
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
56 try:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
57 tmp_err = tempfile.NamedTemporaryFile().name
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
58 tmp_out = "outfile"
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
59 tmp_stderr = open( tmp_err, 'wb' )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
60 tmp_stdout = open( tmp_out, 'wb' )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
61 proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
62 returncode = proc.wait()
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
63 tmp_stderr.close()
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
64 # get stderr, allowing for case where it's very large
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
65 tmp_stderr = open( tmp_err, 'rb' )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
66 tmp_stdout = open( tmp_out, 'rb' )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
67
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
68 stderr = ''
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
69 stdout = ''
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
70 buffsize = 1048576
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
71 try:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
72 while True:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
73 stdout += tmp_stdout.read( buffsize )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
74 if not stdout or len( stdout ) % buffsize != 0:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
75 break
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
76 except OverflowError:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
77 pass
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
78 tmp_stdout.close()
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
79
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
80 try:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
81 while True:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
82 stderr += tmp_stderr.read( buffsize )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
83 if not stderr or len( stderr ) % buffsize != 0:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
84 break
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
85 except OverflowError:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
86 pass
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
87 tmp_stderr.close()
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
88 if stderr:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
89 raise Exception, stderr
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
90 except Exception, e:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
91 self.stop_err( 'Error with mdust :\n' + str( e ) )
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
92
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
93 if self._options.format == 'bed':
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
94 with open(tmp_out,"r") as fin:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
95 with open(self._options.outFile, "w") as fout:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
96 lineNumber = 0
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
97 for line in fin:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
98 lineNumber += 1
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
99 m = re.search(r"^(\S+)\t(\d+)\t(\d+)\t(\d+)$", line)
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
100 if m is not None:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
101 fout.write("%s\t%d\t%d\n" % (m.group(1), int(m.group(3))-1, int(m.group(4))))
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
102 if m is None:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
103 raise Exception("\nLine %d '%s' does not has a mdust format." % (lineNumber, line))
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
104 else:
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
105 os.rename(tmp_out,self._options.outFile)
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
106
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
107
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
108
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
109
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
110 if __name__ == "__main__":
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
111 iWrapper = MdustWrapper()
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
112 iWrapper.setAttributesFromCmdLine()
cc6db1ee2d48 Uploaded
urgi-team
parents:
diff changeset
113 iWrapper.run()