18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 ##@file
|
|
4 # usage: getCumulLengthFromTEannot.py [ options ]
|
|
5 # options:
|
|
6 # -h: this help
|
|
7 # -i: table with the annotations (format=path)
|
|
8 # -r: name of a TE reference sequence (if empty, all subjects are considered)
|
|
9 # -g: length of the genome (in bp)
|
|
10 # -C: configuration file
|
|
11 # -c: clean
|
|
12 # -v: verbosity level (default=0/1)
|
|
13
|
|
14
|
|
15 import sys
|
|
16 import os
|
|
17 import getopt
|
|
18 from commons.core.sql.DbMySql import DbMySql
|
|
19 from commons.core.sql.TablePathAdaptator import TablePathAdaptator
|
|
20
|
|
21
|
|
22 class getCumulLengthFromTEannot( object ):
|
|
23 """
|
|
24 Give the cumulative length of TE annotations (subjects mapped on queries).
|
|
25 """
|
|
26
|
|
27 def __init__( self ):
|
|
28 """
|
|
29 Constructor.
|
|
30 """
|
|
31 self._tableName = ""
|
|
32 self._TErefseq = ""
|
|
33 self._genomeLength = 0
|
|
34 self._configFileName = ""
|
|
35 self._clean = False
|
|
36 self._verbose = 0
|
|
37 self._db = None
|
|
38 self._tpA = None
|
|
39
|
|
40
|
|
41 def help( self ):
|
|
42 """
|
|
43 Display the help on stdout.
|
|
44 """
|
|
45 print
|
|
46 print "usage: getCumulLengthFromTEannot.py [ options ]"
|
|
47 print "options:"
|
|
48 print " -h: this help"
|
|
49 print " -i: table with the annotations (format=path)"
|
|
50 print " -r: name of a TE reference sequence (if empty, all subjects are considered)"
|
|
51 print " -g: length of the genome (in bp)"
|
|
52 print " -C: configuration file"
|
|
53 print " -c: clean"
|
|
54 print " -v: verbosity level (default=0/1)"
|
|
55 print
|
|
56
|
|
57
|
|
58 def setAttributesFromCmdLine( self ):
|
|
59 """
|
|
60 Set the attributes from the command-line.
|
|
61 """
|
|
62 try:
|
|
63 opts, args = getopt.getopt(sys.argv[1:],"hi:r:g:C:cv:")
|
|
64 except getopt.GetoptError, err:
|
|
65 print str(err); self.help(); sys.exit(1)
|
|
66 for o,a in opts:
|
|
67 if o == "-h":
|
|
68 self.help(); sys.exit(0)
|
|
69 elif o == "-i":
|
|
70 self.setInputTable( a )
|
|
71 elif o == "-r":
|
|
72 self.setTErefseq( a )
|
|
73 elif o == "-g":
|
|
74 self.setGenomeLength( a )
|
|
75 elif o == "-C":
|
|
76 self.setConfigFileName( a )
|
|
77 elif o == "-c":
|
|
78 self.setClean()
|
|
79 elif o == "-v":
|
|
80 self.setVerbosityLevel( a )
|
|
81
|
|
82
|
|
83 def setInputTable( self, inTable ):
|
|
84 self._tableName = inTable
|
|
85
|
|
86 def setTErefseq( self, a ):
|
|
87 self._TErefseq = a
|
|
88
|
|
89 def setGenomeLength( self, genomeLength ):
|
|
90 self._genomeLength = int(genomeLength)
|
|
91
|
|
92 def setConfigFileName( self, configFileName ):
|
|
93 self._configFileName = configFileName
|
|
94
|
|
95 def setClean( self ):
|
|
96 self._clean = True
|
|
97
|
|
98 def setVerbosityLevel( self, verbose ):
|
|
99 self._verbose = int(verbose)
|
|
100
|
|
101 def checkAttributes( self ):
|
|
102 """
|
|
103 Check the attributes are valid before running the algorithm.
|
|
104 """
|
|
105 if self._tableName == "":
|
|
106 print "ERROR: missing input table"; self.help(); sys.exit(1)
|
|
107
|
|
108
|
|
109 def setAdaptatorToTable( self ):
|
|
110 self._db = DbMySql( cfgFileName=self._configFileName )
|
|
111 self._tpA = TablePathAdaptator( self._db, self._tableName )
|
|
112
|
|
113
|
|
114 def getAllSubjectsAsMapOfQueries( self ):
|
|
115 mapFileName = "%s.map" % self._tableName
|
|
116 mapFile = open( mapFileName, "w" )
|
|
117 if self._TErefseq != "":
|
|
118 lPathnums = self._tpA.getIdListFromSubject( self._TErefseq )
|
|
119 else:
|
|
120 lPathnums = self._tpA.getIdList()
|
|
121 if self._verbose > 0:
|
|
122 print "nb of paths: %i" % ( len(lPathnums) )
|
|
123 for pathnum in lPathnums:
|
|
124 lPaths = self._tpA.getPathListFromId( pathnum )
|
|
125 for path in lPaths:
|
|
126 map = path.getSubjectAsMapOfQuery()
|
|
127 map.write( mapFile )
|
|
128 mapFile.close()
|
|
129 return mapFileName
|
|
130
|
|
131
|
|
132 def mergeRanges( self, mapFileName ):
|
|
133 mergeFileName = "%s.merge" % mapFileName
|
|
134 prg = os.environ["REPET_PATH"] + "/bin/mapOp"
|
|
135 cmd = prg
|
|
136 cmd += " -q %s" % ( mapFileName )
|
|
137 cmd += " -m"
|
|
138 cmd += " 2>&1 > /dev/null"
|
|
139 log = os.system( cmd )
|
|
140 if log != 0:
|
|
141 print "*** Error: %s returned %i" % ( prg, log )
|
|
142 sys.exit(1)
|
|
143 if self._clean:
|
|
144 os.remove( mapFileName )
|
|
145 return mergeFileName
|
|
146
|
|
147
|
|
148 def getCumulLength( self, mergeFileName ):
|
|
149 mergeFile = open( mergeFileName, "r" )
|
|
150 total = 0
|
|
151 while True:
|
|
152 line = mergeFile.readline()
|
|
153 if line == "":
|
|
154 break
|
|
155 tok = line.split("\t")
|
|
156 total += abs( int(tok[3]) - int(tok[2]) ) + 1
|
|
157 mergeFile.close()
|
|
158 if self._clean:
|
|
159 os.remove( mergeFileName )
|
|
160 return total
|
|
161
|
|
162
|
|
163 def start( self ):
|
|
164 """
|
|
165 Useful commands before running the program.
|
|
166 """
|
|
167 self.checkAttributes()
|
|
168 if self._verbose > 0:
|
|
169 print "START %s" % ( type(self).__name__ ); sys.stdout.flush()
|
|
170 self.setAdaptatorToTable()
|
|
171
|
|
172
|
|
173 def end( self, mapFileName, mergeFileName ):
|
|
174 """
|
|
175 Useful commands before ending the program.
|
|
176 """
|
|
177 self._db.close()
|
|
178 if self._verbose > 0:
|
|
179 print "END %s" % ( type(self).__name__ ); sys.stdout.flush()
|
|
180
|
|
181
|
|
182 def run( self ):
|
|
183 """
|
|
184 Run the program.
|
|
185 """
|
|
186 self.start()
|
|
187
|
|
188 mapFileName = self.getAllSubjectsAsMapOfQueries()
|
|
189 mergeFileName = self.mergeRanges( mapFileName )
|
|
190 total = self.getCumulLength( mergeFileName )
|
|
191 print "cumulative length: %i bp" % total
|
|
192 if self._genomeLength > 0:
|
|
193 print "TE content: %.2f%%" % ( 100 * total / float(self._genomeLength) )
|
|
194
|
|
195 self.end( mapFileName, mergeFileName )
|
|
196
|
|
197
|
|
198 if __name__ == "__main__":
|
|
199 i = getCumulLengthFromTEannot()
|
|
200 i.setAttributesFromCmdLine()
|
|
201 i.run()
|