6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31 import time
|
|
32
|
|
33 from commons.core.coord.Range import Range
|
|
34 from commons.core.coord.Map import Map
|
|
35
|
|
36
|
|
37 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity)
|
|
38 #
|
|
39 class Align( object ):
|
|
40
|
|
41 ## Constructor
|
|
42 #
|
|
43 # @param range_q: a Range instance for the query
|
|
44 # @param range_s: a Range instance for the subject
|
|
45 # @param e_value: E-value of the match
|
|
46 # @param identity: identity percentage of the match
|
|
47 # @param score: score of the match
|
|
48 #
|
|
49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0):
|
|
50 self.range_query = range_q
|
|
51 self.range_subject = range_s
|
|
52 self.e_value = float(e_value)
|
|
53 self.score = float(score)
|
|
54 self.identity = float(identity)
|
|
55
|
|
56 ## Return True if the instance is empty, False otherwise
|
|
57 #
|
|
58 def isEmpty(self):
|
|
59 return self.range_query.isEmpty() or self.range_subject.isEmpty()
|
|
60
|
|
61 ## Equal operator
|
|
62 #
|
|
63 def __eq__(self, o):
|
|
64 if self.range_query==o.range_query and self.range_subject==o.range_subject and \
|
|
65 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity:
|
|
66 return True
|
|
67 return False
|
|
68
|
|
69 ## Unequal operator
|
|
70 #
|
|
71 # @param o a Range instance
|
|
72 #
|
|
73 def __ne__(self, o):
|
|
74 return not self.__eq__(o)
|
|
75
|
|
76 ## Convert the object into a string
|
|
77 #
|
|
78 # @note used in 'print myObject'
|
|
79 #
|
|
80 def __str__( self ):
|
|
81 return self.toString()
|
|
82
|
|
83 ## Read attributes from an Align file
|
|
84 #
|
|
85 # @param fileHandler: file handler of the file being read
|
|
86 # @return: 1 on success, 0 at the end of the file
|
|
87 #
|
|
88 def read(self, fileHandler):
|
|
89 self.reset()
|
|
90 line = fileHandler.readline()
|
|
91 if line == "":
|
|
92 return 0
|
|
93 tokens = line.split("\t")
|
|
94 if len(tokens) < len(self.__dict__.keys()):
|
|
95 return 0
|
|
96 self.setFromTuple(tokens)
|
|
97 return 1
|
|
98
|
|
99 ## Set attributes from tuple
|
|
100 #
|
|
101 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity)
|
|
102 # @note data are loaded such that the query is always on the direct strand
|
|
103 #
|
|
104 def setFromTuple( self, tuple ):
|
|
105 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ???
|
|
106 self.range_query = Range()
|
|
107 self.range_subject = Range()
|
|
108 if int(tuple[1]) < int(tuple[2]):
|
|
109 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) )
|
|
110 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) )
|
|
111 else:
|
|
112 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) )
|
|
113 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) )
|
|
114 self.e_value = float(tuple[6])
|
|
115 self.score = float(tuple[7])
|
|
116 self.identity = float(tuple[8])
|
|
117
|
|
118 ## Reset
|
|
119 #
|
|
120 def reset( self ):
|
|
121 self.range_query.reset()
|
|
122 self.range_subject.reset()
|
|
123 self.e_value = 0
|
|
124 self.score = 0
|
|
125 self.identity = 0
|
|
126
|
|
127 ## Return the attributes as a formatted string
|
|
128 #
|
|
129 def toString(self):
|
|
130 string = "%s" % ( self.range_query.toString() )
|
|
131 string += "\t%s" % ( self.range_subject.toString() )
|
|
132 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity )
|
|
133 return string
|
|
134
|
|
135
|
|
136 ## Return the attributes as a GFF-formatted string
|
|
137 #
|
|
138 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ):
|
|
139 if not self.isSubjectOnDirectStrand():
|
|
140 self.reverse()
|
|
141 string = "%s" % ( self.getQueryName() )
|
|
142 string += "\t%s" % ( source )
|
|
143 string += "\t%s" % ( type )
|
|
144 string += "\t%s" % ( self.getQueryMin() )
|
|
145 string += "\t%s" % ( self.getQueryMax() )
|
|
146 string += "\t%g" % ( self.e_value )
|
|
147 string += "\t%s" % ( self.getQueryStrand() )
|
|
148 string += "\t%s" % ( phase )
|
|
149 attributes = ""
|
|
150 if ID != "":
|
|
151 attributes += "ID=%s" % ( ID )
|
|
152 else:
|
|
153 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") )
|
|
154 if Parent != "":
|
|
155 attributes += ";Parent=%s" % ( Parent )
|
|
156 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() )
|
|
157 string += "\t%s" % ( attributes )
|
|
158 return string
|
|
159
|
|
160
|
|
161 ## Reverse query and subject
|
|
162 #
|
|
163 def reverse(self):
|
|
164 self.range_query.reverse()
|
|
165 self.range_subject.reverse()
|
|
166
|
|
167 ## Show the attributes
|
|
168 #
|
|
169 def show(self):
|
|
170 print self.toString()
|
|
171
|
|
172 ## Write attributes into an Align file
|
|
173 #
|
|
174 # @param fileHandler: file handler of the file being filled
|
|
175 #
|
|
176 def write(self, fileHandler):
|
|
177 fileHandler.write("%s\n" % (self.toString()))
|
|
178
|
|
179 ## Save attributes into an Align file
|
|
180 #
|
|
181 # @param file: name of the file being filled
|
|
182 #
|
|
183 def save(self, file):
|
|
184 fileHandler = open( file, "a" )
|
|
185 self.write( fileHandler )
|
|
186 fileHandler.close()
|
|
187
|
|
188 ## Return the score
|
|
189 #
|
|
190 def getScore(self):
|
|
191 return self.score
|
|
192
|
|
193 ## Return the identity
|
|
194 #
|
|
195 def getIdentity(self):
|
|
196 return self.identity
|
|
197
|
|
198 def getEvalue(self):
|
|
199 return self.e_value
|
|
200
|
|
201 ## Return the length on the query
|
|
202 #
|
|
203 def getLengthOnQuery(self):
|
|
204 return self.range_query.getLength()
|
|
205
|
|
206 ## Return the name of the query
|
|
207 #
|
|
208 def getQueryName( self ):
|
|
209 return self.range_query.seqname
|
|
210
|
|
211 ## Return the start of the query
|
|
212 #
|
|
213 def getQueryStart( self ):
|
|
214 return self.range_query.start
|
|
215
|
|
216 ## Return the end of the query
|
|
217 #
|
|
218 def getQueryEnd( self ):
|
|
219 return self.range_query.end
|
|
220
|
|
221 ## Return the min of the query
|
|
222 #
|
|
223 def getQueryMin( self ):
|
|
224 return self.range_query.getMin()
|
|
225
|
|
226 ## Return the max of the query
|
|
227 #
|
|
228 def getQueryMax( self ):
|
|
229 return self.range_query.getMax()
|
|
230
|
|
231 ## Return the strand of the query
|
|
232 #
|
|
233 def getQueryStrand( self ):
|
|
234 return self.range_query.getStrand()
|
|
235
|
|
236 ## Return the length on the subject
|
|
237 #
|
|
238 def getLengthOnSubject(self):
|
|
239 return self.range_subject.getLength()
|
|
240
|
|
241 ## Return the name of the subject
|
|
242 #
|
|
243 def getSubjectName( self ):
|
|
244 return self.range_subject.seqname
|
|
245
|
|
246 ## Return the start of the subject
|
|
247 #
|
|
248 def getSubjectStart( self ):
|
|
249 return self.range_subject.start
|
|
250
|
|
251 ## Return the end of the subject
|
|
252 #
|
|
253 def getSubjectEnd( self ):
|
|
254 return self.range_subject.end
|
|
255
|
|
256 ## Return the min of the subject
|
|
257 #
|
|
258 def getSubjectMin( self ):
|
|
259 return self.range_subject.getMin()
|
|
260
|
|
261 ## Return the max of the subject
|
|
262 #
|
|
263 def getSubjectMax( self ):
|
|
264 return self.range_subject.getMax()
|
|
265
|
|
266 ## Return the strand of the subject
|
|
267 #
|
|
268 def getSubjectStrand( self ):
|
|
269 return self.range_subject.getStrand()
|
|
270
|
|
271 ## Return the query as a Range instance
|
|
272 #
|
|
273 def getQueryAsRange( self ):
|
|
274 return self.range_query
|
|
275
|
|
276 ## Return the subject as a Range instance
|
|
277 #
|
|
278 def getSubjectAsRange( self ):
|
|
279 return self.range_subject
|
|
280
|
|
281 ## Set the name of the query
|
|
282 #
|
|
283 def setQueryName( self, name ):
|
|
284 self.range_query.seqname = name
|
|
285
|
|
286 ## Set the start of the query
|
|
287 #
|
|
288 def setQueryStart( self, start ):
|
|
289 self.range_query.start = start
|
|
290
|
|
291 ## Set the end of the query
|
|
292 #
|
|
293 def setQueryEnd( self, end ):
|
|
294 self.range_query.end = end
|
|
295
|
|
296 ## Set the name of the subject
|
|
297 #
|
|
298 def setSubjectName( self, name ):
|
|
299 self.range_subject.seqname = name
|
|
300
|
|
301 ## Set the start of the subject
|
|
302 #
|
|
303 def setSubjectStart( self, start ):
|
|
304 self.range_subject.start = start
|
|
305
|
|
306 ## Set the end of the subject
|
|
307 #
|
|
308 def setSubjectEnd( self, end ):
|
|
309 self.range_subject.end = end
|
|
310
|
|
311 ## Merge the instance with another Align instance
|
|
312 #
|
|
313 # @param o an Align instance
|
|
314 #
|
|
315 def merge(self, o):
|
|
316 if self.range_query.seqname != o.range_query.seqname \
|
|
317 or self.range_subject.seqname != o.range_subject.seqname:
|
|
318 return
|
|
319 self.range_query.merge(o.range_query)
|
|
320 self.range_subject.merge(o.range_subject)
|
|
321 self.score = max(self.score,o.score)
|
|
322 self.e_value = min(self.e_value,o.e_value)
|
|
323 self.identity = max(self.identity,o.identity)
|
|
324
|
|
325 ## Return a Map instance with the subject mapped on the query
|
|
326 #
|
|
327 def getSubjectAsMapOfQuery(self):
|
|
328 iMap = Map()
|
|
329 iMap.name = self.range_subject.seqname
|
|
330 iMap.seqname = self.range_query.seqname
|
|
331 if self.range_subject.isOnDirectStrand():
|
|
332 iMap.start = self.range_query.start
|
|
333 iMap.end = self.range_query.end
|
|
334 else:
|
|
335 iMap.start = self.range_query.end
|
|
336 iMap.end = self.range_query.start
|
|
337 return iMap
|
|
338
|
|
339 ## Return True if query is on direct strand
|
|
340 #
|
|
341 def isQueryOnDirectStrand( self ):
|
|
342 return self.range_query.isOnDirectStrand()
|
|
343
|
|
344 ## Return True if subject is on direct strand
|
|
345 #
|
|
346 def isSubjectOnDirectStrand( self ):
|
|
347 return self.range_subject.isOnDirectStrand()
|
|
348
|
|
349 ## Return True if query and subject are on the same strand, False otherwise
|
|
350 #
|
|
351 def areQrySbjOnSameStrand(self):
|
|
352 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand()
|
|
353
|
|
354 ## Return False if query and subject are on the same strand, True otherwise
|
|
355 #
|
|
356 def areQrySbjOnOppositeStrands(self):
|
|
357 return not self.areQrySbjOnSameStrand()
|
|
358
|
|
359 ## Set attributes from string
|
|
360 #
|
|
361 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity
|
|
362 # @param sep field separator
|
|
363 #
|
|
364 def setFromString(self, string, sep="\t"):
|
|
365 if string[-1] == "\n":
|
|
366 string = string[:-1]
|
|
367 self.setFromTuple( string.split(sep) )
|
|
368
|
|
369 ## Return a first Map instance for the query and a second for the subject
|
|
370 #
|
|
371 def getMapsOfQueryAndSubject(self):
|
|
372 iMapQuery = Map( name="repet",
|
|
373 seqname=self.range_query.seqname,
|
|
374 start=self.range_query.start,
|
|
375 end=self.range_query.end )
|
|
376 iMapSubject = Map( name="repet",
|
|
377 seqname=self.range_subject.seqname,
|
|
378 start=self.range_subject.start,
|
|
379 end=self.range_subject.end )
|
|
380 return iMapQuery, iMapSubject
|
|
381
|
|
382 ## Write query coordinates as Map in a file
|
|
383 #
|
|
384 # @param fileHandler: file handler of the file being filled
|
|
385 #
|
|
386 def writeSubjectAsMapOfQuery( self, fileHandler ):
|
|
387 m = self.getSubjectAsMapOfQuery()
|
|
388 m.write( fileHandler )
|
|
389
|
|
390 ## Return a bin for fast database access
|
|
391 #
|
|
392 def getBin(self):
|
|
393 return self.range_query.getBin()
|
|
394
|
|
395 ## Switch query and subject
|
|
396 #
|
|
397 def switchQuerySubject( self ):
|
|
398 tmpRange = self.range_query
|
|
399 self.range_query = self.range_subject
|
|
400 self.range_subject = tmpRange
|
|
401 if not self.isQueryOnDirectStrand():
|
|
402 self.reverse()
|
|
403
|
|
404 ## Return True if the query overlaps with the query of another Align instance, False otherwise
|
|
405 #
|
|
406 def isQueryOverlapping( self, iAlign ):
|
|
407 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() )
|
|
408
|
|
409 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise
|
|
410 #
|
|
411 def isSubjectOverlapping( self, iAlign ):
|
|
412 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() )
|
|
413
|
|
414 ## Return True if the Align instance overlaps with another Align instance, False otherwise
|
|
415 #
|
|
416 def isOverlapping( self, iAlign ):
|
|
417 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ):
|
|
418 return True
|
|
419 else:
|
|
420 return False
|
|
421
|
|
422 ## Update the score
|
|
423 #
|
|
424 # @note the new score is the length on the query times the percentage of identity
|
|
425 #
|
|
426 def updateScore( self ):
|
|
427 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0
|
|
428 self.score = newScore
|