Mercurial > repos > yufei-luo > s_mart
comparison commons/core/coord/Align.py @ 36:44d5973c188c
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 15:02:29 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
35:d94018ca4ada | 36:44d5973c188c |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 import time | |
32 | |
33 from commons.core.coord.Range import Range | |
34 from commons.core.coord.Map import Map | |
35 | |
36 | |
37 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) | |
38 # | |
39 class Align( object ): | |
40 | |
41 ## Constructor | |
42 # | |
43 # @param range_q: a Range instance for the query | |
44 # @param range_s: a Range instance for the subject | |
45 # @param e_value: E-value of the match | |
46 # @param identity: identity percentage of the match | |
47 # @param score: score of the match | |
48 # | |
49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0): | |
50 self.range_query = range_q | |
51 self.range_subject = range_s | |
52 self.e_value = float(e_value) | |
53 self.score = float(score) | |
54 self.identity = float(identity) | |
55 | |
56 ## Return True if the instance is empty, False otherwise | |
57 # | |
58 def isEmpty(self): | |
59 return self.range_query.isEmpty() or self.range_subject.isEmpty() | |
60 | |
61 ## Equal operator | |
62 # | |
63 def __eq__(self, o): | |
64 if self.range_query==o.range_query and self.range_subject==o.range_subject and \ | |
65 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity: | |
66 return True | |
67 return False | |
68 | |
69 ## Unequal operator | |
70 # | |
71 # @param o a Range instance | |
72 # | |
73 def __ne__(self, o): | |
74 return not self.__eq__(o) | |
75 | |
76 ## Convert the object into a string | |
77 # | |
78 # @note used in 'print myObject' | |
79 # | |
80 def __str__( self ): | |
81 return self.toString() | |
82 | |
83 ## Read attributes from an Align file | |
84 # | |
85 # @param fileHandler: file handler of the file being read | |
86 # @return: 1 on success, 0 at the end of the file | |
87 # | |
88 def read(self, fileHandler): | |
89 self.reset() | |
90 line = fileHandler.readline() | |
91 if line == "": | |
92 return 0 | |
93 tokens = line.split("\t") | |
94 if len(tokens) < len(self.__dict__.keys()): | |
95 return 0 | |
96 self.setFromTuple(tokens) | |
97 return 1 | |
98 | |
99 ## Set attributes from tuple | |
100 # | |
101 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity) | |
102 # @note data are loaded such that the query is always on the direct strand | |
103 # | |
104 def setFromTuple( self, tuple ): | |
105 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ??? | |
106 self.range_query = Range() | |
107 self.range_subject = Range() | |
108 if int(tuple[1]) < int(tuple[2]): | |
109 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) ) | |
110 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) ) | |
111 else: | |
112 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) ) | |
113 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) ) | |
114 self.e_value = float(tuple[6]) | |
115 self.score = float(tuple[7]) | |
116 self.identity = float(tuple[8]) | |
117 | |
118 ## Reset | |
119 # | |
120 def reset( self ): | |
121 self.range_query.reset() | |
122 self.range_subject.reset() | |
123 self.e_value = 0 | |
124 self.score = 0 | |
125 self.identity = 0 | |
126 | |
127 ## Return the attributes as a formatted string | |
128 # | |
129 def toString(self): | |
130 string = "%s" % ( self.range_query.toString() ) | |
131 string += "\t%s" % ( self.range_subject.toString() ) | |
132 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity ) | |
133 return string | |
134 | |
135 | |
136 ## Return the attributes as a GFF-formatted string | |
137 # | |
138 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ): | |
139 if not self.isSubjectOnDirectStrand(): | |
140 self.reverse() | |
141 string = "%s" % ( self.getQueryName() ) | |
142 string += "\t%s" % ( source ) | |
143 string += "\t%s" % ( type ) | |
144 string += "\t%s" % ( self.getQueryMin() ) | |
145 string += "\t%s" % ( self.getQueryMax() ) | |
146 string += "\t%g" % ( self.e_value ) | |
147 string += "\t%s" % ( self.getQueryStrand() ) | |
148 string += "\t%s" % ( phase ) | |
149 attributes = "" | |
150 if ID != "": | |
151 attributes += "ID=%s" % ( ID ) | |
152 else: | |
153 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") ) | |
154 if Parent != "": | |
155 attributes += ";Parent=%s" % ( Parent ) | |
156 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() ) | |
157 string += "\t%s" % ( attributes ) | |
158 return string | |
159 | |
160 | |
161 ## Reverse query and subject | |
162 # | |
163 def reverse(self): | |
164 self.range_query.reverse() | |
165 self.range_subject.reverse() | |
166 | |
167 ## Show the attributes | |
168 # | |
169 def show(self): | |
170 print self.toString() | |
171 | |
172 ## Write attributes into an Align file | |
173 # | |
174 # @param fileHandler: file handler of the file being filled | |
175 # | |
176 def write(self, fileHandler): | |
177 fileHandler.write("%s\n" % (self.toString())) | |
178 | |
179 ## Save attributes into an Align file | |
180 # | |
181 # @param file: name of the file being filled | |
182 # | |
183 def save(self, file): | |
184 fileHandler = open( file, "a" ) | |
185 self.write( fileHandler ) | |
186 fileHandler.close() | |
187 | |
188 ## Return the score | |
189 # | |
190 def getScore(self): | |
191 return self.score | |
192 | |
193 ## Return the identity | |
194 # | |
195 def getIdentity(self): | |
196 return self.identity | |
197 | |
198 def getEvalue(self): | |
199 return self.e_value | |
200 | |
201 ## Return the length on the query | |
202 # | |
203 def getLengthOnQuery(self): | |
204 return self.range_query.getLength() | |
205 | |
206 ## Return the name of the query | |
207 # | |
208 def getQueryName( self ): | |
209 return self.range_query.seqname | |
210 | |
211 ## Return the start of the query | |
212 # | |
213 def getQueryStart( self ): | |
214 return self.range_query.start | |
215 | |
216 ## Return the end of the query | |
217 # | |
218 def getQueryEnd( self ): | |
219 return self.range_query.end | |
220 | |
221 ## Return the min of the query | |
222 # | |
223 def getQueryMin( self ): | |
224 return self.range_query.getMin() | |
225 | |
226 ## Return the max of the query | |
227 # | |
228 def getQueryMax( self ): | |
229 return self.range_query.getMax() | |
230 | |
231 ## Return the strand of the query | |
232 # | |
233 def getQueryStrand( self ): | |
234 return self.range_query.getStrand() | |
235 | |
236 ## Return the length on the subject | |
237 # | |
238 def getLengthOnSubject(self): | |
239 return self.range_subject.getLength() | |
240 | |
241 ## Return the name of the subject | |
242 # | |
243 def getSubjectName( self ): | |
244 return self.range_subject.seqname | |
245 | |
246 ## Return the start of the subject | |
247 # | |
248 def getSubjectStart( self ): | |
249 return self.range_subject.start | |
250 | |
251 ## Return the end of the subject | |
252 # | |
253 def getSubjectEnd( self ): | |
254 return self.range_subject.end | |
255 | |
256 ## Return the min of the subject | |
257 # | |
258 def getSubjectMin( self ): | |
259 return self.range_subject.getMin() | |
260 | |
261 ## Return the max of the subject | |
262 # | |
263 def getSubjectMax( self ): | |
264 return self.range_subject.getMax() | |
265 | |
266 ## Return the strand of the subject | |
267 # | |
268 def getSubjectStrand( self ): | |
269 return self.range_subject.getStrand() | |
270 | |
271 ## Return the query as a Range instance | |
272 # | |
273 def getQueryAsRange( self ): | |
274 return self.range_query | |
275 | |
276 ## Return the subject as a Range instance | |
277 # | |
278 def getSubjectAsRange( self ): | |
279 return self.range_subject | |
280 | |
281 ## Set the name of the query | |
282 # | |
283 def setQueryName( self, name ): | |
284 self.range_query.seqname = name | |
285 | |
286 ## Set the start of the query | |
287 # | |
288 def setQueryStart( self, start ): | |
289 self.range_query.start = start | |
290 | |
291 ## Set the end of the query | |
292 # | |
293 def setQueryEnd( self, end ): | |
294 self.range_query.end = end | |
295 | |
296 ## Set the name of the subject | |
297 # | |
298 def setSubjectName( self, name ): | |
299 self.range_subject.seqname = name | |
300 | |
301 ## Set the start of the subject | |
302 # | |
303 def setSubjectStart( self, start ): | |
304 self.range_subject.start = start | |
305 | |
306 ## Set the end of the subject | |
307 # | |
308 def setSubjectEnd( self, end ): | |
309 self.range_subject.end = end | |
310 | |
311 ## Merge the instance with another Align instance | |
312 # | |
313 # @param o an Align instance | |
314 # | |
315 def merge(self, o): | |
316 if self.range_query.seqname != o.range_query.seqname \ | |
317 or self.range_subject.seqname != o.range_subject.seqname: | |
318 return | |
319 self.range_query.merge(o.range_query) | |
320 self.range_subject.merge(o.range_subject) | |
321 self.score = max(self.score,o.score) | |
322 self.e_value = min(self.e_value,o.e_value) | |
323 self.identity = max(self.identity,o.identity) | |
324 | |
325 ## Return a Map instance with the subject mapped on the query | |
326 # | |
327 def getSubjectAsMapOfQuery(self): | |
328 iMap = Map() | |
329 iMap.name = self.range_subject.seqname | |
330 iMap.seqname = self.range_query.seqname | |
331 if self.range_subject.isOnDirectStrand(): | |
332 iMap.start = self.range_query.start | |
333 iMap.end = self.range_query.end | |
334 else: | |
335 iMap.start = self.range_query.end | |
336 iMap.end = self.range_query.start | |
337 return iMap | |
338 | |
339 ## Return True if query is on direct strand | |
340 # | |
341 def isQueryOnDirectStrand( self ): | |
342 return self.range_query.isOnDirectStrand() | |
343 | |
344 ## Return True if subject is on direct strand | |
345 # | |
346 def isSubjectOnDirectStrand( self ): | |
347 return self.range_subject.isOnDirectStrand() | |
348 | |
349 ## Return True if query and subject are on the same strand, False otherwise | |
350 # | |
351 def areQrySbjOnSameStrand(self): | |
352 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand() | |
353 | |
354 ## Return False if query and subject are on the same strand, True otherwise | |
355 # | |
356 def areQrySbjOnOppositeStrands(self): | |
357 return not self.areQrySbjOnSameStrand() | |
358 | |
359 ## Set attributes from string | |
360 # | |
361 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity | |
362 # @param sep field separator | |
363 # | |
364 def setFromString(self, string, sep="\t"): | |
365 if string[-1] == "\n": | |
366 string = string[:-1] | |
367 self.setFromTuple( string.split(sep) ) | |
368 | |
369 ## Return a first Map instance for the query and a second for the subject | |
370 # | |
371 def getMapsOfQueryAndSubject(self): | |
372 iMapQuery = Map( name="repet", | |
373 seqname=self.range_query.seqname, | |
374 start=self.range_query.start, | |
375 end=self.range_query.end ) | |
376 iMapSubject = Map( name="repet", | |
377 seqname=self.range_subject.seqname, | |
378 start=self.range_subject.start, | |
379 end=self.range_subject.end ) | |
380 return iMapQuery, iMapSubject | |
381 | |
382 ## Write query coordinates as Map in a file | |
383 # | |
384 # @param fileHandler: file handler of the file being filled | |
385 # | |
386 def writeSubjectAsMapOfQuery( self, fileHandler ): | |
387 m = self.getSubjectAsMapOfQuery() | |
388 m.write( fileHandler ) | |
389 | |
390 ## Return a bin for fast database access | |
391 # | |
392 def getBin(self): | |
393 return self.range_query.getBin() | |
394 | |
395 ## Switch query and subject | |
396 # | |
397 def switchQuerySubject( self ): | |
398 tmpRange = self.range_query | |
399 self.range_query = self.range_subject | |
400 self.range_subject = tmpRange | |
401 if not self.isQueryOnDirectStrand(): | |
402 self.reverse() | |
403 | |
404 ## Return True if the query overlaps with the query of another Align instance, False otherwise | |
405 # | |
406 def isQueryOverlapping( self, iAlign ): | |
407 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() ) | |
408 | |
409 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise | |
410 # | |
411 def isSubjectOverlapping( self, iAlign ): | |
412 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() ) | |
413 | |
414 ## Return True if the Align instance overlaps with another Align instance, False otherwise | |
415 # | |
416 def isOverlapping( self, iAlign ): | |
417 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ): | |
418 return True | |
419 else: | |
420 return False | |
421 | |
422 ## Update the score | |
423 # | |
424 # @note the new score is the length on the query times the percentage of identity | |
425 # | |
426 def updateScore( self ): | |
427 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0 | |
428 self.score = newScore |