Mercurial > repos > urgi-team > teiso
comparison TEisotools-1.0/commons/core/coord/Align.py @ 6:20ec0d14798e draft
Uploaded
| author | urgi-team |
|---|---|
| date | Wed, 20 Jul 2016 05:00:24 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:4093a2fb58be | 6:20ec0d14798e |
|---|---|
| 1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
| 2 # http://www.inra.fr | |
| 3 # http://urgi.versailles.inra.fr | |
| 4 # | |
| 5 # This software is governed by the CeCILL license under French law and | |
| 6 # abiding by the rules of distribution of free software. You can use, | |
| 7 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 9 # "http://www.cecill.info". | |
| 10 # | |
| 11 # As a counterpart to the access to the source code and rights to copy, | |
| 12 # modify and redistribute granted by the license, users are provided only | |
| 13 # with a limited warranty and the software's author, the holder of the | |
| 14 # economic rights, and the successive licensors have only limited | |
| 15 # liability. | |
| 16 # | |
| 17 # In this respect, the user's attention is drawn to the risks associated | |
| 18 # with loading, using, modifying and/or developing or reproducing the | |
| 19 # software by the user in light of its specific status of free software, | |
| 20 # that may mean that it is complicated to manipulate, and that also | |
| 21 # therefore means that it is reserved for developers and experienced | |
| 22 # professionals having in-depth computer knowledge. Users are therefore | |
| 23 # encouraged to load and test the software's suitability as regards their | |
| 24 # requirements in conditions enabling the security of their systems and/or | |
| 25 # data to be ensured and, more generally, to use and operate it in the | |
| 26 # same conditions as regards security. | |
| 27 # | |
| 28 # The fact that you are presently reading this means that you have had | |
| 29 # knowledge of the CeCILL license and that you accept its terms. | |
| 30 | |
| 31 import time | |
| 32 from commons.core.coord.Map import Map | |
| 33 from commons.core.coord.Range import Range | |
| 34 | |
| 35 ## Handle a match between two sequences, query and subject (pair of coordinates with E-value, score and identity) | |
| 36 # | |
| 37 class Align( object ): | |
| 38 | |
| 39 __slots__ = ("range_query", "range_subject", "e_value", "score", "identity", '__dict__') | |
| 40 | |
| 41 ## Constructor | |
| 42 # | |
| 43 # @param range_q: a Range instance for the query | |
| 44 # @param range_s: a Range instance for the subject | |
| 45 # @param e_value: E-value of the match | |
| 46 # @param identity: identity percentage of the match | |
| 47 # @param score: score of the match | |
| 48 # | |
| 49 def __init__(self, range_q=Range(), range_s=Range(), e_value=0, score=0, identity=0): | |
| 50 self.range_query = range_q | |
| 51 self.range_subject = range_s | |
| 52 self.e_value = float(e_value) | |
| 53 self.score = float(score) | |
| 54 self.identity = float(identity) | |
| 55 | |
| 56 ## Return True if the instance is empty, False otherwise | |
| 57 # | |
| 58 def isEmpty(self): | |
| 59 return self.range_query.isEmpty() or self.range_subject.isEmpty() | |
| 60 | |
| 61 ## Equal operator | |
| 62 # | |
| 63 def __eq__(self, o): | |
| 64 if type(o) is not type(self): | |
| 65 return False | |
| 66 else: | |
| 67 return self.range_query==o.range_query and self.range_subject==o.range_subject and \ | |
| 68 self.e_value==o.e_value and self.score==o.score and self.identity==o.identity | |
| 69 | |
| 70 ## Unequal operator | |
| 71 # | |
| 72 # @param o a Range instance | |
| 73 # | |
| 74 def __ne__(self, o): | |
| 75 return not self.__eq__(o) | |
| 76 | |
| 77 ## Convert the object into a string | |
| 78 # | |
| 79 # @note used in 'print myObject' | |
| 80 # | |
| 81 def __str__( self ): | |
| 82 return self.toString() | |
| 83 | |
| 84 ## Read attributes from an Align file | |
| 85 # | |
| 86 # @param fileHandler: file handler of the file being read | |
| 87 # @return: 1 on success, 0 at the end of the file | |
| 88 # | |
| 89 def read(self, fileHandler): | |
| 90 self.reset() | |
| 91 line = fileHandler.readline() | |
| 92 if line == "": | |
| 93 return 0 | |
| 94 tokens = line.split("\t") | |
| 95 if len(tokens) < 5: | |
| 96 return 0 | |
| 97 self.setFromTuple(tokens) | |
| 98 return 1 | |
| 99 | |
| 100 ## Set attributes from tuple | |
| 101 # | |
| 102 # @param tuple a tuple with (queryName,queryStart,queryEnd,subjectName,subjectStar,subjectEnd,E-value,score,identity) | |
| 103 # @note data are loaded such that the query is always on the direct strand | |
| 104 # | |
| 105 def setFromTuple( self, tuple ): | |
| 106 #TODO: we need to create Range instances because of __eq__() and isEmpty() tests, but WHY ??? | |
| 107 self.range_query = Range() | |
| 108 self.range_subject = Range() | |
| 109 if int(tuple[1]) < int(tuple[2]): | |
| 110 self.range_query.setFromTuple( ( tuple[0], tuple[1], tuple[2] ) ) | |
| 111 self.range_subject.setFromTuple( ( tuple[3], tuple[4], tuple[5] ) ) | |
| 112 else: | |
| 113 self.range_query.setFromTuple( ( tuple[0], tuple[2], tuple[1] ) ) | |
| 114 self.range_subject.setFromTuple( ( tuple[3], tuple[5], tuple[4] ) ) | |
| 115 self.e_value = float(tuple[6]) | |
| 116 self.score = float(tuple[7]) | |
| 117 self.identity = float(tuple[8]) | |
| 118 | |
| 119 ## Reset | |
| 120 # | |
| 121 def reset( self ): | |
| 122 self.range_query.reset() | |
| 123 self.range_subject.reset() | |
| 124 self.e_value = 0 | |
| 125 self.score = 0 | |
| 126 self.identity = 0 | |
| 127 | |
| 128 ## Return the attributes as a formatted string | |
| 129 # | |
| 130 def toString(self): | |
| 131 string = "%s" % ( self.range_query.toString() ) | |
| 132 string += "\t%s" % ( self.range_subject.toString() ) | |
| 133 string += "\t%g\t%i\t%f" % ( self.e_value, self.score, self.identity ) | |
| 134 return string | |
| 135 | |
| 136 | |
| 137 ## Return the attributes as a GFF-formatted string | |
| 138 # | |
| 139 def toStringAsGff( self, source="REPET", type="match", phase=".", ID="", Parent="" ): | |
| 140 if not self.isSubjectOnDirectStrand(): | |
| 141 self.reverse() | |
| 142 string = "%s" % ( self.getQueryName() ) | |
| 143 string += "\t%s" % ( source ) | |
| 144 string += "\t%s" % ( type ) | |
| 145 string += "\t%s" % ( self.getQueryMin() ) | |
| 146 string += "\t%s" % ( self.getQueryMax() ) | |
| 147 string += "\t%g" % ( self.e_value ) | |
| 148 string += "\t%s" % ( self.getQueryStrand() ) | |
| 149 string += "\t%s" % ( phase ) | |
| 150 attributes = "" | |
| 151 if ID != "": | |
| 152 attributes += "ID=%s" % ( ID ) | |
| 153 else: | |
| 154 attributes += "ID=%i" % ( str(time.time())[-8:-1].replace(".","") ) | |
| 155 if Parent != "": | |
| 156 attributes += ";Parent=%s" % ( Parent ) | |
| 157 attributes += ";Target=%s %i %i" % ( self.getSubjectName(), self.getSubjectStart(), self.getSubjectEnd() ) | |
| 158 string += "\t%s" % ( attributes ) | |
| 159 return string | |
| 160 | |
| 161 | |
| 162 ## Reverse query and subject | |
| 163 # | |
| 164 def reverse(self): | |
| 165 self.range_query.reverse() | |
| 166 self.range_subject.reverse() | |
| 167 | |
| 168 ## Show the attributes | |
| 169 # | |
| 170 def show(self): | |
| 171 print self.toString() | |
| 172 | |
| 173 ## Write attributes into an Align file | |
| 174 # | |
| 175 # @param fileHandler: file handler of the file being filled | |
| 176 # | |
| 177 def write(self, fileHandler): | |
| 178 fileHandler.write("%s\n" % (self.toString())) | |
| 179 | |
| 180 ## Save attributes into an Align file | |
| 181 # | |
| 182 # @param file: name of the file being filled | |
| 183 # | |
| 184 def save(self, file): | |
| 185 fileHandler = open( file, "a" ) | |
| 186 self.write( fileHandler ) | |
| 187 fileHandler.close() | |
| 188 | |
| 189 ## Return the score | |
| 190 # | |
| 191 def getScore(self): | |
| 192 return self.score | |
| 193 | |
| 194 ## Return the identity | |
| 195 # | |
| 196 def getIdentity(self): | |
| 197 return self.identity | |
| 198 | |
| 199 def getEvalue(self): | |
| 200 return self.e_value | |
| 201 | |
| 202 ## Return the length on the query | |
| 203 # | |
| 204 def getLengthOnQuery(self): | |
| 205 return self.range_query.getLength() | |
| 206 | |
| 207 ## Return the name of the query | |
| 208 # | |
| 209 def getQueryName( self ): | |
| 210 return self.range_query.seqname | |
| 211 | |
| 212 ## Return the start of the query | |
| 213 # | |
| 214 def getQueryStart( self ): | |
| 215 return self.range_query.start | |
| 216 | |
| 217 ## Return the end of the query | |
| 218 # | |
| 219 def getQueryEnd( self ): | |
| 220 return self.range_query.end | |
| 221 | |
| 222 ## Return the min of the query | |
| 223 # | |
| 224 def getQueryMin( self ): | |
| 225 return self.range_query.getMin() | |
| 226 | |
| 227 ## Return the max of the query | |
| 228 # | |
| 229 def getQueryMax( self ): | |
| 230 return self.range_query.getMax() | |
| 231 | |
| 232 ## Return the strand of the query | |
| 233 # | |
| 234 def getQueryStrand( self ): | |
| 235 return self.range_query.getStrand() | |
| 236 | |
| 237 ## Return the length on the subject | |
| 238 # | |
| 239 def getLengthOnSubject(self): | |
| 240 return self.range_subject.getLength() | |
| 241 | |
| 242 ## Return the name of the subject | |
| 243 # | |
| 244 def getSubjectName( self ): | |
| 245 return self.range_subject.seqname | |
| 246 | |
| 247 ## Return the start of the subject | |
| 248 # | |
| 249 def getSubjectStart( self ): | |
| 250 return self.range_subject.start | |
| 251 | |
| 252 ## Return the end of the subject | |
| 253 # | |
| 254 def getSubjectEnd( self ): | |
| 255 return self.range_subject.end | |
| 256 | |
| 257 ## Return the min of the subject | |
| 258 # | |
| 259 def getSubjectMin( self ): | |
| 260 return self.range_subject.getMin() | |
| 261 | |
| 262 ## Return the max of the subject | |
| 263 # | |
| 264 def getSubjectMax( self ): | |
| 265 return self.range_subject.getMax() | |
| 266 | |
| 267 ## Return the strand of the subject | |
| 268 # | |
| 269 def getSubjectStrand( self ): | |
| 270 return self.range_subject.getStrand() | |
| 271 | |
| 272 ## Return the query as a Range instance | |
| 273 # | |
| 274 def getQueryAsRange( self ): | |
| 275 return self.range_query | |
| 276 | |
| 277 ## Return the subject as a Range instance | |
| 278 # | |
| 279 def getSubjectAsRange( self ): | |
| 280 return self.range_subject | |
| 281 | |
| 282 ## Set the name of the query | |
| 283 # | |
| 284 def setQueryName( self, name ): | |
| 285 self.range_query.seqname = name | |
| 286 | |
| 287 ## Set the start of the query | |
| 288 # | |
| 289 def setQueryStart( self, start ): | |
| 290 self.range_query.start = start | |
| 291 | |
| 292 ## Set the end of the query | |
| 293 # | |
| 294 def setQueryEnd( self, end ): | |
| 295 self.range_query.end = end | |
| 296 | |
| 297 ## Set the name of the subject | |
| 298 # | |
| 299 def setSubjectName( self, name ): | |
| 300 self.range_subject.seqname = name | |
| 301 | |
| 302 ## Set the start of the subject | |
| 303 # | |
| 304 def setSubjectStart( self, start ): | |
| 305 self.range_subject.start = start | |
| 306 | |
| 307 ## Set the end of the subject | |
| 308 # | |
| 309 def setSubjectEnd( self, end ): | |
| 310 self.range_subject.end = end | |
| 311 | |
| 312 ## Merge the instance with another Align instance | |
| 313 # | |
| 314 # @param o an Align instance | |
| 315 # | |
| 316 def merge(self, o): | |
| 317 if self.range_query.seqname != o.range_query.seqname \ | |
| 318 or self.range_subject.seqname != o.range_subject.seqname: | |
| 319 return | |
| 320 self.range_query.merge(o.range_query) | |
| 321 self.range_subject.merge(o.range_subject) | |
| 322 self.score = max(self.score,o.score) | |
| 323 self.e_value = min(self.e_value,o.e_value) | |
| 324 self.identity = max(self.identity,o.identity) | |
| 325 | |
| 326 ## Return a Map instance with the subject mapped on the query | |
| 327 # | |
| 328 def getSubjectAsMapOfQuery(self): | |
| 329 iMap = Map() | |
| 330 iMap.name = self.range_subject.seqname | |
| 331 iMap.seqname = self.range_query.seqname | |
| 332 if self.range_subject.isOnDirectStrand(): | |
| 333 iMap.start = self.range_query.start | |
| 334 iMap.end = self.range_query.end | |
| 335 else: | |
| 336 iMap.start = self.range_query.end | |
| 337 iMap.end = self.range_query.start | |
| 338 return iMap | |
| 339 | |
| 340 ## Return True if query is on direct strand | |
| 341 # | |
| 342 def isQueryOnDirectStrand( self ): | |
| 343 return self.range_query.isOnDirectStrand() | |
| 344 | |
| 345 ## Return True if subject is on direct strand | |
| 346 # | |
| 347 def isSubjectOnDirectStrand( self ): | |
| 348 return self.range_subject.isOnDirectStrand() | |
| 349 | |
| 350 ## Return True if query and subject are on the same strand, False otherwise | |
| 351 # | |
| 352 def areQrySbjOnSameStrand(self): | |
| 353 return self.isQueryOnDirectStrand() == self.isSubjectOnDirectStrand() | |
| 354 | |
| 355 ## Return False if query and subject are on the same strand, True otherwise | |
| 356 # | |
| 357 def areQrySbjOnOppositeStrands(self): | |
| 358 return not self.areQrySbjOnSameStrand() | |
| 359 | |
| 360 ## Set attributes from string | |
| 361 # | |
| 362 # @param string a string formatted like queryName queryStart queryEnd subjectName subjectStart subjectEnd E-value score identity | |
| 363 # @param sep field separator | |
| 364 # | |
| 365 def setFromString(self, string, sep="\t"): | |
| 366 if string[-1] == "\n": | |
| 367 string = string[:-1] | |
| 368 self.setFromTuple( string.split(sep) ) | |
| 369 | |
| 370 ## Return a first Map instance for the query and a second for the subject | |
| 371 # | |
| 372 def getMapsOfQueryAndSubject(self): | |
| 373 iMapQuery = Map( name="repet", | |
| 374 seqname=self.range_query.seqname, | |
| 375 start=self.range_query.start, | |
| 376 end=self.range_query.end ) | |
| 377 iMapSubject = Map( name="repet", | |
| 378 seqname=self.range_subject.seqname, | |
| 379 start=self.range_subject.start, | |
| 380 end=self.range_subject.end ) | |
| 381 return iMapQuery, iMapSubject | |
| 382 | |
| 383 ## Write query coordinates as Map in a file | |
| 384 # | |
| 385 # @param fileHandler: file handler of the file being filled | |
| 386 # | |
| 387 def writeSubjectAsMapOfQuery( self, fileHandler ): | |
| 388 m = self.getSubjectAsMapOfQuery() | |
| 389 m.write( fileHandler ) | |
| 390 | |
| 391 ## Return a bin for fast database access | |
| 392 # | |
| 393 def getBin(self): | |
| 394 return self.range_query.getBin() | |
| 395 | |
| 396 ## Switch query and subject | |
| 397 # | |
| 398 def switchQuerySubject( self ): | |
| 399 tmpRange = self.range_query | |
| 400 self.range_query = self.range_subject | |
| 401 self.range_subject = tmpRange | |
| 402 if not self.isQueryOnDirectStrand(): | |
| 403 self.reverse() | |
| 404 | |
| 405 ## Return True if the query overlaps with the query of another Align instance, False otherwise | |
| 406 # | |
| 407 def isQueryOverlapping( self, iAlign ): | |
| 408 return self.getQueryAsRange().isOverlapping( iAlign.getQueryAsRange() ) | |
| 409 | |
| 410 ## Return True if the subject overlaps with the subject of another Align instance, False otherwise | |
| 411 # | |
| 412 def isSubjectOverlapping( self, iAlign ): | |
| 413 return self.getSubjectAsRange().isOverlapping( iAlign.getSubjectAsRange() ) | |
| 414 | |
| 415 ## Return True if the Align instance overlaps with another Align instance, False otherwise | |
| 416 # | |
| 417 def isOverlapping( self, iAlign ): | |
| 418 if self.isQueryOverlapping( iAlign ) and self.isSubjectOverlapping( iAlign ): | |
| 419 return True | |
| 420 else: | |
| 421 return False | |
| 422 | |
| 423 ## Update the score | |
| 424 # | |
| 425 # @note the new score is the length on the query times the percentage of identity | |
| 426 # | |
| 427 def updateScore( self ): | |
| 428 newScore = self.getLengthOnQuery() * self.getIdentity() / 100.0 | |
| 429 self.score = newScore |
