Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/structure/Interval.py @ 6:769e306b7933
Change the repository level.
| author | yufei-luo |
|---|---|
| date | Fri, 18 Jan 2013 04:54:14 -0500 |
| parents | |
| children | 94ab73e8a190 |
comparison
equal
deleted
inserted
replaced
| 5:ea3082881bf8 | 6:769e306b7933 |
|---|---|
| 1 # | |
| 2 # Copyright INRA-URGI 2009-2010 | |
| 3 # | |
| 4 # This software is governed by the CeCILL license under French law and | |
| 5 # abiding by the rules of distribution of free software. You can use, | |
| 6 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 7 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 8 # "http://www.cecill.info". | |
| 9 # | |
| 10 # As a counterpart to the access to the source code and rights to copy, | |
| 11 # modify and redistribute granted by the license, users are provided only | |
| 12 # with a limited warranty and the software's author, the holder of the | |
| 13 # economic rights, and the successive licensors have only limited | |
| 14 # liability. | |
| 15 # | |
| 16 # In this respect, the user's attention is drawn to the risks associated | |
| 17 # with loading, using, modifying and/or developing or reproducing the | |
| 18 # software by the user in light of its specific status of free software, | |
| 19 # that may mean that it is complicated to manipulate, and that also | |
| 20 # therefore means that it is reserved for developers and experienced | |
| 21 # professionals having in-depth computer knowledge. Users are therefore | |
| 22 # encouraged to load and test the software's suitability as regards their | |
| 23 # requirements in conditions enabling the security of their systems and/or | |
| 24 # data to be ensured and, more generally, to use and operate it in the | |
| 25 # same conditions as regards security. | |
| 26 # | |
| 27 # The fact that you are presently reading this means that you have had | |
| 28 # knowledge of the CeCILL license and that you accept its terms. | |
| 29 # | |
| 30 | |
| 31 from SMART.Java.Python.structure.Bins import * | |
| 32 from commons.core.coord.Range import Range | |
| 33 | |
| 34 class Interval(Range): | |
| 35 """ | |
| 36 Store a genomic interval | |
| 37 @ivar name: name of the interval [optional] | |
| 38 @type name: string | |
| 39 @ivar id: id of the interval [optional] | |
| 40 @type id: int | |
| 41 @ivar bin: bin in which the interval should be if stored in a database [computed] | |
| 42 @type bin: int | |
| 43 @ival tags: information about the transcript [optional] | |
| 44 @type tags: dict | |
| 45 @ivar verbosity: verbosity | |
| 46 @type verbosity: int [default: 0] | |
| 47 """ | |
| 48 | |
| 49 def __init__(self, interval = None, verbosity = 0): | |
| 50 """ | |
| 51 Constructor | |
| 52 @param interval: interval to be copied | |
| 53 @type interval: class L{Interval<Interval>} | |
| 54 @param verbosity: verbosity | |
| 55 @type verbosity: int | |
| 56 """ | |
| 57 Range.__init__(self) | |
| 58 self.name = None | |
| 59 self.id = None | |
| 60 self.bin = None | |
| 61 self.verbosity = verbosity | |
| 62 self.tags = {} | |
| 63 if interval != None: | |
| 64 self.copy(interval) | |
| 65 | |
| 66 #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!# | |
| 67 #In case strand = "+", start < end; strand = "-", start > end | |
| 68 def getStart(self): | |
| 69 if self.start == -1: | |
| 70 return -1 | |
| 71 if self.end == -1: | |
| 72 return self.start | |
| 73 return self.getMin() | |
| 74 | |
| 75 | |
| 76 def getEnd(self): | |
| 77 if self.end == -1: | |
| 78 return -1 | |
| 79 if self.start == -1: | |
| 80 return self.end | |
| 81 return self.getMax() | |
| 82 | |
| 83 | |
| 84 def getChromosome(self): | |
| 85 return self.getSeqname() | |
| 86 | |
| 87 | |
| 88 def getDirection(self): | |
| 89 return 1 if self.getStrand() == "+" else -1 | |
| 90 | |
| 91 | |
| 92 def getName(self): | |
| 93 return self.name | |
| 94 | |
| 95 | |
| 96 def isSet(self): | |
| 97 """ | |
| 98 Check if the interval is set | |
| 99 """ | |
| 100 return self.getStart() == None and self.getEnd() == None | |
| 101 | |
| 102 | |
| 103 def copy(self, interval): | |
| 104 """ | |
| 105 Copy method | |
| 106 @param interval: interval to be copied | |
| 107 @type interval: class L{Interval<Interval>} | |
| 108 """ | |
| 109 self.setStart(interval.getStart()) | |
| 110 self.setEnd(interval.getEnd()) | |
| 111 self.setChromosome(interval.getChromosome()) | |
| 112 self.setDirection(interval.getDirection()) | |
| 113 self.name = interval.name | |
| 114 self.id = interval.id | |
| 115 self.bin = interval.bin | |
| 116 self.tags = {} | |
| 117 for tag in interval.tags: | |
| 118 self.tags[tag] = interval.tags[tag] | |
| 119 self.verbosity = interval.verbosity | |
| 120 | |
| 121 | |
| 122 def setName(self, name): | |
| 123 """ | |
| 124 Set the name | |
| 125 @param name: name of the interval | |
| 126 @type name: string | |
| 127 """ | |
| 128 if len(name) > 100: | |
| 129 name = name[:100] | |
| 130 self.name = name | |
| 131 | |
| 132 | |
| 133 def setChromosome(self, chromosome=""): | |
| 134 """ | |
| 135 Set the chromosome | |
| 136 @param chromosome: chromosome on which the interval is | |
| 137 @type chromosome: string | |
| 138 """ | |
| 139 if not chromosome: | |
| 140 self.seqname = None | |
| 141 else: | |
| 142 self.seqname = chromosome.replace(".", "_").replace("|", "_") | |
| 143 | |
| 144 | |
| 145 def setStart(self, start): | |
| 146 """ | |
| 147 Set the start point | |
| 148 Possibly reset bin | |
| 149 @param start: start point of the interval | |
| 150 @type start: int | |
| 151 """ | |
| 152 self.bin = None | |
| 153 direction = self.getDirection() | |
| 154 if self.start == -1: | |
| 155 self.start = start | |
| 156 elif self.end == -1: | |
| 157 self.end = start | |
| 158 else: | |
| 159 if direction == 1: | |
| 160 self.start = start | |
| 161 else: | |
| 162 self.end = start | |
| 163 if direction == 1: | |
| 164 self.start, self.end = min(self.start, self.end), max(self.start, self.end) | |
| 165 else: | |
| 166 self.start, self.end = max(self.start, self.end), min(self.start, self.end) | |
| 167 | |
| 168 | |
| 169 def setEnd(self, end): | |
| 170 """ | |
| 171 Set the end point | |
| 172 Possibly reset bin | |
| 173 @param end: end point of the interval of the interval | |
| 174 @type end: int | |
| 175 """ | |
| 176 self.bin = None | |
| 177 direction = self.getDirection() | |
| 178 if self.end == -1: | |
| 179 self.end = end | |
| 180 elif self.start == -1: | |
| 181 self.start = end | |
| 182 else: | |
| 183 if direction == 1: | |
| 184 self.end = end | |
| 185 else: | |
| 186 self.start = end | |
| 187 if direction == 1: | |
| 188 self.start, self.end = min(self.start, self.end), max(self.start, self.end) | |
| 189 else: | |
| 190 self.start, self.end = max(self.start, self.end), min(self.start, self.end) | |
| 191 | |
| 192 | |
| 193 def setSize(self, size): | |
| 194 """ | |
| 195 Possibly modify the end point | |
| 196 @param size: size of the transcript | |
| 197 @type size: int | |
| 198 """ | |
| 199 if self.end == None and self.start != None: | |
| 200 self.setEnd(self.start + self.getSize() - 1) | |
| 201 elif self.start == None and self.end != None: | |
| 202 self.setStart(self.end - self.getSize() + 1) | |
| 203 | |
| 204 | |
| 205 def getSize(self): | |
| 206 """ | |
| 207 Get the size | |
| 208 """ | |
| 209 return self.getEnd() - self.getStart() + 1 | |
| 210 | |
| 211 | |
| 212 def _setDirection(self, direction): | |
| 213 """ | |
| 214 Set the direction of the interval (connection to Range) | |
| 215 @param direction: direction of the transcript (+ / -) | |
| 216 @type direction: int (1 or -1) | |
| 217 """ | |
| 218 if direction * self.getDirection() < 0: | |
| 219 self.reverse() | |
| 220 | |
| 221 | |
| 222 def setDirection(self, direction): | |
| 223 """ | |
| 224 Set the direction of the interval | |
| 225 Possibly parse different formats | |
| 226 @param direction: direction of the transcript (+ / -) | |
| 227 @type direction: int or string | |
| 228 """ | |
| 229 if type(direction).__name__ == 'int': | |
| 230 self._setDirection(direction / abs(direction)) | |
| 231 elif type(direction).__name__ == 'str': | |
| 232 if direction == "+": | |
| 233 self._setDirection(1) | |
| 234 elif direction == "-": | |
| 235 self._setDirection(-1) | |
| 236 elif direction == "1" or direction == "-1": | |
| 237 self._setDirection(int(direction)) | |
| 238 elif direction.lower() == "plus": | |
| 239 self._setDirection(1) | |
| 240 elif direction.lower() == "minus": | |
| 241 self._setDirection(-1) | |
| 242 else: | |
| 243 raise Exception("Cannot understand direction %s" % (direction)) | |
| 244 else: | |
| 245 raise Exception("Cannot understand direction %s" % (direction)) | |
| 246 | |
| 247 | |
| 248 def extendStart(self, size): | |
| 249 """ | |
| 250 Extend the interval by the 5' end | |
| 251 @param size: the size to be exended | |
| 252 @type size: int | |
| 253 """ | |
| 254 if self.getDirection() == 1: | |
| 255 self.setStart(max(0, self.getStart() - size)) | |
| 256 else: | |
| 257 self.setEnd(self.getEnd() + size) | |
| 258 self.bin = None | |
| 259 | |
| 260 | |
| 261 def extendEnd(self, size): | |
| 262 """ | |
| 263 Extend the interval by the 3' end | |
| 264 @param size: the size to be exended | |
| 265 @type size: int | |
| 266 """ | |
| 267 if self.getDirection() == 1: | |
| 268 self.setEnd(self.getEnd() + size) | |
| 269 else: | |
| 270 self.setStart(max(0, self.getStart() - size)) | |
| 271 self.bin = None | |
| 272 | |
| 273 | |
| 274 def restrictStart(self, size = 1): | |
| 275 """ | |
| 276 Restrict the interval by some nucleotides, start from its start position | |
| 277 Remove the exons | |
| 278 @param size: the size to be restricted to | |
| 279 @type size: int | |
| 280 """ | |
| 281 if self.getDirection() == 1: | |
| 282 self.setEnd(min(self.getEnd(), self.getStart() + size - 1)) | |
| 283 else: | |
| 284 self.setStart(max(self.getStart(), self.getEnd() - size + 1)) | |
| 285 self.bin = None | |
| 286 | |
| 287 | |
| 288 def restrictEnd(self, size = 1): | |
| 289 """ | |
| 290 Restrict the interval by some nucleotides, end from its end position | |
| 291 Remove the exons | |
| 292 @param size: the size to be restricted to | |
| 293 @type size: int | |
| 294 """ | |
| 295 if self.getDirection() == 1: | |
| 296 self.setStart(max(self.getStart(), self.getEnd() - size + 1)) | |
| 297 else: | |
| 298 self.setEnd(min(self.getEnd(), self.getStart() + size - 1)) | |
| 299 self.bin = None | |
| 300 | |
| 301 | |
| 302 | |
| 303 def setTagValue(self, name, value): | |
| 304 """ | |
| 305 Set a tag | |
| 306 @param name: name of the tag | |
| 307 @type name: string | |
| 308 @param value: value of the tag | |
| 309 @type value: int or string | |
| 310 """ | |
| 311 self.tags[name] = value | |
| 312 | |
| 313 | |
| 314 def getTagNames(self): | |
| 315 """ | |
| 316 Get all the names of the tags | |
| 317 """ | |
| 318 return self.tags.keys() | |
| 319 | |
| 320 | |
| 321 def getTagValue(self, tag): | |
| 322 """ | |
| 323 Get the value of a tag | |
| 324 @param tag: name of a tag | |
| 325 @type tag: string | |
| 326 """ | |
| 327 if tag not in self.tags: | |
| 328 return None | |
| 329 return self.tags[tag] | |
| 330 | |
| 331 | |
| 332 def getTagValues(self, tagSep = "; ", fieldSep = " ", surrounder = ""): | |
| 333 """ | |
| 334 Get the formatted tag values | |
| 335 @param tagSep: separator between tags | |
| 336 @type tagSep: string | |
| 337 @param fieldSep: separator between tag name and tag value | |
| 338 @type fieldSep: string | |
| 339 @param surrounder: string which optionally surround values | |
| 340 @type surrounder: string | |
| 341 """ | |
| 342 tags = [] | |
| 343 for name in self.tags: | |
| 344 value = self.tags[name] | |
| 345 if value == None: | |
| 346 continue | |
| 347 if isinstance(value, basestring): | |
| 348 tags.append("%s%s%s%s%s" % (name, fieldSep, surrounder, self.tags[name], surrounder)) | |
| 349 elif type(value) is int: | |
| 350 tags.append("%s%s%s%i%s" % (name, fieldSep, surrounder, self.tags[name], surrounder)) | |
| 351 elif type(value) is float: | |
| 352 tags.append("%s%s%s%f%s" % (name, fieldSep, surrounder, self.tags[name], surrounder)) | |
| 353 else: | |
| 354 raise Exception("Do not know how to print '" + value + "'.") | |
| 355 if self.getName() != None: | |
| 356 tags.append("%s%s%s%s%s" % ("Name", fieldSep, surrounder, self.getName(), surrounder)) | |
| 357 return tagSep.join(tags) | |
| 358 | |
| 359 | |
| 360 def setTagValues(self, tags, tagSep = "; ", fieldSep = " "): | |
| 361 """ | |
| 362 Set the tag values using given string | |
| 363 @param tags: the tags, concatenated | |
| 364 @type tags: string | |
| 365 @param tagSep: separator between tags | |
| 366 @type tagSep: string | |
| 367 @param fieldSep: separator between tag name and tag value | |
| 368 @type fieldSep: string | |
| 369 """ | |
| 370 if tags == "": | |
| 371 self.tags = {} | |
| 372 return | |
| 373 for splittedTag in tags.split(tagSep): | |
| 374 if fieldSep not in splittedTag: | |
| 375 raise Exception("Weird field '%s' in tags '%s'" % (splittedTag, tags)) | |
| 376 tag, value = splittedTag.split(fieldSep, 1) | |
| 377 if tag == "Name": | |
| 378 self.setName(value) | |
| 379 continue | |
| 380 try: | |
| 381 intValue = int(value) | |
| 382 self.tags[tag] = intValue | |
| 383 except ValueError: | |
| 384 try: | |
| 385 floatValue = float(value) | |
| 386 self.tags[tag] = floatValue | |
| 387 except ValueError: | |
| 388 self.tags[tag] = value | |
| 389 | |
| 390 | |
| 391 def deleteTag(self, tag): | |
| 392 """ | |
| 393 Remove a tag | |
| 394 @param tag: the tag to be removed | |
| 395 @type tag: string | |
| 396 """ | |
| 397 if tag in self.tags: | |
| 398 del self.tags[tag] | |
| 399 | |
| 400 | |
| 401 def setNbOccurrences(self, nbOccurrences): | |
| 402 """ | |
| 403 Set the number of occurrences of the interval | |
| 404 @param nbOccurrences: number of occurrences of the interval | |
| 405 @type nbOccurrences: int | |
| 406 """ | |
| 407 self.setTagValue("nbOccurrences", nbOccurrences) | |
| 408 | |
| 409 | |
| 410 def setOccurrence(self, occurrence): | |
| 411 """ | |
| 412 Set the occurrence of this interval | |
| 413 @param occurrence: an occurrence for this transcript | |
| 414 @type occurrence: int | |
| 415 """ | |
| 416 self.setTagValue("occurrence", occurrence) | |
| 417 | |
| 418 def __eq__(self, interval): | |
| 419 """ | |
| 420 Whether two intervals are equal (start and end at same position) | |
| 421 @param interval: object to be compared to | |
| 422 @type interval: class L{Interval<Interval>} | |
| 423 """ | |
| 424 if not interval: | |
| 425 return False | |
| 426 return self.getChromosome() == interval.getChromosome() and self.getStart() == interval.getStart() and self.getEnd() == interval.getEnd() and self.getDirection() == interval.getDirection() | |
| 427 | |
| 428 | |
| 429 def overlapWith(self, interval, nbNucleotides = 1): | |
| 430 """ | |
| 431 Whether two intervals overlap | |
| 432 @param interval: object to be compared to | |
| 433 @type interval: class L{Interval<Interval>} | |
| 434 @param nbNucleotides: minimum number of nucleotides to declare and overlap | |
| 435 @type nbNucleotides: int | |
| 436 """ | |
| 437 if self.getChromosome() != interval.getChromosome(): | |
| 438 return False | |
| 439 return (min(self.getEnd(), interval.getEnd()) - max(self.getStart(), interval.getStart()) + 1 >= nbNucleotides) | |
| 440 | |
| 441 def isIncludeIn(self, interval): | |
| 442 return interval.include(self) | |
| 443 | |
| 444 | |
| 445 def include(self, interval): | |
| 446 """ | |
| 447 Whether this interval includes the other one | |
| 448 @param interval: object to be compared to | |
| 449 @type interval: class L{Interval<Interval>} | |
| 450 """ | |
| 451 if self.getChromosome() != interval.getChromosome(): | |
| 452 return False | |
| 453 return ((self.getStart() <= interval.getStart()) and (self.getEnd() >= interval.getEnd())) | |
| 454 | |
| 455 | |
| 456 def getDifference(self, interval, sameStrand = False): | |
| 457 """ | |
| 458 Get the difference between this cluster and another one | |
| 459 @param interval: object to be compared to | |
| 460 @type interval: class L{Interval<Interval>} | |
| 461 @param sameStrand: do the comparison iff the intervals are on the same strand | |
| 462 @type sameStrand: boolean | |
| 463 @return: a (possibly empty) list of intervals | |
| 464 """ | |
| 465 newInterval = Interval() | |
| 466 newInterval.copy(self) | |
| 467 if self.getChromosome() != interval.getChromosome(): | |
| 468 return [newInterval] | |
| 469 if not self.overlapWith(interval): | |
| 470 return [newInterval] | |
| 471 if sameStrand and self.getDirection() != interval.getDirection(): | |
| 472 return [newInterval] | |
| 473 intervals = [] | |
| 474 if self.getStart() < interval.getStart(): | |
| 475 newInterval = Interval() | |
| 476 newInterval.copy(self) | |
| 477 newInterval.setEnd(min(self.getEnd(), interval.getStart() - 1)) | |
| 478 intervals.append(newInterval) | |
| 479 if self.getEnd() > interval.getEnd(): | |
| 480 newInterval = Interval() | |
| 481 newInterval.copy(self) | |
| 482 newInterval.setStart(max(self.getStart(), interval.getEnd() + 1)) | |
| 483 intervals.append(newInterval) | |
| 484 return intervals | |
| 485 | |
| 486 | |
| 487 def getIntersection(self, interval): | |
| 488 """ | |
| 489 Get the intersection between this interval and another one | |
| 490 @param interval: object to be compared to | |
| 491 @type interval: class L{Interval<Interval>} | |
| 492 @return: an other interval | |
| 493 """ | |
| 494 if not self.overlapWith(interval): | |
| 495 return None | |
| 496 newInterval = Interval() | |
| 497 newInterval.setChromosome(self.getChromosome()) | |
| 498 newInterval.setDirection(self.getDirection()) | |
| 499 newInterval.setName("%s_intersect_%s" % (self.getName(), interval.getName())) | |
| 500 newInterval.setStart(max(self.getStart(), interval.getStart())) | |
| 501 newInterval.setEnd(min(self.getEnd(), interval.getEnd())) | |
| 502 return newInterval | |
| 503 | |
| 504 | |
| 505 def getDistance(self, interval): | |
| 506 """ | |
| 507 Get the distance between two intervals (a non-negative value) | |
| 508 @param interval: another interval | |
| 509 @type interval: class L{Interval<Interval>} | |
| 510 """ | |
| 511 if self.overlapWith(interval): | |
| 512 return 0 | |
| 513 if self.getChromosome() != interval.getChromosome(): | |
| 514 raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval))) | |
| 515 return min(abs(self.getStart() - interval.getEnd()), abs(self.getEnd() - interval.getStart())) | |
| 516 | |
| 517 | |
| 518 def getRelativeDistance(self, interval): | |
| 519 """ | |
| 520 Get the distance between two intervals (negative if first interval is before) | |
| 521 @param interval: another interval | |
| 522 @type interval: class L{Interval<Interval>} | |
| 523 """ | |
| 524 if self.overlapWith(interval): | |
| 525 return 0 | |
| 526 if self.getChromosome() != interval.getChromosome(): | |
| 527 raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval))) | |
| 528 if self.getEnd() < interval.getStart(): | |
| 529 distance = interval.getStart() - self.getEnd() | |
| 530 else: | |
| 531 distance = interval.getEnd() - self.getStart() | |
| 532 distance *= self.getDirection() | |
| 533 return distance | |
| 534 | |
| 535 | |
| 536 def merge(self, interval, normalization = False): | |
| 537 """ | |
| 538 Merge two intervals | |
| 539 @param interval: another interval | |
| 540 @type interval: class L{Interval<Interval>} | |
| 541 @param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements | |
| 542 @type normalization: boolean | |
| 543 """ | |
| 544 if self.getChromosome() != interval.getChromosome(): | |
| 545 raise Exception("Cannot merge '%s' and '%s' for they are on different chromosomes." % (str(self), str(interval))) | |
| 546 direction = None | |
| 547 if self.getStart() == self.getEnd(): | |
| 548 direction = interval.getDirection() | |
| 549 elif interval.getStart() == interval.getEnd(): | |
| 550 direction = self.getDirection() | |
| 551 elif self.getDirection() != interval.getDirection(): | |
| 552 raise Exception("Cannot merge '%s' and '%s' for they are on different strands." % (str(self), str(interval))) | |
| 553 self.setStart(min(self.getStart(), interval.getStart())) | |
| 554 self.setEnd(max(self.getEnd(), interval.getEnd())) | |
| 555 if direction != None: | |
| 556 self.setDirection(direction) | |
| 557 nbElements = 0.0 | |
| 558 for element in (self, interval): | |
| 559 for tagName in ("nbElements", "nbOccurrences"): | |
| 560 if tagName not in element.getTagNames(): | |
| 561 element.setTagValue(tagName, 1) | |
| 562 nbElements += float(element.getTagValue("nbElements")) / float(element.getTagValue("nbOccurrences")) if normalization else float(element.getTagValue("nbElements")) | |
| 563 self.setTagValue("nbElements", nbElements) | |
| 564 self.bin = None | |
| 565 for tagName in ("identity", "nbOccurrences", "occurrence", "nbMismatches", "nbGaps", "rank", "evalue", "bestRegion"): | |
| 566 if tagName in self.getTagNames(): | |
| 567 del self.tags[tagName] | |
| 568 | |
| 569 | |
| 570 def getBin(self): | |
| 571 """ | |
| 572 Get the bin of the interval | |
| 573 Computed on the fly | |
| 574 """ | |
| 575 if self.bin == None: | |
| 576 self.bin = getBin(self.getStart(), self.getEnd()) | |
| 577 return self.bin | |
| 578 | |
| 579 | |
| 580 def getBins(self): | |
| 581 """ | |
| 582 Get all the bin this interval could fall into | |
| 583 """ | |
| 584 return getOverlappingBins(self.getStart(), self.getEnd()) | |
| 585 | |
| 586 | |
| 587 def getSqlVariables(cls): | |
| 588 """ | |
| 589 Get the properties of the object that should be saved in a database | |
| 590 """ | |
| 591 variables = ["name", "chromosome", "start", "end", "direction", "tags", "bin"] | |
| 592 return variables | |
| 593 getSqlVariables = classmethod(getSqlVariables) | |
| 594 | |
| 595 | |
| 596 def setSqlValues(self, array): | |
| 597 """ | |
| 598 Set the values of the properties of this object as given by a results line of a SQL query | |
| 599 """ | |
| 600 self.id = array[0] | |
| 601 self.name = array[1].strip("'") | |
| 602 self.setChromosome(array[2].strip("'")) | |
| 603 self.setStart(array[3]) | |
| 604 self.setEnd(array[4]) | |
| 605 self.setDirection(array[5]) | |
| 606 self.setTagValues(array[6].strip("'"), ";", "=") | |
| 607 self.bin = array[7] | |
| 608 | |
| 609 | |
| 610 def getSqlValues(self): | |
| 611 """ | |
| 612 Get the values of the properties that should be saved in a database | |
| 613 """ | |
| 614 values = dict() | |
| 615 values["name"] = self.name | |
| 616 values["chromosome"] = self.getChromosome() | |
| 617 values["start"] = self.getStart() | |
| 618 values["end"] = self.getEnd() | |
| 619 values["direction"] = self.getDirection() | |
| 620 values["tags"] = self.getTagValues(";", "=") | |
| 621 values["bin"] = self.getBin() | |
| 622 return values | |
| 623 | |
| 624 | |
| 625 def getSqlTypes(cls): | |
| 626 """ | |
| 627 Get the values of the properties that should be saved in a database | |
| 628 """ | |
| 629 types = dict() | |
| 630 types["name"] = "varchar" | |
| 631 types["chromosome"] = "varchar" | |
| 632 types["start"] = "int" | |
| 633 types["end"] = "int" | |
| 634 types["direction"] = "tinyint" | |
| 635 types["tags"] = "varchar" | |
| 636 types["bin"] = "int" | |
| 637 return types | |
| 638 getSqlTypes = classmethod(getSqlTypes) | |
| 639 | |
| 640 | |
| 641 def getSqlSizes(cls): | |
| 642 """ | |
| 643 Get the sizes of the properties that should be saved in a database | |
| 644 """ | |
| 645 sizes = dict() | |
| 646 sizes["name"] = 255 | |
| 647 sizes["chromosome"] = 255 | |
| 648 sizes["start"] = 11 | |
| 649 sizes["end"] = 11 | |
| 650 sizes["direction"] = 4 | |
| 651 sizes["tags"] = 1023 | |
| 652 sizes["bin"] = 11 | |
| 653 return sizes | |
| 654 getSqlSizes = classmethod(getSqlSizes) | |
| 655 | |
| 656 | |
| 657 def printCoordinates(self): | |
| 658 """ | |
| 659 Print start and end positions (depending on the direction of the interval) | |
| 660 """ | |
| 661 if self.getDirection() == 1: | |
| 662 return "%d-%d" % (self.getStart(), self.getEnd()) | |
| 663 else: | |
| 664 return "%d-%d" % (self.getEnd(), self.getStart()) | |
| 665 | |
| 666 | |
| 667 def extractSequence(self, parser): | |
| 668 """ | |
| 669 Get the sequence corresponding to this interval | |
| 670 @param parser: a parser to a FASTA file | |
| 671 @type parser: class L{SequenceListParser<SequenceListParser>} | |
| 672 @return : a instance of L{Sequence<Sequence>} | |
| 673 """ | |
| 674 return parser.getSubSequence(self.getChromosome(), self.getStart(), self.getEnd(), self.getDirection(), self.name) | |
| 675 | |
| 676 | |
| 677 def extractWigData(self, parser): | |
| 678 """ | |
| 679 Get the data retrieved from a wig file | |
| 680 @param parser: a parser class to a WIG file | |
| 681 @type parser: class L{WigParser<WigParser>} | |
| 682 """ | |
| 683 data = parser.getRange(self.getChromosome(), self.getStart(), self.getEnd()) | |
| 684 if self.getDirection() == -1: | |
| 685 if parser.strands: | |
| 686 newData = {} | |
| 687 for strand in data: | |
| 688 data[strand].reverse() | |
| 689 newData[-strand] = data[strand] | |
| 690 data = newData | |
| 691 else: | |
| 692 data.reverse() | |
| 693 return data | |
| 694 | |
| 695 | |
| 696 def __str__(self): | |
| 697 """ | |
| 698 Output a simple representation of this interval | |
| 699 """ | |
| 700 direction = "+" | |
| 701 if self.getDirection() == -1: | |
| 702 direction = "-" | |
| 703 string = "%s:%d-%d (%s)" % (self.getChromosome(), self.getStart(), self.getEnd(), direction) | |
| 704 if self.name != "": | |
| 705 string = "(%s) %s" % (self.name, string) | |
| 706 return string | |
| 707 |
