36
|
1 #
|
|
2 # Copyright INRA-URGI 2009-2010
|
|
3 #
|
|
4 # This software is governed by the CeCILL license under French law and
|
|
5 # abiding by the rules of distribution of free software. You can use,
|
|
6 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
7 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
8 # "http://www.cecill.info".
|
|
9 #
|
|
10 # As a counterpart to the access to the source code and rights to copy,
|
|
11 # modify and redistribute granted by the license, users are provided only
|
|
12 # with a limited warranty and the software's author, the holder of the
|
|
13 # economic rights, and the successive licensors have only limited
|
|
14 # liability.
|
|
15 #
|
|
16 # In this respect, the user's attention is drawn to the risks associated
|
|
17 # with loading, using, modifying and/or developing or reproducing the
|
|
18 # software by the user in light of its specific status of free software,
|
|
19 # that may mean that it is complicated to manipulate, and that also
|
|
20 # therefore means that it is reserved for developers and experienced
|
|
21 # professionals having in-depth computer knowledge. Users are therefore
|
|
22 # encouraged to load and test the software's suitability as regards their
|
|
23 # requirements in conditions enabling the security of their systems and/or
|
|
24 # data to be ensured and, more generally, to use and operate it in the
|
|
25 # same conditions as regards security.
|
|
26 #
|
|
27 # The fact that you are presently reading this means that you have had
|
|
28 # knowledge of the CeCILL license and that you accept its terms.
|
|
29 #
|
|
30
|
|
31 from SMART.Java.Python.structure.Bins import *
|
|
32 from commons.core.coord.Range import Range
|
|
33
|
|
34 class Interval(Range):
|
|
35 """
|
|
36 Store a genomic interval
|
|
37 @ivar name: name of the interval [optional]
|
|
38 @type name: string
|
|
39 @ivar id: id of the interval [optional]
|
|
40 @type id: int
|
|
41 @ivar bin: bin in which the interval should be if stored in a database [computed]
|
|
42 @type bin: int
|
|
43 @ival tags: information about the transcript [optional]
|
|
44 @type tags: dict
|
|
45 @ivar verbosity: verbosity
|
|
46 @type verbosity: int [default: 0]
|
|
47 """
|
|
48
|
|
49 def __init__(self, interval = None, verbosity = 0):
|
|
50 """
|
|
51 Constructor
|
|
52 @param interval: interval to be copied
|
|
53 @type interval: class L{Interval<Interval>}
|
|
54 @param verbosity: verbosity
|
|
55 @type verbosity: int
|
|
56 """
|
|
57 Range.__init__(self)
|
|
58 self.name = None
|
|
59 self.id = None
|
|
60 self.bin = None
|
|
61 self.verbosity = verbosity
|
|
62 self.tags = {}
|
|
63 if interval != None:
|
|
64 self.copy(interval)
|
|
65
|
|
66 #!!!! Warning: two methods getStart() and getEnd() give the information maximum and minimum in interval.!!!!#
|
|
67 #In case strand = "+", start < end; strand = "-", start > end
|
|
68 def getStart(self):
|
|
69 if self.start == -1:
|
|
70 return -1
|
|
71 if self.end == -1:
|
|
72 return self.start
|
|
73 return self.getMin()
|
|
74
|
|
75
|
|
76 def getEnd(self):
|
|
77 if self.end == -1:
|
|
78 return -1
|
|
79 if self.start == -1:
|
|
80 return self.end
|
|
81 return self.getMax()
|
|
82
|
|
83
|
|
84 def getChromosome(self):
|
|
85 return self.getSeqname()
|
|
86
|
|
87
|
|
88 def getDirection(self):
|
|
89 return 1 if self.getStrand() == "+" else -1
|
|
90
|
|
91
|
|
92 def getName(self):
|
|
93 return self.name
|
|
94
|
|
95
|
|
96 def isSet(self):
|
|
97 """
|
|
98 Check if the interval is set
|
|
99 """
|
|
100 return self.getStart() == None and self.getEnd() == None
|
|
101
|
|
102
|
|
103 def copy(self, interval):
|
|
104 """
|
|
105 Copy method
|
|
106 @param interval: interval to be copied
|
|
107 @type interval: class L{Interval<Interval>}
|
|
108 """
|
|
109 self.setStart(interval.getStart())
|
|
110 self.setEnd(interval.getEnd())
|
|
111 self.setChromosome(interval.getChromosome())
|
|
112 self.setDirection(interval.getDirection())
|
|
113 self.name = interval.name
|
|
114 self.id = interval.id
|
|
115 self.bin = interval.bin
|
|
116 self.tags = {}
|
|
117 for tag in interval.tags:
|
|
118 self.tags[tag] = interval.tags[tag]
|
|
119 self.verbosity = interval.verbosity
|
|
120
|
|
121
|
|
122 def setName(self, name):
|
|
123 """
|
|
124 Set the name
|
|
125 @param name: name of the interval
|
|
126 @type name: string
|
|
127 """
|
|
128 if len(name) > 100:
|
|
129 name = name[:100]
|
|
130 self.name = name
|
|
131
|
|
132
|
|
133 def setChromosome(self, chromosome=""):
|
|
134 """
|
|
135 Set the chromosome
|
|
136 @param chromosome: chromosome on which the interval is
|
|
137 @type chromosome: string
|
|
138 """
|
|
139 if not chromosome:
|
|
140 self.seqname = None
|
|
141 else:
|
|
142 self.seqname = chromosome.replace(".", "_").replace("|", "_")
|
|
143
|
|
144
|
|
145 def setStart(self, start):
|
|
146 """
|
|
147 Set the start point
|
|
148 Possibly reset bin
|
|
149 @param start: start point of the interval
|
|
150 @type start: int
|
|
151 """
|
|
152 self.bin = None
|
|
153 direction = self.getDirection()
|
|
154 if self.start == -1:
|
|
155 self.start = start
|
|
156 elif self.end == -1:
|
|
157 self.end = start
|
|
158 else:
|
|
159 if direction == 1:
|
|
160 self.start = start
|
|
161 else:
|
|
162 self.end = start
|
|
163 if direction == 1:
|
|
164 self.start, self.end = min(self.start, self.end), max(self.start, self.end)
|
|
165 else:
|
|
166 self.start, self.end = max(self.start, self.end), min(self.start, self.end)
|
|
167
|
|
168
|
|
169 def setEnd(self, end):
|
|
170 """
|
|
171 Set the end point
|
|
172 Possibly reset bin
|
|
173 @param end: end point of the interval of the interval
|
|
174 @type end: int
|
|
175 """
|
|
176 self.bin = None
|
|
177 direction = self.getDirection()
|
|
178 if self.end == -1:
|
|
179 self.end = end
|
|
180 elif self.start == -1:
|
|
181 self.start = end
|
|
182 else:
|
|
183 if direction == 1:
|
|
184 self.end = end
|
|
185 else:
|
|
186 self.start = end
|
|
187 if direction == 1:
|
|
188 self.start, self.end = min(self.start, self.end), max(self.start, self.end)
|
|
189 else:
|
|
190 self.start, self.end = max(self.start, self.end), min(self.start, self.end)
|
|
191
|
|
192
|
|
193 def setSize(self, size):
|
|
194 """
|
|
195 Possibly modify the end point
|
|
196 @param size: size of the transcript
|
|
197 @type size: int
|
|
198 """
|
|
199 if self.end == None and self.start != None:
|
|
200 self.setEnd(self.start + self.getSize() - 1)
|
|
201 elif self.start == None and self.end != None:
|
|
202 self.setStart(self.end - self.getSize() + 1)
|
|
203
|
|
204
|
|
205 def getSize(self):
|
|
206 """
|
|
207 Get the size
|
|
208 """
|
|
209 return self.getEnd() - self.getStart() + 1
|
|
210
|
|
211
|
|
212 def _setDirection(self, direction):
|
|
213 """
|
|
214 Set the direction of the interval (connection to Range)
|
|
215 @param direction: direction of the transcript (+ / -)
|
|
216 @type direction: int (1 or -1)
|
|
217 """
|
|
218 if direction * self.getDirection() < 0:
|
|
219 self.reverse()
|
|
220
|
|
221
|
|
222 def setDirection(self, direction):
|
|
223 """
|
|
224 Set the direction of the interval
|
|
225 Possibly parse different formats
|
|
226 @param direction: direction of the transcript (+ / -)
|
|
227 @type direction: int or string
|
|
228 """
|
|
229 if type(direction).__name__ == 'int':
|
|
230 self._setDirection(direction / abs(direction))
|
|
231 elif type(direction).__name__ == 'str':
|
|
232 if direction == "+":
|
|
233 self._setDirection(1)
|
|
234 elif direction == "-":
|
|
235 self._setDirection(-1)
|
|
236 elif direction == "1" or direction == "-1":
|
|
237 self._setDirection(int(direction))
|
|
238 elif direction.lower() == "plus":
|
|
239 self._setDirection(1)
|
|
240 elif direction.lower() == "minus":
|
|
241 self._setDirection(-1)
|
|
242 else:
|
|
243 raise Exception("Cannot understand direction %s" % (direction))
|
|
244 else:
|
|
245 raise Exception("Cannot understand direction %s" % (direction))
|
|
246
|
|
247
|
|
248 def extendStart(self, size):
|
|
249 """
|
|
250 Extend the interval by the 5' end
|
|
251 @param size: the size to be exended
|
|
252 @type size: int
|
|
253 """
|
|
254 if self.getDirection() == 1:
|
|
255 self.setStart(max(0, self.getStart() - size))
|
|
256 else:
|
|
257 self.setEnd(self.getEnd() + size)
|
|
258 self.bin = None
|
|
259
|
|
260
|
|
261 def extendEnd(self, size):
|
|
262 """
|
|
263 Extend the interval by the 3' end
|
|
264 @param size: the size to be exended
|
|
265 @type size: int
|
|
266 """
|
|
267 if self.getDirection() == 1:
|
|
268 self.setEnd(self.getEnd() + size)
|
|
269 else:
|
|
270 self.setStart(max(0, self.getStart() - size))
|
|
271 self.bin = None
|
|
272
|
|
273
|
|
274 def restrictStart(self, size = 1):
|
|
275 """
|
|
276 Restrict the interval by some nucleotides, start from its start position
|
|
277 Remove the exons
|
|
278 @param size: the size to be restricted to
|
|
279 @type size: int
|
|
280 """
|
|
281 if self.getDirection() == 1:
|
|
282 self.setEnd(min(self.getEnd(), self.getStart() + size - 1))
|
|
283 else:
|
|
284 self.setStart(max(self.getStart(), self.getEnd() - size + 1))
|
|
285 self.bin = None
|
|
286
|
|
287
|
|
288 def restrictEnd(self, size = 1):
|
|
289 """
|
|
290 Restrict the interval by some nucleotides, end from its end position
|
|
291 Remove the exons
|
|
292 @param size: the size to be restricted to
|
|
293 @type size: int
|
|
294 """
|
|
295 if self.getDirection() == 1:
|
|
296 self.setStart(max(self.getStart(), self.getEnd() - size + 1))
|
|
297 else:
|
|
298 self.setEnd(min(self.getEnd(), self.getStart() + size - 1))
|
|
299 self.bin = None
|
|
300
|
|
301
|
|
302
|
|
303 def setTagValue(self, name, value):
|
|
304 """
|
|
305 Set a tag
|
|
306 @param name: name of the tag
|
|
307 @type name: string
|
|
308 @param value: value of the tag
|
|
309 @type value: int or string
|
|
310 """
|
|
311 self.tags[name] = value
|
|
312
|
|
313
|
|
314 def getTagNames(self):
|
|
315 """
|
|
316 Get all the names of the tags
|
|
317 """
|
|
318 return self.tags.keys()
|
|
319
|
|
320
|
|
321 def getTagValue(self, tag):
|
|
322 """
|
|
323 Get the value of a tag
|
|
324 @param tag: name of a tag
|
|
325 @type tag: string
|
|
326 """
|
|
327 if tag not in self.tags:
|
|
328 return None
|
|
329 return self.tags[tag]
|
|
330
|
|
331
|
|
332 def getTagValues(self, tagSep = "; ", fieldSep = " ", surrounder = ""):
|
|
333 """
|
|
334 Get the formatted tag values
|
|
335 @param tagSep: separator between tags
|
|
336 @type tagSep: string
|
|
337 @param fieldSep: separator between tag name and tag value
|
|
338 @type fieldSep: string
|
|
339 @param surrounder: string which optionally surround values
|
|
340 @type surrounder: string
|
|
341 """
|
|
342 tags = []
|
|
343 for name, value in self.tags.iteritems():
|
|
344 if value == None:
|
|
345 continue
|
|
346 if isinstance(value, basestring):
|
|
347 tags.append("%s%s%s%s%s" % (name, fieldSep, surrounder, value.replace("'", "\\'"), surrounder))
|
|
348 elif type(value) is int:
|
|
349 tags.append("%s%s%s%i%s" % (name, fieldSep, surrounder, value, surrounder))
|
|
350 elif type(value) is float:
|
|
351 tags.append("%s%s%s%f%s" % (name, fieldSep, surrounder, value, surrounder))
|
|
352 else:
|
|
353 raise Exception("Do not know how to print '" + value + "'.")
|
|
354 if self.getName() != None:
|
|
355 tags.append("%s%s%s%s%s" % ("Name", fieldSep, surrounder, self.getName(), surrounder))
|
|
356 return tagSep.join(tags)
|
|
357
|
|
358
|
|
359 def setTagValues(self, tags, tagSep = "; ", fieldSep = " "):
|
|
360 """
|
|
361 Set the tag values using given string
|
|
362 @param tags: the tags, concatenated
|
|
363 @type tags: string
|
|
364 @param tagSep: separator between tags
|
|
365 @type tagSep: string
|
|
366 @param fieldSep: separator between tag name and tag value
|
|
367 @type fieldSep: string
|
|
368 """
|
|
369 if tags == "":
|
|
370 self.tags = {}
|
|
371 return
|
|
372 for splittedTag in tags.split(tagSep):
|
|
373 if fieldSep not in splittedTag:
|
|
374 raise Exception("Weird field '%s' in tags '%s'" % (splittedTag, tags))
|
|
375 tag, value = splittedTag.split(fieldSep, 1)
|
|
376 if tag == "Name":
|
|
377 self.setName(value)
|
|
378 continue
|
|
379 try:
|
|
380 intValue = int(value)
|
|
381 self.tags[tag] = intValue
|
|
382 except ValueError:
|
|
383 try:
|
|
384 floatValue = float(value)
|
|
385 self.tags[tag] = floatValue
|
|
386 except ValueError:
|
|
387 self.tags[tag] = value
|
|
388
|
|
389
|
|
390 def deleteTag(self, tag):
|
|
391 """
|
|
392 Remove a tag
|
|
393 @param tag: the tag to be removed
|
|
394 @type tag: string
|
|
395 """
|
|
396 if tag in self.tags:
|
|
397 del self.tags[tag]
|
|
398
|
|
399
|
|
400 def setNbOccurrences(self, nbOccurrences):
|
|
401 """
|
|
402 Set the number of occurrences of the interval
|
|
403 @param nbOccurrences: number of occurrences of the interval
|
|
404 @type nbOccurrences: int
|
|
405 """
|
|
406 self.setTagValue("nbOccurrences", nbOccurrences)
|
|
407
|
|
408
|
|
409 def setOccurrence(self, occurrence):
|
|
410 """
|
|
411 Set the occurrence of this interval
|
|
412 @param occurrence: an occurrence for this transcript
|
|
413 @type occurrence: int
|
|
414 """
|
|
415 self.setTagValue("occurrence", occurrence)
|
|
416
|
|
417 def __eq__(self, interval):
|
|
418 """
|
|
419 Whether two intervals are equal (start and end at same position)
|
|
420 @param interval: object to be compared to
|
|
421 @type interval: class L{Interval<Interval>}
|
|
422 """
|
|
423 if not interval:
|
|
424 return False
|
|
425 return self.getChromosome() == interval.getChromosome() and self.getStart() == interval.getStart() and self.getEnd() == interval.getEnd() and self.getDirection() == interval.getDirection()
|
|
426
|
|
427
|
|
428 def overlapWith(self, interval, nbNucleotides = 1):
|
|
429 """
|
|
430 Whether two intervals overlap
|
|
431 @param interval: object to be compared to
|
|
432 @type interval: class L{Interval<Interval>}
|
|
433 @param nbNucleotides: minimum number of nucleotides to declare and overlap
|
|
434 @type nbNucleotides: int
|
|
435 """
|
|
436 if self.getChromosome() != interval.getChromosome():
|
|
437 return False
|
|
438 return (min(self.getEnd(), interval.getEnd()) - max(self.getStart(), interval.getStart()) + 1 >= nbNucleotides)
|
|
439
|
|
440 def isIncludeIn(self, interval):
|
|
441 return interval.include(self)
|
|
442
|
|
443
|
|
444 def include(self, interval):
|
|
445 """
|
|
446 Whether this interval includes the other one
|
|
447 @param interval: object to be compared to
|
|
448 @type interval: class L{Interval<Interval>}
|
|
449 """
|
|
450 if self.getChromosome() != interval.getChromosome():
|
|
451 return False
|
|
452 return ((self.getStart() <= interval.getStart()) and (self.getEnd() >= interval.getEnd()))
|
|
453
|
|
454
|
|
455 def getDifference(self, interval, sameStrand = False):
|
|
456 """
|
|
457 Get the difference between this cluster and another one
|
|
458 @param interval: object to be compared to
|
|
459 @type interval: class L{Interval<Interval>}
|
|
460 @param sameStrand: do the comparison iff the intervals are on the same strand
|
|
461 @type sameStrand: boolean
|
|
462 @return: a (possibly empty) list of intervals
|
|
463 """
|
|
464 newInterval = Interval()
|
|
465 newInterval.copy(self)
|
|
466 if self.getChromosome() != interval.getChromosome():
|
|
467 return [newInterval]
|
|
468 if not self.overlapWith(interval):
|
|
469 return [newInterval]
|
|
470 if sameStrand and self.getDirection() != interval.getDirection():
|
|
471 return [newInterval]
|
|
472 intervals = []
|
|
473 if self.getStart() < interval.getStart():
|
|
474 newInterval = Interval()
|
|
475 newInterval.copy(self)
|
|
476 newInterval.setEnd(min(self.getEnd(), interval.getStart() - 1))
|
|
477 intervals.append(newInterval)
|
|
478 if self.getEnd() > interval.getEnd():
|
|
479 newInterval = Interval()
|
|
480 newInterval.copy(self)
|
|
481 newInterval.setStart(max(self.getStart(), interval.getEnd() + 1))
|
|
482 intervals.append(newInterval)
|
|
483 return intervals
|
|
484
|
|
485
|
|
486 def getIntersection(self, interval):
|
|
487 """
|
|
488 Get the intersection between this interval and another one
|
|
489 @param interval: object to be compared to
|
|
490 @type interval: class L{Interval<Interval>}
|
|
491 @return: an other interval
|
|
492 """
|
|
493 if not self.overlapWith(interval):
|
|
494 return None
|
|
495 newInterval = Interval()
|
|
496 newInterval.setChromosome(self.getChromosome())
|
|
497 newInterval.setDirection(self.getDirection())
|
|
498 newInterval.setName("%s_intersect_%s" % (self.getName(), interval.getName()))
|
|
499 newInterval.setStart(max(self.getStart(), interval.getStart()))
|
|
500 newInterval.setEnd(min(self.getEnd(), interval.getEnd()))
|
|
501 return newInterval
|
|
502
|
|
503
|
|
504 def getDistance(self, interval):
|
|
505 """
|
|
506 Get the distance between two intervals (a non-negative value)
|
|
507 @param interval: another interval
|
|
508 @type interval: class L{Interval<Interval>}
|
|
509 """
|
|
510 if self.overlapWith(interval):
|
|
511 return 0
|
|
512 if self.getChromosome() != interval.getChromosome():
|
|
513 raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval)))
|
|
514 return min(abs(self.getStart() - interval.getEnd()), abs(self.getEnd() - interval.getStart()))
|
|
515
|
|
516
|
|
517 def getRelativeDistance(self, interval):
|
|
518 """
|
|
519 Get the distance between two intervals (negative if first interval is before)
|
|
520 @param interval: another interval
|
|
521 @type interval: class L{Interval<Interval>}
|
|
522 """
|
|
523 if self.overlapWith(interval):
|
|
524 return 0
|
|
525 if self.getChromosome() != interval.getChromosome():
|
|
526 raise Exception("Cannot get the distance between %s and %s" % (str(self), str(interval)))
|
|
527 if self.getEnd() < interval.getStart():
|
|
528 distance = interval.getStart() - self.getEnd()
|
|
529 else:
|
|
530 distance = interval.getEnd() - self.getStart()
|
|
531 distance *= self.getDirection()
|
|
532 return distance
|
|
533
|
|
534
|
|
535 def merge(self, interval, normalization = False):
|
|
536 """
|
|
537 Merge two intervals
|
|
538 @param interval: another interval
|
|
539 @type interval: class L{Interval<Interval>}
|
|
540 @param normalization: whether the sum of the merge should be normalized wrt the number of mappings of each elements
|
|
541 @type normalization: boolean
|
|
542 """
|
|
543 if self.getChromosome() != interval.getChromosome():
|
|
544 raise Exception("Cannot merge '%s' and '%s' for they are on different chromosomes." % (str(self), str(interval)))
|
|
545 direction = None
|
|
546 if self.getStart() == self.getEnd():
|
|
547 direction = interval.getDirection()
|
|
548 elif interval.getStart() == interval.getEnd():
|
|
549 direction = self.getDirection()
|
|
550 elif self.getDirection() != interval.getDirection():
|
|
551 raise Exception("Cannot merge '%s' and '%s' for they are on different strands." % (str(self), str(interval)))
|
|
552 self.setStart(min(self.getStart(), interval.getStart()))
|
|
553 self.setEnd(max(self.getEnd(), interval.getEnd()))
|
|
554 if direction != None:
|
|
555 self.setDirection(direction)
|
|
556 nbElements = 0.0
|
|
557 for element in (self, interval):
|
|
558 for tagName in ("nbElements", "nbOccurrences"):
|
|
559 if tagName not in element.getTagNames():
|
|
560 element.setTagValue(tagName, 1)
|
|
561 nbElements += float(element.getTagValue("nbElements")) / float(element.getTagValue("nbOccurrences")) if normalization else float(element.getTagValue("nbElements"))
|
|
562 self.setTagValue("nbElements", nbElements)
|
|
563 self.bin = None
|
|
564 for tagName in ("identity", "nbOccurrences", "occurrence", "nbMismatches", "nbGaps", "rank", "evalue", "bestRegion"):
|
|
565 if tagName in self.getTagNames():
|
|
566 del self.tags[tagName]
|
|
567
|
|
568
|
|
569 def getBin(self):
|
|
570 """
|
|
571 Get the bin of the interval
|
|
572 Computed on the fly
|
|
573 """
|
|
574 if self.bin == None:
|
|
575 self.bin = getBin(self.getStart(), self.getEnd())
|
|
576 return self.bin
|
|
577
|
|
578
|
|
579 def getBins(self):
|
|
580 """
|
|
581 Get all the bin this interval could fall into
|
|
582 """
|
|
583 return getOverlappingBins(self.getStart(), self.getEnd())
|
|
584
|
|
585
|
|
586 def getSqlVariables(cls):
|
|
587 """
|
|
588 Get the properties of the object that should be saved in a database
|
|
589 """
|
|
590 variables = ["name", "chromosome", "start", "end", "direction", "tags", "bin"]
|
|
591 return variables
|
|
592 getSqlVariables = classmethod(getSqlVariables)
|
|
593
|
|
594
|
|
595 def setSqlValues(self, array):
|
|
596 """
|
|
597 Set the values of the properties of this object as given by a results line of a SQL query
|
|
598 """
|
|
599 self.id = array[0]
|
|
600 self.name = array[1].strip("'")
|
|
601 self.setChromosome(array[2].strip("'"))
|
|
602 self.setStart(array[3])
|
|
603 self.setEnd(array[4])
|
|
604 self.setDirection(array[5])
|
|
605 self.setTagValues(array[6].strip("'"), ";", "=")
|
|
606 self.bin = array[7]
|
|
607
|
|
608
|
|
609 def getSqlValues(self):
|
|
610 """
|
|
611 Get the values of the properties that should be saved in a database
|
|
612 """
|
|
613 values = dict()
|
|
614 values["name"] = self.name
|
|
615 values["chromosome"] = self.getChromosome()
|
|
616 values["start"] = self.getStart()
|
|
617 values["end"] = self.getEnd()
|
|
618 values["direction"] = self.getDirection()
|
|
619 values["tags"] = self.getTagValues(";", "=")
|
|
620 values["bin"] = self.getBin()
|
|
621 return values
|
|
622
|
|
623
|
|
624 def getSqlTypes(cls):
|
|
625 """
|
|
626 Get the values of the properties that should be saved in a database
|
|
627 """
|
|
628 types = dict()
|
|
629 types["name"] = "varchar"
|
|
630 types["chromosome"] = "varchar"
|
|
631 types["start"] = "int"
|
|
632 types["end"] = "int"
|
|
633 types["direction"] = "tinyint"
|
|
634 types["tags"] = "varchar"
|
|
635 types["bin"] = "int"
|
|
636 return types
|
|
637 getSqlTypes = classmethod(getSqlTypes)
|
|
638
|
|
639
|
|
640 def getSqlSizes(cls):
|
|
641 """
|
|
642 Get the sizes of the properties that should be saved in a database
|
|
643 """
|
|
644 sizes = dict()
|
|
645 sizes["name"] = 255
|
|
646 sizes["chromosome"] = 255
|
|
647 sizes["start"] = 11
|
|
648 sizes["end"] = 11
|
|
649 sizes["direction"] = 4
|
|
650 sizes["tags"] = 1023
|
|
651 sizes["bin"] = 11
|
|
652 return sizes
|
|
653 getSqlSizes = classmethod(getSqlSizes)
|
|
654
|
|
655
|
|
656 def printCoordinates(self):
|
|
657 """
|
|
658 Print start and end positions (depending on the direction of the interval)
|
|
659 """
|
|
660 if self.getDirection() == 1:
|
|
661 return "%d-%d" % (self.getStart(), self.getEnd())
|
|
662 else:
|
|
663 return "%d-%d" % (self.getEnd(), self.getStart())
|
|
664
|
|
665
|
|
666 def extractSequence(self, parser):
|
|
667 """
|
|
668 Get the sequence corresponding to this interval
|
|
669 @param parser: a parser to a FASTA file
|
|
670 @type parser: class L{SequenceListParser<SequenceListParser>}
|
|
671 @return : a instance of L{Sequence<Sequence>}
|
|
672 """
|
|
673 return parser.getSubSequence(self.getChromosome(), self.getStart(), self.getEnd(), self.getDirection(), self.name)
|
|
674
|
|
675
|
|
676 def extractWigData(self, parser):
|
|
677 """
|
|
678 Get the data retrieved from a wig file
|
|
679 @param parser: a parser class to a WIG file
|
|
680 @type parser: class L{WigParser<WigParser>}
|
|
681 """
|
|
682 data = parser.getRange(self.getChromosome(), self.getStart(), self.getEnd())
|
|
683 if self.getDirection() == -1:
|
|
684 if parser.strands:
|
|
685 newData = {}
|
|
686 for strand in data:
|
|
687 data[strand].reverse()
|
|
688 newData[-strand] = data[strand]
|
|
689 data = newData
|
|
690 else:
|
|
691 data.reverse()
|
|
692 return data
|
|
693
|
|
694
|
|
695 def __str__(self):
|
|
696 """
|
|
697 Output a simple representation of this interval
|
|
698 """
|
|
699 direction = "+"
|
|
700 if self.getDirection() == -1:
|
|
701 direction = "-"
|
|
702 string = "%s:%d-%d (%s)" % (self.getChromosome(), self.getStart(), self.getEnd(), direction)
|
|
703 if self.name != "":
|
|
704 string = "(%s) %s" % (self.name, string)
|
|
705 return string
|
|
706
|