comparison commons/core/coord/Range.py @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
37:d22fadc825e3 38:2c0c0a89fad7
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 ## Record a region on a given sequence
33 #
34 class Range( object ):
35
36 ## Constructor
37 #
38 # @param seqname the name of the sequence
39 # @param start the start coordinate
40 # @param end the end coordinate
41 #
42 def __init__(self, seqname="", start=-1, end=-1):
43 self.seqname = seqname
44 self.start = int(start)
45 self.end = int(end)
46
47 ## Equal operator
48 #
49 # @param o a Range instance
50 #
51 def __eq__(self, o):
52 if self.seqname == o.seqname and self.start == o.start and self.end == o.end:
53 return True
54 return False
55
56 ## Unequal operator
57 #
58 # @param o a Range instance
59 #
60 def __ne__(self, o):
61 return not self.__eq__(o)
62
63 ## Convert the object into a string
64 #
65 # @note used in 'print myObject'
66 #
67 def __str__( self ):
68 return self.toString()
69
70 ## Convert the object into a string
71 #
72 # @note used in 'repr(myObject)' for debugging
73 #
74 def __repr__( self ):
75 return self.toString().replace("\t",";")
76
77 def setStart(self, start):
78 self.start = start
79
80 def setEnd(self, end):
81 self.end = end
82
83 def setSeqName(self, seqName):
84 self.seqname = seqName
85
86 ## Reset
87 #
88 def reset(self):
89 self.seqname = ""
90 self.start = -1
91 self.end = -1
92
93 ## Return the attributes as a formatted string
94 #
95 def toString(self):
96 string = "%s" % (self.seqname)
97 string += "\t%d" % (self.start)
98 string += "\t%d" % (self.end)
99 return string
100
101 ## Show the attributes
102 #
103 def show(self):
104 print self.toString()
105
106 ## Return seqname
107 #
108 def getSeqname(self):
109 return self.seqname
110
111 ## Return the start coordinate
112 #
113 def getStart(self):
114 return self.start
115
116 ## Return the end coordinate
117 #
118 def getEnd(self):
119 return self.end
120
121 ## Return the lowest value between start and end coordinates
122 #
123 def getMin(self):
124 return min(self.start, self.end)
125
126 ## Return the greatest value between start and end attributes
127 #
128 def getMax(self):
129 return max(self.start, self.end)
130
131 ## Return True if the instance is on the direct strand, False otherwise
132 #
133 def isOnDirectStrand(self):
134 if self.start <= self.end:
135 return True
136 else:
137 return False
138
139 ## Return True if the instance is on the reverse strand, False otherwise
140 #
141 def isOnReverseStrand(self):
142 return not self.isOnDirectStrand()
143
144 ## Return '+' if the instance is on the direct strand, '-' otherwise
145 #
146 def getStrand(self):
147 if self.isOnDirectStrand():
148 return '+'
149 else:
150 return '-'
151
152 ## Exchange start and end coordinates
153 #
154 def reverse(self):
155 tmp = self.start
156 self.start = self.end
157 self.end = tmp
158
159 ## Return the length of the instance
160 #
161 # @warning old name is 'length'
162 #
163 def getLength(self):
164 return int(abs(self.start-self.end))+1
165
166 ## Return True if the instance is empty, False otherwise
167 #
168 def isEmpty(self):
169 if self.start==self.end and (self.start==0 or self.start==-1):
170 return True
171 return False
172
173 ## Set attributes from tuple
174 #
175 # @param tuple a tuple with (name,start,end)
176 #
177 def setFromTuple(self, tuple):
178 self.seqname = tuple[0]
179 self.start = int(tuple[1])
180 self.end = int(tuple[2])
181
182 ## Set attributes from string
183 #
184 # @param string a string formatted like name<sep>start<sep>end
185 # @param sep field separator
186 #
187 def setFromString(self, string, sep="\t"):
188 if string[-1] == "\n":
189 string = string[:-1]
190 self.setFromTuple( string.split(sep) )
191
192 ## Merge the instance with another Range instance
193 #
194 # @param o a Range instance
195 #
196 def merge(self, o):
197 if self.seqname != o.seqname:
198 return
199 if self.isOnDirectStrand():
200 self.start = min(self.getMin(), o.getMin())
201 self.end = max(self.getMax(), o.getMax())
202 else:
203 self.start = max(self.getMax(), o.getMax())
204 self.end = min(self.getMin(), o.getMin())
205
206 ## Return True if the instance overlaps with another Range instance, False otherwise
207 #
208 # @param o a Range instance
209 #
210 def isOverlapping(self, o):
211 if o.seqname != self.seqname:
212 return False
213 smin = self.getMin()
214 smax = self.getMax()
215 omin = o.getMin()
216 omax = o.getMax()
217 if omin <= smin and omax >= smax:
218 return True
219 if omin >= smin and omin <= smax or omax >= smin and omax <= smax:
220 return True
221 return False
222
223
224 ## Return the length of the overlap between the instance and another Range, 0 if no overlap
225 #
226 # @param o a Range instance
227 #
228 def getOverlapLength( self, o ):
229 if self.isOverlapping( o ):
230 if self.isIncludedIn( o ):
231 return self.getLength()
232 elif o.isIncludedIn( self ):
233 return o.getLength()
234 elif o.getMin() <= self.getMax() and o.getMin() >= self.getMin():
235 return self.getMax() - o.getMin() + 1
236 elif o.getMax() <= self.getMax() and o.getMax() >= self.getMin():
237 return o.getMax() - self.getMin() + 1
238 return 0
239
240
241 ## Return True if the instance is included within another Range, False otherwise
242 #
243 # @param o a Range instance
244 #
245 # @note the min (respectively max) coordinates can be equal
246 #
247 def isIncludedIn( self, o ):
248 if o.seqname != self.seqname:
249 return False
250 if self.getMin() >= o.getMin() and self.getMax() <= o.getMax():
251 return True
252 else:
253 return False
254
255
256 ## Return the distance between the start of the instance and the start of another Range instance
257 #
258 # @param o a Range instance
259 #
260 def getDistance(self, o):
261 if self.isOnDirectStrand() == o.isOnDirectStrand():
262 if self.isOverlapping(o):
263 return 0
264 elif self.isOnDirectStrand():
265 if self.start > o.start:
266 return self.start - o.end
267 else:
268 return o.start - self.end
269 else:
270 if self.start > o.start:
271 return self.end - o.start
272 else:
273 return o.end - self.start
274 return -1
275
276 ## Remove in the instance the region overlapping with another Range instance
277 #
278 # @param o a Range instance
279 #
280 def diff(self, o):
281 new_range = Range(self.seqname)
282 if not self.isOverlapping(o) or self.seqname != o.seqname:
283 return new_range
284
285 istart = min(self.start, self.end)
286 iend = max(self.start, self.end)
287 jstart = min(o.start, o.end)
288 jend = max(o.start, o.end)
289 if istart < jstart:
290 if iend <= jend:
291 if self.isOnDirectStrand():
292 self.start = istart
293 self.end = jstart - 1
294 else:
295 self.start = jstart - 1
296 self.end = istart
297 else:
298 if self.isOnDirectStrand():
299 self.start = istart
300 self.end = jstart - 1
301 new_range.start = jend + 1
302 new_range.end = iend
303 else:
304 self.start = jstart - 1;
305 self.end = istart;
306 new_range.start = iend
307 new_range.end = jend + 1
308 else: #istart>=jstart
309 if iend <= jend:
310 self.start = 0
311 self.end = 0
312 else:
313 if self.isOnDirectStrand():
314 self.start = jend + 1
315 self.end = iend
316 else:
317 self.start = iend
318 self.end = jend + 1
319 return new_range
320
321 ## Find the bin that contains the instance and compute its index
322 #
323 # @note Required for coordinate indexing via a hierarchical bin system
324 #
325 def findIdx(self):
326 min_lvl = 3
327 max_lvl = 6
328 for bin_lvl in xrange(min_lvl, max_lvl):
329 if getBin(self.start, bin_lvl) == getBin(self.end, bin_lvl):
330 return getIdx(self.start, bin_lvl)
331 return getIdx(self.start, max_lvl)
332
333 ## Get a bin for fast database access
334 #
335 # @return bin number (float)
336 #
337 def getBin(self):
338 for i in xrange(3, 8):
339 bin_lvl = pow(10, i)
340 if int(self.start/bin_lvl) == int(self.end/bin_lvl):
341 return float(bin_lvl+(int(self.start/bin_lvl)/1e10))
342 bin_lvl = pow(10, 8)
343 return float(bin_lvl+(int(self.start/bin_lvl)/1e10))
344
345
346 # Functions
347
348 # Get the bin number of a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system
349 #
350 def getBin(val, bin_lvl):
351 bin_size = pow(10, bin_lvl)
352 return long(val / bin_size)
353
354 # Get an index from a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system
355 #
356 def getIdx(val, bin_lvl):
357 min_lvl = 3
358 max_lvl = 6
359 if bin_lvl >= max_lvl:
360 return long((bin_lvl-min_lvl+1)*pow(10,max_lvl))
361 return long(((bin_lvl-min_lvl+1)*pow(10,max_lvl))+getBin(val,bin_lvl))