Mercurial > repos > yufei-luo > s_mart
comparison commons/core/coord/Range.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
37:d22fadc825e3 | 38:2c0c0a89fad7 |
---|---|
1 # Copyright INRA (Institut National de la Recherche Agronomique) | |
2 # http://www.inra.fr | |
3 # http://urgi.versailles.inra.fr | |
4 # | |
5 # This software is governed by the CeCILL license under French law and | |
6 # abiding by the rules of distribution of free software. You can use, | |
7 # modify and/ or redistribute the software under the terms of the CeCILL | |
8 # license as circulated by CEA, CNRS and INRIA at the following URL | |
9 # "http://www.cecill.info". | |
10 # | |
11 # As a counterpart to the access to the source code and rights to copy, | |
12 # modify and redistribute granted by the license, users are provided only | |
13 # with a limited warranty and the software's author, the holder of the | |
14 # economic rights, and the successive licensors have only limited | |
15 # liability. | |
16 # | |
17 # In this respect, the user's attention is drawn to the risks associated | |
18 # with loading, using, modifying and/or developing or reproducing the | |
19 # software by the user in light of its specific status of free software, | |
20 # that may mean that it is complicated to manipulate, and that also | |
21 # therefore means that it is reserved for developers and experienced | |
22 # professionals having in-depth computer knowledge. Users are therefore | |
23 # encouraged to load and test the software's suitability as regards their | |
24 # requirements in conditions enabling the security of their systems and/or | |
25 # data to be ensured and, more generally, to use and operate it in the | |
26 # same conditions as regards security. | |
27 # | |
28 # The fact that you are presently reading this means that you have had | |
29 # knowledge of the CeCILL license and that you accept its terms. | |
30 | |
31 | |
32 ## Record a region on a given sequence | |
33 # | |
34 class Range( object ): | |
35 | |
36 ## Constructor | |
37 # | |
38 # @param seqname the name of the sequence | |
39 # @param start the start coordinate | |
40 # @param end the end coordinate | |
41 # | |
42 def __init__(self, seqname="", start=-1, end=-1): | |
43 self.seqname = seqname | |
44 self.start = int(start) | |
45 self.end = int(end) | |
46 | |
47 ## Equal operator | |
48 # | |
49 # @param o a Range instance | |
50 # | |
51 def __eq__(self, o): | |
52 if self.seqname == o.seqname and self.start == o.start and self.end == o.end: | |
53 return True | |
54 return False | |
55 | |
56 ## Unequal operator | |
57 # | |
58 # @param o a Range instance | |
59 # | |
60 def __ne__(self, o): | |
61 return not self.__eq__(o) | |
62 | |
63 ## Convert the object into a string | |
64 # | |
65 # @note used in 'print myObject' | |
66 # | |
67 def __str__( self ): | |
68 return self.toString() | |
69 | |
70 ## Convert the object into a string | |
71 # | |
72 # @note used in 'repr(myObject)' for debugging | |
73 # | |
74 def __repr__( self ): | |
75 return self.toString().replace("\t",";") | |
76 | |
77 def setStart(self, start): | |
78 self.start = start | |
79 | |
80 def setEnd(self, end): | |
81 self.end = end | |
82 | |
83 def setSeqName(self, seqName): | |
84 self.seqname = seqName | |
85 | |
86 ## Reset | |
87 # | |
88 def reset(self): | |
89 self.seqname = "" | |
90 self.start = -1 | |
91 self.end = -1 | |
92 | |
93 ## Return the attributes as a formatted string | |
94 # | |
95 def toString(self): | |
96 string = "%s" % (self.seqname) | |
97 string += "\t%d" % (self.start) | |
98 string += "\t%d" % (self.end) | |
99 return string | |
100 | |
101 ## Show the attributes | |
102 # | |
103 def show(self): | |
104 print self.toString() | |
105 | |
106 ## Return seqname | |
107 # | |
108 def getSeqname(self): | |
109 return self.seqname | |
110 | |
111 ## Return the start coordinate | |
112 # | |
113 def getStart(self): | |
114 return self.start | |
115 | |
116 ## Return the end coordinate | |
117 # | |
118 def getEnd(self): | |
119 return self.end | |
120 | |
121 ## Return the lowest value between start and end coordinates | |
122 # | |
123 def getMin(self): | |
124 return min(self.start, self.end) | |
125 | |
126 ## Return the greatest value between start and end attributes | |
127 # | |
128 def getMax(self): | |
129 return max(self.start, self.end) | |
130 | |
131 ## Return True if the instance is on the direct strand, False otherwise | |
132 # | |
133 def isOnDirectStrand(self): | |
134 if self.start <= self.end: | |
135 return True | |
136 else: | |
137 return False | |
138 | |
139 ## Return True if the instance is on the reverse strand, False otherwise | |
140 # | |
141 def isOnReverseStrand(self): | |
142 return not self.isOnDirectStrand() | |
143 | |
144 ## Return '+' if the instance is on the direct strand, '-' otherwise | |
145 # | |
146 def getStrand(self): | |
147 if self.isOnDirectStrand(): | |
148 return '+' | |
149 else: | |
150 return '-' | |
151 | |
152 ## Exchange start and end coordinates | |
153 # | |
154 def reverse(self): | |
155 tmp = self.start | |
156 self.start = self.end | |
157 self.end = tmp | |
158 | |
159 ## Return the length of the instance | |
160 # | |
161 # @warning old name is 'length' | |
162 # | |
163 def getLength(self): | |
164 return int(abs(self.start-self.end))+1 | |
165 | |
166 ## Return True if the instance is empty, False otherwise | |
167 # | |
168 def isEmpty(self): | |
169 if self.start==self.end and (self.start==0 or self.start==-1): | |
170 return True | |
171 return False | |
172 | |
173 ## Set attributes from tuple | |
174 # | |
175 # @param tuple a tuple with (name,start,end) | |
176 # | |
177 def setFromTuple(self, tuple): | |
178 self.seqname = tuple[0] | |
179 self.start = int(tuple[1]) | |
180 self.end = int(tuple[2]) | |
181 | |
182 ## Set attributes from string | |
183 # | |
184 # @param string a string formatted like name<sep>start<sep>end | |
185 # @param sep field separator | |
186 # | |
187 def setFromString(self, string, sep="\t"): | |
188 if string[-1] == "\n": | |
189 string = string[:-1] | |
190 self.setFromTuple( string.split(sep) ) | |
191 | |
192 ## Merge the instance with another Range instance | |
193 # | |
194 # @param o a Range instance | |
195 # | |
196 def merge(self, o): | |
197 if self.seqname != o.seqname: | |
198 return | |
199 if self.isOnDirectStrand(): | |
200 self.start = min(self.getMin(), o.getMin()) | |
201 self.end = max(self.getMax(), o.getMax()) | |
202 else: | |
203 self.start = max(self.getMax(), o.getMax()) | |
204 self.end = min(self.getMin(), o.getMin()) | |
205 | |
206 ## Return True if the instance overlaps with another Range instance, False otherwise | |
207 # | |
208 # @param o a Range instance | |
209 # | |
210 def isOverlapping(self, o): | |
211 if o.seqname != self.seqname: | |
212 return False | |
213 smin = self.getMin() | |
214 smax = self.getMax() | |
215 omin = o.getMin() | |
216 omax = o.getMax() | |
217 if omin <= smin and omax >= smax: | |
218 return True | |
219 if omin >= smin and omin <= smax or omax >= smin and omax <= smax: | |
220 return True | |
221 return False | |
222 | |
223 | |
224 ## Return the length of the overlap between the instance and another Range, 0 if no overlap | |
225 # | |
226 # @param o a Range instance | |
227 # | |
228 def getOverlapLength( self, o ): | |
229 if self.isOverlapping( o ): | |
230 if self.isIncludedIn( o ): | |
231 return self.getLength() | |
232 elif o.isIncludedIn( self ): | |
233 return o.getLength() | |
234 elif o.getMin() <= self.getMax() and o.getMin() >= self.getMin(): | |
235 return self.getMax() - o.getMin() + 1 | |
236 elif o.getMax() <= self.getMax() and o.getMax() >= self.getMin(): | |
237 return o.getMax() - self.getMin() + 1 | |
238 return 0 | |
239 | |
240 | |
241 ## Return True if the instance is included within another Range, False otherwise | |
242 # | |
243 # @param o a Range instance | |
244 # | |
245 # @note the min (respectively max) coordinates can be equal | |
246 # | |
247 def isIncludedIn( self, o ): | |
248 if o.seqname != self.seqname: | |
249 return False | |
250 if self.getMin() >= o.getMin() and self.getMax() <= o.getMax(): | |
251 return True | |
252 else: | |
253 return False | |
254 | |
255 | |
256 ## Return the distance between the start of the instance and the start of another Range instance | |
257 # | |
258 # @param o a Range instance | |
259 # | |
260 def getDistance(self, o): | |
261 if self.isOnDirectStrand() == o.isOnDirectStrand(): | |
262 if self.isOverlapping(o): | |
263 return 0 | |
264 elif self.isOnDirectStrand(): | |
265 if self.start > o.start: | |
266 return self.start - o.end | |
267 else: | |
268 return o.start - self.end | |
269 else: | |
270 if self.start > o.start: | |
271 return self.end - o.start | |
272 else: | |
273 return o.end - self.start | |
274 return -1 | |
275 | |
276 ## Remove in the instance the region overlapping with another Range instance | |
277 # | |
278 # @param o a Range instance | |
279 # | |
280 def diff(self, o): | |
281 new_range = Range(self.seqname) | |
282 if not self.isOverlapping(o) or self.seqname != o.seqname: | |
283 return new_range | |
284 | |
285 istart = min(self.start, self.end) | |
286 iend = max(self.start, self.end) | |
287 jstart = min(o.start, o.end) | |
288 jend = max(o.start, o.end) | |
289 if istart < jstart: | |
290 if iend <= jend: | |
291 if self.isOnDirectStrand(): | |
292 self.start = istart | |
293 self.end = jstart - 1 | |
294 else: | |
295 self.start = jstart - 1 | |
296 self.end = istart | |
297 else: | |
298 if self.isOnDirectStrand(): | |
299 self.start = istart | |
300 self.end = jstart - 1 | |
301 new_range.start = jend + 1 | |
302 new_range.end = iend | |
303 else: | |
304 self.start = jstart - 1; | |
305 self.end = istart; | |
306 new_range.start = iend | |
307 new_range.end = jend + 1 | |
308 else: #istart>=jstart | |
309 if iend <= jend: | |
310 self.start = 0 | |
311 self.end = 0 | |
312 else: | |
313 if self.isOnDirectStrand(): | |
314 self.start = jend + 1 | |
315 self.end = iend | |
316 else: | |
317 self.start = iend | |
318 self.end = jend + 1 | |
319 return new_range | |
320 | |
321 ## Find the bin that contains the instance and compute its index | |
322 # | |
323 # @note Required for coordinate indexing via a hierarchical bin system | |
324 # | |
325 def findIdx(self): | |
326 min_lvl = 3 | |
327 max_lvl = 6 | |
328 for bin_lvl in xrange(min_lvl, max_lvl): | |
329 if getBin(self.start, bin_lvl) == getBin(self.end, bin_lvl): | |
330 return getIdx(self.start, bin_lvl) | |
331 return getIdx(self.start, max_lvl) | |
332 | |
333 ## Get a bin for fast database access | |
334 # | |
335 # @return bin number (float) | |
336 # | |
337 def getBin(self): | |
338 for i in xrange(3, 8): | |
339 bin_lvl = pow(10, i) | |
340 if int(self.start/bin_lvl) == int(self.end/bin_lvl): | |
341 return float(bin_lvl+(int(self.start/bin_lvl)/1e10)) | |
342 bin_lvl = pow(10, 8) | |
343 return float(bin_lvl+(int(self.start/bin_lvl)/1e10)) | |
344 | |
345 | |
346 # Functions | |
347 | |
348 # Get the bin number of a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system | |
349 # | |
350 def getBin(val, bin_lvl): | |
351 bin_size = pow(10, bin_lvl) | |
352 return long(val / bin_size) | |
353 | |
354 # Get an index from a coordinate according to the bin level. Required for coordinate indexing with hierarchical bin system | |
355 # | |
356 def getIdx(val, bin_lvl): | |
357 min_lvl = 3 | |
358 max_lvl = 6 | |
359 if bin_lvl >= max_lvl: | |
360 return long((bin_lvl-min_lvl+1)*pow(10,max_lvl)) | |
361 return long(((bin_lvl-min_lvl+1)*pow(10,max_lvl))+getBin(val,bin_lvl)) |