Mercurial > repos > davidmurphy > codonlogo
comparison corebio/utils/__init__.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c55bdc2fb9fa |
---|---|
1 | |
2 | |
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
4 # | |
5 # This software is distributed under the MIT Open Source License. | |
6 # <http://www.opensource.org/licenses/mit-license.html> | |
7 # | |
8 # Permission is hereby granted, free of charge, to any person obtaining a | |
9 # copy of this software and associated documentation files (the "Software"), | |
10 # to deal in the Software without restriction, including without limitation | |
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 # and/or sell copies of the Software, and to permit persons to whom the | |
13 # Software is furnished to do so, subject to the following conditions: | |
14 # | |
15 # The above copyright notice and this permission notice shall be included | |
16 # in all copies or substantial portions of the Software. | |
17 # | |
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 # THE SOFTWARE. | |
25 # | |
26 | |
27 | |
28 """Extra utilities and core classes not in standard python. | |
29 """ | |
30 | |
31 | |
32 __all__ = ('isblank', 'isfloat', 'isint', 'fcmp', 'remove_whitespace', | |
33 'invert_dict','update', 'stdrepr', 'Token', 'Struct', 'Reiterate', | |
34 'deoptparse', 'crc32', 'crc64', 'FileIndex', 'find_command', | |
35 'ArgumentError', 'frozendict') | |
36 | |
37 import os.path | |
38 import math | |
39 | |
40 def isblank( string) : | |
41 """Is this whitespace or an empty string?""" | |
42 if string == '' : return True | |
43 return string.isspace() | |
44 | |
45 def isfloat(s): | |
46 """Does this object represent a floating point number? """ | |
47 try: | |
48 float(s) | |
49 return True | |
50 except (ValueError, TypeError): | |
51 return False | |
52 | |
53 def isint(s): | |
54 """Does this object represent an integer?""" | |
55 try: | |
56 int(s) | |
57 return True | |
58 except (ValueError, TypeError): | |
59 return False | |
60 | |
61 def fcmp(x, y, precision): | |
62 """Floating point comparison.""" | |
63 # TODO: Doc string, default precision. Test | |
64 if math.fabs(x-y) < precision: | |
65 return 0 | |
66 elif x < y: | |
67 return -1 | |
68 return 1 | |
69 | |
70 def remove_whitespace( astring) : | |
71 """Remove all whitespace from a string.""" | |
72 # TODO: Is this horrible slow? | |
73 return "".join(astring.split()) | |
74 | |
75 | |
76 def invert_dict( dictionary) : | |
77 """Constructs a new dictionary with inverted mappings so that keys become | |
78 values and vice versa. If the values of the original dictionary are not | |
79 unique then only one of the original kesys will be included in the new | |
80 dictionary. | |
81 """ | |
82 return dict( [(value, key) for key, value in dictionary.iteritems()] ) | |
83 | |
84 | |
85 | |
86 def update(obj, **entries): | |
87 """Update an instance with new values. | |
88 | |
89 >>> update({'a': 1}, a=10, b=20) | |
90 {'a': 10, 'b': 20} | |
91 """ | |
92 if hasattr(obj, 'update') : | |
93 obj.update( entries) | |
94 else : | |
95 for k, v in entries.iteritems() : | |
96 setattr(obj, k, v) | |
97 return obj | |
98 | |
99 | |
100 | |
101 def stdrepr( obj, attributes=None, name=None) : | |
102 """Create a standard representation of an object.""" | |
103 if name==None : name = obj.__class__.__name__ | |
104 if attributes==None: attributes = obj.__class__.__slots__ | |
105 args = [] | |
106 for a in attributes : | |
107 args.append( '%s=%s' % ( a, repr( getattr(obj, a) ) ) ) | |
108 args = ',\n'.join(args).replace('\n', '\n ') | |
109 return '%s(\n %s\n)' % (name, args) | |
110 | |
111 | |
112 class Token(object): | |
113 """Represents the items returned by a file scanner, normally processed | |
114 by a parser. | |
115 | |
116 Attributes : | |
117 o typeof -- a string describing the kind of token | |
118 o data -- the value of the token | |
119 o lineno -- the line of the file on which the data was found (if known) | |
120 o offset -- the offset of the data within the line (if known) | |
121 """ | |
122 __slots__ = [ 'typeof', 'data', 'lineno', 'offset'] | |
123 def __init__(self, typeof, data=None, lineno=-1, offset=-1) : | |
124 self.typeof = typeof | |
125 self.data = data | |
126 self.lineno = lineno | |
127 self.offset = offset | |
128 | |
129 def __repr__(self) : | |
130 return stdrepr( self) | |
131 | |
132 def __str__(self): | |
133 coord = str(self.lineno) | |
134 if self.offset != -1 : coord += ':'+str(self.offset) | |
135 coord = coord.ljust(7) | |
136 return (coord+ ' '+ self.typeof +' : ').ljust(32)+ str(self.data or '') | |
137 | |
138 | |
139 | |
140 def Struct(**kwargs) : | |
141 """Create a new instance of an anonymous class with the supplied attributes | |
142 and values. | |
143 | |
144 >>> s = Struct(a=3,b=4) | |
145 >>> s | |
146 Struct( | |
147 a=3, | |
148 b=4 | |
149 ) | |
150 >>> s.a | |
151 3 | |
152 | |
153 """ | |
154 name = 'Struct' | |
155 | |
156 def _init(obj, **kwargs) : | |
157 for k, v in kwargs.iteritems() : | |
158 setattr( obj, k, v) | |
159 | |
160 def _repr(obj) : | |
161 return stdrepr( obj, obj.__slots__, name) | |
162 | |
163 adict = {} | |
164 adict['__slots__'] = kwargs.keys() | |
165 adict['__init__'] = _init | |
166 adict['__repr__'] = _repr | |
167 | |
168 return type( name, (object,) , adict)(**kwargs) | |
169 | |
170 | |
171 class Reiterate(object): | |
172 """ A flexible wrapper around a simple iterator. | |
173 """ | |
174 def __new__(cls, iterator): | |
175 if isinstance(iterator, cls) : return iterator | |
176 new = object.__new__(cls) | |
177 new._iterator = iter(iterator) | |
178 new._stack = [] | |
179 new._index = 0 | |
180 return new | |
181 | |
182 def __init__(self, *args, **kw): | |
183 pass | |
184 | |
185 | |
186 def __iter__(self): | |
187 return self | |
188 | |
189 def next(self): | |
190 """Return the next item in the iteration.""" | |
191 self._index +=1 | |
192 if self._stack : | |
193 return self._stack.pop() | |
194 else: | |
195 return self._iterator.next() | |
196 | |
197 def index(self) : | |
198 """The number of items returned. Incremented by next(), Decremented | |
199 by push(), unchanged by peek() """ | |
200 return self._index | |
201 | |
202 def push(self, item) : | |
203 """Push an item back onto the top of the iterator,""" | |
204 self._index -=1 | |
205 self._stack.append(item) | |
206 | |
207 def peek(self) : | |
208 """Returns the next item, but does not advance the iteration. | |
209 Returns None if no more items. (Bit may also return None as the | |
210 next item.)""" | |
211 try : | |
212 item = self.next() | |
213 self.push(item) | |
214 return item | |
215 except StopIteration: | |
216 return None | |
217 | |
218 def has_item(self) : | |
219 """More items to return?""" | |
220 try : | |
221 item = self.next() | |
222 self.push(item) | |
223 return True | |
224 except StopIteration: | |
225 return False | |
226 | |
227 def filter(self, predicate): | |
228 """Return the next item in the iteration that satisifed the | |
229 predicate.""" | |
230 next = self.next() | |
231 while not predicate(next) : next = self.next() | |
232 return next | |
233 # End class Reiterate | |
234 | |
235 | |
236 | |
237 | |
238 | |
239 def crc32(string): | |
240 """Return the standard CRC32 checksum as a hexidecimal string.""" | |
241 import binascii | |
242 return "%08X"% binascii.crc32(string) | |
243 | |
244 _crc64_table =None | |
245 | |
246 def crc64(string): | |
247 """ Calculate ISO 3309 standard cyclic redundancy checksum. | |
248 Used, for example, by SWISS-PROT. | |
249 | |
250 Returns : The CRC as a hexadecimal string. | |
251 | |
252 Reference: | |
253 o W. H. Press, S. A. Teukolsky, W. T. Vetterling, and B. P. Flannery, | |
254 "Numerical recipes in C", 2nd ed., Cambridge University Press. Pages 896ff. | |
255 """ | |
256 # Adapted from biopython, which was adapted from bioperl | |
257 global _crc64_table | |
258 if _crc64_table is None : | |
259 # Initialisation of CRC64 table | |
260 table = [] | |
261 for i in range(256): | |
262 l = i | |
263 part_h = 0 | |
264 for j in range(8): | |
265 rflag = l & 1 | |
266 l >>= 1 | |
267 if part_h & 1: l |= (1L << 31) | |
268 part_h >>= 1L | |
269 if rflag: part_h ^= 0xd8000000L | |
270 table.append(part_h) | |
271 _crc64_table= tuple(table) | |
272 | |
273 crcl = 0 | |
274 crch = 0 | |
275 for c in string: | |
276 shr = (crch & 0xFF) << 24 | |
277 temp1h = crch >> 8 | |
278 temp1l = (crcl >> 8) | shr | |
279 idx = (crcl ^ ord(c)) & 0xFF | |
280 crch = temp1h ^ _crc64_table[idx] | |
281 crcl = temp1l | |
282 | |
283 return "%08X%08X" % (crch, crcl) | |
284 # End crc64 | |
285 | |
286 | |
287 class FileIndex(object) : | |
288 """Line based random access to a file. Quickly turn a file into a read-only | |
289 database. | |
290 | |
291 Attr: | |
292 - indexfile -- The file to be indexed. Can be set to None and latter | |
293 replaced with a new file handle, for exampel, if you need to | |
294 close and latter reopen the file. | |
295 | |
296 Bugs: | |
297 User must set the indexedfile to None before pickling this class. | |
298 | |
299 """ | |
300 __slots__ = [ 'indexedfile', '_parser', '_positions', '_keys', '_key_dict'] | |
301 | |
302 def __init__(self, indexedfile, linekey = None, parser=None) : | |
303 """ | |
304 | |
305 Args: | |
306 - indexedfile -- The file to index | |
307 - linekey -- An optional function. keyofline() will be passed each line | |
308 of the file in turn and should return a string to index the line, | |
309 or None. If keyofline() is supplied, then only lines that generate | |
310 keys are indexed. | |
311 - parser -- An optional parser. A function that reads from a file handle | |
312 positioned at the start of a record and returns an object. | |
313 """ | |
314 | |
315 def default_parser(seekedfile) : | |
316 return seekedfile.readline() | |
317 | |
318 if parser is None : parser = default_parser | |
319 self._parser = parser | |
320 | |
321 indexedfile.seek(0) | |
322 positions = [] | |
323 keys = [] | |
324 | |
325 while True : | |
326 position = indexedfile.tell() | |
327 line = indexedfile.readline() | |
328 if line == '' : break | |
329 | |
330 if linekey : | |
331 k = linekey(line) | |
332 if k is None: continue | |
333 keys.append(k) | |
334 | |
335 positions.append(position) | |
336 | |
337 self.indexedfile = indexedfile | |
338 self._positions = tuple(positions) | |
339 | |
340 if linekey : | |
341 self._keys = tuple(keys) | |
342 self._key_dict = dict( zip(keys, positions)) | |
343 | |
344 | |
345 def tell(self, item) : | |
346 if isinstance(item, str) : | |
347 p = self._key_dict[item] | |
348 else : | |
349 p = self._positions[item] | |
350 return p | |
351 | |
352 def seek(self, item) : | |
353 """Seek the indexfile to the position of item.""" | |
354 self.indexedfile.seek(self.tell(item)) | |
355 | |
356 def __iter__(self) : | |
357 for i in range(0, len(self)) : | |
358 yield self[i] | |
359 | |
360 def __len__(self) : | |
361 return len(self._positions) | |
362 | |
363 def __getitem__(self, item) : | |
364 self.indexedfile.seek(self.tell(item)) | |
365 return self._parser(self.indexedfile) | |
366 | |
367 def __contains__(self, item) : | |
368 try: | |
369 self.tell(item) | |
370 return True | |
371 except KeyError : | |
372 return False | |
373 except IndexError : | |
374 return False | |
375 | |
376 # End class FileIndex | |
377 | |
378 | |
379 def find_command(command, path=None): | |
380 """Return the full path to the first match of the given command on | |
381 the path. | |
382 | |
383 Arguments: | |
384 - command -- is a the name of the executable to search for. | |
385 - path -- is an optional alternate path list to search. The default it | |
386 to use the COREBIOPATH environment variable, if it exists, else the | |
387 PATH environment variable. | |
388 | |
389 Raises: | |
390 - EnvironmentError -- If no match is found for the command. | |
391 | |
392 By default the COREBIO or PATH environment variable is searched (as well | |
393 as, on Windows, the AppPaths key in the registry), but a specific 'path' | |
394 list to search may be specified as well. | |
395 | |
396 Author: Adapted from code by Trent Mick (TrentM@ActiveState.com) | |
397 See: http://trentm.com/projects/which/ | |
398 """ | |
399 import _which | |
400 if path is None : | |
401 path = os.environ.get("COREBIOPATH", "").split(os.pathsep) | |
402 if path==['']: path = None | |
403 | |
404 try : | |
405 match =_which.whichgen(command, path).next() | |
406 except StopIteration, _which.WhichError: | |
407 raise EnvironmentError("Could not find '%s' on the path." % command) | |
408 return match | |
409 | |
410 | |
411 | |
412 class ArgumentError(ValueError) : | |
413 """ A subclass of ValueError raised when a function receives an argument | |
414 that has the right type but an inappropriate value, and the situation is not | |
415 described by a more precise exception such as IndexError. The name of the | |
416 argument or component at fault and (optionally) the value are also stored. | |
417 """ | |
418 | |
419 def __init__(self, message, key, value=None) : | |
420 """ Args: | |
421 - message -- An error message. | |
422 - key -- The name of the argument or component at fault. | |
423 - value -- Optional value of the argument. | |
424 """ | |
425 ValueError.__init__(self, message) | |
426 self.key = key | |
427 self.value = value | |
428 # end class ArgumentError | |
429 | |
430 | |
431 class frozendict(dict): | |
432 """A frozendict is a dictionary that cannot be modified after being created | |
433 - but it is hashable and may serve as a member of a set or a key in a | |
434 dictionary. | |
435 # Author: Adapted from code by Oren Tirosh | |
436 """ | |
437 # See: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/414283 | |
438 | |
439 def _blocked_attribute(obj): | |
440 raise AttributeError, "A frozendict cannot be modified." | |
441 _blocked_attribute = property(_blocked_attribute) | |
442 | |
443 __delitem__ = _blocked_attribute | |
444 __setitem__ = _blocked_attribute | |
445 clear = _blocked_attribute | |
446 pop = _blocked_attribute | |
447 popitem = _blocked_attribute | |
448 setdefault = _blocked_attribute | |
449 update = _blocked_attribute | |
450 | |
451 def __new__(cls, *args, **kw): | |
452 new = dict.__new__(cls) | |
453 dict.__init__(new, *args, **kw) | |
454 return new | |
455 | |
456 def __init__(self, *args, **kw): | |
457 pass | |
458 | |
459 def __hash__(self): | |
460 try: | |
461 return self._cached_hash | |
462 except AttributeError: | |
463 # Hash keys, not items, since items can be mutable and unhasahble. | |
464 h = self._cached_hash = hash(tuple(sorted(self.keys()))) | |
465 return h | |
466 | |
467 def __repr__(self): | |
468 return "frozendict(%s)" % dict.__repr__(self) | |
469 # end class frozendict | |
470 |