view planemo/lib/python3.7/site-packages/repoze/lru/__init__.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
line wrap: on
line source

""" LRU caching class and decorator """
from abc import abstractmethod
from abc import ABCMeta

import threading
import time
import uuid


_MARKER = object()
# By default, expire items after 2**60 seconds. This fits into 64 bit
# integers and is close enough to "never" for practical purposes.
_DEFAULT_TIMEOUT = 2 ** 60


class Cache(object):
    __metaclass__ = ABCMeta

    @abstractmethod
    def clear(self):
        """Remove all entries from the cache"""

    @abstractmethod
    def get(self, key, default=None):
        """Return value for key. If not in cache, return default"""

    @abstractmethod
    def put(self, key, val):
        """Add key to the cache with value val"""

    @abstractmethod
    def invalidate(self, key):
        """Remove key from the cache"""


class UnboundedCache(Cache):
    """
    a simple unbounded cache backed by a dictionary
    """

    def __init__(self):
        self._data = dict()

    def get(self, key, default=None):
        return self._data.get(key, default)

    def clear(self):
        self._data.clear()

    def invalidate(self, key):
        try:
            del self._data[key]
        except KeyError:
            pass

    def put(self, key, val):
        self._data[key] = val


class LRUCache(Cache):
    """ Implements a pseudo-LRU algorithm (CLOCK)

    The Clock algorithm is not kept strictly to improve performance, e.g. to
    allow get() and invalidate() to work without acquiring the lock.
    """
    def __init__(self, size):
        size = int(size)
        if size < 1:
            raise ValueError('size must be >0')
        self.size = size
        self.lock = threading.Lock()
        self.hand = 0
        self.maxpos = size - 1
        self.clock_keys = None
        self.clock_refs = None
        self.data = None
        self.evictions = 0
        self.hits = 0
        self.misses = 0
        self.lookups = 0
        self.clear()

    def clear(self):
        """Remove all entries from the cache"""
        with self.lock:
            # If really clear()ing a full cache, clean up self.data first to
            # give garbage collection a chance to reduce memorey usage.
            # Instantiating "[_MARKER] * size" will temporarily have 2 lists
            # in memory -> high peak memory usage for tiny amount of time.
            # With self.data already clear, that peak should not exceed what
            # we normally use.
            self.data = {}
            size = self.size
            self.clock_keys = [_MARKER] * size
            self.clock_refs = [False] * size
            self.hand = 0
            self.evictions = 0
            self.hits = 0
            self.misses = 0
            self.lookups = 0

    def get(self, key, default=None):
        """Return value for key. If not in cache, return default"""
        self.lookups += 1
        try:
            pos, val = self.data[key]
            self.hits += 1
        except KeyError:
            self.misses += 1
            return default
        self.clock_refs[pos] = True
        return val

    def put(self, key, val):
        """Add key to the cache with value val"""
        # These do not change or they are just references, no need for locking.
        maxpos = self.maxpos
        clock_refs = self.clock_refs
        clock_keys = self.clock_keys
        data = self.data

        with self.lock:
            entry = data.get(key)
            if entry is not None:
                # We already have key. Only make sure data is up to date and
                # to remember that it was used.
                pos, old_val = entry
                if old_val is not val:
                    data[key] = (pos, val)
                self.clock_refs[pos] = True
                return
            # else: key is not yet in cache. Search place to insert it.

            hand = self.hand
            count = 0
            max_count = 107
            while 1:
                ref = clock_refs[hand]
                if ref == True:
                    clock_refs[hand] = False
                    hand += 1
                    if hand > maxpos:
                        hand = 0

                    count += 1
                    if count >= max_count:
                        # We have been searching long enough. Force eviction of
                        # next entry, no matter what its status is.
                        clock_refs[hand] = False
                else:
                    oldkey = clock_keys[hand]
                    # Maybe oldkey was not in self.data to begin with. If it
                    # was, self.invalidate() in another thread might have
                    # already removed it. del() would raise KeyError, so pop().
                    oldentry = data.pop(oldkey, _MARKER)
                    if oldentry is not _MARKER:
                        self.evictions += 1
                    clock_keys[hand] = key
                    clock_refs[hand] = True
                    data[key] = (hand, val)
                    hand += 1
                    if hand > maxpos:
                        hand = 0
                    self.hand = hand
                    break

    def invalidate(self, key):
        """Remove key from the cache"""
        # pop with default arg will not raise KeyError
        entry = self.data.pop(key, _MARKER)
        if entry is not _MARKER:
            # We have no lock, but worst thing that can happen is that we
            # set another key's entry to False.
            self.clock_refs[entry[0]] = False
        # else: key was not in cache. Nothing to do.


class ExpiringLRUCache(Cache):
    """ Implements a pseudo-LRU algorithm (CLOCK) with expiration times

    The Clock algorithm is not kept strictly to improve performance, e.g. to
    allow get() and invalidate() to work without acquiring the lock.
    """
    def __init__(self, size, default_timeout=_DEFAULT_TIMEOUT):
        self.default_timeout = default_timeout
        size = int(size)
        if size < 1:
            raise ValueError('size must be >0')
        self.size = size
        self.lock = threading.Lock()
        self.hand = 0
        self.maxpos = size - 1
        self.clock_keys = None
        self.clock_refs = None
        self.data = None
        self.evictions = 0
        self.hits = 0
        self.misses = 0
        self.lookups = 0
        self.clear()

    def clear(self):
        """Remove all entries from the cache"""
        with self.lock:
            # If really clear()ing a full cache, clean up self.data first to
            # give garbage collection a chance to reduce memorey usage.
            # Instantiating "[_MARKER] * size" will temporarily have 2 lists
            # in memory -> high peak memory usage for tiny amount of time.
            # With self.data already clear, that peak should not exceed what
            # we normally use.
            # self.data contains (pos, val, expires) triplets
            self.data = {}
            size = self.size
            self.clock_keys = [_MARKER] * size
            self.clock_refs = [False] * size
            self.hand = 0
            self.evictions = 0
            self.hits = 0
            self.misses = 0
            self.lookups = 0

    def get(self, key, default=None):
        """Return value for key. If not in cache or expired, return default"""
        self.lookups += 1
        try:
            pos, val, expires = self.data[key]
        except KeyError:
            self.misses += 1
            return default
        if expires > time.time():
            # cache entry still valid
            self.hits += 1
            self.clock_refs[pos] = True
            return val
        else:
            # cache entry has expired. Make sure the space in the cache can
            # be recycled soon.
            self.misses += 1
            self.clock_refs[pos] = False
            return default

    def put(self, key, val, timeout=None):
        """Add key to the cache with value val

        key will expire in $timeout seconds. If key is already in cache, val
        and timeout will be updated.
        """
        # These do not change or they are just references, no need for locking.
        maxpos = self.maxpos
        clock_refs = self.clock_refs
        clock_keys = self.clock_keys
        data = self.data
        lock = self.lock
        if timeout is None:
            timeout = self.default_timeout

        with self.lock:
            entry = data.get(key)
            if entry is not None:
                # We already have key. Only make sure data is up to date and
                # to remember that it was used.
                pos = entry[0]
                data[key] = (pos, val, time.time() + timeout)
                clock_refs[pos] = True
                return
            # else: key is not yet in cache. Search place to insert it.

            hand = self.hand
            count = 0
            max_count = 107
            while 1:
                ref = clock_refs[hand]
                if ref == True:
                    clock_refs[hand] = False
                    hand += 1
                    if hand > maxpos:
                        hand = 0

                    count += 1
                    if count >= max_count:
                        # We have been searching long enough. Force eviction of
                        # next entry, no matter what its status is.
                        clock_refs[hand] = False
                else:
                    oldkey = clock_keys[hand]
                    # Maybe oldkey was not in self.data to begin with. If it
                    # was, self.invalidate() in another thread might have
                    # already removed it. del() would raise KeyError, so pop().
                    oldentry = data.pop(oldkey, _MARKER)
                    if oldentry is not _MARKER:
                        self.evictions += 1
                    clock_keys[hand] = key
                    clock_refs[hand] = True
                    data[key] = (hand, val, time.time() + timeout)
                    hand += 1
                    if hand > maxpos:
                        hand = 0
                    self.hand = hand
                    break

    def invalidate(self, key):
        """Remove key from the cache"""
        # pop with default arg will not raise KeyError
        entry = self.data.pop(key, _MARKER)
        if entry is not _MARKER:
            # We have no lock, but worst thing that can happen is that we
            # set another key's entry to False.
            self.clock_refs[entry[0]] = False
        # else: key was not in cache. Nothing to do.


class lru_cache(object):
    """ Decorator for LRU-cached function

    timeout parameter specifies after how many seconds a cached entry should
    be considered invalid.
    """
    def __init__(self,
                 maxsize,
                 cache=None, # cache is an arg to serve tests
                 timeout=None,
                 ignore_unhashable_args=False):
        if cache is None:
            if maxsize is None:
                cache = UnboundedCache()
            elif timeout is None:
                cache = LRUCache(maxsize)
            else:
                cache = ExpiringLRUCache(maxsize, default_timeout=timeout)
        self.cache = cache
        self._ignore_unhashable_args = ignore_unhashable_args

    def __call__(self, func):
        cache = self.cache
        marker = _MARKER

        def cached_wrapper(*args, **kwargs):
            try:
                key = (args, frozenset(kwargs.items())) if kwargs else args
            except TypeError as e:
                if self._ignore_unhashable_args:
                    return func(*args, **kwargs)
                else:
                    raise e
            else:
                val = cache.get(key, marker)
                if val is marker:
                    val = func(*args, **kwargs)
                    cache.put(key, val)
                return val

        def _maybe_copy(source, target, attr):
            value = getattr(source, attr, source)
            if value is not source:
                setattr(target, attr, value)

        _maybe_copy(func, cached_wrapper, '__module__')
        _maybe_copy(func, cached_wrapper, '__name__')
        _maybe_copy(func, cached_wrapper, '__doc__')
        cached_wrapper._cache = cache
        return cached_wrapper


class CacheMaker(object):
    """Generates decorators that can be cleared later
    """
    def __init__(self, maxsize=None, timeout=_DEFAULT_TIMEOUT):
        """Create cache decorator factory.

        - maxsize : the default size for created caches.

        - timeout : the defaut expiraiton time for created caches.
        """
        self._maxsize = maxsize
        self._timeout = timeout
        self._cache = {}

    def _resolve_setting(self, name=None, maxsize=None, timeout=None):
        if name is None:
            while True:
                name = str(uuid.uuid4())
                ## the probability of collision is so low ....
                if name not in self._cache:  # pragma: NO COVER
                    break

        if name in self._cache:
            raise KeyError("cache %s already in use" % name)

        if maxsize is None:
            maxsize = self._maxsize

        if maxsize is None:
            raise ValueError("Cache must have a maxsize set")

        if timeout is None:
            timeout = self._timeout

        return name, maxsize, timeout

    def memoized(self, name=None):
        name, maxsize, _ = self._resolve_setting(name, 0)
        cache = self._cache[name] = UnboundedCache()
        return lru_cache(None, cache)

    def lrucache(self, name=None, maxsize=None):
        """Named arguments:
        
        - name (optional) is a string, and should be unique amongst all caches

        - maxsize (optional) is an int, overriding any default value set by
          the constructor
        """
        name, maxsize, _ = self._resolve_setting(name, maxsize)
        cache = self._cache[name] = LRUCache(maxsize)
        return lru_cache(maxsize, cache)

    def expiring_lrucache(self, name=None, maxsize=None, timeout=None):
        """Named arguments:

        - name (optional) is a string, and should be unique amongst all caches

        - maxsize (optional) is an int, overriding any default value set by
          the constructor

        - timeout (optional) is an int, overriding any default value set by
          the constructor or the default value (%d seconds)
        """ % _DEFAULT_TIMEOUT
        name, maxsize, timeout = self._resolve_setting(name, maxsize, timeout)
        cache = self._cache[name] = ExpiringLRUCache(maxsize, timeout)
        return lru_cache(maxsize, cache, timeout)

    def clear(self, *names):
        """Clear the given cache(s).
        
        If no 'names' are passed, clear all caches.
        """
        if len(names) == 0:
            names = self._cache.keys()

        for name in names:
            self._cache[name].clear()