diff --git a/backports/functools_lru_cache.py b/backports/functools_lru_cache.py index 1b83fe9..e372cff 100644 --- a/backports/functools_lru_cache.py +++ b/backports/functools_lru_cache.py @@ -26,6 +26,12 @@ def update_wrapper( class _HashedSeq(list): + """This class guarantees that hash() will be called no more than once + per element. This is important because the lru_cache() will hash + the key multiple times on a cache miss. + + """ + __slots__ = 'hashvalue' def __init__(self, tup, hash=hash): @@ -41,45 +47,57 @@ def _make_key( kwds, typed, kwd_mark=(object(),), - fasttypes=set([int, str, frozenset, type(None)]), - sorted=sorted, + fasttypes={int, str}, tuple=tuple, type=type, len=len, ): - 'Make a cache key from optionally typed positional and keyword arguments' + """Make a cache key from optionally typed positional and keyword arguments + + The key is constructed in a way that is flat as possible rather than + as a nested structure that would take more memory. + + If there is only a single argument and its data type is known to cache + its hash value, then that argument is returned without a wrapper. This + saves space and improves lookup speed. + + """ + # All of code below relies on kwds preserving the order input by the user. + # Formerly, we sorted() the kwds before looping. The new way is *much* + # faster; however, it means that f(x=1, y=2) will now be treated as a + # distinct call from f(y=2, x=1) which will be cached separately. key = args if kwds: - sorted_items = sorted(kwds.items()) key += kwd_mark - for item in sorted_items: + for item in kwds.items(): key += item if typed: key += tuple(type(v) for v in args) if kwds: - key += tuple(type(v) for k, v in sorted_items) + key += tuple(type(v) for v in kwds.values()) elif len(key) == 1 and type(key[0]) in fasttypes: return key[0] return _HashedSeq(key) -def lru_cache(maxsize=100, typed=False): # noqa: C901 +def lru_cache(maxsize=128, typed=False): """Least-recently-used cache decorator. If *maxsize* is set to None, the LRU features are disabled and the cache can grow without bound. If *typed* is True, arguments of different types will be cached separately. - For example, f(3.0) and f(3) will be treated as distinct calls with - distinct results. + For example, f(decimal.Decimal("3.0")) and f(3.0) will be treated as + distinct calls with distinct results. Some types such as str and int may + be cached separately even when typed is false. Arguments to the cached function must be hashable. - View the cache statistics named tuple (hits, misses, maxsize, currsize) with - f.cache_info(). Clear the cache and statistics with f.cache_clear(). + View the cache statistics named tuple (hits, misses, maxsize, currsize) + with f.cache_info(). Clear the cache and statistics with f.cache_clear(). Access the underlying function with f.__wrapped__. - See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + See: https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU) """ @@ -88,108 +106,138 @@ def lru_cache(maxsize=100, typed=False): # noqa: C901 # The internals of the lru_cache are encapsulated for thread safety and # to allow the implementation to change (including a possible C version). + if isinstance(maxsize, int): + # Negative maxsize is treated as 0 + if maxsize < 0: + maxsize = 0 + elif callable(maxsize) and isinstance(typed, bool): + # The user_function was passed in directly via the maxsize argument + user_function, maxsize = maxsize, 128 + wrapper = _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo) + wrapper.cache_parameters = lambda: {'maxsize': maxsize, 'typed': typed} + return update_wrapper(wrapper, user_function) + elif maxsize is not None: + raise TypeError('Expected first argument to be an integer, a callable, or None') + def decorating_function(user_function): - cache = dict() - stats = [0, 0] # make statistics updateable non-locally - HITS, MISSES = 0, 1 # names for the stats fields - make_key = _make_key - cache_get = cache.get # bound method to lookup key or return None - _len = len # localize the global len() function - lock = RLock() # because linkedlist updates aren't threadsafe - root = [] # root of the circular doubly linked list - root[:] = [root, root, None, None] # initialize by pointing to self - nonlocal_root = [root] # make updateable non-locally - PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields - - if maxsize == 0: - - def wrapper(*args, **kwds): - # no caching, just do a statistics update after a successful call - result = user_function(*args, **kwds) - stats[MISSES] += 1 - return result + wrapper = _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo) + wrapper.cache_parameters = lambda: {'maxsize': maxsize, 'typed': typed} + return update_wrapper(wrapper, user_function) - elif maxsize is None: + return decorating_function - def wrapper(*args, **kwds): - # simple caching without ordering or size limit - key = make_key(args, kwds, typed) - result = cache_get( - key, root - ) # root used here as a unique not-found sentinel - if result is not root: - stats[HITS] += 1 - return result - result = user_function(*args, **kwds) - cache[key] = result - stats[MISSES] += 1 - return result - else: - - def wrapper(*args, **kwds): - # size limited caching that tracks accesses by recency - key = make_key(args, kwds, typed) if kwds or typed else args - with lock: - link = cache_get(key) - if link is not None: - # record recent use of the key by moving it - # to the front of the list - (root,) = nonlocal_root - link_prev, link_next, key, result = link - link_prev[NEXT] = link_next - link_next[PREV] = link_prev - last = root[PREV] - last[NEXT] = root[PREV] = link - link[PREV] = last - link[NEXT] = root - stats[HITS] += 1 - return result - result = user_function(*args, **kwds) - with lock: - (root,) = nonlocal_root - if key in cache: - # getting here means that this same key was added to the - # cache while the lock was released. since the link - # update is already done, we need only return the - # computed result and update the count of misses. - pass - elif _len(cache) >= maxsize: - # use the old root to store the new key and result - oldroot = root - oldroot[KEY] = key - oldroot[RESULT] = result - # empty the oldest link and make it the new root - root = nonlocal_root[0] = oldroot[NEXT] - oldkey = root[KEY] - root[KEY] = root[RESULT] = None - # now update the cache dictionary for the new links - del cache[oldkey] - cache[key] = oldroot - else: - # put result in a new link at the front of the list - last = root[PREV] - link = [last, root, key, result] - last[NEXT] = root[PREV] = cache[key] = link - stats[MISSES] += 1 +def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo): + # Constants shared by all lru cache instances: + sentinel = object() # unique object used to signal cache misses + make_key = _make_key # build a key from the function arguments + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + cache = {} + hits = misses = 0 + full = False + cache_get = cache.get # bound method to lookup a key or return None + cache_len = cache.__len__ # get cache size without calling len() + lock = RLock() # because linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + + if maxsize == 0: + + def wrapper(*args, **kwds): + # No caching -- just a statistics update + nonlocal misses + misses += 1 + result = user_function(*args, **kwds) + return result + + elif maxsize is None: + + def wrapper(*args, **kwds): + # Simple caching without ordering or size limit + nonlocal hits, misses + key = make_key(args, kwds, typed) + result = cache_get(key, sentinel) + if result is not sentinel: + hits += 1 return result + misses += 1 + result = user_function(*args, **kwds) + cache[key] = result + return result - def cache_info(): - """Report cache statistics""" - with lock: - return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + else: - def cache_clear(): - """Clear the cache and cache statistics""" + def wrapper(*args, **kwds): + # Size limited caching that tracks accesses by recency + nonlocal root, hits, misses, full + key = make_key(args, kwds, typed) with lock: - cache.clear() - root = nonlocal_root[0] - root[:] = [root, root, None, None] - stats[:] = [0, 0] - - wrapper.__wrapped__ = user_function - wrapper.cache_info = cache_info - wrapper.cache_clear = cache_clear - return update_wrapper(wrapper, user_function) - - return decorating_function + link = cache_get(key) + if link is not None: + # Move the link to the front of the circular queue + link_prev, link_next, _key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + hits += 1 + return result + misses += 1 + result = user_function(*args, **kwds) + with lock: + if key in cache: + # Getting here means that this same key was added to the + # cache while the lock was released. Since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif full: + # Use the old root to store the new key and result. + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # Empty the oldest link and make it the new root. + # Keep a reference to the old key and old result to + # prevent their ref counts from going to zero during the + # update. That will prevent potentially arbitrary object + # clean-up code (i.e. __del__) from running while we're + # still adjusting the links. + root = oldroot[NEXT] + oldkey = root[KEY] + root[KEY] = root[RESULT] = None + # Now update the cache dictionary. + del cache[oldkey] + # Save the potentially reentrant cache[key] assignment + # for last, after the root and links have been put in + # a consistent state. + cache[key] = oldroot + else: + # Put result in a new link at the front of the queue. + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + # Use the cache_len bound method instead of the len() function + # which could potentially be wrapped in an lru_cache itself. + full = cache_len() >= maxsize + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(hits, misses, maxsize, cache_len()) + + def cache_clear(): + """Clear the cache and cache statistics""" + nonlocal hits, misses, full + with lock: + cache.clear() + root[:] = [root, root, None, None] + hits = misses = 0 + full = False + + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return wrapper diff --git a/newsfragments/+7e6fa2bb.feature.rst b/newsfragments/+7e6fa2bb.feature.rst new file mode 100644 index 0000000..443c67d --- /dev/null +++ b/newsfragments/+7e6fa2bb.feature.rst @@ -0,0 +1 @@ +Refreshed implementation from CPython. \ No newline at end of file