[Python-checkins] cpython: Improve the memory utilization (and speed) of functools.lru_cache().

raymond.hettinger python-checkins at python.org
Fri Mar 16 09:19:07 CET 2012


http://hg.python.org/cpython/rev/3b2856d8614b
changeset:   75729:3b2856d8614b
parent:      75725:d2460ff173ff
user:        Raymond Hettinger <python at rcn.com>
date:        Fri Mar 16 01:16:31 2012 -0700
summary:
  Improve the memory utilization (and speed) of functools.lru_cache().

files:
  Lib/functools.py |  53 +++++++++++++++++++++--------------
  Misc/NEWS        |   2 +
  2 files changed, 34 insertions(+), 21 deletions(-)


diff --git a/Lib/functools.py b/Lib/functools.py
--- a/Lib/functools.py
+++ b/Lib/functools.py
@@ -12,7 +12,7 @@
            'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial']
 
 from _functools import partial, reduce
-from collections import OrderedDict, namedtuple
+from collections import namedtuple
 try:
     from _thread import allocate_lock as Lock
 except:
@@ -147,17 +147,20 @@
     # to allow the implementation to change (including a possible C version).
 
     def decorating_function(user_function,
-            *, tuple=tuple, sorted=sorted, map=map, len=len, type=type, KeyError=KeyError):
+            *, tuple=tuple, sorted=sorted, map=map, len=len, type=type):
 
+        cache = dict()
         hits = misses = 0
+        cache_get = cache.get           # bound method for fast lookup
         kwd_mark = (object(),)          # separates positional and keyword args
-        lock = Lock()                   # needed because OrderedDict isn't threadsafe
+        lock = Lock()                   # needed because linkedlist isn't threadsafe
+        root = []                       # root of circular doubly linked list
+        root[:] = [root, root, None, None]      # initialize by pointing to self
 
         if maxsize is None:
-            cache = dict()              # simple cache without ordering or size limit
-
             @wraps(user_function)
             def wrapper(*args, **kwds):
+                # simple caching without ordering or size limit
                 nonlocal hits, misses
                 key = args
                 if kwds:
@@ -167,23 +170,18 @@
                     key += tuple(map(type, args))
                     if kwds:
                         key += tuple(type(v) for k, v in sorted_items)
-                try:
-                    result = cache[key]
+                result = cache_get(key)
+                if result is not None:
                     hits += 1
                     return result
-                except KeyError:
-                    pass
                 result = user_function(*args, **kwds)
                 cache[key] = result
                 misses += 1
                 return result
         else:
-            cache = OrderedDict()           # ordered least recent to most recent
-            cache_popitem = cache.popitem
-            cache_renew = cache.move_to_end
-
             @wraps(user_function)
             def wrapper(*args, **kwds):
+                # size limited caching that tracks accesses by recency
                 nonlocal hits, misses
                 key = args
                 if kwds:
@@ -193,20 +191,33 @@
                     key += tuple(map(type, args))
                     if kwds:
                         key += tuple(type(v) for k, v in sorted_items)
+                PREV, NEXT = 0, 1               # names of link fields
                 with lock:
-                    try:
-                        result = cache[key]
-                        cache_renew(key)    # record recent use of this key
+                    link = cache_get(key)
+                    if link is not None:
+                        link = cache[key]
+                        # record recent use of the key by moving it to the front of the list
+                        link_prev, link_next, key, result = link
+                        link_prev[NEXT] = link_next
+                        link_next[PREV] = link_prev
+                        last = root[PREV]
+                        last[NEXT] = root[PREV] = link
+                        link[PREV] = last
+                        link[NEXT] = root
                         hits += 1
                         return result
-                    except KeyError:
-                        pass
                 result = user_function(*args, **kwds)
                 with lock:
-                    cache[key] = result     # record recent use of this key
+                    last = root[PREV]
+                    link = [last, root, key, result]
+                    cache[key] = last[NEXT] = root[PREV] = link
+                    if len(cache) > maxsize:
+                        # purge least recently used cache entry
+                        old_prev, old_next, old_key, old_result = root[NEXT]
+                        root[NEXT] = old_next
+                        old_next[PREV] = root
+                        del cache[old_key]
                     misses += 1
-                    if len(cache) > maxsize:
-                        cache_popitem(0)    # purge least recently used cache entry
                 return result
 
         def cache_info():
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -26,6 +26,8 @@
 
 - Issue #11199: Fix the with urllib which hangs on particular ftp urls.
 
+- Improve the memory utilization and speed of functools.lru_cache.
+
 - Issue #14222: Use the new time.steady() function instead of time.time() for
   timeout in queue and threading modules to not be affected of system time
   update.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list