LRU cache
Weatherby,Gerard
gweatherby at uchc.edu
Thu Feb 16 21:40:21 EST 2023
I think this does the trick:
https://gist.github.com/Gerardwx/c60d200b4db8e7864cb3342dd19d41c9
#!/usr/bin/env python3
import collections
import random
from typing import Hashable, Any, Optional, Dict, Tuple
class LruCache:
"""Dictionary like storage of most recently inserted values"""
def __init__(self, size: int = 1000):
""":param size number of cached entries"""
assert isinstance(size, int)
self.size = size
self.insert_counter = 0
self.oldest = 0
self._data : Dict[Hashable,Tuple[Any,int]]= {} # store values and age index
self._lru: Dict[int, Hashable] = {} # age counter dictionary
def insert(self, key: Hashable, value: Any) -> None:
"""Insert into dictionary"""
existing = self._data.get(key, None)
self._data[key] = (value, self.insert_counter)
self._lru[self.insert_counter] = key
if existing is not None:
self._lru.pop(existing[1], None) # remove old counter value, if it exists
self.insert_counter += 1
if (sz := len(self._data)) > self.size: # is cache full?
assert sz == self.size + 1
while (
key := self._lru.get(self.oldest, None)) is None: # index may not be present, if value was reinserted
self.oldest += 1
del self._data[key] # remove oldest key / value from dictionary
del self._lru[self.oldest]
self.oldest += 1 # next oldest index
assert len(self._lru) == len(self._data)
def get(self, key: Hashable) -> Optional[Any]:
"""Get value or return None if not in cache"""
if (tpl := self._data.get(key, None)) is not None:
return tpl[0]
return None
if __name__ == "__main__":
CACHE_SIZE = 1000
TEST_SIZE = 1_000_000
cache = LruCache(size=CACHE_SIZE)
all = []
for i in range(TEST_SIZE):
all.append(random.randint(-5000, 5000))
summary = collections.defaultdict(int)
for value in all:
cache.insert(value, value * value)
summary[value] += 1
smallest = TEST_SIZE
largest = -TEST_SIZE
total = 0
for value, count in summary.items():
smallest = min(smallest, count)
largest = max(largest, count)
total += count
avg = total / len(summary)
print(f"{len(summary)} values occurrences range from {smallest} to {largest}, average {avg:.1f}")
recent = set() # recent most recent entries
for i in range(len(all) - 1, -1, -1): # loop backwards to get the most recent entries
value = all[i]
if len(recent) < CACHE_SIZE:
recent.add(value)
if value in recent:
if (r := cache.get(value)) != value * value:
raise ValueError(f"Cache missing recent {value} {r}")
else:
if cache.get(value) != None:
raise ValueError(f"Cache includes old {value}")
From: Python-list <python-list-bounces+gweatherby=uchc.edu at python.org> on behalf of Dino <dino at no.spam.ar>
Date: Wednesday, February 15, 2023 at 3:07 PM
To: python-list at python.org <python-list at python.org>
Subject: Re: LRU cache
*** Attention: This is an external email. Use caution responding, opening attachments or clicking on links. ***
Thank you Mats, Avi and Chris
btw, functools.lru_cache seems rather different from what I need, but
maybe I am missing something. I'll look closer.
On 2/14/2023 7:36 PM, Mats Wichmann wrote:
> On 2/14/23 15:07, Dino wrote:
>>
--
https://urldefense.com/v3/__https://mail.python.org/mailman/listinfo/python-list__;!!Cn_UX_p3!jb3Gr2BFAPLJ2YuI5rFdJUtalqWcijhxHAfdmCI3afnLFDdcekALxDYAQwpE1L_JlJBBJ-BB3BuLdoSE$<https://urldefense.com/v3/__https:/mail.python.org/mailman/listinfo/python-list__;!!Cn_UX_p3!jb3Gr2BFAPLJ2YuI5rFdJUtalqWcijhxHAfdmCI3afnLFDdcekALxDYAQwpE1L_JlJBBJ-BB3BuLdoSE$>
More information about the Python-list
mailing list