LRU cache

Weatherby,Gerard gweatherby at uchc.edu
Thu Feb 16 21:40:21 EST 2023


I think this does the trick:

https://gist.github.com/Gerardwx/c60d200b4db8e7864cb3342dd19d41c9


#!/usr/bin/env python3
import collections
import random
from typing import Hashable, Any, Optional, Dict, Tuple


class LruCache:
    """Dictionary like storage of most recently inserted values"""

    def __init__(self, size: int = 1000):
        """:param size number of cached entries"""
        assert isinstance(size, int)
        self.size = size
        self.insert_counter = 0
       self.oldest = 0
        self._data : Dict[Hashable,Tuple[Any,int]]= {} # store values and age index
        self._lru: Dict[int, Hashable] = {} # age counter dictionary

    def insert(self, key: Hashable, value: Any) -> None:
        """Insert into dictionary"""
        existing = self._data.get(key, None)
        self._data[key] = (value, self.insert_counter)
        self._lru[self.insert_counter] = key
        if existing is not None:
            self._lru.pop(existing[1], None)  # remove old counter value, if it exists
        self.insert_counter += 1
        if (sz := len(self._data)) > self.size:  # is cache full?
            assert sz == self.size + 1
            while (
            key := self._lru.get(self.oldest, None)) is None:  # index may not be present, if value was reinserted
                self.oldest += 1
            del self._data[key]  # remove oldest key / value from dictionary
            del self._lru[self.oldest]
            self.oldest += 1  # next oldest index
        assert len(self._lru) == len(self._data)

    def get(self, key: Hashable) -> Optional[Any]:
        """Get value or return None if not in cache"""
        if (tpl := self._data.get(key, None)) is not None:
            return tpl[0]
        return None


if __name__ == "__main__":
    CACHE_SIZE = 1000
    TEST_SIZE = 1_000_000
    cache = LruCache(size=CACHE_SIZE)

    all = []
    for i in range(TEST_SIZE):
        all.append(random.randint(-5000, 5000))

    summary = collections.defaultdict(int)
    for value in all:
        cache.insert(value, value * value)
        summary[value] += 1
    smallest = TEST_SIZE
    largest = -TEST_SIZE
    total = 0
    for value, count in summary.items():
        smallest = min(smallest, count)
        largest = max(largest, count)
        total += count
    avg = total / len(summary)
    print(f"{len(summary)} values occurrences range from {smallest} to {largest}, average {avg:.1f}")

    recent = set()  # recent most recent entries
    for i in range(len(all) - 1, -1, -1):  # loop backwards to get the most recent entries
        value = all[i]
        if len(recent) < CACHE_SIZE:
            recent.add(value)
        if value in recent:
            if (r := cache.get(value)) != value * value:
                raise ValueError(f"Cache missing recent {value} {r}")
        else:
            if cache.get(value) != None:
                raise ValueError(f"Cache includes old {value}")

From: Python-list <python-list-bounces+gweatherby=uchc.edu at python.org> on behalf of Dino <dino at no.spam.ar>
Date: Wednesday, February 15, 2023 at 3:07 PM
To: python-list at python.org <python-list at python.org>
Subject: Re: LRU cache
*** Attention: This is an external email. Use caution responding, opening attachments or clicking on links. ***

Thank you Mats, Avi and Chris

btw, functools.lru_cache seems rather different from what I need, but
maybe I am missing something. I'll look closer.

On 2/14/2023 7:36 PM, Mats Wichmann wrote:
> On 2/14/23 15:07, Dino wrote:
>>

--
https://urldefense.com/v3/__https://mail.python.org/mailman/listinfo/python-list__;!!Cn_UX_p3!jb3Gr2BFAPLJ2YuI5rFdJUtalqWcijhxHAfdmCI3afnLFDdcekALxDYAQwpE1L_JlJBBJ-BB3BuLdoSE$<https://urldefense.com/v3/__https:/mail.python.org/mailman/listinfo/python-list__;!!Cn_UX_p3!jb3Gr2BFAPLJ2YuI5rFdJUtalqWcijhxHAfdmCI3afnLFDdcekALxDYAQwpE1L_JlJBBJ-BB3BuLdoSE$>


More information about the Python-list mailing list