LRU cache
Dino
dino at no.spam.ar
Fri Feb 17 14:09:15 EST 2023
Thank you, Gerard. I really appreciate your help
Dino
On 2/16/2023 9:40 PM, Weatherby,Gerard wrote:
> I think this does the trick:
>
> https://gist.github.com/Gerardwx/c60d200b4db8e7864cb3342dd19d41c9
>
>
> #!/usr/bin/env python3
> import collections
> import random
> from typing import Hashable, Any, Optional, Dict, Tuple
>
>
> class LruCache:
> """Dictionary like storage of most recently inserted values"""
>
> def __init__(self, size: int = 1000):
> """:param size number of cached entries"""
> assert isinstance(size, int)
> self.size = size
> self.insert_counter = 0
> self.oldest = 0
> self._data : Dict[Hashable,Tuple[Any,int]]= {} # store values and age index
> self._lru: Dict[int, Hashable] = {} # age counter dictionary
>
> def insert(self, key: Hashable, value: Any) -> None:
> """Insert into dictionary"""
> existing = self._data.get(key, None)
> self._data[key] = (value, self.insert_counter)
> self._lru[self.insert_counter] = key
> if existing is not None:
> self._lru.pop(existing[1], None) # remove old counter value, if it exists
> self.insert_counter += 1
> if (sz := len(self._data)) > self.size: # is cache full?
> assert sz == self.size + 1
> while (
> key := self._lru.get(self.oldest, None)) is None: # index may not be present, if value was reinserted
> self.oldest += 1
> del self._data[key] # remove oldest key / value from dictionary
> del self._lru[self.oldest]
> self.oldest += 1 # next oldest index
> assert len(self._lru) == len(self._data)
>
> def get(self, key: Hashable) -> Optional[Any]:
> """Get value or return None if not in cache"""
> if (tpl := self._data.get(key, None)) is not None:
> return tpl[0]
> return None
>
>
> if __name__ == "__main__":
> CACHE_SIZE = 1000
> TEST_SIZE = 1_000_000
> cache = LruCache(size=CACHE_SIZE)
>
> all = []
> for i in range(TEST_SIZE):
> all.append(random.randint(-5000, 5000))
>
> summary = collections.defaultdict(int)
> for value in all:
> cache.insert(value, value * value)
> summary[value] += 1
> smallest = TEST_SIZE
> largest = -TEST_SIZE
> total = 0
> for value, count in summary.items():
> smallest = min(smallest, count)
> largest = max(largest, count)
> total += count
> avg = total / len(summary)
> print(f"{len(summary)} values occurrences range from {smallest} to {largest}, average {avg:.1f}")
>
> recent = set() # recent most recent entries
> for i in range(len(all) - 1, -1, -1): # loop backwards to get the most recent entries
> value = all[i]
> if len(recent) < CACHE_SIZE:
> recent.add(value)
> if value in recent:
> if (r := cache.get(value)) != value * value:
> raise ValueError(f"Cache missing recent {value} {r}")
> else:
> if cache.get(value) != None:
> raise ValueError(f"Cache includes old {value}")
>
> From: Python-list <python-list-bounces+gweatherby=uchc.edu at python.org> on behalf of Dino <dino at no.spam.ar>
> Date: Wednesday, February 15, 2023 at 3:07 PM
> To: python-list at python.org <python-list at python.org>
> Subject: Re: LRU cache
> *** Attention: This is an external email. Use caution responding, opening attachments or clicking on links. ***
>
> Thank you Mats, Avi and Chris
>
> btw, functools.lru_cache seems rather different from what I need, but
> maybe I am missing something. I'll look closer.
>
> On 2/14/2023 7:36 PM, Mats Wichmann wrote:
>> On 2/14/23 15:07, Dino wrote:
>>>
>
> --
> https://urldefense.com/v3/__https://mail.python.org/mailman/listinfo/python-list__;!!Cn_UX_p3!jb3Gr2BFAPLJ2YuI5rFdJUtalqWcijhxHAfdmCI3afnLFDdcekALxDYAQwpE1L_JlJBBJ-BB3BuLdoSE$<https://urldefense.com/v3/__https:/mail.python.org/mailman/listinfo/python-list__;!!Cn_UX_p3!jb3Gr2BFAPLJ2YuI5rFdJUtalqWcijhxHAfdmCI3afnLFDdcekALxDYAQwpE1L_JlJBBJ-BB3BuLdoSE$>
More information about the Python-list
mailing list