[Python-ideas] Contiguous-array-based ordering for OrderedDict
Franklin? Lee
leewangzhong+python at gmail.com
Tue Dec 15 10:00:20 EST 2015
On Tue, Dec 15, 2015 at 9:53 AM, Laura Creighton <lac at openend.se> wrote:
> In a message of Tue, 15 Dec 2015 04:09:55 -0500, "Franklin? Lee" writes:
>>I did. It is in a pastebin link in my original message, based on the
>>3.5.0 Python (not C) version of OrderedDict. I was hoping for guidance
>>on evaluating it. Maybe it wasn't seen because I used '-'*n to
>>separate it from my intro, or maybe pastebin is so disliked that
>>people couldn't see it. Here it is again: http://pastebin.com/LESRktJw
>
> That is the problem. We absolutely do not want links to things
> like pastebin. We want the code here, as part of the text. 5 years
> from now, when other people are scratching their heads saying,
> I wonder why Guido decided things the way he did, and whether that
> decision can and should be revisited, the first thing we will do is
> to go back and read all this discussion. And if the discussion is
> about code we can no longer see, because the pastebin has expired,
> then we won't be able to learn much.
>
> Anything that matters needs to be part of the archived discussion.
>
> Laura
I feel similarly about information history, which is why I always set
"Expire: Never" when I use pastebin :D.
But alright. The rest of this message is the code as I had it when I
sent the original message.
from collections.abc import *
from reprlib import recursive_repr as _recursive_repr
from collections import OrderedDict
class _ListDictKeysView(KeysView):
def __reversed__(self):
yield from reversed(self._mapping)
class _ListDictItemsView(ItemsView):
def __reversed__(self):
for key in reversed(self._mapping):
yield (key, self._mapping[key])
class _ListDictValuesView(ValuesView):
def __reversed__(self):
for key in reversed(self._mapping):
yield self._mapping[key]
_sentinel = object()
class ListDict(dict):
'Dictionary that remembers insertion order'
# An inherited dict maps keys to values.
# The inherited dict provides __getitem__, __len__, __contains__, and get.
# The remaining methods are order-aware.
# Big-O running times for all methods are the same as regular dictionaries.
def __init__(*args, **kwds):
'''Initialize an ordered dictionary. The signature is the same as
regular dictionaries, but keyword arguments are not recommended because
their insertion order is arbitrary.
'''
if not args:
raise TypeError("descriptor '__init__' of 'ListDict' object "
"needs an argument")
self, *args = args
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
# self.__root
self.__list
except AttributeError:
self.__map = {}
self.__list = []
self.__size = 0
self.__update(*args, **kwds)
def __setitem__(self, key, value,
dict_setitem=dict.__setitem__, len=len):
'od.__setitem__(i, y) <==> od[i]=y'
# If it's a new key, we need to track it.
if key not in self:
self.__map[key] = len(self.__list)
self.__list.append(key)
self.__size += 1
dict_setitem(self, key, value)
def __delitem__(self, key, dict_delitem=dict.__delitem__,
sentinel=_sentinel):
'od.__delitem__(y) <==> del od[y]'
dict_delitem(self, key)
# Remove the tracking for this item
index = self.__map.pop(key)
self.__list[index] = sentinel
self.__size -= 1
self.__compact()
def __iter__(self, sentinel=_sentinel):
'od.__iter__() <==> iter(od)'
for key in self.__list:
if key is not sentinel:
yield key
def __reversed__(self, sentinel=_sentinel, reversed=reversed):
'od.__reversed__() <==> reversed(od)'
for key in reversed(self.__list):
if key is not sentinel:
yield key
def clear(self):
'od.clear() -> None. Remove all items from od.'
self.__list.clear()
self.__map.clear()
self.__size = 0
dict.clear(self) # dict.clear isn't cached?
def popitem(self, last=True, sentinel=_sentinel,
reversed=reversed, next=next):
'''od.popitem() -> (k, v), return and remove a (key, value) pair.
Pairs are returned in LIFO order if last is true or FIFO order if false.
'''
if not self:
raise KeyError('dictionary is empty')
if last:
lst = reversed(self.__list)
else:
lst = self.__list
# Could use the key lookup to find this, but... meh.
# Note that attempting to compact first might have helped.
index, key = next((i, k)
for i, k in enumerate(lst)
if k is not sentinel)
# We're calling dict.pop later, which won't handle
# the metadata.
del self.__map[key]
self.__list[index] = sentinel
self.__size -= 1
self.__compact()
value = dict.pop(self, key) # dict.pop isn't cached?
return key, value
def __compact(self, sentinel=_sentinel,
enumerate=enumerate, reversed=reversed):
''' Compact the order __list if necessary.
'''
# May need to use this a lot in the upcoming `else`.
lst = self.__list
if not lst:
return
if self.__size / len(lst) <= 0.5: #chosen at non-random
# Implementation 1: list comprehension
self.__list = [k for k in lst if k is not sentinel]
# Implementation 2:
# If only `list` had a `remove_all` method...
pass
''' Update all indices after a reordering.
Should only be done when full (because it shouldn't be
necessary otherwise).
'''
inner_map = self.__map
for index, key in enumerate(self.__list):
inner_map[key] = index
else:
# Even if the list isn't mostly empty,
# we can try to clear the back.
# TODO: How can this be more efficient?
# Note: There exists a non-sentinel because
# otherwise, .__size/positive == 0 < positive.
# # Implementation 1: Pop until it drops.
# while lst[-1] is sentinel:
# lst.pop()
# Implementation 2: Count the number of sentinels at the end.
emptys = next(i for i, k in enumerate(reversed(lst))
if k is not sentinel)
# guaranteed not to StopIteration since .__size > 0
del lst[:-emptys] #safe even if 0
def move_to_end(self, key, last=True, sentinel=_sentinel,
enumerate=enumerate, len=len):
'''Move an existing element to the end (or beginning if last==False).
Raises KeyError if the element does not exist.
When last=True, acts like a fast version of self[key]=self.pop(key).
'''
index = self.__map[key]
lst = self.__list
if last:
if index + 1 == len(lst):
# already last
# Not sure if this is the right path to optimize.
# But I think redundant move_to_ends shouldn't
# blow up the __list.
return
lst[index] = sentinel
if lst[-1] is sentinel:
# can just swap with last
lst[-1] = key
self.__map[key] = len(lst) - 1
else:
# append and maybe compact
lst[index] = sentinel
lst.append(key)
self.__map[key] = len(lst) - 1
self.__compact()
else:
# This is costly. But this shouldn't
# be a common case anyway, right?
# I mean, who repeatedly adds to the front
# of an OrderedDict?
# And this is basically the only costly
# operation I'm adding.
lst[index] = sentinel
# Propagate forward from the front.
for i, newkey in enumerate(lst):
self.__map[key] = i
lst[i], key = key, newkey
if key is sentinel:
break
def __sizeof__(self):
sizeof = _sys.getsizeof
size = sizeof(self.__dict__) # instance dictionary
size += sizeof(self.__map) * 2 # internal dict and
inherited dict
size += sizeof(self.__list)
size += sizeof(self.__size)
return size
update = __update = MutableMapping.update
def keys(self):
"D.keys() -> a set-like object providing a view on D's keys"
return _ListDictKeysView(self)
def items(self):
"D.items() -> a set-like object providing a view on D's items"
return _ListDictItemsView(self)
def values(self):
"D.values() -> an object providing a view on D's values"
return _ListDictValuesView(self)
__ne__ = MutableMapping.__ne__
__marker = object()
def pop(self, key, default=__marker):
'''od.pop(k[,d]) -> v, remove specified key and return the corresponding
value. If key is not found, d is returned if given, otherwise KeyError
is raised.
'''
if key in self:
result = self[key]
del self[key]
return result
if default is self.__marker:
raise KeyError(key)
return default
def setdefault(self, key, default=None):
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
if key in self:
return self[key]
self[key] = default
return default
@_recursive_repr()
def __repr__(self):
'od.__repr__() <==> repr(od)'
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self.items()))
def __reduce__(self):
'Return state information for pickling'
inst_dict = vars(self).copy()
for k in vars(ListDict()):
inst_dict.pop(k, None)
return self.__class__, (), inst_dict or None, None, iter(self.items())
def copy(self):
'od.copy() -> a shallow copy of od'
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
If not specified, the value defaults to None.
'''
self = cls()
for key in iterable:
self[key] = value
return self
def __eq__(self, other):
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
while comparison to a regular mapping is order-insensitive.
'''
if isinstance(other, ListDict):
return dict.__eq__(self, other) and all(map(_eq, self, other))
return dict.__eq__(self, other)
More information about the Python-ideas
mailing list