[Python-ideas] Contiguous-array-based ordering for OrderedDict

Franklin? Lee leewangzhong+python at gmail.com
Tue Dec 15 10:00:20 EST 2015


On Tue, Dec 15, 2015 at 9:53 AM, Laura Creighton <lac at openend.se> wrote:
> In a message of Tue, 15 Dec 2015 04:09:55 -0500, "Franklin? Lee" writes:
>>I did. It is in a pastebin link in my original message, based on the
>>3.5.0 Python (not C) version of OrderedDict. I was hoping for guidance
>>on evaluating it. Maybe it wasn't seen because I used '-'*n to
>>separate it from my intro, or maybe pastebin is so disliked that
>>people couldn't see it. Here it is again: http://pastebin.com/LESRktJw
>
> That is the problem.  We absolutely do not want links to things
> like pastebin.  We want the code here, as part of the text.  5 years
> from now, when other people are scratching their heads saying,
> I wonder why Guido decided things the way he did, and whether that
> decision can and should be revisited, the first thing we will do is
> to go back and read all this discussion.  And if the discussion is
> about code we can no longer see, because the pastebin has expired,
> then we won't be able to learn much.
>
> Anything that matters needs to be part of the archived discussion.
>
> Laura

I feel similarly about information history, which is why I always set
"Expire: Never" when I use pastebin :D.

But alright. The rest of this message is the code as I had it when I
sent the original message.


from collections.abc import *
from reprlib import recursive_repr as _recursive_repr
from collections import OrderedDict

class _ListDictKeysView(KeysView):

    def __reversed__(self):
        yield from reversed(self._mapping)

class _ListDictItemsView(ItemsView):

    def __reversed__(self):
        for key in reversed(self._mapping):
            yield (key, self._mapping[key])

class _ListDictValuesView(ValuesView):

    def __reversed__(self):
        for key in reversed(self._mapping):
            yield self._mapping[key]

_sentinel = object()


class ListDict(dict):
    'Dictionary that remembers insertion order'
    # An inherited dict maps keys to values.
    # The inherited dict provides __getitem__, __len__, __contains__, and get.
    # The remaining methods are order-aware.
    # Big-O running times for all methods are the same as regular dictionaries.

    def __init__(*args, **kwds):
        '''Initialize an ordered dictionary.  The signature is the same as
        regular dictionaries, but keyword arguments are not recommended because
        their insertion order is arbitrary.

        '''
        if not args:
            raise TypeError("descriptor '__init__' of 'ListDict' object "
                            "needs an argument")
        self, *args = args
        if len(args) > 1:
            raise TypeError('expected at most 1 arguments, got %d' % len(args))
        try:
            # self.__root
            self.__list
        except AttributeError:
            self.__map = {}
            self.__list = []
            self.__size = 0
        self.__update(*args, **kwds)

    def __setitem__(self, key, value,
                    dict_setitem=dict.__setitem__, len=len):
        'od.__setitem__(i, y) <==> od[i]=y'
        # If it's a new key, we need to track it.
        if key not in self:
            self.__map[key] = len(self.__list)
            self.__list.append(key)
            self.__size += 1

        dict_setitem(self, key, value)

    def __delitem__(self, key, dict_delitem=dict.__delitem__,
                    sentinel=_sentinel):
        'od.__delitem__(y) <==> del od[y]'
        dict_delitem(self, key)

        # Remove the tracking for this item
        index = self.__map.pop(key)
        self.__list[index] = sentinel
        self.__size -= 1

        self.__compact()

    def __iter__(self, sentinel=_sentinel):
        'od.__iter__() <==> iter(od)'
        for key in self.__list:
            if key is not sentinel:
                yield key

    def __reversed__(self, sentinel=_sentinel, reversed=reversed):
        'od.__reversed__() <==> reversed(od)'
        for key in reversed(self.__list):
            if key is not sentinel:
                yield key

    def clear(self):
        'od.clear() -> None.  Remove all items from od.'
        self.__list.clear()
        self.__map.clear()
        self.__size = 0
        dict.clear(self) # dict.clear isn't cached?

    def popitem(self, last=True, sentinel=_sentinel,
                reversed=reversed, next=next):
        '''od.popitem() -> (k, v), return and remove a (key, value) pair.
        Pairs are returned in LIFO order if last is true or FIFO order if false.

        '''
        if not self:
            raise KeyError('dictionary is empty')

        if last:
            lst = reversed(self.__list)
        else:
            lst = self.__list

        # Could use the key lookup to find this, but... meh.
        # Note that attempting to compact first might have helped.
        index, key = next((i, k)
                for i, k in enumerate(lst)
                if k is not sentinel)

        # We're calling dict.pop later, which won't handle
        # the metadata.
        del self.__map[key]
        self.__list[index] = sentinel
        self.__size -= 1

        self.__compact()

        value = dict.pop(self, key) # dict.pop isn't cached?
        return key, value

    def __compact(self, sentinel=_sentinel,
                  enumerate=enumerate, reversed=reversed):
        ''' Compact the order __list if necessary.
        '''
        # May need to use this a lot in the upcoming `else`.
        lst = self.__list
        if not lst:
            return

        if self.__size / len(lst) <= 0.5: #chosen at non-random
            # Implementation 1: list comprehension
            self.__list = [k for k in lst if k is not sentinel]

            # Implementation 2:
            #    If only `list` had a `remove_all` method...
            pass

            ''' Update all indices after a reordering.

            Should only be done when full (because it shouldn't be
necessary otherwise).
            '''
            inner_map = self.__map
            for index, key in enumerate(self.__list):
                inner_map[key] = index

        else:
            # Even if the list isn't mostly empty,
            # we can try to clear the back.
            # TODO: How can this be more efficient?

            # Note: There exists a non-sentinel because
            # otherwise, .__size/positive == 0 < positive.

            # # Implementation 1: Pop until it drops.
            # while lst[-1] is sentinel:
                # lst.pop()

            # Implementation 2: Count the number of sentinels at the end.
            emptys = next(i for i, k in enumerate(reversed(lst))
                            if k is not sentinel)
                     # guaranteed not to StopIteration since .__size > 0
            del lst[:-emptys] #safe even if 0

    def move_to_end(self, key, last=True, sentinel=_sentinel,
                    enumerate=enumerate, len=len):
        '''Move an existing element to the end (or beginning if last==False).

        Raises KeyError if the element does not exist.
        When last=True, acts like a fast version of self[key]=self.pop(key).
        '''

        index = self.__map[key]
        lst = self.__list
        if last:
            if index + 1 == len(lst):
                # already last
                # Not sure if this is the right path to optimize.
                # But I think redundant move_to_ends shouldn't
                # blow up the __list.
                return

            lst[index] = sentinel
            if lst[-1] is sentinel:
                # can just swap with last
                lst[-1] = key
                self.__map[key] = len(lst) - 1
            else:
                # append and maybe compact
                lst[index] = sentinel
                lst.append(key)
                self.__map[key] = len(lst) - 1
                self.__compact()
        else:
            # This is costly. But this shouldn't
            # be a common case anyway, right?
            # I mean, who repeatedly adds to the front
            # of an OrderedDict?
            # And this is basically the only costly
            # operation I'm adding.

            lst[index] = sentinel

            # Propagate forward from the front.
            for i, newkey in enumerate(lst):
                self.__map[key] = i
                lst[i], key = key, newkey
                if key is sentinel:
                    break

    def __sizeof__(self):
        sizeof = _sys.getsizeof
        size = sizeof(self.__dict__)            # instance dictionary
        size += sizeof(self.__map) * 2          # internal dict and
inherited dict

        size += sizeof(self.__list)
        size += sizeof(self.__size)
        return size

    update = __update = MutableMapping.update

    def keys(self):
        "D.keys() -> a set-like object providing a view on D's keys"
        return _ListDictKeysView(self)

    def items(self):
        "D.items() -> a set-like object providing a view on D's items"
        return _ListDictItemsView(self)

    def values(self):
        "D.values() -> an object providing a view on D's values"
        return _ListDictValuesView(self)

    __ne__ = MutableMapping.__ne__

    __marker = object()

    def pop(self, key, default=__marker):
        '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
        value.  If key is not found, d is returned if given, otherwise KeyError
        is raised.

        '''
        if key in self:
            result = self[key]
            del self[key]
            return result
        if default is self.__marker:
            raise KeyError(key)
        return default

    def setdefault(self, key, default=None):
        'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
        if key in self:
            return self[key]
        self[key] = default
        return default

    @_recursive_repr()
    def __repr__(self):
        'od.__repr__() <==> repr(od)'
        if not self:
            return '%s()' % (self.__class__.__name__,)
        return '%s(%r)' % (self.__class__.__name__, list(self.items()))

    def __reduce__(self):
        'Return state information for pickling'
        inst_dict = vars(self).copy()
        for k in vars(ListDict()):
            inst_dict.pop(k, None)
        return self.__class__, (), inst_dict or None, None, iter(self.items())

    def copy(self):
        'od.copy() -> a shallow copy of od'
        return self.__class__(self)

    @classmethod
    def fromkeys(cls, iterable, value=None):
        '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
        If not specified, the value defaults to None.

        '''
        self = cls()
        for key in iterable:
            self[key] = value
        return self

    def __eq__(self, other):
        '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
        while comparison to a regular mapping is order-insensitive.

        '''
        if isinstance(other, ListDict):
            return dict.__eq__(self, other) and all(map(_eq, self, other))
        return dict.__eq__(self, other)


More information about the Python-ideas mailing list