Ordered Sets

pataphor pataphor at gmail.com
Mon Mar 30 18:27:11 CEST 2009


On Mon, 30 Mar 2009 03:30:04 -0500
Nick Craig-Wood <nick at craig-wood.com> wrote:

> >>> class Node(object):
> ...     __slots__ = ["prev", "next", "this"]
> ...     def __init__(self, prev, next, this):
> ...         self.prev = prev
> ...         self.next = next
> ...         self.this = this

[...]

> So the Node class actually takes less memory 38 Mbytes vs 53 Mbytes
> for the list.
 
Well OK, but if we're going to start optimizing, it seems we don't need
to store the key itself in the Node or the list. Here's a version of my
script that stores only prev and next. Another twist is that it is now
possible to arbitrarily set the starting point for the iteration. (Just
in case someone needed a cue for further ranting)

P.

import collections

class OrderedSet(collections.MutableSet):
    'Set that remembers the order elements were added'
    # Big-O running times for all methods are the same as for regular
sets. 
    def __init__(self, iterable=None):
        self.links = {}                     # key --> [prev,next]
        if iterable is not None:
            self |= iterable

    def __len__(self):
        return len(self.links)

    def __contains__(self, key):
        return key in self.links

    def add(self, key):
        if not self:
            self.start = key
            self.links = {key: [key,key]}
        elif key not in self.links:
            links = self.links
            start = self.start
            prev, next  = links[start]
            links[prev][1] = key
            links[start][0] = key
            links[key] = [prev,start]
    
    def discard(self, key):
        links = self.links
        if key in links:        
            prev,next = links.pop(key)
            if self.links:
                links[prev][1] = next
                links[next][0] = prev
                if key  == self.start:
                    self.start = next
            else:
                del self.start

    def __iter__(self):
        links = self.links
        start = self.start
        if links:
            yield start
            prev,next = links[start]
            while next != start:
                yield next
                prev,next = links[next]
            
    def __reversed__(self):
        links = self.links
        start = self.start
        if links:
            prev,next = links[start]
            while prev != start:
                yield prev
                prev,next = self.links[prev]
            yield start

    def pop(self, last=True):
        if not self:
            raise KeyError('set is empty')
        key = next(reversed(self)) if last else next(iter(self))
        self.discard(key)
        return key

    def __repr__(self):
        if not self:
            return '%s()' % (self.__class__.__name__,)
        return '%s(%r)' % (self.__class__.__name__, list(self))

    def __eq__(self, other):
        if isinstance(other, OrderedSet):
            return len(self) == len(other) and list(self) == list(other)
        return not self.isdisjoint(other)
        
def test():
    D = OrderedSet('abcd')
    R = OrderedSet()
    for x in list(D):
        print(D,R)
        R.add(D.pop(last = False))
    print(D,R)
    print()
    R.start = 'c'
    print(list(reversed(R)))
    
if __name__ == '__main__':
    test()



More information about the Python-list mailing list