[pypy-commit] pypy set-strategies: optimized intersection_multiple some more

l.diekmann noreply at buildbot.pypy.org
Thu Nov 10 13:52:37 CET 2011


Author: Lukas Diekmann <lukas.diekmann at uni-duesseldorf.de>
Branch: set-strategies
Changeset: r49267:031e88af4605
Date: 2011-11-07 14:07 +0100
http://bitbucket.org/pypy/pypy/changeset/031e88af4605/

Log:	optimized intersection_multiple some more

diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -120,14 +120,6 @@
         """ Keeps only those elements found in both sets, removing all other elements. """
         return self.strategy.intersect_update(self, w_other)
 
-    def intersect_multiple(self, others_w):
-        """ Returns a new set of all elements that exist in all of the given iterables."""
-        return self.strategy.intersect_multiple(self, others_w)
-
-    def intersect_multiple_update(self, others_w):
-        """ Same as intersect_multiple but overwrites this set with the result. """
-        self.strategy.intersect_multiple_update(self, others_w)
-
     def issubset(self, w_other):
         """ Checks wether this set is a subset of w_other. W_other must be a set. """
         return self.strategy.issubset(self, w_other)
@@ -247,12 +239,6 @@
     def intersect_update(self, w_set, w_other):
         raise NotImplementedError
 
-    def intersect_multiple(self, w_set, others_w):
-        raise NotImplementedError
-
-    def intersect_multiple_update(self, w_set, others_w):
-        raise NotImplementedError
-
     def issubset(self, w_set, w_other):
         raise NotImplementedError
 
@@ -353,14 +339,6 @@
         self.check_for_unhashable_objects(w_other)
         return w_set.copy_real()
 
-    def intersect_multiple(self, w_set, others_w):
-        self.intersect_multiple_update(w_set, others_w)
-        return w_set.copy_real()
-
-    def intersect_multiple_update(self, w_set, others_w):
-        for w_other in others_w:
-            self.check_for_unhashable_objects(w_other)
-
     def isdisjoint(self, w_set, w_other):
         return True
 
@@ -625,45 +603,6 @@
         w_set.sstorage = storage
         return w_set
 
-    def intersect_multiple(self, w_set, others_w):
-        #XXX find smarter implementations
-        result = w_set.copy_real()
-
-        # find smallest set in others_w to reduce comparisons
-        # XXX maybe we can do this smarter
-        if len(others_w) > 1:
-            startset, startlength = None, 0
-            for w_other in others_w:
-                try:
-                    length = self.space.len(w_other)
-                except OperationError, e:
-                    if not e.match(self.space, self.space.w_TypeError):
-                        raise
-                    continue
-
-                if startset is None or self.space.is_true(self.space.lt(length, startlength)):
-                    startset = w_other
-                    startlength = length
-
-            others_w[others_w.index(startset)] = others_w[0]
-            others_w[0] = startset
-
-        for w_other in others_w:
-            if result.length() == 0:
-                break
-            if isinstance(w_other, W_BaseSetObject):
-                # optimization only
-                result.intersect_update(w_other)
-            else:
-                w_other_as_set = w_set._newobj(self.space, w_other)
-                result.intersect_update(w_other_as_set)
-        return result
-
-    def intersect_multiple_update(self, w_set, others_w):
-        result = self.intersect_multiple(w_set, others_w)
-        w_set.strategy = result.strategy
-        w_set.sstorage = result.sstorage
-
     def _issubset_unwrapped(self, w_set, w_other):
         d_other = self.unerase(w_other.sstorage)
         for item in self.unerase(w_set.sstorage):
@@ -1270,7 +1209,36 @@
 and__Frozenset_Frozenset = and__Set_Set
 
 def _intersection_multiple(space, w_left, others_w):
-    return w_left.intersect_multiple(others_w)
+    #XXX find smarter implementations
+    others_w.append(w_left)
+
+    # find smallest set in others_w to reduce comparisons
+    startindex, startlength = -1, -1
+    for i in range(len(others_w)):
+        w_other = others_w[i]
+        try:
+            length = space.int_w(space.len(w_other))
+        except OperationError, e:
+            if not e.match(space, space.w_TypeError):
+                raise
+            continue
+
+        if length < startlength:
+            startindex = i
+            startlength = length
+
+    others_w[i], others_w[0] = others_w[0], others_w[i]
+
+    result = w_left._newobj(space, others_w[0])
+    for i in range(1,len(others_w)):
+        w_other = others_w[i]
+        if isinstance(w_other, W_BaseSetObject):
+            # optimization only
+            result.intersect_update(w_other)
+        else:
+            w_other_as_set = w_left._newobj(space, w_other)
+            result.intersect_update(w_other_as_set)
+    return result
 
 def set_intersection__Set(space, w_left, others_w):
     if len(others_w) == 0:
@@ -1281,7 +1249,9 @@
 frozenset_intersection__Frozenset = set_intersection__Set
 
 def set_intersection_update__Set(space, w_left, others_w):
-    w_left.intersect_multiple_update(others_w)
+    result = set_intersection__Set(space, w_left, others_w)
+    w_left.strategy = result.strategy
+    w_left.sstorage = result.sstorage
     return
 
 def inplace_and__Set_Set(space, w_left, w_other):
diff --git a/pypy/objspace/std/test/test_setobject.py b/pypy/objspace/std/test/test_setobject.py
--- a/pypy/objspace/std/test/test_setobject.py
+++ b/pypy/objspace/std/test/test_setobject.py
@@ -665,7 +665,7 @@
         assert e.isdisjoint(x) == True
         assert x.isdisjoint(e) == True
 
-    def test_empty_typeerror(self):
+    def test_empty_unhashable(self):
         s = set()
         raises(TypeError, s.difference, [[]])
         raises(TypeError, s.difference_update, [[]])


More information about the pypy-commit mailing list