[pypy-svn] r23430 - in pypy/dist/pypy/tool/algo: . test
arigo at codespeak.net
arigo at codespeak.net
Fri Feb 17 00:20:05 CET 2006
Author: arigo
Date: Fri Feb 17 00:20:02 2006
New Revision: 23430
Added:
pypy/dist/pypy/tool/algo/BB.sml
pypy/dist/pypy/tool/algo/fset.py (contents, props changed)
pypy/dist/pypy/tool/algo/test/test_fset.py (contents, props changed)
Log:
Check-in for reference. We'll see if using this really makes
pypy.jit.hintannotator faster on large inputs, or if we need
to be more clever.
Added: pypy/dist/pypy/tool/algo/BB.sml
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/tool/algo/BB.sml Fri Feb 17 00:20:02 2006
@@ -0,0 +1,287 @@
+(*
+ Copyright 1992-1996 Stephen Adams.
+
+ This software may be used freely provided that:
+ 1. This copyright notice is attached to any copy, derived work,
+ or work including all or part of this software.
+ 2. Any derived work must contain a prominent notice stating that
+ it has been altered from the original.
+
+*)
+
+(* Address: Electronics & Computer Science
+ University of Southampton
+ Southampton SO9 5NH
+ Great Britian
+ E-mail: sra at ecs.soton.ac.uk
+
+ Comments:
+
+ 1. The implementation is based on Binary search trees of Bounded
+ Balance, similar to Nievergelt & Reingold, SIAM J. Computing
+ 2(1), March 1973. The main advantage of these trees is that
+ they keep the size of the tree in the node, giving a constant
+ time size operation.
+
+ 2. The bounded balance criterion is simpler than N&R's alpha.
+ Simply, one subtree must not have more than `weight' times as
+ many elements as the opposite subtree. Rebalancing is
+ guaranteed to reinstate the criterion for weight>2.23, but
+ the occasional incorrect behaviour for weight=2 is not
+ detrimental to performance.
+
+ 3. There are two implementations of union. The default,
+ hedge_union, is much more complex and usually 20% faster. I
+ am not sure that the performance increase warrants the
+ complexity (and time it took to write), but I am leaving it
+ in for the competition. It is derived from the original
+ union by replacing the split_lt(gt) operations with a lazy
+ version. The `obvious' version is called old_union.
+*)
+
+structure B (*: INTSET*) =
+ struct
+
+ local
+
+ type T = int
+ val lt : T*T->bool = op <
+
+ (* weight is a parameter to the rebalancing process. *)
+ val weight:int = 3
+
+ datatype Set = E | T of T * int * Set * Set
+
+ fun size E = 0
+ | size (T(_,n,_,_)) = n
+
+ (*fun N(v,l,r) = T(v,1+size(l)+size(r),l,r)*)
+ fun N(v,E, E) = T(v,1,E,E)
+ | N(v,E, r as T(_,n,_,_)) = T(v,n+1,E,r)
+ | N(v,l as T(_,n,_,_),E) = T(v,n+1,l,E)
+ | N(v,l as T(_,n,_,_),r as T(_,m,_,_)) = T(v,n+m+1,l,r)
+
+ fun single_L (a,x,T(b,_,y,z)) = N(b,N(a,x,y),z)
+ | single_L _ = raise Match
+ fun single_R (b,T(a,_,x,y),z) = N(a,x,N(b,y,z))
+ | single_R _ = raise Match
+ fun double_L (a,w,T(c,_,T(b,_,x,y),z)) = N(b,N(a,w,x),N(c,y,z))
+ | double_L _ = raise Match
+ fun double_R (c,T(a,_,w,T(b,_,x,y)),z) = N(b,N(a,w,x),N(c,y,z))
+ | double_R _ = raise Match
+
+ fun T' (v,E,E) = T(v,1,E,E)
+ | T' (v,E,r as T(_,_,E,E)) = T(v,2,E,r)
+ | T' (v,l as T(_,_,E,E),E) = T(v,2,l,E)
+
+ | T' (p as (_,E,T(_,_,T(_,_,_,_),E))) = double_L p
+ | T' (p as (_,T(_,_,E,T(_,_,_,_)),E)) = double_R p
+
+ (* these cases almost never happen with small weight*)
+ | T' (p as (_,E,T(_,_,T(_,ln,_,_),T(_,rn,_,_)))) =
+ if ln<rn then single_L p else double_L p
+ | T' (p as (_,T(_,_,T(_,ln,_,_),T(_,rn,_,_)),E)) =
+ if ln>rn then single_R p else double_R p
+
+ | T' (p as (_,E,T(_,_,E,_))) = single_L p
+ | T' (p as (_,T(_,_,_,E),E)) = single_R p
+
+ | T' (p as (v,l as T(lv,ln,ll,lr),r as T(rv,rn,rl,rr))) =
+ if rn>=weight*ln then (*right is too big*)
+ let val rln = size rl
+ val rrn = size rr
+ in
+ if rln < rrn then single_L p else double_L p
+ end
+
+ else if ln>=weight*rn then (*left is too big*)
+ let val lln = size ll
+ val lrn = size lr
+ in
+ if lrn < lln then single_R p else double_R p
+ end
+
+ else
+ T(v,ln+rn+1,l,r)
+
+ fun add (E,x) = T(x,1,E,E)
+ | add (set as T(v,_,l,r),x) =
+ if lt(x,v) then T'(v,add(l,x),r)
+ else if lt(v,x) then T'(v,l,add(r,x))
+ else set
+
+ fun concat3 (E,v,r) = add(r,v)
+ | concat3 (l,v,E) = add(l,v)
+ | concat3 (l as T(v1,n1,l1,r1), v, r as T(v2,n2,l2,r2)) =
+ if weight*n1 < n2 then T'(v2,concat3(l,v,l2),r2)
+ else if weight*n2 < n1 then T'(v1,l1,concat3(r1,v,r))
+ else N(v,l,r)
+
+ fun split_lt (E,x) = E
+ | split_lt (t as T(v,_,l,r),x) =
+ if lt(x,v) then split_lt(l,x)
+ else if lt(v,x) then concat3(l,v,split_lt(r,x))
+ else l
+
+ fun split_gt (E,x) = E
+ | split_gt (t as T(v,_,l,r),x) =
+ if lt(v,x) then split_gt(r,x)
+ else if lt(x,v) then concat3(split_gt(l,x),v,r)
+ else r
+
+ fun min (T(v,_,E,_)) = v
+ | min (T(v,_,l,_)) = min l
+ | min _ = raise Match
+
+ and delete' (E,r) = r
+ | delete' (l,E) = l
+ | delete' (l,r) = let val min_elt = min r in
+ T'(min_elt,l,delmin r)
+ end
+ and delmin (T(_,_,E,r)) = r
+ | delmin (T(v,_,l,r)) = T'(v,delmin l,r)
+ | delmin _ = raise Match
+
+ fun concat (E, s2) = s2
+ | concat (s1, E) = s1
+ | concat (t1 as T(v1,n1,l1,r1), t2 as T(v2,n2,l2,r2)) =
+ if weight*n1 < n2 then T'(v2,concat(t1,l2),r2)
+ else if weight*n2 < n1 then T'(v1,l1,concat(r1,t2))
+ else T'(min t2,t1, delmin t2)
+
+ fun fold(f,base,set) =
+ let fun fold'(base,E) = base
+ | fold'(base,T(v,_,l,r)) = fold'(f(v,fold'(base,r)),l)
+ in
+ fold'(base,set)
+ end
+
+ in
+
+ val empty = E
+
+ fun singleton x = T(x,1,E,E)
+
+
+ local
+ fun trim (lo,hi,E) = E
+ | trim (lo,hi,s as T(v,_,l,r)) =
+ if lt(lo,v) then
+ if lt(v,hi) then s
+ else trim(lo,hi,l)
+ else trim(lo,hi,r)
+
+
+ fun uni_bd (s,E,lo,hi) = s
+ | uni_bd (E,T(v,_,l,r),lo,hi) =
+ concat3(split_gt(l,lo),v,split_lt(r,hi))
+ | uni_bd (T(v,_,l1,r1), s2 as T(v2,_,l2,r2),lo,hi) =
+ concat3(uni_bd(l1,trim(lo,v,s2),lo,v),
+ v,
+ uni_bd(r1,trim(v,hi,s2),v,hi))
+ (* inv: lo < v < hi *)
+
+ (*all the other versions of uni and trim are
+ specializations of the above two functions with
+ lo=-infinity and/or hi=+infinity *)
+
+ fun trim_lo (_ ,E) = E
+ | trim_lo (lo,s as T(v,_,_,r)) =
+ if lt(lo,v) then s else trim_lo(lo,r)
+ fun trim_hi (_ ,E) = E
+ | trim_hi (hi,s as T(v,_,l,_)) =
+ if lt(v,hi) then s else trim_hi(hi,l)
+
+ fun uni_hi (s,E,hi) = s
+ | uni_hi (E,T(v,_,l,r),hi) =
+ concat3(l,v,split_lt(r,hi))
+ | uni_hi (T(v,_,l1,r1), s2 as T(v2,_,l2,r2),hi) =
+ concat3(uni_hi(l1,trim_hi(v,s2),v),
+ v,
+ uni_bd(r1,trim(v,hi,s2),v,hi))
+
+ fun uni_lo (s,E,lo) = s
+ | uni_lo (E,T(v,_,l,r),lo) =
+ concat3(split_gt(l,lo),v,r)
+ | uni_lo (T(v,_,l1,r1), s2 as T(v2,_,l2,r2),lo) =
+ concat3(uni_bd(l1,trim(lo,v,s2),lo,v),
+ v,
+ uni_lo(r1,trim_lo(v,s2),v))
+
+ fun uni (s,E) = s
+ | uni (E,s as T(v,_,l,r)) = s
+ | uni (T(v,_,l1,r1), s2 as T(v2,_,l2,r2)) =
+ concat3(uni_hi(l1,trim_hi(v,s2),v),
+ v,
+ uni_lo(r1,trim_lo(v,s2),v))
+
+ in
+ val hedge_union = uni
+ end
+
+
+ fun old_union (E,s2) = s2
+ | old_union (s1,E) = s1
+ | old_union (s1 as T(v,_,l,r),s2) =
+ let val l2 = split_lt(s2,v)
+ val r2 = split_gt(s2,v)
+ in
+ concat3(old_union(l,l2),v,old_union(r,r2))
+ end
+
+ (* The old_union version is about 20% slower than
+ hedge_union in most cases *)
+
+ val union = hedge_union
+ (*val union = old_union*)
+
+ val add = add
+
+ fun difference (E,s) = E
+ | difference (s,E) = s
+ | difference (s, T(v,_,l,r)) =
+ let val l2 = split_lt(s,v)
+ val r2 = split_gt(s,v)
+ in
+ concat(difference(l2,l),difference(r2,r))
+ end
+
+ fun member (x,set) =
+ let fun mem E = false
+ | mem (T(v,_,l,r)) =
+ if lt(x,v) then mem l else if lt(v,x) then mem r else true
+ in mem set end
+
+ (*fun intersection (a,b) = difference(a,difference(a,b))*)
+
+ fun intersection (E,_) = E
+ | intersection (_,E) = E
+ | intersection (s, T(v,_,l,r)) =
+ let val l2 = split_lt(s,v)
+ val r2 = split_gt(s,v)
+ in
+ if member(v,s) then
+ concat3(intersection(l2,l),v,intersection(r2,r))
+ else
+ concat(intersection(l2,l),intersection(r2,r))
+ end
+
+ fun members set = fold(op::,[],set)
+
+ fun cardinality E = 0
+ | cardinality (T(_,n,_,_)) = n
+
+ fun delete (E,x) = E
+ | delete (set as T(v,_,l,r),x) =
+ if lt(x,v) then T'(v,delete(l,x),r)
+ else if lt(v,x) then T'(v,l,delete(r,x))
+ else delete'(l,r)
+
+ fun fromList l = List.fold (fn(x,y)=>add(y,x)) l E
+
+ type intset = Set
+
+ end
+ end
+
+structure IntSet : INTSET =B;
Added: pypy/dist/pypy/tool/algo/fset.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/tool/algo/fset.py Fri Feb 17 00:20:02 2006
@@ -0,0 +1,244 @@
+__all__ = ['FSet', 'emptyset']
+
+# Reference:
+# "Implementing sets efficiently in a functional language"
+# http://swiss.csail.mit.edu/~adams/BB/
+# See BB.sml in the current directory.
+
+
+class FSet(object):
+ """Functional Set.
+ Behaves like a frozenset from Python 2.4 (incomplete, though).
+ This version is meant to have a better complexity than frozenset for
+ operations involving a lot of single-element adds and unions.
+ For example, a long chain of 'set.union([x]).union([y]).union([z])...'
+ takes quadratic time with frozensets, but only n*log(n) with FSets.
+ """
+ __slots__ = ['_left', '_value', '_right', '_count']
+
+ def __new__(cls, items=()):
+ if isinstance(items, FSet):
+ return items
+ items = list(items)
+ if len(items) == 1:
+ return node(emptyset, items[0], emptyset)
+ if not items:
+ return emptyset
+ items.sort()
+ any = items[0]
+ items = [x for i, x in enumerate(items) if x != items[i-1]]
+ if not items:
+ items.append(any)
+ def maketree(start, stop):
+ if start == stop:
+ return emptyset
+ else:
+ mid = (start+stop)//2
+ return node(maketree(start, mid), items[mid],
+ maketree(mid+1, stop))
+ return maketree(0, len(items))
+
+ def __len__(self):
+ return self._count
+
+ def __repr__(self):
+ return '{%s}' % (', '.join([repr(n) for n in self]),)
+
+ def __iter__(self):
+ return treeiter(self)
+
+ def union(self, other):
+ return uniontree(self, FSet(other))
+
+ def __or__(self, other):
+ if not isinstance(other, FSet):
+ return NotImplemented
+ return uniontree(self, other)
+
+ def __eq__(self, other):
+ if not isinstance(other, FSet):
+ return NotImplemented
+ if self is other:
+ return True
+ if eqtree(self, other):
+ other._left = self._left
+ other._value = self._value
+ other._right = self._right
+ return True
+ return False
+
+ def __ne__(self, other):
+ res = self.__eq__(other)
+ if res is NotImplemented:
+ return NotImplemented
+ return not res
+
+ def __hash__(self):
+ return hash(tuple(self)) ^ 1043498183
+
+ def __contains__(self, value):
+ return contains(self, value)
+
+emptyset = object.__new__(FSet)
+emptyset._count = 0
+
+# ____________________________________________________________
+# creation and balancing stuff
+
+WEIGHT = 3
+
+def node(left, value, right):
+ result = object.__new__(FSet)
+ result._left = left
+ result._value = value
+ result._right = right
+ result._count = left._count + right._count + 1
+ return result
+
+def node_balance_fast(left, value, right):
+ # used when an original tree was balanced, and changed by at most
+ # one element (as in adding or deleting one item).
+ ln = left._count
+ rn = right._count
+ if ln <= 1 and rn <= 1:
+ return node(left, value, right)
+ elif rn > WEIGHT * ln: # right too big
+ if right._left._count < right._right._count:
+ return single_L(left, value, right)
+ else:
+ return double_L(left, value, right)
+ elif ln > WEIGHT * rn: # left too big
+ if left._right._count < left._left._count:
+ return single_R(left, value, right)
+ else:
+ return double_R(left, value, right)
+ else:
+ return node(left, value, right)
+
+def node_balance(left, value, right):
+ if left is emptyset:
+ return add(right, value)
+ elif right is emptyset:
+ return add(left, value)
+ elif WEIGHT * left._count < right._count:
+ t = node_balance(left, value, right._left)
+ return node_balance_fast(t, right._value, right._right)
+ elif WEIGHT * right._count < left._count:
+ t = node_balance(left._right, value, right)
+ return node_balance_fast(left._left, left._value, t)
+ else:
+ return node(left, value, right)
+
+def add(tree, value):
+ if tree is emptyset:
+ return node(emptyset, value, emptyset)
+ elif value < tree._value:
+ t = add(tree._left, value)
+ return node_balance_fast(t, tree._value, tree._right)
+ elif value == tree._value:
+ return tree
+ else:
+ t = add(tree._right, value)
+ return node_balance_fast(tree._left, tree._value, t)
+
+def single_L(left, value, right):
+ return node(node(left, value, right._left), right._value, right._right)
+
+def single_R(left, value, right):
+ return node(left._left, left._value, node(left._right, value, right))
+
+def double_L(left, value, right):
+ rl = right._left
+ n1 = node(left, value, rl._left)
+ n2 = node(rl._right, right._value, right._right)
+ return node(n1, rl._value, n2)
+
+def double_R(left, value, right):
+ lr = left._right
+ n1 = node(left._left, left._value, lr._left)
+ n2 = node(lr._right, value, right)
+ return node(n1, lr._value, n2)
+
+# ____________________________________________________________
+# union
+
+def uniontree(tree1, tree2):
+ if tree2._count <= 1:
+ if tree2 is emptyset:
+ return tree1
+ else:
+ return add(tree1, tree2._value)
+ elif tree1._count <= 1:
+ if tree1 is emptyset:
+ return tree2
+ else:
+ return add(tree2, tree1._value)
+ else:
+ left2, right2 = splittree(tree2, tree1._value)
+ return node_balance(uniontree(tree1._left, left2), tree1._value,
+ uniontree(tree1._right, right2))
+
+def splittree(tree, value):
+ if tree is emptyset:
+ return emptyset, emptyset
+ elif tree._value < value:
+ t1, t2 = splittree(tree._right, value)
+ return node_balance(tree._left, tree._value, t1), t2
+ elif tree._value == value:
+ return tree._left, tree._right
+ else:
+ t1, t2 = splittree(tree._left, value)
+ return t1, node_balance(t2, tree._value, tree._right)
+
+# ____________________________________________________________
+# utilities
+
+def treeiter(tree):
+ if tree is emptyset:
+ return
+ path = []
+ while True:
+ while tree._left is not emptyset:
+ path.append(tree)
+ tree = tree._left
+ yield tree._value
+ tree = tree._right
+ while tree is emptyset:
+ if not path:
+ return
+ tree = path.pop()
+ yield tree._value
+ tree = tree._right
+
+def eqtree(tree1, tree2):
+ if tree1 is tree2:
+ return True
+ if tree1._count != tree2._count:
+ return False
+ assert tree1 is not emptyset and tree2 is not emptyset
+ left2, right2 = splittree(tree2, tree1._value)
+ if left2._count + right2._count == tree2._count:
+ return False # _value was not in tree2
+ return eqtree(tree1._left, left2) and eqtree(tree1._right, right2)
+
+def contains(tree, value):
+ while tree is not emptyset:
+ if value < tree._value:
+ tree = tree._left
+ elif value == tree._value:
+ return True
+ else:
+ tree = tree._right
+ return False
+
+
+_no = object()
+def checktree(tree, bmin=_no, bmax=_no):
+ if tree is not emptyset:
+ if bmin is not _no:
+ assert bmin < tree._value
+ if bmax is not _no:
+ assert tree._value < bmax
+ assert tree._count == tree._left._count + tree._right._count + 1
+ checktree(tree._left, bmin, tree._value)
+ checktree(tree._right, tree._value, bmax)
Added: pypy/dist/pypy/tool/algo/test/test_fset.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/tool/algo/test/test_fset.py Fri Feb 17 00:20:02 2006
@@ -0,0 +1,76 @@
+from pypy.tool.algo.fset import FSet, checktree, emptyset
+import random
+
+
+def test_empty():
+ assert FSet() is FSet([]) is emptyset
+ assert len(emptyset) == 0
+ assert list(emptyset) == []
+ checktree(emptyset)
+
+def test_iter():
+ s = FSet(range(42))
+ assert len(s) == 42
+ assert list(s) == range(42)
+ checktree(s)
+
+def test_new():
+ s = FSet(range(6, 42) + range(13))
+ assert len(s) == 42
+ assert list(s) == range(42)
+ assert FSet(s) is s
+ checktree(s)
+
+def test_union():
+ s1 = FSet([1, 10, 100, 1000])
+ assert list(s1.union([])) == [1, 10, 100, 1000]
+ assert list(s1.union([100])) == [1, 10, 100, 1000]
+ assert list(s1.union([3, 4, 5])) == [1, 3, 4, 5, 10, 100, 1000]
+ assert list(s1.union([1000, 1200, 1400])) == [1, 10, 100, 1000, 1200, 1400]
+ assert list(s1.union(s1)) == [1, 10, 100, 1000]
+
+def test_or():
+ s1 = FSet([0, 3, 6])
+ s2 = FSet([1, 3])
+ assert list(s1 | s2) == [0, 1, 3, 6]
+
+def test_eq():
+ assert FSet([0, 3]) == FSet([0, 3])
+ assert FSet([]) == emptyset
+ assert FSet(range(42)) == FSet(range(42))
+ assert FSet([]) != FSet([5])
+ assert FSet(range(42)) != FSet(range(43))
+
+def test_hash():
+ assert hash(emptyset) != hash(FSet([1])) != hash(FSet([1, 2]))
+ assert hash(FSet([1, 2])) == hash(FSet([1]) | FSet([2]))
+
+def test_len():
+ assert len(FSet([1, 2]) | FSet([2, 3])) == 3
+
+def test_reasonable_speed(N=1000):
+ d = emptyset
+ for i in range(N):
+ d |= FSet([i])
+ checktree(d)
+ assert list(d) == range(N)
+ d = emptyset
+ for i in range(N-1, -1, -1):
+ d |= FSet([i])
+ checktree(d)
+ assert list(d) == range(N)
+ d = emptyset
+ lst = range(N)
+ random.shuffle(lst)
+ for i in lst:
+ d |= FSet([i])
+ checktree(d)
+ assert list(d) == range(N)
+
+def test_contains():
+ assert 5 not in emptyset
+ lst = range(0, 20, 2)
+ random.shuffle(lst)
+ d = FSet(lst)
+ for x in range(20):
+ assert (x in d) == (x in lst)
More information about the Pypy-commit
mailing list