[pypy-commit] benchmarks default: add a btree benchmark with similar problems as skip-list (a bit better though)
Raemi
noreply at buildbot.pypy.org
Wed Apr 9 16:37:02 CEST 2014
Author: Remi Meier <remi.meier at inf.ethz.ch>
Branch:
Changeset: r248:a7f0f6c6e15e
Date: 2014-04-09 16:37 +0200
http://bitbucket.org/pypy/benchmarks/changeset/a7f0f6c6e15e/
Log: add a btree benchmark with similar problems as skip-list (a bit
better though)
diff --git a/multithread/btree/btree.py b/multithread/btree/btree.py
new file mode 100644
--- /dev/null
+++ b/multithread/btree/btree.py
@@ -0,0 +1,354 @@
+# https://github.com/MartinThoma/algorithms/tree/master/datastructures
+
+from common.abstract_threading import atomic, Future
+import time, threading
+
+import random
+
+thread_local = threading.local()
+
+import bisect
+
+
+class _BNode(object):
+ __slots__ = ["tree", "contents", "children"]
+
+ def __init__(self, tree, contents=None, children=None):
+ self.tree = tree
+ self.contents = contents or []
+ self.children = children or []
+ if self.children:
+ assert len(self.contents) + 1 == len(self.children), \
+ "one more child than data item required"
+
+ def __repr__(self):
+ name = getattr(self, "children", 0) and "Branch" or "Leaf"
+ return "<%s %s>" % (name, ", ".join(map(str, self.contents)))
+
+ def lateral(self, parent, parent_index, dest, dest_index):
+ if parent_index > dest_index:
+ dest.contents.append(parent.contents[dest_index])
+ parent.contents[dest_index] = self.contents.pop(0)
+ if self.children:
+ dest.children.append(self.children.pop(0))
+ else:
+ dest.contents.insert(0, parent.contents[parent_index])
+ parent.contents[parent_index] = self.contents.pop()
+ if self.children:
+ dest.children.insert(0, self.children.pop())
+
+ def shrink(self, ancestors):
+ parent = None
+
+ if ancestors:
+ parent, parent_index = ancestors.pop()
+ # try to lend to the left neighboring sibling
+ if parent_index:
+ left_sib = parent.children[parent_index - 1]
+ if len(left_sib.contents) < self.tree.order:
+ self.lateral(
+ parent, parent_index, left_sib, parent_index - 1)
+ return
+
+ # try the right neighbor
+ if parent_index + 1 < len(parent.children):
+ right_sib = parent.children[parent_index + 1]
+ if len(right_sib.contents) < self.tree.order:
+ self.lateral(
+ parent, parent_index, right_sib, parent_index + 1)
+ return
+
+ sibling, push = self.split()
+
+ if not parent:
+ parent, parent_index = self.tree.BRANCH(
+ self.tree, children=[self]), 0
+ self.tree._root = parent
+
+ # pass the median up to the parent
+ parent.contents.insert(parent_index, push)
+ parent.children.insert(parent_index + 1, sibling)
+ if len(parent.contents) > parent.tree.order:
+ parent.shrink(ancestors)
+
+ def grow(self, ancestors):
+ parent, parent_index = ancestors.pop()
+
+ minimum = self.tree.order // 2
+ left_sib = right_sib = None
+
+ # try to borrow from the right sibling
+ if parent_index + 1 < len(parent.children):
+ right_sib = parent.children[parent_index + 1]
+ if len(right_sib.contents) > minimum:
+ right_sib.lateral(parent, parent_index + 1, self, parent_index)
+ return
+
+ # try to borrow from the left sibling
+ if parent_index:
+ left_sib = parent.children[parent_index - 1]
+ if len(left_sib.contents) > minimum:
+ left_sib.lateral(parent, parent_index - 1, self, parent_index)
+ return
+
+ # consolidate with a sibling - try left first
+ if left_sib:
+ left_sib.contents.append(parent.contents[parent_index - 1])
+ left_sib.contents.extend(self.contents)
+ if self.children:
+ left_sib.children.extend(self.children)
+ parent.contents.pop(parent_index - 1)
+ parent.children.pop(parent_index)
+ else:
+ self.contents.append(parent.contents[parent_index])
+ self.contents.extend(right_sib.contents)
+ if self.children:
+ self.children.extend(right_sib.children)
+ parent.contents.pop(parent_index)
+ parent.children.pop(parent_index + 1)
+
+ if len(parent.contents) < minimum:
+ if ancestors:
+ # parent is not the root
+ parent.grow(ancestors)
+ elif not parent.contents:
+ # parent is root, and its now empty
+ self.tree._root = left_sib or self
+
+ def split(self):
+ center = len(self.contents) // 2
+ median = self.contents[center]
+ sibling = type(self)(
+ self.tree,
+ self.contents[center + 1:],
+ self.children[center + 1:])
+ self.contents = self.contents[:center]
+ self.children = self.children[:center + 1]
+ return sibling, median
+
+ def insert(self, index, item, ancestors):
+ self.contents.insert(index, item)
+ if len(self.contents) > self.tree.order:
+ self.shrink(ancestors)
+
+ def remove(self, index, ancestors):
+ minimum = self.tree.order // 2
+
+ if self.children:
+ # try promoting from the right subtree first,
+ # but only if it won't have to resize
+ additional_ancestors = [(self, index + 1)]
+ descendent = self.children[index + 1]
+ while descendent.children:
+ additional_ancestors.append((descendent, 0))
+ descendent = descendent.children[0]
+ if len(descendent.contents) > minimum:
+ ancestors.extend(additional_ancestors)
+ self.contents[index] = descendent.contents[0]
+ descendent.remove(0, ancestors)
+ return
+
+ # fall back to the left child
+ additional_ancestors = [(self, index)]
+ descendent = self.children[index]
+ while descendent.children:
+ additional_ancestors.append(
+ (descendent, len(descendent.children) - 1))
+ descendent = descendent.children[-1]
+ ancestors.extend(additional_ancestors)
+ self.contents[index] = descendent.contents[-1]
+ descendent.remove(len(descendent.children) - 1, ancestors)
+ else:
+ self.contents.pop(index)
+ if len(self.contents) < minimum and ancestors:
+ self.grow(ancestors)
+
+class BTree(object):
+ BRANCH = LEAF = _BNode
+
+ def __init__(self, order):
+ self.order = order
+ self._root = self._bottom = self.LEAF(self)
+
+ def _path_to(self, item):
+ current = self._root
+ ancestry = []
+
+ while getattr(current, "children", None):
+ index = bisect.bisect_left(current.contents, item)
+ ancestry.append((current, index))
+ if index < len(current.contents) \
+ and current.contents[index] == item:
+ return ancestry
+ current = current.children[index]
+
+ index = bisect.bisect_left(current.contents, item)
+ ancestry.append((current, index))
+ present = index < len(current.contents)
+ present = present and current.contents[index] == item
+
+ return ancestry
+
+ def _present(self, item, ancestors):
+ last, index = ancestors[-1]
+ return index < len(last.contents) and last.contents[index] == item
+
+ def insert(self, item):
+ ancestors = self._path_to(item)
+ node, index = ancestors[-1]
+ while getattr(node, "children", None):
+ node = node.children[index]
+ index = bisect.bisect_left(node.contents, item)
+ ancestors.append((node, index))
+ node, index = ancestors.pop()
+ node.insert(index, item, ancestors)
+
+ def remove(self, item):
+ ancestors = self._path_to(item)
+
+ if self._present(item, ancestors):
+ node, index = ancestors.pop()
+ node.remove(index, ancestors)
+ # else:
+ # raise ValueError("%r not in %s" % (item, self.__class__.__name__))
+
+ def __contains__(self, item):
+ return self._present(item, self._path_to(item))
+
+ def __iter__(self):
+ def _recurse(node):
+ if node.children:
+ for child, item in zip(node.children, node.contents):
+ for child_item in _recurse(child):
+ yield child_item
+ yield item
+ for child_item in _recurse(node.children[-1]):
+ yield child_item
+ else:
+ for item in node.contents:
+ yield item
+
+ for item in _recurse(self._root):
+ yield item
+
+ def __repr__(self):
+ def recurse(node, accum, depth):
+ accum.append((" " * depth) + repr(node))
+ for node in getattr(node, "children", []):
+ recurse(node, accum, depth + 1)
+
+ accum = []
+ recurse(self._root, accum, 0)
+ return "\n".join(accum)
+
+ @classmethod
+ def bulkload(cls, items, order):
+ tree = object.__new__(cls)
+ tree.order = order
+
+ leaves = tree._build_bulkloaded_leaves(items)
+ tree._build_bulkloaded_branches(leaves)
+
+ return tree
+
+ def _build_bulkloaded_leaves(self, items):
+ minimum = self.order // 2
+ leaves, seps = [[]], []
+
+ for item in items:
+ if len(leaves[-1]) < self.order:
+ leaves[-1].append(item)
+ else:
+ seps.append(item)
+ leaves.append([])
+
+ if len(leaves[-1]) < minimum and seps:
+ last_two = leaves[-2] + [seps.pop()] + leaves[-1]
+ leaves[-2] = last_two[:minimum]
+ leaves[-1] = last_two[minimum + 1:]
+ seps.append(last_two[minimum])
+
+ return [self.LEAF(self, contents=node) for node in leaves], seps
+
+ def _build_bulkloaded_branches(self, (leaves, seps)):
+ minimum = self.order // 2
+ levels = [leaves]
+
+ while len(seps) > self.order + 1:
+ items, nodes, seps = seps, [[]], []
+
+ for item in items:
+ if len(nodes[-1]) < self.order:
+ nodes[-1].append(item)
+ else:
+ seps.append(item)
+ nodes.append([])
+
+ if len(nodes[-1]) < minimum and seps:
+ last_two = nodes[-2] + [seps.pop()] + nodes[-1]
+ nodes[-2] = last_two[:minimum]
+ nodes[-1] = last_two[minimum + 1:]
+ seps.append(last_two[minimum])
+
+ offset = 0
+ for i, node in enumerate(nodes):
+ children = levels[-1][offset:offset + len(node) + 1]
+ nodes[i] = self.BRANCH(self, contents=node, children=children)
+ offset += len(node) + 1
+
+ levels.append(nodes)
+
+ self._root = self.BRANCH(self, contents=seps, children=levels[-1])
+
+
+OPS = [BTree.__contains__] * 98 + [BTree.insert, BTree.remove]
+
+
+def task(id, tree, ops):
+ print "start task with %s ops" % ops
+ r = random.Random()
+ r.seed(id)
+ thread_local.rnd = r
+
+ for _ in xrange(ops):
+ op = r.choice(OPS)
+ elem = r.randint(1, 10000)
+ with atomic:
+ op(tree, elem)
+
+ print "task ended"
+
+
+def chunks(l, n):
+ """ Yield successive n-sized chunks from l. """
+ for i in xrange(0, len(l), n):
+ yield l[i:i+n]
+
+
+
+def run(threads=2, operations=2000000):
+ threads = int(threads)
+ operations = int(operations)
+
+ thread_local.rnd = random
+
+ tree = BTree(20)
+ for _ in xrange(1000):
+ tree.insert(random.randint(1, 1000))
+
+ c_len = operations // threads
+ fs = []
+ for i in xrange(threads):
+ fs.append(Future(task, i, tree, c_len))
+ for f in fs:
+ f()
+
+ # print "tree:"
+ # print tree
+
+
+
+
+
+if __name__ == '__main__':
+ run()
More information about the pypy-commit
mailing list