[pypy-svn] r39908 - in pypy/branch/rope-branch/pypy/objspace/std: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Sun Mar 4 18:28:08 CET 2007


Author: cfbolz
Date: Sun Mar  4 18:28:06 2007
New Revision: 39908

Modified:
   pypy/branch/rope-branch/pypy/objspace/std/rope.py
   pypy/branch/rope-branch/pypy/objspace/std/test/test_rope.py
Log:
intermediate checkin: steps in the direction of "composable hashes", that
compose the hash of a binary concatenation node out of the hashes of the two
subtrees.


Modified: pypy/branch/rope-branch/pypy/objspace/std/rope.py
==============================================================================
--- pypy/branch/rope-branch/pypy/objspace/std/rope.py	(original)
+++ pypy/branch/rope-branch/pypy/objspace/std/rope.py	Sun Mar  4 18:28:06 2007
@@ -1,6 +1,13 @@
 import py
 import sys
+from pypy.rlib.rarithmetic import intmask, _hash_string
+from pypy.rlib.objectmodel import we_are_translated
+import math
 
+LOG2 = math.log(2)
+NBITS = int(math.log(sys.maxint) / LOG2) + 2
+
+# XXX should optimize the numbers
 NEW_NODE_WHEN_LENGTH = 16
 MAX_DEPTH = 32 # maybe should be smaller
 MIN_SLICE_LENGTH = 64
@@ -27,6 +34,9 @@
     def rebalance(self):
         return self
 
+    def hash_part(self):
+        raise NotImplementedError("base class")
+
     def flatten(self):
         return ''
 
@@ -69,6 +79,12 @@
     def flatten(self):
         return self.s
 
+    def hash_part(self):
+        x = 0
+        for c in self.s:
+            x = (1000003*x) + ord(c)
+        return intmask(x)
+
     def getitem(self, index):
         return self.s[index]
 
@@ -129,6 +145,11 @@
         f = fringe(self)
         return "".join([node.flatten() for node in f])
  
+    def hash_part(self):
+        h1 = self.left.hash_part()
+        h2 = self.right.hash_part()
+        return intmask(h1 + h2 * (1000003 ** self.left.length()))
+
     def rebalance(self):
         return rebalance([self], self.len)
 
@@ -168,6 +189,12 @@
     def flatten(self):
         return self.node.flatten()[self.start: self.stop]
 
+    def hash_part(self):
+        x = 0
+        for i in range(self.start, self.stop):
+            x = (1000003*x) + ord(self.node.getitem(i))
+        return intmask(x)
+
     def dot(self, seen, toplevel=False):
         if self in seen:
             return
@@ -784,8 +811,6 @@
 
 
 def hash_rope(rope):
-    from pypy.rlib.rarithmetic import intmask, _hash_string
-    from pypy.rlib.objectmodel import we_are_translated
     length = rope.length()
     if length == 0:
         x = -1
@@ -802,11 +827,10 @@
         iter = CharIterator(rope)
         while 1:
             try:
-                x = (1000003*x) ^ ord(iter.next())
+                x = (1000003*x) + ord(iter.next())
             except StopIteration:
                 break
         x ^= length
         if x == 0:
             x = -1
-    return intmask(x)
-
+    return intmask(x) 

Modified: pypy/branch/rope-branch/pypy/objspace/std/test/test_rope.py
==============================================================================
--- pypy/branch/rope-branch/pypy/objspace/std/test/test_rope.py	(original)
+++ pypy/branch/rope-branch/pypy/objspace/std/test/test_rope.py	Sun Mar  4 18:28:06 2007
@@ -310,6 +310,18 @@
         rope = LiteralStringNode(st)
         assert hash_rope(rope) == _hash_string(st)
 
+def test_hash_part():
+    a = "".join([chr(random.randrange(256)) * random.randrange(500)])
+    h = None
+    for split in range(1, 499):
+        s1 = LiteralStringNode(a[:split])
+        s2 = LiteralStringNode(a[split:])
+        s = BinaryConcatNode(s1, s2)
+        if h is None:
+            h = s.hash_part()
+        else:
+            assert s.hash_part() == h
+
 def test_equality():
     l = [make_random_string() for i in range(3)]
     l.append((LiteralStringNode(""), ""))



More information about the Pypy-commit mailing list