[pypy-svn] r47286 - in pypy/dist/pypy/objspace/std: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Oct 8 14:24:38 CEST 2007


Author: cfbolz
Date: Mon Oct  8 14:24:38 2007
New Revision: 47286

Modified:
   pypy/dist/pypy/objspace/std/rope.py
   pypy/dist/pypy/objspace/std/ropeobject.py
   pypy/dist/pypy/objspace/std/test/test_rope.py
Log:
cleanup ropes a small bit:

 - add a reverse char iterator, which helps in some places to be more efficient
 - don't flatten the search string when doing find operations
 - have prebuilt interp-level rope objects of lenghts 0 and 1 (not only
   prebuilt W_RopeObjects)


Modified: pypy/dist/pypy/objspace/std/rope.py
==============================================================================
--- pypy/dist/pypy/objspace/std/rope.py	(original)
+++ pypy/dist/pypy/objspace/std/rope.py	Mon Oct  8 14:24:38 2007
@@ -134,6 +134,9 @@
         yield ('"%s" [shape=box,label="length: %s\\n%s"];' % (
             id(self), len(self.s),
             repr(addinfo).replace('"', '').replace("\\", "\\\\")))
+LiteralStringNode.EMPTY = LiteralStringNode("")
+LiteralStringNode.PREBUILT = [LiteralStringNode(chr(i)) for i in range(256)]
+del i
 
 
 class BinaryConcatNode(StringNode):
@@ -395,7 +398,7 @@
 
 def multiply(node, times):
     if times <= 0:
-        return LiteralStringNode("")
+        return LiteralStringNode.EMPTY
     if times == 1:
         return node
     end_length = node.length() * times
@@ -443,7 +446,7 @@
         for node in nodelist:
             sizehint += node.length()
     if sizehint == 0:
-        return LiteralStringNode("")
+        return LiteralStringNode.EMPTY
 
     # this code is based on the Fibonacci identity:
     #   sum(fib(i) for i in range(n+1)) == fib(n+2)
@@ -563,18 +566,17 @@
 def find(node, subnode, start=0, stop=-1):
 
     len1 = node.length()
+    len2 = subnode.length()
     if stop > len1 or stop == -1:
         stop = len1
-    substring = subnode.flatten() # XXX stressful to do it as a node
-    len2 = len(substring)
     if len2 == 1:
-        return find_char(node, substring[0], start, stop)
+        return find_char(node, subnode.getitem(0), start, stop)
     if len2 == 0:
         if (stop - start) < 0:
             return -1
         return start
-    restart = construct_restart_positions(substring)
-    return _find(node, substring, start, stop, restart)
+    restart = construct_restart_positions_node(subnode)
+    return _find_node(node, subnode, start, stop, restart)
 
 def _find(node, substring, start, stop, restart):
     len2 = len(substring)
@@ -608,6 +610,43 @@
                 i = e
     return -1
 
+def _find_node(node, subnode, start, stop, restart):
+    len2 = subnode.length()
+    m = start
+    iter = SeekableCharIterator(node)
+    iter.seekforward(start)
+    c = iter.next()
+    i = 0
+    subiter = SeekableCharIterator(subnode)
+    d = subiter.next()
+    while m + i < stop:
+        if c == d:
+            i += 1
+            if i == len2:
+                return m
+            d = subiter.next()
+            if m + i < stop:
+                c = iter.next()
+        else:
+            # mismatch, go back to the last possible starting pos
+            if i == 0:
+                m += 1
+                if m + i < stop:
+                    c = iter.next()
+            else:
+                e = restart[i - 1]
+                new_m = m + i - e
+                assert new_m <= m + i
+                seek = m + i - new_m
+                if seek:
+                    iter.seekback(m + i - new_m)
+                    c = iter.next()
+                m = new_m
+                subiter.seekback(i - e + 1)
+                d = subiter.next()
+                i = e
+    return -1
+
 def construct_restart_positions(s):
     l = len(s)
     restart = [0] * l
@@ -710,29 +749,36 @@
         except StopIteration:
             return result
 
-class SeekableFringeIterator(object):
-    # XXX allow to seek in bigger character steps
+
+class ReverseFringeIterator(object):
     def __init__(self, node):
         self.stack = [node]
-        self.fringestack = []
-        self.fringe = []
 
     def next(self):
-        if self.fringestack:
-            result = self.fringestack.pop()
-            self.fringe.append(result)
-            return result
         while self.stack:
             curr = self.stack.pop()
             while 1:
                 if isinstance(curr, BinaryConcatNode):
-                    self.stack.append(curr.right)
-                    curr = curr.left
+                    self.stack.append(curr.left)
+                    curr = curr.right
                 else:
-                    self.fringe.append(curr)
                     return curr
         raise StopIteration
 
+class SeekableFringeIterator(FringeIterator):
+    def __init__(self, node):
+        FringeIterator.__init__(self, node)
+        self.fringestack = []
+        self.fringe = []
+
+    def next(self):
+        if self.fringestack:
+            result = self.fringestack.pop()
+        else:
+            result = FringeIterator.next(self)
+        self.fringe.append(result)
+        return result
+
     def seekback(self):
         result = self.fringe.pop()
         self.fringestack.append(result)
@@ -756,13 +802,36 @@
                     break
             self.index = 0
         index = self.index
-        result = self.node.getitem(index)
-        if self.index == self.nodelength - 1:
+        result = node.getitem(index)
+        if index == self.nodelength - 1:
             self.node = None
         else:
             self.index = index + 1
         return result
 
+class ReverseCharIterator(object):
+    def __init__(self, node):
+        self.iter = ReverseFringeIterator(node)
+        self.node = None
+        self.index = 0
+
+    def next(self):
+        node = self.node
+        index = self.index
+        if node is None:
+            while 1:
+                node = self.node = self.iter.next()
+                index = self.index = node.length() - 1
+                if index != -1:
+                    break
+        result = node.getitem(index)
+        if index == 0:
+            self.node = None
+        else:
+            self.index = index - 1
+        return result
+
+
 class SeekableCharIterator(object):
     def __init__(self, node):
         self.iter = SeekableFringeIterator(node)
@@ -904,10 +973,7 @@
     if not len2:
         return 1
 
-    if len1 < len2:
-        cmplen = len1
-    else:
-        cmplen = len2
+    cmplen = min(len1, len2)
     i = 0
     iter1 = CharIterator(node1)
     iter2 = CharIterator(node2)

Modified: pypy/dist/pypy/objspace/std/ropeobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/ropeobject.py	(original)
+++ pypy/dist/pypy/objspace/std/ropeobject.py	Mon Oct  8 14:24:38 2007
@@ -31,8 +31,8 @@
             return w_self
         return W_RopeObject(w_self._node)
 
-W_RopeObject.EMPTY = W_RopeObject(rope.LiteralStringNode(""))
-W_RopeObject.PREBUILT = [W_RopeObject(rope.LiteralStringNode(chr(i)))
+W_RopeObject.EMPTY = W_RopeObject(rope.LiteralStringNode.EMPTY)
+W_RopeObject.PREBUILT = [W_RopeObject(rope.LiteralStringNode.PREBUILT[i])
                              for i in range(256)]
 del i
 
@@ -393,8 +393,10 @@
     d = u_arg - selfnode.length()
     if d > 0:
         fillchar = fillchar[0]    # annotator hint: it's a single character
-        resultnode = rope.concatenate(rope.LiteralStringNode(d * fillchar),
-                                      selfnode)
+        resultnode = rope.concatenate(
+                rope.multiply(rope.LiteralStringNode.PREBUILT[ord(fillchar)],
+                              d),
+                selfnode)
         return W_RopeObject(resultnode)
     else:
         return W_RopeObject(selfnode)
@@ -410,8 +412,10 @@
     d = u_arg - selfnode.length()
     if d > 0:
         fillchar = fillchar[0]    # annotator hint: it's a single character
-        resultnode = rope.concatenate(selfnode,
-                                     rope.LiteralStringNode(d * fillchar))
+        resultnode = rope.concatenate(
+                selfnode,
+                rope.multiply(rope.LiteralStringNode.PREBUILT[ord(fillchar)],
+                              d))
         return W_RopeObject(resultnode)
     else:
         return W_RopeObject(selfnode)
@@ -525,7 +529,7 @@
         substrings = [by]
         iter = rope.CharIterator(node)
         for i in range(upper):
-            substrings.append(rope.LiteralStringNode(iter.next()))
+            substrings.append(rope.LiteralStringNode.PREBUILT[ord(iter.next())])
             substrings.append(by)
         substrings.append(rope.getslice_one(node, upper, length))
         try:
@@ -571,8 +575,8 @@
            lpos += 1
        
     if right:
-        # XXX improve this
-        while rpos > lpos and node.getitem(rpos - 1) in u_chars:
+        iter = rope.ReverseCharIterator(node)
+        while rpos > lpos and iter.next() in u_chars:
            rpos -= 1
        
     return W_RopeObject(rope.getslice_one(node, lpos, rpos))
@@ -592,8 +596,8 @@
            lpos += 1
        
     if right:
-        # XXX fix this
-        while rpos > lpos and node.getitem(rpos - 1).isspace():
+        iter = rope.ReverseCharIterator(node)
+        while rpos > lpos and iter.next().isspace():
            rpos -= 1
        
     assert rpos >= lpos    # annotator hint, don't remove
@@ -632,7 +636,7 @@
     d = arg - length
     if d>0:
         offset = d//2
-        fillcharnode = rope.LiteralStringNode(fillchar)
+        fillcharnode = rope.LiteralStringNode.PREBUILT[ord(fillchar)]
         pre = rope.multiply(fillcharnode, offset)
         post = rope.multiply(fillcharnode, (d - offset))
         centered = rope.rebalance([pre, node, post])
@@ -724,23 +728,22 @@
 
 def _tabindent(node, tabsize):
     "calculates distance after the token to the next tabstop"
-    # XXX implement reverse char iterator
     length = node.length()
     distance = tabsize
     if length:
         distance = 0
-        offset = length
+        iter = rope.ReverseCharIterator(node)
 
         while 1:
             # no sophisticated linebreak support now
             # '\r' just for passing adapted CPython test
-            char = node.getitem(offset - 1)
+            try:
+                char = iter.next()
+            except StopIteration:
+                break
             if char == "\n" or char == "\r":
                 break
             distance += 1
-            offset -= 1
-            if offset == 0:
-                break
                 
         #the same like distance = len(u_token) - (offset + 1)
         distance = (tabsize - distance) % tabsize
@@ -758,7 +761,7 @@
     tabsize  = space.int_w(w_tabsize)
     
     expanded = []
-    iter = rope.FindIterator(node, rope.LiteralStringNode("\t"))
+    iter = rope.FindIterator(node, rope.LiteralStringNode.PREBUILT[ord("\t")])
     #split = u_self.split("\t")
     #u_expanded = oldtoken = split.pop(0)
 
@@ -774,7 +777,7 @@
         return w_self.create_if_subclassed()
     expanded.append(last)
     while 1:
-        expanded.append(rope.multiply(rope.LiteralStringNode(" "),
+        expanded.append(rope.multiply(rope.LiteralStringNode.PREBUILT[ord(" ")],
                                       _tabindent(last, tabsize)))
         try:
             next = iter.next()
@@ -845,7 +848,7 @@
 
     if length >= width:
         return w_self.create_if_subclassed()
-    zero = rope.LiteralStringNode("0")
+    zero = rope.LiteralStringNode.PREBUILT[ord("0")]
     if length == 0:
         return W_RopeObject(rope.multiply(zero, width))
 
@@ -853,7 +856,7 @@
     firstchar = node.getitem(0)
     if length > 0 and (firstchar == '+' or firstchar == '-'):
         return W_RopeObject(rope.rebalance(
-            [rope.LiteralStringNode(firstchar),
+            [rope.LiteralStringNode.PREBUILT[ord(firstchar)],
              rope.multiply(zero, middle),
              rope.getslice_one(node, 1, length)]))
     else:

Modified: pypy/dist/pypy/objspace/std/test/test_rope.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_rope.py	(original)
+++ pypy/dist/pypy/objspace/std/test/test_rope.py	Mon Oct  8 14:24:38 2007
@@ -1,3 +1,4 @@
+import py
 import random
 from pypy.objspace.std.rope import *
 
@@ -154,6 +155,14 @@
         assert c2 == c
     py.test.raises(StopIteration, iter.next)
 
+def test_reverse_iteration():
+    rope, real_st = make_random_string(200)
+    iter = ReverseCharIterator(rope)
+    for c in py.builtin.reversed(real_st):
+        c2 = iter.next()
+        assert c2 == c
+    py.test.raises(StopIteration, iter.next)
+
 def test_multiply():
     strs = [(LiteralStringNode("a"), "a"), (LiteralStringNode("abc"), "abc"),
             make_random_string(500)]
@@ -218,6 +227,51 @@
     assert c2 == "i"
     py.test.raises(StopIteration, iter.next)
 
+def test_fringe_iterator():
+    ABC = LiteralStringNode("abc")
+    DEF = LiteralStringNode("def")
+    GHI = LiteralStringNode("ghi")
+    rope = BinaryConcatNode(BinaryConcatNode(ABC, DEF), GHI)
+    iter = FringeIterator(rope)
+    n = iter.next()
+    assert n is ABC
+    n = iter.next()
+    assert n is DEF
+    n = iter.next()
+    assert n is GHI
+    py.test.raises(StopIteration, iter.next)
+
+def test_seekable_fringe_iterator():
+    ABC = LiteralStringNode("abc")
+    DEF = LiteralStringNode("def")
+    GHI = LiteralStringNode("ghi")
+    rope = BinaryConcatNode(BinaryConcatNode(ABC, DEF), GHI)
+    iter = SeekableFringeIterator(rope)
+    n = iter.next()
+    assert n is ABC
+    n = iter.seekback()
+    assert n is ABC
+    n = iter.next()
+    assert n is ABC
+    n = iter.next()
+    assert n is DEF
+    n = iter.next()
+    assert n is GHI
+    n = iter.seekback()
+    assert n is GHI
+    n = iter.seekback()
+    assert n is DEF
+    n = iter.seekback()
+    assert n is ABC
+    n = iter.next()
+    assert n is ABC
+    n = iter.next()
+    assert n is DEF
+    n = iter.next()
+    assert n is GHI
+    py.test.raises(StopIteration, iter.next)
+
+
 def test_seekforward():
     rope = BinaryConcatNode(BinaryConcatNode(LiteralStringNode("abc"),
                                              LiteralStringNode("def")),



More information about the Pypy-commit mailing list