[pypy-svn] r47286 - in pypy/dist/pypy/objspace/std: . test
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Oct 8 14:24:38 CEST 2007
Author: cfbolz
Date: Mon Oct 8 14:24:38 2007
New Revision: 47286
Modified:
pypy/dist/pypy/objspace/std/rope.py
pypy/dist/pypy/objspace/std/ropeobject.py
pypy/dist/pypy/objspace/std/test/test_rope.py
Log:
cleanup ropes a small bit:
- add a reverse char iterator, which helps in some places to be more efficient
- don't flatten the search string when doing find operations
- have prebuilt interp-level rope objects of lenghts 0 and 1 (not only
prebuilt W_RopeObjects)
Modified: pypy/dist/pypy/objspace/std/rope.py
==============================================================================
--- pypy/dist/pypy/objspace/std/rope.py (original)
+++ pypy/dist/pypy/objspace/std/rope.py Mon Oct 8 14:24:38 2007
@@ -134,6 +134,9 @@
yield ('"%s" [shape=box,label="length: %s\\n%s"];' % (
id(self), len(self.s),
repr(addinfo).replace('"', '').replace("\\", "\\\\")))
+LiteralStringNode.EMPTY = LiteralStringNode("")
+LiteralStringNode.PREBUILT = [LiteralStringNode(chr(i)) for i in range(256)]
+del i
class BinaryConcatNode(StringNode):
@@ -395,7 +398,7 @@
def multiply(node, times):
if times <= 0:
- return LiteralStringNode("")
+ return LiteralStringNode.EMPTY
if times == 1:
return node
end_length = node.length() * times
@@ -443,7 +446,7 @@
for node in nodelist:
sizehint += node.length()
if sizehint == 0:
- return LiteralStringNode("")
+ return LiteralStringNode.EMPTY
# this code is based on the Fibonacci identity:
# sum(fib(i) for i in range(n+1)) == fib(n+2)
@@ -563,18 +566,17 @@
def find(node, subnode, start=0, stop=-1):
len1 = node.length()
+ len2 = subnode.length()
if stop > len1 or stop == -1:
stop = len1
- substring = subnode.flatten() # XXX stressful to do it as a node
- len2 = len(substring)
if len2 == 1:
- return find_char(node, substring[0], start, stop)
+ return find_char(node, subnode.getitem(0), start, stop)
if len2 == 0:
if (stop - start) < 0:
return -1
return start
- restart = construct_restart_positions(substring)
- return _find(node, substring, start, stop, restart)
+ restart = construct_restart_positions_node(subnode)
+ return _find_node(node, subnode, start, stop, restart)
def _find(node, substring, start, stop, restart):
len2 = len(substring)
@@ -608,6 +610,43 @@
i = e
return -1
+def _find_node(node, subnode, start, stop, restart):
+ len2 = subnode.length()
+ m = start
+ iter = SeekableCharIterator(node)
+ iter.seekforward(start)
+ c = iter.next()
+ i = 0
+ subiter = SeekableCharIterator(subnode)
+ d = subiter.next()
+ while m + i < stop:
+ if c == d:
+ i += 1
+ if i == len2:
+ return m
+ d = subiter.next()
+ if m + i < stop:
+ c = iter.next()
+ else:
+ # mismatch, go back to the last possible starting pos
+ if i == 0:
+ m += 1
+ if m + i < stop:
+ c = iter.next()
+ else:
+ e = restart[i - 1]
+ new_m = m + i - e
+ assert new_m <= m + i
+ seek = m + i - new_m
+ if seek:
+ iter.seekback(m + i - new_m)
+ c = iter.next()
+ m = new_m
+ subiter.seekback(i - e + 1)
+ d = subiter.next()
+ i = e
+ return -1
+
def construct_restart_positions(s):
l = len(s)
restart = [0] * l
@@ -710,29 +749,36 @@
except StopIteration:
return result
-class SeekableFringeIterator(object):
- # XXX allow to seek in bigger character steps
+
+class ReverseFringeIterator(object):
def __init__(self, node):
self.stack = [node]
- self.fringestack = []
- self.fringe = []
def next(self):
- if self.fringestack:
- result = self.fringestack.pop()
- self.fringe.append(result)
- return result
while self.stack:
curr = self.stack.pop()
while 1:
if isinstance(curr, BinaryConcatNode):
- self.stack.append(curr.right)
- curr = curr.left
+ self.stack.append(curr.left)
+ curr = curr.right
else:
- self.fringe.append(curr)
return curr
raise StopIteration
+class SeekableFringeIterator(FringeIterator):
+ def __init__(self, node):
+ FringeIterator.__init__(self, node)
+ self.fringestack = []
+ self.fringe = []
+
+ def next(self):
+ if self.fringestack:
+ result = self.fringestack.pop()
+ else:
+ result = FringeIterator.next(self)
+ self.fringe.append(result)
+ return result
+
def seekback(self):
result = self.fringe.pop()
self.fringestack.append(result)
@@ -756,13 +802,36 @@
break
self.index = 0
index = self.index
- result = self.node.getitem(index)
- if self.index == self.nodelength - 1:
+ result = node.getitem(index)
+ if index == self.nodelength - 1:
self.node = None
else:
self.index = index + 1
return result
+class ReverseCharIterator(object):
+ def __init__(self, node):
+ self.iter = ReverseFringeIterator(node)
+ self.node = None
+ self.index = 0
+
+ def next(self):
+ node = self.node
+ index = self.index
+ if node is None:
+ while 1:
+ node = self.node = self.iter.next()
+ index = self.index = node.length() - 1
+ if index != -1:
+ break
+ result = node.getitem(index)
+ if index == 0:
+ self.node = None
+ else:
+ self.index = index - 1
+ return result
+
+
class SeekableCharIterator(object):
def __init__(self, node):
self.iter = SeekableFringeIterator(node)
@@ -904,10 +973,7 @@
if not len2:
return 1
- if len1 < len2:
- cmplen = len1
- else:
- cmplen = len2
+ cmplen = min(len1, len2)
i = 0
iter1 = CharIterator(node1)
iter2 = CharIterator(node2)
Modified: pypy/dist/pypy/objspace/std/ropeobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/ropeobject.py (original)
+++ pypy/dist/pypy/objspace/std/ropeobject.py Mon Oct 8 14:24:38 2007
@@ -31,8 +31,8 @@
return w_self
return W_RopeObject(w_self._node)
-W_RopeObject.EMPTY = W_RopeObject(rope.LiteralStringNode(""))
-W_RopeObject.PREBUILT = [W_RopeObject(rope.LiteralStringNode(chr(i)))
+W_RopeObject.EMPTY = W_RopeObject(rope.LiteralStringNode.EMPTY)
+W_RopeObject.PREBUILT = [W_RopeObject(rope.LiteralStringNode.PREBUILT[i])
for i in range(256)]
del i
@@ -393,8 +393,10 @@
d = u_arg - selfnode.length()
if d > 0:
fillchar = fillchar[0] # annotator hint: it's a single character
- resultnode = rope.concatenate(rope.LiteralStringNode(d * fillchar),
- selfnode)
+ resultnode = rope.concatenate(
+ rope.multiply(rope.LiteralStringNode.PREBUILT[ord(fillchar)],
+ d),
+ selfnode)
return W_RopeObject(resultnode)
else:
return W_RopeObject(selfnode)
@@ -410,8 +412,10 @@
d = u_arg - selfnode.length()
if d > 0:
fillchar = fillchar[0] # annotator hint: it's a single character
- resultnode = rope.concatenate(selfnode,
- rope.LiteralStringNode(d * fillchar))
+ resultnode = rope.concatenate(
+ selfnode,
+ rope.multiply(rope.LiteralStringNode.PREBUILT[ord(fillchar)],
+ d))
return W_RopeObject(resultnode)
else:
return W_RopeObject(selfnode)
@@ -525,7 +529,7 @@
substrings = [by]
iter = rope.CharIterator(node)
for i in range(upper):
- substrings.append(rope.LiteralStringNode(iter.next()))
+ substrings.append(rope.LiteralStringNode.PREBUILT[ord(iter.next())])
substrings.append(by)
substrings.append(rope.getslice_one(node, upper, length))
try:
@@ -571,8 +575,8 @@
lpos += 1
if right:
- # XXX improve this
- while rpos > lpos and node.getitem(rpos - 1) in u_chars:
+ iter = rope.ReverseCharIterator(node)
+ while rpos > lpos and iter.next() in u_chars:
rpos -= 1
return W_RopeObject(rope.getslice_one(node, lpos, rpos))
@@ -592,8 +596,8 @@
lpos += 1
if right:
- # XXX fix this
- while rpos > lpos and node.getitem(rpos - 1).isspace():
+ iter = rope.ReverseCharIterator(node)
+ while rpos > lpos and iter.next().isspace():
rpos -= 1
assert rpos >= lpos # annotator hint, don't remove
@@ -632,7 +636,7 @@
d = arg - length
if d>0:
offset = d//2
- fillcharnode = rope.LiteralStringNode(fillchar)
+ fillcharnode = rope.LiteralStringNode.PREBUILT[ord(fillchar)]
pre = rope.multiply(fillcharnode, offset)
post = rope.multiply(fillcharnode, (d - offset))
centered = rope.rebalance([pre, node, post])
@@ -724,23 +728,22 @@
def _tabindent(node, tabsize):
"calculates distance after the token to the next tabstop"
- # XXX implement reverse char iterator
length = node.length()
distance = tabsize
if length:
distance = 0
- offset = length
+ iter = rope.ReverseCharIterator(node)
while 1:
# no sophisticated linebreak support now
# '\r' just for passing adapted CPython test
- char = node.getitem(offset - 1)
+ try:
+ char = iter.next()
+ except StopIteration:
+ break
if char == "\n" or char == "\r":
break
distance += 1
- offset -= 1
- if offset == 0:
- break
#the same like distance = len(u_token) - (offset + 1)
distance = (tabsize - distance) % tabsize
@@ -758,7 +761,7 @@
tabsize = space.int_w(w_tabsize)
expanded = []
- iter = rope.FindIterator(node, rope.LiteralStringNode("\t"))
+ iter = rope.FindIterator(node, rope.LiteralStringNode.PREBUILT[ord("\t")])
#split = u_self.split("\t")
#u_expanded = oldtoken = split.pop(0)
@@ -774,7 +777,7 @@
return w_self.create_if_subclassed()
expanded.append(last)
while 1:
- expanded.append(rope.multiply(rope.LiteralStringNode(" "),
+ expanded.append(rope.multiply(rope.LiteralStringNode.PREBUILT[ord(" ")],
_tabindent(last, tabsize)))
try:
next = iter.next()
@@ -845,7 +848,7 @@
if length >= width:
return w_self.create_if_subclassed()
- zero = rope.LiteralStringNode("0")
+ zero = rope.LiteralStringNode.PREBUILT[ord("0")]
if length == 0:
return W_RopeObject(rope.multiply(zero, width))
@@ -853,7 +856,7 @@
firstchar = node.getitem(0)
if length > 0 and (firstchar == '+' or firstchar == '-'):
return W_RopeObject(rope.rebalance(
- [rope.LiteralStringNode(firstchar),
+ [rope.LiteralStringNode.PREBUILT[ord(firstchar)],
rope.multiply(zero, middle),
rope.getslice_one(node, 1, length)]))
else:
Modified: pypy/dist/pypy/objspace/std/test/test_rope.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_rope.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_rope.py Mon Oct 8 14:24:38 2007
@@ -1,3 +1,4 @@
+import py
import random
from pypy.objspace.std.rope import *
@@ -154,6 +155,14 @@
assert c2 == c
py.test.raises(StopIteration, iter.next)
+def test_reverse_iteration():
+ rope, real_st = make_random_string(200)
+ iter = ReverseCharIterator(rope)
+ for c in py.builtin.reversed(real_st):
+ c2 = iter.next()
+ assert c2 == c
+ py.test.raises(StopIteration, iter.next)
+
def test_multiply():
strs = [(LiteralStringNode("a"), "a"), (LiteralStringNode("abc"), "abc"),
make_random_string(500)]
@@ -218,6 +227,51 @@
assert c2 == "i"
py.test.raises(StopIteration, iter.next)
+def test_fringe_iterator():
+ ABC = LiteralStringNode("abc")
+ DEF = LiteralStringNode("def")
+ GHI = LiteralStringNode("ghi")
+ rope = BinaryConcatNode(BinaryConcatNode(ABC, DEF), GHI)
+ iter = FringeIterator(rope)
+ n = iter.next()
+ assert n is ABC
+ n = iter.next()
+ assert n is DEF
+ n = iter.next()
+ assert n is GHI
+ py.test.raises(StopIteration, iter.next)
+
+def test_seekable_fringe_iterator():
+ ABC = LiteralStringNode("abc")
+ DEF = LiteralStringNode("def")
+ GHI = LiteralStringNode("ghi")
+ rope = BinaryConcatNode(BinaryConcatNode(ABC, DEF), GHI)
+ iter = SeekableFringeIterator(rope)
+ n = iter.next()
+ assert n is ABC
+ n = iter.seekback()
+ assert n is ABC
+ n = iter.next()
+ assert n is ABC
+ n = iter.next()
+ assert n is DEF
+ n = iter.next()
+ assert n is GHI
+ n = iter.seekback()
+ assert n is GHI
+ n = iter.seekback()
+ assert n is DEF
+ n = iter.seekback()
+ assert n is ABC
+ n = iter.next()
+ assert n is ABC
+ n = iter.next()
+ assert n is DEF
+ n = iter.next()
+ assert n is GHI
+ py.test.raises(StopIteration, iter.next)
+
+
def test_seekforward():
rope = BinaryConcatNode(BinaryConcatNode(LiteralStringNode("abc"),
LiteralStringNode("def")),
More information about the Pypy-commit
mailing list