[pypy-svn] pypy fast-forward: optimize unicode.join to use a UnicodeBuilder
amauryfa
commits-noreply at bitbucket.org
Thu Jan 6 18:40:52 CET 2011
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: fast-forward
Changeset: r40420:3c25108a2274
Date: 2011-01-06 18:18 +0100
http://bitbucket.org/pypy/pypy/changeset/3c25108a2274/
Log: optimize unicode.join to use a UnicodeBuilder Optimize str.join when
there is only one item in the list (this also fixes a test for
identity in CPython test suite)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -11,7 +11,7 @@
from pypy.objspace.std.tupleobject import W_TupleObject
from pypy.rlib.rarithmetic import intmask, ovfcheck
from pypy.rlib.objectmodel import compute_hash
-from pypy.rlib.rstring import string_repeat
+from pypy.rlib.rstring import UnicodeBuilder, string_repeat
from pypy.rlib.runicode import unicode_encode_unicode_escape
from pypy.module.unicodedata import unicodedb
from pypy.tool.sourcetools import func_with_new_name
@@ -182,28 +182,35 @@
return space.newbool(container.find(item) != -1)
def unicode_join__Unicode_ANY(space, w_self, w_list):
- l = space.unpackiterable(w_list)
- delim = w_self._value
- totlen = 0
- if len(l) == 0:
+ list_w = space.unpackiterable(w_list)
+ size = len(list_w)
+
+ if size == 0:
return W_UnicodeObject.EMPTY
- if (len(l) == 1 and
- space.is_w(space.type(l[0]), space.w_unicode)):
- return l[0]
-
- values_list = [None] * len(l)
- for i in range(len(l)):
- item = l[i]
- if isinstance(item, W_UnicodeObject):
- # shortcut for performane
- item = item._value
- elif space.is_true(space.isinstance(item, space.w_str)):
- item = space.unicode_w(item)
+
+ if size == 1:
+ w_s = list_w[0]
+ if space.is_w(space.type(w_s), space.w_unicode):
+ return w_s
+
+ self = w_self._value
+ sb = UnicodeBuilder()
+ for i in range(size):
+ if self and i != 0:
+ sb.append(self)
+ w_s = list_w[i]
+ if isinstance(w_s, W_UnicodeObject):
+ # shortcut for performance
+ sb.append(w_s._value)
else:
- raise operationerrfmt(space.w_TypeError,
- "sequence item %d: expected string or Unicode", i)
- values_list[i] = item
- return W_UnicodeObject(w_self._value.join(values_list))
+ try:
+ sb.append(space.unicode_w(w_s))
+ except OperationError, e:
+ if not e.match(space, space.w_TypeError):
+ raise
+ raise operationerrfmt(space.w_TypeError,
+ "sequence item %d: expected string or Unicode", i)
+ return space.wrap(sb.build())
def hash__Unicode(space, w_uni):
s = w_uni._value
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -326,33 +326,42 @@
def str_join__String_ANY(space, w_self, w_list):
list_w = space.listview(w_list)
- if list_w:
- self = w_self._value
- reslen = 0
- for i in range(len(list_w)):
- w_s = list_w[i]
- if not space.is_true(space.isinstance(w_s, space.w_str)):
- if space.is_true(space.isinstance(w_s, space.w_unicode)):
- # we need to rebuild w_list here, because the original
- # w_list might be an iterable which we already consumed
- w_list = space.newlist(list_w)
- w_u = space.call_function(space.w_unicode, w_self)
- return space.call_method(w_u, "join", w_list)
- raise operationerrfmt(
- space.w_TypeError,
- "sequence item %d: expected string, %s "
- "found", i, space.type(w_s).getname(space))
- reslen += len(space.str_w(w_s))
- reslen += len(self) * (len(list_w) - 1)
- sb = StringBuilder(reslen)
- for i in range(len(list_w)):
- if self and i != 0:
- sb.append(self)
- sb.append(space.str_w(list_w[i]))
- return space.wrap(sb.build())
- else:
+ size = len(list_w)
+
+ if size == 0:
return W_StringObject.EMPTY
+ if size == 1:
+ w_s = list_w[0]
+ # only one item, return it if it's not a subclass of str
+ if (space.is_w(space.type(w_s), space.w_str) or
+ space.is_w(space.type(w_s), space.w_unicode)):
+ return w_s
+
+ self = w_self._value
+ reslen = len(self) * (size - 1)
+ for i in range(size):
+ w_s = list_w[i]
+ if not space.is_true(space.isinstance(w_s, space.w_str)):
+ if space.is_true(space.isinstance(w_s, space.w_unicode)):
+ # we need to rebuild w_list here, because the original
+ # w_list might be an iterable which we already consumed
+ w_list = space.newlist(list_w)
+ w_u = space.call_function(space.w_unicode, w_self)
+ return space.call_method(w_u, "join", w_list)
+ raise operationerrfmt(
+ space.w_TypeError,
+ "sequence item %d: expected string, %s "
+ "found", i, space.type(w_s).getname(space))
+ reslen += len(space.str_w(w_s))
+
+ sb = StringBuilder(reslen)
+ for i in range(size):
+ if self and i != 0:
+ sb.append(self)
+ sb.append(space.str_w(list_w[i]))
+ return space.wrap(sb.build())
+
def str_rjust__String_ANY_ANY(space, w_self, w_arg, w_fillchar):
u_arg = space.int_w(w_arg)
u_self = w_self._value
diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py
--- a/pypy/objspace/std/test/test_stringobject.py
+++ b/pypy/objspace/std/test/test_stringobject.py
@@ -494,6 +494,8 @@
assert ", ".join(['a', 'b', 'c']) == "a, b, c"
assert "".join([]) == ""
assert "-".join(['a', 'b']) == 'a-b'
+ text = 'text'
+ assert "".join([text]) is text
raises(TypeError, ''.join, 1)
raises(TypeError, ''.join, [1])
raises(TypeError, ''.join, [[1]])
More information about the Pypy-commit
mailing list