[pypy-commit] pypy fix-strbuf: (fijal, arigo)
arigo
noreply at buildbot.pypy.org
Wed Jul 29 15:16:17 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: fix-strbuf
Changeset: r78709:735443d28cec
Date: 2015-07-26 19:47 +0200
http://bitbucket.org/pypy/pypy/changeset/735443d28cec/
Log: (fijal, arigo)
Adding the W_UnicodeBufferObject
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -47,6 +47,9 @@
value = value[:]
return W_BytearrayObject(value)
+ def _new_concat(self, space, value1, value2):
+ return self._new(value1 + value2)
+
def _new_from_buffer(self, buffer):
return W_BytearrayObject([buffer[i] for i in range(len(buffer))])
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -482,6 +482,13 @@
"""
raise NotImplementedError
+ def buffer_w(self, space, flags):
+ space.check_buf_flags(flags, True)
+ return StringBuffer(self.str_w(space))
+
+ def readbuf_w(self, space):
+ return StringBuffer(self.str_w(space))
+
def writebuf_w(self, space):
raise OperationError(space.w_TypeError, space.wrap(
"Cannot use string as modifiable buffer"))
@@ -499,12 +506,12 @@
def descr_formatter_parser(self, space):
from pypy.objspace.std.newformat import str_template_formatter
- tformat = str_template_formatter(space, space.str_w(self))
+ tformat = str_template_formatter(space, self.str_w(space))
return tformat.formatter_parser()
def descr_formatter_field_name_split(self, space):
from pypy.objspace.std.newformat import str_template_formatter
- tformat = str_template_formatter(space, space.str_w(self))
+ tformat = str_template_formatter(space, self.str_w(space))
return tformat.formatter_field_name_split()
@@ -526,19 +533,21 @@
def str_w(self, space):
return self._value
- def buffer_w(self, space, flags):
- space.check_buf_flags(flags, True)
- return StringBuffer(self._value)
-
- def readbuf_w(self, space):
- return StringBuffer(self._value)
-
def listview_bytes(self):
return _create_list_from_bytes(self._value)
def _new(self, value):
return W_BytesObject(value)
+ def _new_concat(self, space, value1, value2):
+ if space.config.objspace.std.withstrbuf:
+ from pypy.objspace.std.strbufobject import W_StringBufferObject
+ builder = StringBuilder(len(value1) + len(value2))
+ builder.append(value1)
+ builder.append(value2)
+ return W_StringBufferObject(builder)
+ return self._new(value1 + value2)
+
def _new_from_list(self, value):
return W_BytesObject(''.join(value))
@@ -726,18 +735,6 @@
from .bytearrayobject import W_BytearrayObject, _make_data
self_as_bytearray = W_BytearrayObject(_make_data(self._value))
return space.add(self_as_bytearray, w_other)
- if space.config.objspace.std.withstrbuf:
- from pypy.objspace.std.strbufobject import W_StringBufferObject
- try:
- other = self._op_val(space, w_other)
- except OperationError as e:
- if e.match(space, space.w_TypeError):
- return space.w_NotImplemented
- raise
- builder = StringBuilder()
- builder.append(self._value)
- builder.append(other)
- return W_StringBufferObject(builder)
return self._StringMethods_descr_add(space, w_other)
_StringMethods__startswith = _startswith
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -31,7 +31,7 @@
from pypy.objspace.std.sliceobject import W_SliceObject
from pypy.objspace.std.tupleobject import W_AbstractTupleObject, W_TupleObject
from pypy.objspace.std.typeobject import W_TypeObject, TypeCache
-from pypy.objspace.std.unicodeobject import W_UnicodeObject, wrapunicode
+from pypy.objspace.std.unicodeobject import W_AbstractUnicodeObject, W_UnicodeObject, wrapunicode
class StdObjSpace(ObjSpace):
@@ -82,6 +82,8 @@
}
if self.config.objspace.std.withstrbuf:
builtin_type_classes[W_BytesObject.typedef] = W_AbstractBytesObject
+ builtin_type_classes[W_UnicodeObject.typedef] = (
+ W_AbstractUnicodeObject)
self.builtin_types = {}
self._interplevel_classes = {}
diff --git a/pypy/objspace/std/strbufobject.py b/pypy/objspace/std/strbufobject.py
--- a/pypy/objspace/std/strbufobject.py
+++ b/pypy/objspace/std/strbufobject.py
@@ -1,10 +1,5 @@
-import inspect
-
-import py
-
-from pypy.objspace.std.bytesobject import (W_AbstractBytesObject,
- W_BytesObject, StringBuffer)
-from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.objspace.std.bytesobject import W_AbstractBytesObject
+from pypy.objspace.std.bytesobject import W_BytesObject
from pypy.interpreter.error import OperationError
from rpython.rlib.rstring import StringBuilder
@@ -37,29 +32,22 @@
def str_w(self, space):
return self.force()
- def buffer_w(self, space, flags):
- return StringBuffer(self.force())
-
- def readbuf_w(self, space):
- return StringBuffer(self.force())
-
def descr_len(self, space):
return space.wrap(self.length)
def descr_add(self, space, w_other):
- try:
- other = W_BytesObject._op_val(space, w_other)
- except OperationError as e:
- if e.match(space, space.w_TypeError):
- return space.w_NotImplemented
- raise
- if self.builder.getlength() != self.length:
- builder = StringBuilder()
- builder.append(self.force())
+ if isinstance(w_other, W_AbstractBytesObject):
+ other = w_other.str_w(space)
+ if self.builder.getlength() != self.length:
+ builder = StringBuilder()
+ builder.append(self.force())
+ else:
+ builder = self.builder
+ builder.append(other)
+ return W_StringBufferObject(builder)
else:
- builder = self.builder
- builder.append(other)
- return W_StringBufferObject(builder)
+ self.force()
+ return self.w_str.descr_add(space, w_other)
def descr_str(self, space):
# you cannot get subclasses of W_StringBufferObject here
@@ -67,32 +55,42 @@
return self
-for key, value in W_BytesObject.typedef.rawdict.iteritems():
- if not isinstance(value, interp2app):
- continue
- if key in ('__len__', '__add__', '__str__'):
- continue
+def copy_from_base_class(baseclass, bufclass, attr_name):
+ import inspect
+ import py
+ from pypy.interpreter.gateway import interp2app, unwrap_spec
- func = value._code._bltin
- args = inspect.getargs(func.func_code)
- if args.varargs or args.keywords:
- raise TypeError("Varargs and keywords not supported in unwrap_spec")
- argspec = ', '.join([arg for arg in args.args[1:]])
- func_code = py.code.Source("""
- def f(self, %(args)s):
- self.force()
- return self.w_str.%(func_name)s(%(args)s)
- """ % {'args': argspec, 'func_name': func.func_name})
- d = {}
- exec func_code.compile() in d
- f = d['f']
- f.func_defaults = func.func_defaults
- f.__module__ = func.__module__
- # necessary for unique identifiers for pickling
- f.func_name = func.func_name
- unwrap_spec_ = getattr(func, 'unwrap_spec', None)
- if unwrap_spec_ is not None:
- f = unwrap_spec(**unwrap_spec_)(f)
- setattr(W_StringBufferObject, func.func_name, f)
+ for key, value in baseclass.typedef.rawdict.iteritems():
+ if not isinstance(value, interp2app):
+ continue
-W_StringBufferObject.typedef = W_BytesObject.typedef
+ func = value._code._bltin
+ if func.func_name in bufclass.__dict__:
+ assert key in ('__len__', '__add__', '__str__', '__unicode__')
+ continue
+
+ args = inspect.getargs(func.func_code)
+ if args.varargs or args.keywords:
+ raise TypeError("Varargs and keywords not supported in unwrap_spec")
+ argspec = ', '.join([arg for arg in args.args[1:]])
+ func_code = py.code.Source("""
+ def f(self, %(args)s):
+ self.force()
+ return self.%(attr_name)s.%(func_name)s(%(args)s)
+ """ % {'args': argspec, 'func_name': func.func_name,
+ 'attr_name': attr_name})
+ d = {}
+ exec func_code.compile() in d
+ f = d['f']
+ f.func_defaults = func.func_defaults
+ f.__module__ = func.__module__
+ # necessary for unique identifiers for pickling
+ f.func_name = func.func_name
+ unwrap_spec_ = getattr(func, 'unwrap_spec', None)
+ if unwrap_spec_ is not None:
+ f = unwrap_spec(**unwrap_spec_)(f)
+ setattr(bufclass, func.func_name, f)
+
+ bufclass.typedef = baseclass.typedef
+
+copy_from_base_class(W_BytesObject, W_StringBufferObject, 'w_str')
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -62,7 +62,7 @@
if e.match(space, space.w_TypeError):
return space.w_NotImplemented
raise
- return self._new(self._val(space) + other)
+ return self._new_concat(space, self._val(space), other)
# Bytearray overrides this method, CPython doesn't support contacting
# buffers and strs, and unicodes are always handled above
diff --git a/pypy/objspace/std/test/test_strbufobject.py b/pypy/objspace/std/test/test_strbufobject.py
--- a/pypy/objspace/std/test/test_strbufobject.py
+++ b/pypy/objspace/std/test/test_strbufobject.py
@@ -84,6 +84,13 @@
a += 'b'
raises(TypeError, "a += 5")
+ def test_add_unicode(self):
+ a = 'a'
+ a += 'b'
+ a += u'\u1234'
+ assert a == u'ab\u1234'
+ assert isinstance(a, unicode)
+
def test_mix_strings_format(self):
a = 'a'
a += 'b'
@@ -99,3 +106,9 @@
a = 'abc'
a += 'bc'
assert list(a._formatter_parser()) == [('abcbc', None, None, None)]
+
+ def test_startswith_u(self):
+ a = 'abc'
+ a += 'bc'
+ assert a.startswith(u'abcb')
+ assert not a.startswith(u'\u1234')
diff --git a/pypy/objspace/std/test/test_unibufobject.py b/pypy/objspace/std/test/test_unibufobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/objspace/std/test/test_unibufobject.py
@@ -0,0 +1,110 @@
+import py
+
+from pypy.objspace.std.test import test_unicodeobject
+
+class AppTestUnicodeObject(test_unicodeobject.AppTestUnicodeString):
+ spaceconfig = test_unicodeobject.AppTestUnicodeString.spaceconfig.copy()
+ spaceconfig.update({"objspace.std.withstrbuf": True})
+
+ def test_basic(self):
+ import __pypy__
+ # cannot do "Hello, " + "World!" because cpy2.5 optimises this
+ # away on AST level
+ s = u"Hello, ".__add__(u"World!")
+ assert type(s) is unicode
+ assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(s)
+
+ def test_add_twice(self):
+ x = u"a".__add__(u"b")
+ y = x + u"c"
+ c = x + u"d"
+ assert y == u"abc"
+ assert c == u"abd"
+
+ def test_add(self):
+ import __pypy__
+ all = ""
+ for i in range(20):
+ all += unicode(i)
+ assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(all)
+ assert all == u"012345678910111213141516171819"
+
+ def test_hash(self):
+ import __pypy__
+ def join(s): return s[:len(s) // 2] + s[len(s) // 2:]
+ t = u'a' * 101
+ s = join(t)
+ assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(s)
+ assert hash(s) == hash(t)
+
+ def test_len(self):
+ s = u"a".__add__(u"b")
+ r = u"c".__add__(u"d")
+ t = s + r
+ assert len(s) == 2
+ assert len(r) == 2
+ assert len(t) == 4
+
+ def test_add_strbuf(self):
+ # make three strbuf objects
+ s = u'a'.__add__(u'b')
+ t = u'x'.__add__(u'c')
+ u = u'y'.__add__(u'd')
+
+ # add two different strbufs to the same string
+ v = s + t
+ w = s + u
+
+ # check that insanity hasn't resulted.
+ assert v == u"abxc"
+ assert w == u"abyd"
+
+ def test_more_adding_fun(self):
+ s = u'a'.__add__(u'b') # s is a strbuf now
+ t = s + u'c'
+ u = s + u'd'
+ v = s + u'e'
+ assert v == u'abe'
+ assert u == u'abd'
+ assert t == u'abc'
+
+ def test_buh_even_more(self):
+ a = u'a'.__add__(u'b')
+ b = a + u'c'
+ c = u'0'.__add__(u'1')
+ x = c + a
+ assert x == u'01ab'
+
+ def test_add_non_string(self):
+ a = u'a'
+ a += u'b'
+ raises(TypeError, "a += 5")
+
+ def test_add_plain_string(self):
+ a = u'a'
+ a += u'\u1234'
+ a += 'b'
+ assert a == u'a\u1234b'
+ assert isinstance(a, unicode)
+
+ def test_mix_strings_format(self):
+ a = u'a'
+ a += u'b'
+ assert u'foo%s' % a == u'fooab'
+ assert (a + u'%s') % (u'foo',) == u'abfoo'
+
+ def test_print(self):
+ a = u'abc'
+ a += u'bc'
+ print a
+
+ def test_formatter_parser(self):
+ a = u'abc'
+ a += u'bc'
+ assert list(a._formatter_parser()) == [(u'abcbc', None, None, None)]
+
+ def test_startswith_s(self):
+ a = u'abc'
+ a += u'bc'
+ assert a.startswith('abcb')
+ assert not a.startswith('1234')
diff --git a/pypy/objspace/std/unibufobject.py b/pypy/objspace/std/unibufobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/objspace/std/unibufobject.py
@@ -0,0 +1,67 @@
+from pypy.objspace.std.unicodeobject import W_AbstractUnicodeObject
+from pypy.objspace.std.unicodeobject import W_UnicodeObject, unicode_from_string
+from pypy.objspace.std.strbufobject import copy_from_base_class
+from pypy.interpreter.error import OperationError
+from rpython.rlib.rstring import UnicodeBuilder
+
+
+class W_UnicodeBufferObject(W_AbstractUnicodeObject):
+ w_unicode = None
+
+ def __init__(self, builder):
+ self.builder = builder # UnicodeBuilder
+ self.length = builder.getlength()
+
+ def force(self):
+ if self.w_unicode is None:
+ s = self.builder.build()
+ if self.length < len(s):
+ s = s[:self.length]
+ self.w_unicode = W_UnicodeObject(s)
+ return s
+ else:
+ return self.w_unicode._value
+
+ def __repr__(w_self):
+ """ representation for debugging purposes """
+ return "%s(%r[:%d])" % (
+ w_self.__class__.__name__, w_self.builder, w_self.length)
+
+ def unwrap(self, space):
+ return self.force()
+
+ def unicode_w(self, space):
+ return self.force()
+
+ def descr_len(self, space):
+ return space.wrap(self.length)
+
+ def _new_concat_buffer(self, other):
+ if self.builder.getlength() != self.length:
+ builder = UnicodeBuilder()
+ builder.append(self.force())
+ else:
+ builder = self.builder
+ builder.append(other)
+ return W_UnicodeBufferObject(builder)
+
+ def descr_add(self, space, w_other):
+ from pypy.objspace.std.bytesobject import W_AbstractBytesObject
+
+ if isinstance(w_other, W_AbstractUnicodeObject):
+ other = w_other.unicode_w(space)
+ return self._new_concat_buffer(other)
+ elif isinstance(w_other, W_AbstractBytesObject):
+ other = unicode_from_string(space, w_other)._value
+ return self._new_concat_buffer(other)
+ else:
+ self.force()
+ return self.w_unicode.descr_add(space, w_other)
+
+ def descr_unicode(self, space):
+ # you cannot get subclasses of W_UnicodeBufferObject here
+ assert type(self) is W_UnicodeBufferObject
+ return self
+
+
+copy_from_base_class(W_UnicodeObject, W_UnicodeBufferObject, 'w_unicode')
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -495,14 +495,22 @@
of the specified width. The string S is never truncated.
"""
+ def readbuf_w(self, space):
+ from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
+ value = self.unicode_w(space)
+ builder = StringBuilder(len(value) * UNICODE_SIZE)
+ for unich in value:
+ pack_unichar(unich, builder)
+ return StringBuffer(builder.build())
+
def descr_formatter_parser(self, space):
from pypy.objspace.std.newformat import unicode_template_formatter
- tformat = unicode_template_formatter(space, space.unicode_w(self))
+ tformat = unicode_template_formatter(space, self.unicode_w(space))
return tformat.formatter_parser()
def descr_formatter_field_name_split(self, space):
from pypy.objspace.std.newformat import unicode_template_formatter
- tformat = unicode_template_formatter(space, space.unicode_w(self))
+ tformat = unicode_template_formatter(space, self.unicode_w(space))
return tformat.formatter_field_name_split()
@@ -530,13 +538,6 @@
def unicode_w(self, space):
return self._value
- def readbuf_w(self, space):
- from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
- builder = StringBuilder(len(self._value) * UNICODE_SIZE)
- for unich in self._value:
- pack_unichar(unich, builder)
- return StringBuffer(builder.build())
-
def writebuf_w(self, space):
raise OperationError(space.w_TypeError, space.wrap(
"cannot use unicode as modifiable buffer"))
@@ -554,6 +555,15 @@
def _new(self, value):
return W_UnicodeObject(value)
+ def _new_concat(self, space, value1, value2):
+ if space.config.objspace.std.withstrbuf:
+ from pypy.objspace.std.unibufobject import W_UnicodeBufferObject
+ builder = UnicodeBuilder(len(value1) + len(value2))
+ builder.append(value1)
+ builder.append(value2)
+ return W_UnicodeBufferObject(builder)
+ return self._new(value1 + value2)
+
def _new_from_list(self, value):
return W_UnicodeObject(u''.join(value))
@@ -573,9 +583,11 @@
@staticmethod
def _op_val(space, w_other):
- if isinstance(w_other, W_UnicodeObject):
- return w_other._value
- if space.isinstance_w(w_other, space.w_str):
+ from pypy.objspace.std.bytesobject import W_AbstractBytesObject
+
+ if isinstance(w_other, W_AbstractUnicodeObject):
+ return w_other.unicode_w(space)
+ if isinstance(w_other, W_AbstractBytesObject):
return unicode_from_string(space, w_other)._value
return unicode_from_encoded_object(
space, w_other, None, "strict")._value
@@ -664,9 +676,9 @@
if space.is_w(w_unicodetype, space.w_unicode):
return w_value
- assert isinstance(w_value, W_UnicodeObject)
+ value = w_value.unicode_w(space)
w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
- W_UnicodeObject.__init__(w_newobj, w_value._value)
+ W_UnicodeObject.__init__(w_newobj, value)
return w_newobj
def descr_repr(self, space):
@@ -1035,7 +1047,7 @@
__add__ = interpindirect2app(W_AbstractUnicodeObject.descr_add),
__mul__ = interpindirect2app(W_AbstractUnicodeObject.descr_mul),
- __rmul__ = interpindirect2app(W_AbstractUnicodeObject.descr_mul),
+ __rmul__ = interpindirect2app(W_AbstractUnicodeObject.descr_rmul),
__getitem__ = interpindirect2app(W_AbstractUnicodeObject.descr_getitem),
__getslice__ = interpindirect2app(W_AbstractUnicodeObject.descr_getslice),
More information about the pypy-commit
mailing list