[pypy-commit] pypy fix-strbuf: (fijal, arigo)

arigo noreply at buildbot.pypy.org
Wed Jul 29 15:16:17 CEST 2015


Author: Armin Rigo <arigo at tunes.org>
Branch: fix-strbuf
Changeset: r78709:735443d28cec
Date: 2015-07-26 19:47 +0200
http://bitbucket.org/pypy/pypy/changeset/735443d28cec/

Log:	(fijal, arigo)

	Adding the W_UnicodeBufferObject

diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -47,6 +47,9 @@
             value = value[:]
         return W_BytearrayObject(value)
 
+    def _new_concat(self, space, value1, value2):
+        return self._new(value1 + value2)
+
     def _new_from_buffer(self, buffer):
         return W_BytearrayObject([buffer[i] for i in range(len(buffer))])
 
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -482,6 +482,13 @@
         """
         raise NotImplementedError
 
+    def buffer_w(self, space, flags):
+        space.check_buf_flags(flags, True)
+        return StringBuffer(self.str_w(space))
+
+    def readbuf_w(self, space):
+        return StringBuffer(self.str_w(space))
+
     def writebuf_w(self, space):
         raise OperationError(space.w_TypeError, space.wrap(
             "Cannot use string as modifiable buffer"))
@@ -499,12 +506,12 @@
 
     def descr_formatter_parser(self, space):
         from pypy.objspace.std.newformat import str_template_formatter
-        tformat = str_template_formatter(space, space.str_w(self))
+        tformat = str_template_formatter(space, self.str_w(space))
         return tformat.formatter_parser()
 
     def descr_formatter_field_name_split(self, space):
         from pypy.objspace.std.newformat import str_template_formatter
-        tformat = str_template_formatter(space, space.str_w(self))
+        tformat = str_template_formatter(space, self.str_w(space))
         return tformat.formatter_field_name_split()
 
 
@@ -526,19 +533,21 @@
     def str_w(self, space):
         return self._value
 
-    def buffer_w(self, space, flags):
-        space.check_buf_flags(flags, True)
-        return StringBuffer(self._value)
-
-    def readbuf_w(self, space):
-        return StringBuffer(self._value)
-
     def listview_bytes(self):
         return _create_list_from_bytes(self._value)
 
     def _new(self, value):
         return W_BytesObject(value)
 
+    def _new_concat(self, space, value1, value2):
+        if space.config.objspace.std.withstrbuf:
+            from pypy.objspace.std.strbufobject import W_StringBufferObject
+            builder = StringBuilder(len(value1) + len(value2))
+            builder.append(value1)
+            builder.append(value2)
+            return W_StringBufferObject(builder)
+        return self._new(value1 + value2)
+
     def _new_from_list(self, value):
         return W_BytesObject(''.join(value))
 
@@ -726,18 +735,6 @@
             from .bytearrayobject import W_BytearrayObject, _make_data
             self_as_bytearray = W_BytearrayObject(_make_data(self._value))
             return space.add(self_as_bytearray, w_other)
-        if space.config.objspace.std.withstrbuf:
-            from pypy.objspace.std.strbufobject import W_StringBufferObject
-            try:
-                other = self._op_val(space, w_other)
-            except OperationError as e:
-                if e.match(space, space.w_TypeError):
-                    return space.w_NotImplemented
-                raise
-            builder = StringBuilder()
-            builder.append(self._value)
-            builder.append(other)
-            return W_StringBufferObject(builder)
         return self._StringMethods_descr_add(space, w_other)
 
     _StringMethods__startswith = _startswith
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -31,7 +31,7 @@
 from pypy.objspace.std.sliceobject import W_SliceObject
 from pypy.objspace.std.tupleobject import W_AbstractTupleObject, W_TupleObject
 from pypy.objspace.std.typeobject import W_TypeObject, TypeCache
-from pypy.objspace.std.unicodeobject import W_UnicodeObject, wrapunicode
+from pypy.objspace.std.unicodeobject import W_AbstractUnicodeObject, W_UnicodeObject, wrapunicode
 
 
 class StdObjSpace(ObjSpace):
@@ -82,6 +82,8 @@
         }
         if self.config.objspace.std.withstrbuf:
             builtin_type_classes[W_BytesObject.typedef] = W_AbstractBytesObject
+            builtin_type_classes[W_UnicodeObject.typedef] = (
+                W_AbstractUnicodeObject)
 
         self.builtin_types = {}
         self._interplevel_classes = {}
diff --git a/pypy/objspace/std/strbufobject.py b/pypy/objspace/std/strbufobject.py
--- a/pypy/objspace/std/strbufobject.py
+++ b/pypy/objspace/std/strbufobject.py
@@ -1,10 +1,5 @@
-import inspect
-
-import py
-
-from pypy.objspace.std.bytesobject import (W_AbstractBytesObject,
-    W_BytesObject, StringBuffer)
-from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.objspace.std.bytesobject import W_AbstractBytesObject
+from pypy.objspace.std.bytesobject import W_BytesObject
 from pypy.interpreter.error import OperationError
 from rpython.rlib.rstring import StringBuilder
 
@@ -37,29 +32,22 @@
     def str_w(self, space):
         return self.force()
 
-    def buffer_w(self, space, flags):
-        return StringBuffer(self.force())
-
-    def readbuf_w(self, space):
-        return StringBuffer(self.force())
-
     def descr_len(self, space):
         return space.wrap(self.length)
 
     def descr_add(self, space, w_other):
-        try:
-            other = W_BytesObject._op_val(space, w_other)
-        except OperationError as e:
-            if e.match(space, space.w_TypeError):
-                return space.w_NotImplemented
-            raise
-        if self.builder.getlength() != self.length:
-            builder = StringBuilder()
-            builder.append(self.force())
+        if isinstance(w_other, W_AbstractBytesObject):
+            other = w_other.str_w(space)
+            if self.builder.getlength() != self.length:
+                builder = StringBuilder()
+                builder.append(self.force())
+            else:
+                builder = self.builder
+            builder.append(other)
+            return W_StringBufferObject(builder)
         else:
-            builder = self.builder
-        builder.append(other)
-        return W_StringBufferObject(builder)
+            self.force()
+            return self.w_str.descr_add(space, w_other)
 
     def descr_str(self, space):
         # you cannot get subclasses of W_StringBufferObject here
@@ -67,32 +55,42 @@
         return self
 
 
-for key, value in W_BytesObject.typedef.rawdict.iteritems():
-    if not isinstance(value, interp2app):
-        continue
-    if key in ('__len__', '__add__', '__str__'):
-        continue
+def copy_from_base_class(baseclass, bufclass, attr_name):
+    import inspect
+    import py
+    from pypy.interpreter.gateway import interp2app, unwrap_spec
 
-    func = value._code._bltin
-    args = inspect.getargs(func.func_code)
-    if args.varargs or args.keywords:
-        raise TypeError("Varargs and keywords not supported in unwrap_spec")
-    argspec = ', '.join([arg for arg in args.args[1:]])
-    func_code = py.code.Source("""
-    def f(self, %(args)s):
-        self.force()
-        return self.w_str.%(func_name)s(%(args)s)
-    """ % {'args': argspec, 'func_name': func.func_name})
-    d = {}
-    exec func_code.compile() in d
-    f = d['f']
-    f.func_defaults = func.func_defaults
-    f.__module__ = func.__module__
-    # necessary for unique identifiers for pickling
-    f.func_name = func.func_name
-    unwrap_spec_ = getattr(func, 'unwrap_spec', None)
-    if unwrap_spec_ is not None:
-        f = unwrap_spec(**unwrap_spec_)(f)
-    setattr(W_StringBufferObject, func.func_name, f)
+    for key, value in baseclass.typedef.rawdict.iteritems():
+        if not isinstance(value, interp2app):
+            continue
 
-W_StringBufferObject.typedef = W_BytesObject.typedef
+        func = value._code._bltin
+        if func.func_name in bufclass.__dict__:
+            assert key in ('__len__', '__add__', '__str__', '__unicode__')
+            continue
+
+        args = inspect.getargs(func.func_code)
+        if args.varargs or args.keywords:
+            raise TypeError("Varargs and keywords not supported in unwrap_spec")
+        argspec = ', '.join([arg for arg in args.args[1:]])
+        func_code = py.code.Source("""
+        def f(self, %(args)s):
+            self.force()
+            return self.%(attr_name)s.%(func_name)s(%(args)s)
+        """ % {'args': argspec, 'func_name': func.func_name,
+               'attr_name': attr_name})
+        d = {}
+        exec func_code.compile() in d
+        f = d['f']
+        f.func_defaults = func.func_defaults
+        f.__module__ = func.__module__
+        # necessary for unique identifiers for pickling
+        f.func_name = func.func_name
+        unwrap_spec_ = getattr(func, 'unwrap_spec', None)
+        if unwrap_spec_ is not None:
+            f = unwrap_spec(**unwrap_spec_)(f)
+        setattr(bufclass, func.func_name, f)
+
+    bufclass.typedef = baseclass.typedef
+
+copy_from_base_class(W_BytesObject, W_StringBufferObject, 'w_str')
diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -62,7 +62,7 @@
                 if e.match(space, space.w_TypeError):
                     return space.w_NotImplemented
                 raise
-            return self._new(self._val(space) + other)
+            return self._new_concat(space, self._val(space), other)
 
         # Bytearray overrides this method, CPython doesn't support contacting
         # buffers and strs, and unicodes are always handled above
diff --git a/pypy/objspace/std/test/test_strbufobject.py b/pypy/objspace/std/test/test_strbufobject.py
--- a/pypy/objspace/std/test/test_strbufobject.py
+++ b/pypy/objspace/std/test/test_strbufobject.py
@@ -84,6 +84,13 @@
         a += 'b'
         raises(TypeError, "a += 5")
 
+    def test_add_unicode(self):
+        a = 'a'
+        a += 'b'
+        a += u'\u1234'
+        assert a == u'ab\u1234'
+        assert isinstance(a, unicode)
+
     def test_mix_strings_format(self):
         a = 'a'
         a += 'b'
@@ -99,3 +106,9 @@
         a = 'abc'
         a += 'bc'
         assert list(a._formatter_parser()) == [('abcbc', None, None, None)]
+
+    def test_startswith_u(self):
+        a = 'abc'
+        a += 'bc'
+        assert a.startswith(u'abcb')
+        assert not a.startswith(u'\u1234')
diff --git a/pypy/objspace/std/test/test_unibufobject.py b/pypy/objspace/std/test/test_unibufobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/objspace/std/test/test_unibufobject.py
@@ -0,0 +1,110 @@
+import py
+
+from pypy.objspace.std.test import test_unicodeobject
+
+class AppTestUnicodeObject(test_unicodeobject.AppTestUnicodeString):
+    spaceconfig = test_unicodeobject.AppTestUnicodeString.spaceconfig.copy()
+    spaceconfig.update({"objspace.std.withstrbuf": True})
+
+    def test_basic(self):
+        import __pypy__
+        # cannot do "Hello, " + "World!" because cpy2.5 optimises this
+        # away on AST level
+        s = u"Hello, ".__add__(u"World!")
+        assert type(s) is unicode
+        assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(s)
+
+    def test_add_twice(self):
+        x = u"a".__add__(u"b")
+        y = x + u"c"
+        c = x + u"d"
+        assert y == u"abc"
+        assert c == u"abd"
+
+    def test_add(self):
+        import __pypy__
+        all = ""
+        for i in range(20):
+            all += unicode(i)
+        assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(all)
+        assert all == u"012345678910111213141516171819"
+
+    def test_hash(self):
+        import __pypy__
+        def join(s): return s[:len(s) // 2] + s[len(s) // 2:]
+        t = u'a' * 101
+        s = join(t)
+        assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(s)
+        assert hash(s) == hash(t)
+
+    def test_len(self):
+        s = u"a".__add__(u"b")
+        r = u"c".__add__(u"d")
+        t = s + r
+        assert len(s) == 2
+        assert len(r) == 2
+        assert len(t) == 4
+
+    def test_add_strbuf(self):
+        # make three strbuf objects
+        s = u'a'.__add__(u'b')
+        t = u'x'.__add__(u'c')
+        u = u'y'.__add__(u'd')
+
+        # add two different strbufs to the same string
+        v = s + t
+        w = s + u
+
+        # check that insanity hasn't resulted.
+        assert v == u"abxc"
+        assert w == u"abyd"
+
+    def test_more_adding_fun(self):
+        s = u'a'.__add__(u'b') # s is a strbuf now
+        t = s + u'c'
+        u = s + u'd'
+        v = s + u'e'
+        assert v == u'abe'
+        assert u == u'abd'
+        assert t == u'abc'
+
+    def test_buh_even_more(self):
+        a = u'a'.__add__(u'b')
+        b = a + u'c'
+        c = u'0'.__add__(u'1')
+        x = c + a
+        assert x == u'01ab'
+
+    def test_add_non_string(self):
+        a = u'a'
+        a += u'b'
+        raises(TypeError, "a += 5")
+
+    def test_add_plain_string(self):
+        a = u'a'
+        a += u'\u1234'
+        a += 'b'
+        assert a == u'a\u1234b'
+        assert isinstance(a, unicode)
+
+    def test_mix_strings_format(self):
+        a = u'a'
+        a += u'b'
+        assert u'foo%s' % a == u'fooab'
+        assert (a + u'%s') % (u'foo',) == u'abfoo'
+
+    def test_print(self):
+        a = u'abc'
+        a += u'bc'
+        print a
+
+    def test_formatter_parser(self):
+        a = u'abc'
+        a += u'bc'
+        assert list(a._formatter_parser()) == [(u'abcbc', None, None, None)]
+
+    def test_startswith_s(self):
+        a = u'abc'
+        a += u'bc'
+        assert a.startswith('abcb')
+        assert not a.startswith('1234')
diff --git a/pypy/objspace/std/unibufobject.py b/pypy/objspace/std/unibufobject.py
new file mode 100644
--- /dev/null
+++ b/pypy/objspace/std/unibufobject.py
@@ -0,0 +1,67 @@
+from pypy.objspace.std.unicodeobject import W_AbstractUnicodeObject
+from pypy.objspace.std.unicodeobject import W_UnicodeObject, unicode_from_string
+from pypy.objspace.std.strbufobject import copy_from_base_class
+from pypy.interpreter.error import OperationError
+from rpython.rlib.rstring import UnicodeBuilder
+
+
+class W_UnicodeBufferObject(W_AbstractUnicodeObject):
+    w_unicode = None
+
+    def __init__(self, builder):
+        self.builder = builder             # UnicodeBuilder
+        self.length = builder.getlength()
+
+    def force(self):
+        if self.w_unicode is None:
+            s = self.builder.build()
+            if self.length < len(s):
+                s = s[:self.length]
+            self.w_unicode = W_UnicodeObject(s)
+            return s
+        else:
+            return self.w_unicode._value
+
+    def __repr__(w_self):
+        """ representation for debugging purposes """
+        return "%s(%r[:%d])" % (
+            w_self.__class__.__name__, w_self.builder, w_self.length)
+
+    def unwrap(self, space):
+        return self.force()
+
+    def unicode_w(self, space):
+        return self.force()
+
+    def descr_len(self, space):
+        return space.wrap(self.length)
+
+    def _new_concat_buffer(self, other):
+        if self.builder.getlength() != self.length:
+            builder = UnicodeBuilder()
+            builder.append(self.force())
+        else:
+            builder = self.builder
+        builder.append(other)
+        return W_UnicodeBufferObject(builder)
+
+    def descr_add(self, space, w_other):
+        from pypy.objspace.std.bytesobject import W_AbstractBytesObject
+
+        if isinstance(w_other, W_AbstractUnicodeObject):
+            other = w_other.unicode_w(space)
+            return self._new_concat_buffer(other)
+        elif isinstance(w_other, W_AbstractBytesObject):
+            other = unicode_from_string(space, w_other)._value
+            return self._new_concat_buffer(other)
+        else:
+            self.force()
+            return self.w_unicode.descr_add(space, w_other)
+
+    def descr_unicode(self, space):
+        # you cannot get subclasses of W_UnicodeBufferObject here
+        assert type(self) is W_UnicodeBufferObject
+        return self
+
+
+copy_from_base_class(W_UnicodeObject, W_UnicodeBufferObject, 'w_unicode')
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -495,14 +495,22 @@
         of the specified width. The string S is never truncated.
         """
 
+    def readbuf_w(self, space):
+        from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
+        value = self.unicode_w(space)
+        builder = StringBuilder(len(value) * UNICODE_SIZE)
+        for unich in value:
+            pack_unichar(unich, builder)
+        return StringBuffer(builder.build())
+
     def descr_formatter_parser(self, space):
         from pypy.objspace.std.newformat import unicode_template_formatter
-        tformat = unicode_template_formatter(space, space.unicode_w(self))
+        tformat = unicode_template_formatter(space, self.unicode_w(space))
         return tformat.formatter_parser()
 
     def descr_formatter_field_name_split(self, space):
         from pypy.objspace.std.newformat import unicode_template_formatter
-        tformat = unicode_template_formatter(space, space.unicode_w(self))
+        tformat = unicode_template_formatter(space, self.unicode_w(space))
         return tformat.formatter_field_name_split()
 
 
@@ -530,13 +538,6 @@
     def unicode_w(self, space):
         return self._value
 
-    def readbuf_w(self, space):
-        from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
-        builder = StringBuilder(len(self._value) * UNICODE_SIZE)
-        for unich in self._value:
-            pack_unichar(unich, builder)
-        return StringBuffer(builder.build())
-
     def writebuf_w(self, space):
         raise OperationError(space.w_TypeError, space.wrap(
             "cannot use unicode as modifiable buffer"))
@@ -554,6 +555,15 @@
     def _new(self, value):
         return W_UnicodeObject(value)
 
+    def _new_concat(self, space, value1, value2):
+        if space.config.objspace.std.withstrbuf:
+            from pypy.objspace.std.unibufobject import W_UnicodeBufferObject
+            builder = UnicodeBuilder(len(value1) + len(value2))
+            builder.append(value1)
+            builder.append(value2)
+            return W_UnicodeBufferObject(builder)
+        return self._new(value1 + value2)
+
     def _new_from_list(self, value):
         return W_UnicodeObject(u''.join(value))
 
@@ -573,9 +583,11 @@
 
     @staticmethod
     def _op_val(space, w_other):
-        if isinstance(w_other, W_UnicodeObject):
-            return w_other._value
-        if space.isinstance_w(w_other, space.w_str):
+        from pypy.objspace.std.bytesobject import W_AbstractBytesObject
+
+        if isinstance(w_other, W_AbstractUnicodeObject):
+            return w_other.unicode_w(space)
+        if isinstance(w_other, W_AbstractBytesObject):
             return unicode_from_string(space, w_other)._value
         return unicode_from_encoded_object(
             space, w_other, None, "strict")._value
@@ -664,9 +676,9 @@
             if space.is_w(w_unicodetype, space.w_unicode):
                 return w_value
 
-        assert isinstance(w_value, W_UnicodeObject)
+        value = w_value.unicode_w(space)
         w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
-        W_UnicodeObject.__init__(w_newobj, w_value._value)
+        W_UnicodeObject.__init__(w_newobj, value)
         return w_newobj
 
     def descr_repr(self, space):
@@ -1035,7 +1047,7 @@
 
     __add__ = interpindirect2app(W_AbstractUnicodeObject.descr_add),
     __mul__ = interpindirect2app(W_AbstractUnicodeObject.descr_mul),
-    __rmul__ = interpindirect2app(W_AbstractUnicodeObject.descr_mul),
+    __rmul__ = interpindirect2app(W_AbstractUnicodeObject.descr_rmul),
 
     __getitem__ = interpindirect2app(W_AbstractUnicodeObject.descr_getitem),
     __getslice__ = interpindirect2app(W_AbstractUnicodeObject.descr_getslice),


More information about the pypy-commit mailing list