[pypy-commit] pypy utf8-unicode2: Some non-ascii Utf8Str's were being marked as ascii

waedt noreply at buildbot.pypy.org
Fri Aug 8 09:22:41 CEST 2014


Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r72718:f324ba27ece1
Date: 2014-08-06 21:10 -0500
http://bitbucket.org/pypy/pypy/changeset/f324ba27ece1/

Log:	Some non-ascii Utf8Str's were being marked as ascii

diff --git a/pypy/interpreter/test/test_utf8_codecs.py b/pypy/interpreter/test/test_utf8_codecs.py
--- a/pypy/interpreter/test/test_utf8_codecs.py
+++ b/pypy/interpreter/test/test_utf8_codecs.py
@@ -746,6 +746,12 @@
         u = Utf8Str.from_unicode(unicode(s, 'raw-unicode-escape'))
         assert decoder(s, len(s), 'strict')[0] == u
 
+    def test_decode_unicode_escape(self):
+        decoder = self.getdecoder('unicode-escape')
+        s = '\\\xff'
+        u = Utf8Str.from_unicode(unicode(s, 'unicode-escape'))
+        assert decoder(s, len(s), 'strict')[0] == u
+
 
 class TestTranslation(object):
     def test_utf8(self):
diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -478,7 +478,7 @@
                 for s in other:
                     if not s._is_ascii:
                         is_ascii = False
-                    break
+                        break
             return Utf8Str(self.bytes.join([s.bytes for s in other]), is_ascii)
         else:
             assert isinstance(other[0], str)
@@ -678,6 +678,8 @@
         elif isinstance(s, Utf8Str):
             self._builder.append_slice(s.bytes, s.index_of_char(start),
                                        s.index_of_char(end))
+            if not s._is_ascii:
+                self._is_ascii = False
         else:
             raise TypeError("Invalid type '%s' for Utf8Str.append_slice" %
                             type(s))
diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py
--- a/pypy/interpreter/utf8_codecs.py
+++ b/pypy/interpreter/utf8_codecs.py
@@ -134,7 +134,7 @@
                 builder.append(res)
         else:
             builder.append('\\')
-            builder.append(ch)
+            builder.append(ord(ch))
 
     return builder.build(), pos
 


More information about the pypy-commit mailing list