[pypy-commit] pypy utf8-unicode2: Some non-ascii Utf8Str's were being marked as ascii
waedt
noreply at buildbot.pypy.org
Fri Aug 8 09:22:41 CEST 2014
Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r72718:f324ba27ece1
Date: 2014-08-06 21:10 -0500
http://bitbucket.org/pypy/pypy/changeset/f324ba27ece1/
Log: Some non-ascii Utf8Str's were being marked as ascii
diff --git a/pypy/interpreter/test/test_utf8_codecs.py b/pypy/interpreter/test/test_utf8_codecs.py
--- a/pypy/interpreter/test/test_utf8_codecs.py
+++ b/pypy/interpreter/test/test_utf8_codecs.py
@@ -746,6 +746,12 @@
u = Utf8Str.from_unicode(unicode(s, 'raw-unicode-escape'))
assert decoder(s, len(s), 'strict')[0] == u
+ def test_decode_unicode_escape(self):
+ decoder = self.getdecoder('unicode-escape')
+ s = '\\\xff'
+ u = Utf8Str.from_unicode(unicode(s, 'unicode-escape'))
+ assert decoder(s, len(s), 'strict')[0] == u
+
class TestTranslation(object):
def test_utf8(self):
diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -478,7 +478,7 @@
for s in other:
if not s._is_ascii:
is_ascii = False
- break
+ break
return Utf8Str(self.bytes.join([s.bytes for s in other]), is_ascii)
else:
assert isinstance(other[0], str)
@@ -678,6 +678,8 @@
elif isinstance(s, Utf8Str):
self._builder.append_slice(s.bytes, s.index_of_char(start),
s.index_of_char(end))
+ if not s._is_ascii:
+ self._is_ascii = False
else:
raise TypeError("Invalid type '%s' for Utf8Str.append_slice" %
type(s))
diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py
--- a/pypy/interpreter/utf8_codecs.py
+++ b/pypy/interpreter/utf8_codecs.py
@@ -134,7 +134,7 @@
builder.append(res)
else:
builder.append('\\')
- builder.append(ch)
+ builder.append(ord(ch))
return builder.build(), pos
More information about the pypy-commit
mailing list