[pypy-commit] pypy unicode-utf8-py3: special case surrogateescape if already handled, this code needs refactoring
mattip
pypy.commits at gmail.com
Tue Nov 13 03:19:56 EST 2018
Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r95307:88ab1ff0c4e8
Date: 2018-11-12 19:25 -0800
http://bitbucket.org/pypy/pypy/changeset/88ab1ff0c4e8/
Log: special case surrogateescape if already handled, this code needs
refactoring
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1206,10 +1206,13 @@
from pypy.module._codecs.interp_codecs import encode_text, CodecState
utf8 = space.utf8_w(w_object)
if not allow_surrogates:
- utf8 = space.utf8_w(w_object)
if errors is None:
errors = 'strict'
pos = rutf8.surrogate_in_utf8(utf8)
+ if pos >= 0:
+ handled_error = True
+ else:
+ handled_error = False
state = space.fromcache(CodecState)
eh = state.encode_error_handler
while pos >= 0:
@@ -1224,7 +1227,7 @@
# surrogatepass?
break
pos = _pos
- if errors == 'surrogateescape':
+ if errors == 'surrogateescape' and handled_error:
#escape
return space.newbytes(utf8)
w_object = space.newtext(utf8)
More information about the pypy-commit
mailing list