[pypy-commit] pypy default: Performance tweaks: may return the unicode object passed in as argument
arigo
noreply at buildbot.pypy.org
Sat Aug 30 08:44:31 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r73198:c71b5f944ccf
Date: 2014-08-30 08:29 +0200
http://bitbucket.org/pypy/pypy/changeset/c71b5f944ccf/
Log: Performance tweaks: may return the unicode object passed in as
argument if it needs no encoding at all.
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,4 +1,6 @@
from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.runicode import str_decode_utf_8
+from pypy.interpreter import unicodehelper
HEX = '0123456789abcdef'
@@ -17,20 +19,39 @@
def raw_encode_basestring_ascii(space, w_string):
if space.isinstance_w(w_string, space.w_str):
s = space.str_w(w_string)
- for c in s:
+ for i in range(len(s)):
+ c = s[i]
if c >= ' ' and c <= '~' and c != '"' and c != '\\':
pass
else:
+ first = i
break
else:
# the input is a string with only non-special ascii chars
return w_string
- w_string = space.call_method(w_string, 'decode', space.wrap('utf-8'))
+ eh = unicodehelper.decode_error_handler(space)
+ u = str_decode_utf_8(
+ s, len(s), None, final=True, errorhandler=eh,
+ allow_surrogates=True)[0]
+ sb = StringBuilder(len(u))
+ sb.append_slice(s, 0, first)
+ else:
+ u = space.unicode_w(w_string)
+ for i in range(len(u)):
+ c = u[i]
+ if c >= u' ' and c <= u'~' and c != u'"' and c != u'\\':
+ pass
+ else:
+ break
+ else:
+ # the input is a unicode with only non-special ascii chars
+ return w_string
+ sb = StringBuilder(len(u))
+ first = 0
- u = space.unicode_w(w_string)
- sb = StringBuilder(len(u))
- for c in u:
+ for i in range(first, len(u)):
+ c = u[i]
if c <= u'~':
if c == u'"' or c == u'\\':
sb.append('\\')
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -192,14 +192,14 @@
def test_raw_encode_basestring_ascii(self):
import _pypyjson
- def check(s):
+ def check(s, expected_type=str):
s = _pypyjson.raw_encode_basestring_ascii(s)
- assert type(s) is str
+ assert type(s) is expected_type
return s
assert check("") == ""
- assert check(u"") == ""
+ assert check(u"", expected_type=unicode) == u""
assert check("abc ") == "abc "
- assert check(u"abc ") == "abc "
+ assert check(u"abc ", expected_type=unicode) == u"abc "
raises(UnicodeDecodeError, check, "\xc0")
assert check("\xc2\x84") == "\\u0084"
assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45"
More information about the pypy-commit
mailing list