[pypy-commit] pypy default: Performance tweaks: may return the unicode object passed in as argument

arigo noreply at buildbot.pypy.org
Sat Aug 30 08:44:31 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r73198:c71b5f944ccf
Date: 2014-08-30 08:29 +0200
http://bitbucket.org/pypy/pypy/changeset/c71b5f944ccf/

Log:	Performance tweaks: may return the unicode object passed in as
	argument if it needs no encoding at all.

diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,4 +1,6 @@
 from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.runicode import str_decode_utf_8
+from pypy.interpreter import unicodehelper
 
 
 HEX = '0123456789abcdef'
@@ -17,20 +19,39 @@
 def raw_encode_basestring_ascii(space, w_string):
     if space.isinstance_w(w_string, space.w_str):
         s = space.str_w(w_string)
-        for c in s:
+        for i in range(len(s)):
+            c = s[i]
             if c >= ' ' and c <= '~' and c != '"' and c != '\\':
                 pass
             else:
+                first = i
                 break
         else:
             # the input is a string with only non-special ascii chars
             return w_string
 
-        w_string = space.call_method(w_string, 'decode', space.wrap('utf-8'))
+        eh = unicodehelper.decode_error_handler(space)
+        u = str_decode_utf_8(
+                s, len(s), None, final=True, errorhandler=eh,
+                allow_surrogates=True)[0]
+        sb = StringBuilder(len(u))
+        sb.append_slice(s, 0, first)
+    else:
+        u = space.unicode_w(w_string)
+        for i in range(len(u)):
+            c = u[i]
+            if c >= u' ' and c <= u'~' and c != u'"' and c != u'\\':
+                pass
+            else:
+                break
+        else:
+            # the input is a unicode with only non-special ascii chars
+            return w_string
+        sb = StringBuilder(len(u))
+        first = 0
 
-    u = space.unicode_w(w_string)
-    sb = StringBuilder(len(u))
-    for c in u:
+    for i in range(first, len(u)):
+        c = u[i]
         if c <= u'~':
             if c == u'"' or c == u'\\':
                 sb.append('\\')
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -192,14 +192,14 @@
 
     def test_raw_encode_basestring_ascii(self):
         import _pypyjson
-        def check(s):
+        def check(s, expected_type=str):
             s = _pypyjson.raw_encode_basestring_ascii(s)
-            assert type(s) is str
+            assert type(s) is expected_type
             return s
         assert check("") == ""
-        assert check(u"") == ""
+        assert check(u"", expected_type=unicode) == u""
         assert check("abc ") == "abc "
-        assert check(u"abc ") == "abc "
+        assert check(u"abc ", expected_type=unicode) == u"abc "
         raises(UnicodeDecodeError, check, "\xc0")
         assert check("\xc2\x84") == "\\u0084"
         assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45"


More information about the pypy-commit mailing list