[pypy-commit] pypy default: removed cPython-oriented code in json and added KeyValueBuilder(s) for speeding up json decoding
Stefano Parmesan
noreply at buildbot.pypy.org
Wed Feb 8 10:57:59 CET 2012
Author: Stefano Parmesan <stefanop at ahref.eu>
Branch:
Changeset: r52215:de5504a0f4f0
Date: 2012-01-27 15:06 +0100
http://bitbucket.org/pypy/pypy/changeset/de5504a0f4f0/
Log: removed cPython-oriented code in json and added KeyValueBuilder(s)
for speeding up json decoding
diff --git a/lib-python/modified-2.7/json/decoder.py b/lib-python/modified-2.7/json/decoder.py
--- a/lib-python/modified-2.7/json/decoder.py
+++ b/lib-python/modified-2.7/json/decoder.py
@@ -5,15 +5,47 @@
import struct
from json.scanner import make_scanner
-try:
- from _json import scanstring as c_scanstring
-except ImportError:
- c_scanstring = None
__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+
+class KeyValueElement(object):
+ __slots__ = ['key', 'value']
+
+ def __init__(self, key, value):
+ self.key = key
+ self.value = value
+
+
+class KeyValueAbstractBuilder(object):
+ __slots__ = ['elements', 'base_type']
+
+ def __init__(self):
+ self.elements = self.base_type()
+
+ def append(self, key, value):
+ pass
+
+ def build(self):
+ return self.elements
+
+
+class KeyValueListBuilder(KeyValueAbstractBuilder):
+ base_type = list
+
+ def append(self, key, value):
+ self.elements.append((key, value))
+
+
+class KeyValueDictBuilder(KeyValueAbstractBuilder):
+ base_type = dict
+
+ def append(self, key, value):
+ self.elements[key] = value
+
+
def _floatconstants():
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
if sys.byteorder != 'big':
@@ -62,7 +94,7 @@
DEFAULT_ENCODING = "utf-8"
-def py_scanstring(s, end, encoding=None, strict=True,
+def scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
@@ -75,7 +107,6 @@
if encoding is None:
encoding = DEFAULT_ENCODING
chunks = []
- _append = chunks.append
begin = end - 1
while 1:
chunk = _m(s, end)
@@ -84,11 +115,13 @@
errmsg("Unterminated string starting at", s, begin))
end = chunk.end()
content, terminator = chunk.groups()
+ del chunk
# Content is contains zero or more unescaped string characters
if content:
if not isinstance(content, unicode):
content = unicode(content, encoding)
- _append(content)
+ chunks.append(content)
+ del content
# Terminator is the end of string, a literal control character,
# or a backslash denoting that an escape sequence follows
if terminator == '"':
@@ -99,7 +132,8 @@
msg = "Invalid control character {0!r} at".format(terminator)
raise ValueError(errmsg(msg, s, end))
else:
- _append(terminator)
+ chunks.append(terminator)
+ del terminator
continue
try:
esc = s[end]
@@ -136,21 +170,16 @@
char = unichr(uni)
end = next_end
# Append the unescaped character
- _append(char)
+ chunks.append(char)
return u''.join(chunks), end
-# Use speedup if available
-scanstring = c_scanstring or py_scanstring
-
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
s, end = s_and_end
- pairs = []
- pairs_append = pairs.append
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
@@ -162,7 +191,7 @@
# Trivial empty object
if nextchar == '}':
if object_pairs_hook is not None:
- result = object_pairs_hook(pairs)
+ result = object_pairs_hook([])
return result, end
pairs = {}
if object_hook is not None:
@@ -171,7 +200,13 @@
elif nextchar != '"':
raise ValueError(errmsg("Expecting property name", s, end))
end += 1
- while True:
+
+ if object_pairs_hook is not None:
+ pairs = KeyValueListBuilder()
+ else:
+ pairs = KeyValueDictBuilder()
+
+ while 1:
key, end = scanstring(s, end, encoding, strict)
# To skip some function call overhead we optimize the fast paths where
@@ -195,7 +230,7 @@
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
- pairs_append((key, value))
+ pairs.append(key, value)
try:
nextchar = s[end]
@@ -227,9 +262,9 @@
raise ValueError(errmsg("Expecting property name", s, end - 1))
if object_pairs_hook is not None:
- result = object_pairs_hook(pairs)
+ result = object_pairs_hook(pairs.build()) # to list
return result, end
- pairs = dict(pairs)
+ pairs = pairs.build() # to dict
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end
@@ -244,13 +279,12 @@
# Look-ahead for trivial empty array
if nextchar == ']':
return values, end + 1
- _append = values.append
- while True:
+ while 1:
try:
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
- _append(value)
+ values.append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
end = _w(s, end + 1).end()
diff --git a/lib-python/modified-2.7/json/scanner.py b/lib-python/modified-2.7/json/scanner.py
--- a/lib-python/modified-2.7/json/scanner.py
+++ b/lib-python/modified-2.7/json/scanner.py
@@ -1,10 +1,6 @@
"""JSON token scanner
"""
import re
-try:
- from _json import make_scanner as c_make_scanner
-except ImportError:
- c_make_scanner = None
__all__ = ['make_scanner']
@@ -12,19 +8,7 @@
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
-def py_make_scanner(context):
- parse_object = context.parse_object
- parse_array = context.parse_array
- parse_string = context.parse_string
- match_number = NUMBER_RE.match
- encoding = context.encoding
- strict = context.strict
- parse_float = context.parse_float
- parse_int = context.parse_int
- parse_constant = context.parse_constant
- object_hook = context.object_hook
- object_pairs_hook = context.object_pairs_hook
-
+def make_scanner(context):
def _scan_once(string, idx):
try:
nextchar = string[idx]
@@ -32,12 +16,12 @@
raise StopIteration
if nextchar == '"':
- return parse_string(string, idx + 1, encoding, strict)
+ return context.parse_string(string, idx + 1, context.encoding, context.strict)
elif nextchar == '{':
- return parse_object((string, idx + 1), encoding, strict,
- _scan_once, object_hook, object_pairs_hook)
+ return context.parse_object((string, idx + 1), context.encoding, context.strict,
+ _scan_once, context.object_hook, context.object_pairs_hook)
elif nextchar == '[':
- return parse_array((string, idx + 1), _scan_once)
+ return context.parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
return None, idx + 4
elif nextchar == 't' and string[idx:idx + 4] == 'true':
@@ -45,23 +29,21 @@
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
return False, idx + 5
- m = match_number(string, idx)
+ m = NUMBER_RE.match(string, idx)
if m is not None:
integer, frac, exp = m.groups()
if frac or exp:
- res = parse_float(integer + (frac or '') + (exp or ''))
+ res = context.parse_float(integer + (frac or '') + (exp or ''))
else:
- res = parse_int(integer)
+ res = context.parse_int(integer)
return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
- return parse_constant('NaN'), idx + 3
+ return context.parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
- return parse_constant('Infinity'), idx + 8
+ return context.parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
- return parse_constant('-Infinity'), idx + 9
+ return context.parse_constant('-Infinity'), idx + 9
else:
raise StopIteration
return _scan_once
-
-make_scanner = c_make_scanner or py_make_scanner
More information about the pypy-commit
mailing list