[pypy-commit] pypy default: fix #3108: the map based parser didn't deal with json dicts with repeated keys
cfbolz
pypy.commits at gmail.com
Thu Oct 31 16:14:24 EDT 2019
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch:
Changeset: r97924:86da6cb357f1
Date: 2019-10-31 21:07 +0100
http://bitbucket.org/pypy/pypy/changeset/86da6cb357f1/
Log: fix #3108: the map based parser didn't deal with json dicts with
repeated keys correctly
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -342,7 +342,14 @@
currmap = self.startmap
while True:
# parse a key: value
- currmap = self.decode_key_map(i, currmap)
+ newmap = self.decode_key_map(i, currmap)
+ if newmap is None:
+ # We've seen a repeated value, switch to dict-based storage.
+ dict_w = self._switch_to_dict(currmap, values_w, nextindex)
+ # We re-parse the last key, to get the correct overwriting
+ # effect. Pointless to care for performance here.
+ return self.decode_object_dict(i, start, dict_w)
+ currmap = newmap
i = self.skip_whitespace(self.pos)
ch = self.ll_chars[i]
if ch != ':':
@@ -610,6 +617,8 @@
""" Given the current map currmap of an object, decode the next key at
position i. This returns the new map of the object. """
newmap = self._decode_key_map(i, currmap)
+ if newmap is None:
+ return None
currmap.observe_transition(newmap, self.startmap)
return newmap
@@ -789,6 +798,11 @@
self.nextmap_first._check_invariants()
def get_next(self, w_key, string, start, stop, terminator):
+ """ Returns the next map, given a wrapped key w_key, the json input
+ string with positions start and stop, as well as a terminator.
+
+ Returns None if the key already appears somewhere in the map chain.
+ """
from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq
if isinstance(self, JSONMap):
assert not self.state == MapBase.BLOCKED
@@ -803,6 +817,8 @@
if nextmap_first is None:
# first transition ever seen, don't initialize nextmap_all
next = self._make_next_map(w_key, string[start:stop])
+ if next is None:
+ return None
self.nextmap_first = next
else:
if self.nextmap_all is None:
@@ -817,6 +833,8 @@
# if we are at this point we didn't find the transition yet, so
# create a new one
next = self._make_next_map(w_key, string[start:stop])
+ if next is None:
+ return None
self.nextmap_all[w_key] = next
# one new leaf has been created
@@ -859,6 +877,14 @@
self.mark_useful(terminator)
def _make_next_map(self, w_key, key_repr):
+ # Check whether w_key is already part of the self.prev chain
+ # to prevent strangeness in the json dict implementation.
+ # This is slow, but it should be rare to call this function.
+ check = self
+ while isinstance(check, JSONMap):
+ if check.w_key._utf8 == w_key._utf8:
+ return None
+ check = check.prev
return JSONMap(self.space, self, w_key, key_repr)
def fill_dict(self, dict_w, values_w):
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -74,6 +74,17 @@
m3.fill_dict(d, [space.w_None, space.w_None, space.w_None])
assert list(d) == [w_a, w_b, w_c]
+ def test_repeated_key_get_next(self):
+ m = Terminator(self.space)
+ w_a = self.space.newutf8("a", 1)
+ w_b = self.space.newutf8("b", 1)
+ w_c = self.space.newutf8("c", 1)
+ m1 = m.get_next(w_a, '"a"', 0, 3, m)
+ m1 = m1.get_next(w_b, '"b"', 0, 3, m)
+ m1 = m1.get_next(w_c, '"c"', 0, 3, m)
+ m2 = m1.get_next(w_a, '"a"', 0, 3, m)
+ assert m2 is None
+
def test_decode_key_map(self):
m = Terminator(self.space)
@@ -519,3 +530,11 @@
exc = raises(ValueError, _pypyjson.loads, inputtext)
assert str(exc.value) == errmsg
+ def test_repeated_key(self):
+ import _pypyjson
+ a = '{"abc": "4", "k": 1, "k": 2}'
+ d = _pypyjson.loads(a)
+ assert d == {u"abc": u"4", u"k": 2}
+ a = '{"abc": "4", "k": 1, "k": 1.5, "c": null, "k": 2}'
+ d = _pypyjson.loads(a)
+ assert d == {u"abc": u"4", u"c": None, u"k": 2}
More information about the pypy-commit
mailing list