[pypy-commit] pypy default: fix #3108: the map based parser didn't deal with json dicts with repeated keys

cfbolz pypy.commits at gmail.com
Thu Oct 31 16:14:24 EDT 2019


Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: 
Changeset: r97924:86da6cb357f1
Date: 2019-10-31 21:07 +0100
http://bitbucket.org/pypy/pypy/changeset/86da6cb357f1/

Log:	fix #3108: the map based parser didn't deal with json dicts with
	repeated keys correctly

diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -342,7 +342,14 @@
         currmap = self.startmap
         while True:
             # parse a key: value
-            currmap = self.decode_key_map(i, currmap)
+            newmap = self.decode_key_map(i, currmap)
+            if newmap is None:
+                # We've seen a repeated value, switch to dict-based storage.
+                dict_w = self._switch_to_dict(currmap, values_w, nextindex)
+                # We re-parse the last key, to get the correct overwriting
+                # effect. Pointless to care for performance here.
+                return self.decode_object_dict(i, start, dict_w)
+            currmap = newmap
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             if ch != ':':
@@ -610,6 +617,8 @@
         """ Given the current map currmap of an object, decode the next key at
         position i. This returns the new map of the object. """
         newmap = self._decode_key_map(i, currmap)
+        if newmap is None:
+            return None
         currmap.observe_transition(newmap, self.startmap)
         return newmap
 
@@ -789,6 +798,11 @@
             self.nextmap_first._check_invariants()
 
     def get_next(self, w_key, string, start, stop, terminator):
+        """ Returns the next map, given a wrapped key w_key, the json input
+        string with positions start and stop, as well as a terminator.
+
+        Returns None if the key already appears somewhere in the map chain.
+        """
         from pypy.objspace.std.dictmultiobject import unicode_hash, unicode_eq
         if isinstance(self, JSONMap):
             assert not self.state == MapBase.BLOCKED
@@ -803,6 +817,8 @@
         if nextmap_first is None:
             # first transition ever seen, don't initialize nextmap_all
             next = self._make_next_map(w_key, string[start:stop])
+            if next is None:
+                return None
             self.nextmap_first = next
         else:
             if self.nextmap_all is None:
@@ -817,6 +833,8 @@
             # if we are at this point we didn't find the transition yet, so
             # create a new one
             next = self._make_next_map(w_key, string[start:stop])
+            if next is None:
+                return None
             self.nextmap_all[w_key] = next
 
             # one new leaf has been created
@@ -859,6 +877,14 @@
                 self.mark_useful(terminator)
 
     def _make_next_map(self, w_key, key_repr):
+        # Check whether w_key is already part of the self.prev chain
+        # to prevent strangeness in the json dict implementation.
+        # This is slow, but it should be rare to call this function.
+        check = self
+        while isinstance(check, JSONMap):
+            if check.w_key._utf8 == w_key._utf8:
+                return None
+            check = check.prev
         return JSONMap(self.space, self, w_key, key_repr)
 
     def fill_dict(self, dict_w, values_w):
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -74,6 +74,17 @@
         m3.fill_dict(d, [space.w_None, space.w_None, space.w_None])
         assert list(d) == [w_a, w_b, w_c]
 
+    def test_repeated_key_get_next(self):
+        m = Terminator(self.space)
+        w_a = self.space.newutf8("a", 1)
+        w_b = self.space.newutf8("b", 1)
+        w_c = self.space.newutf8("c", 1)
+        m1 = m.get_next(w_a, '"a"', 0, 3, m)
+        m1 = m1.get_next(w_b, '"b"', 0, 3, m)
+        m1 = m1.get_next(w_c, '"c"', 0, 3, m)
+        m2 = m1.get_next(w_a, '"a"', 0, 3, m)
+        assert m2 is None
+
 
     def test_decode_key_map(self):
         m = Terminator(self.space)
@@ -519,3 +530,11 @@
             exc = raises(ValueError, _pypyjson.loads, inputtext)
             assert str(exc.value) == errmsg
 
+    def test_repeated_key(self):
+        import _pypyjson
+        a = '{"abc": "4", "k": 1, "k": 2}'
+        d = _pypyjson.loads(a)
+        assert d == {u"abc": u"4", u"k": 2}
+        a = '{"abc": "4", "k": 1, "k": 1.5, "c": null, "k": 2}'
+        d = _pypyjson.loads(a)
+        assert d == {u"abc": u"4", u"c": None, u"k": 2}


More information about the pypy-commit mailing list