bpo-39681: Fix C pickle regression with minimal file-like objects (GH-18592) (#18630)
https://github.com/python/cpython/commit/b19f7ecfa3adc6ba1544225317b94736498... commit: b19f7ecfa3adc6ba1544225317b9473649815b38 branch: 3.8 author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> committer: GitHub <noreply@github.com> date: 2020-02-23T23:53:24+01:00 summary: bpo-39681: Fix C pickle regression with minimal file-like objects (GH-18592) (#18630) Fix a regression where the C pickle module wouldn't allow unpickling from a file-like object that doesn't expose a readinto() method. (cherry picked from commit 9f37872e307734666a7169f7be6e3370d3068282) Co-authored-by: Antoine Pitrou <antoine@python.org> Co-authored-by: Antoine Pitrou <pitrou@free.fr> files: A Misc/NEWS.d/next/Library/2020-02-21-13-58-40.bpo-39681.zN8hf0.rst M Lib/test/pickletester.py M Modules/_pickle.c diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 3a8aee4320c65..7c8383f3eaac9 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -73,6 +73,18 @@ def tell(self): raise io.UnsupportedOperation +class MinimalIO(object): + """ + A file-like object that doesn't support readinto(). + """ + def __init__(self, *args): + self._bio = io.BytesIO(*args) + self.getvalue = self._bio.getvalue + self.read = self._bio.read + self.readline = self._bio.readline + self.write = self._bio.write + + # We can't very well test the extension registry without putting known stuff # in it, but we have to be careful to restore its original state. Code # should do this: @@ -3361,7 +3373,7 @@ def test_reusing_unpickler_objects(self): f.seek(0) self.assertEqual(unpickler.load(), data2) - def _check_multiple_unpicklings(self, ioclass): + def _check_multiple_unpicklings(self, ioclass, *, seekable=True): for proto in protocols: with self.subTest(proto=proto): data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len] @@ -3374,10 +3386,10 @@ def _check_multiple_unpicklings(self, ioclass): f = ioclass(pickled * N) unpickler = self.unpickler_class(f) for i in range(N): - if f.seekable(): + if seekable: pos = f.tell() self.assertEqual(unpickler.load(), data1) - if f.seekable(): + if seekable: self.assertEqual(f.tell(), pos + len(pickled)) self.assertRaises(EOFError, unpickler.load) @@ -3385,7 +3397,12 @@ def test_multiple_unpicklings_seekable(self): self._check_multiple_unpicklings(io.BytesIO) def test_multiple_unpicklings_unseekable(self): - self._check_multiple_unpicklings(UnseekableIO) + self._check_multiple_unpicklings(UnseekableIO, seekable=False) + + def test_multiple_unpicklings_minimal(self): + # File-like object that doesn't support peek() and readinto() + # (bpo-39681) + self._check_multiple_unpicklings(MinimalIO, seekable=False) def test_unpickling_buffering_readline(self): # Issue #12687: the unpickler's buffering logic could fail with diff --git a/Misc/NEWS.d/next/Library/2020-02-21-13-58-40.bpo-39681.zN8hf0.rst b/Misc/NEWS.d/next/Library/2020-02-21-13-58-40.bpo-39681.zN8hf0.rst new file mode 100644 index 0000000000000..c10e2fd7a4b6d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-21-13-58-40.bpo-39681.zN8hf0.rst @@ -0,0 +1,2 @@ +Fix a regression where the C pickle module wouldn't allow unpickling from a +file-like object that doesn't expose a readinto() method. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 9f6e66f70a546..55e2734ca2f31 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1371,13 +1371,42 @@ _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n) } /* Read from file */ - if (!self->readinto) { + if (!self->read) { + /* We're unpickling memory, this means the input is truncated */ return bad_readline(); } if (_Unpickler_SkipConsumed(self) < 0) { return -1; } + if (!self->readinto) { + /* readinto() not supported on file-like object, fall back to read() + * and copy into destination buffer (bpo-39681) */ + PyObject* len = PyLong_FromSsize_t(n); + if (len == NULL) { + return -1; + } + PyObject* data = _Pickle_FastCall(self->read, len); + if (data == NULL) { + return -1; + } + if (!PyBytes_Check(data)) { + PyErr_Format(PyExc_ValueError, + "read() returned non-bytes object (%R)", + Py_TYPE(data)); + Py_DECREF(data); + return -1; + } + Py_ssize_t read_size = PyBytes_GET_SIZE(data); + if (read_size < n) { + Py_DECREF(data); + return bad_readline(); + } + memcpy(buf, PyBytes_AS_STRING(data), n); + Py_DECREF(data); + return n; + } + /* Call readinto() into user buffer */ PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE); if (buf_obj == NULL) { @@ -1606,17 +1635,19 @@ _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file) _Py_IDENTIFIER(readinto); _Py_IDENTIFIER(readline); + /* Optional file methods */ if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) { return -1; } + if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) { + return -1; + } (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read); - (void)_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto); (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline); - if (!self->readline || !self->readinto || !self->read) { + if (!self->readline || !self->read) { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_TypeError, - "file must have 'read', 'readinto' and " - "'readline' attributes"); + "file must have 'read' and 'readline' attributes"); } Py_CLEAR(self->read); Py_CLEAR(self->readinto);
participants (1)
-
Miss Islington (bot)