[Python-checkins] bpo-41180: Replace marshal code.__new__ audit event with marshal.load[s] and marshal.dumps (GH-26971)
zooba
webhook-mailer at python.org
Wed Jun 30 13:52:43 EDT 2021
https://github.com/python/cpython/commit/863e3d5c7e037b24b8294b041ed7686b522973d8
commit: 863e3d5c7e037b24b8294b041ed7686b522973d8
branch: 3.9
author: Steve Dower <steve.dower at python.org>
committer: zooba <steve.dower at microsoft.com>
date: 2021-06-30T18:52:39+01:00
summary:
bpo-41180: Replace marshal code.__new__ audit event with marshal.load[s] and marshal.dumps (GH-26971)
files:
A Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
M Doc/library/marshal.rst
M Lib/test/audit-tests.py
M Lib/test/test_audit.py
M Python/marshal.c
diff --git a/Doc/library/marshal.rst b/Doc/library/marshal.rst
index d65afc20041133..b38ba54b3c3bc6 100644
--- a/Doc/library/marshal.rst
+++ b/Doc/library/marshal.rst
@@ -66,6 +66,8 @@ The module defines these functions:
The *version* argument indicates the data format that ``dump`` should use
(see below).
+ .. audit-event:: marshal.dumps value,version marshal.dump
+
.. function:: load(file)
@@ -74,11 +76,18 @@ The module defines these functions:
format), raise :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. The
file must be a readable :term:`binary file`.
+ .. audit-event:: marshal.load "" marshal.load
+
.. note::
If an object containing an unsupported type was marshalled with :func:`dump`,
:func:`load` will substitute ``None`` for the unmarshallable type.
+ .. versionchanged:: 3.9.7
+
+ This call used to raise a ``code.__new__`` audit event for each code object. Now
+ it raises a single ``marshal.load`` event for the entire load operation.
+
.. function:: dumps(value[, version])
@@ -89,6 +98,8 @@ The module defines these functions:
The *version* argument indicates the data format that ``dumps`` should use
(see below).
+ .. audit-event:: marshal.dumps value,version marshal.dump
+
.. function:: loads(bytes)
@@ -96,6 +107,13 @@ The module defines these functions:
:exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. Extra bytes in the
input are ignored.
+ .. audit-event:: marshal.loads bytes marshal.load
+
+ .. versionchanged:: 3.9.7
+
+ This call used to raise a ``code.__new__`` audit event for each code object. Now
+ it raises a single ``marshal.loads`` event for the entire load operation.
+
In addition, the following constants are defined:
diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py
index 8e66594e52429b..95216bcc48253c 100644
--- a/Lib/test/audit-tests.py
+++ b/Lib/test/audit-tests.py
@@ -6,6 +6,7 @@
"""
import contextlib
+import os
import sys
@@ -106,6 +107,32 @@ def test_block_add_hook_baseexception():
pass
+def test_marshal():
+ import marshal
+ o = ("a", "b", "c", 1, 2, 3)
+ payload = marshal.dumps(o)
+
+ with TestHook() as hook:
+ assertEqual(o, marshal.loads(marshal.dumps(o)))
+
+ try:
+ with open("test-marshal.bin", "wb") as f:
+ marshal.dump(o, f)
+ with open("test-marshal.bin", "rb") as f:
+ assertEqual(o, marshal.load(f))
+ finally:
+ os.unlink("test-marshal.bin")
+
+ actual = [(a[0], a[1]) for e, a in hook.seen if e == "marshal.dumps"]
+ assertSequenceEqual(actual, [(o, marshal.version)] * 2)
+
+ actual = [a[0] for e, a in hook.seen if e == "marshal.loads"]
+ assertSequenceEqual(actual, [payload])
+
+ actual = [e for e, a in hook.seen if e == "marshal.load"]
+ assertSequenceEqual(actual, ["marshal.load"])
+
+
def test_pickle():
import pickle
diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py
index a9ac6fee446f87..387a31229a2f16 100644
--- a/Lib/test/test_audit.py
+++ b/Lib/test/test_audit.py
@@ -51,6 +51,11 @@ def test_block_add_hook(self):
def test_block_add_hook_baseexception(self):
self.do_test("test_block_add_hook_baseexception")
+ def test_marshal(self):
+ support.import_module("marshal")
+
+ self.do_test("test_marshal")
+
def test_pickle(self):
support.import_module("pickle")
diff --git a/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
new file mode 100644
index 00000000000000..88b70c7cea2610
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
@@ -0,0 +1,5 @@
+Add auditing events to the :mod:`marshal` module, and stop raising
+``code.__init__`` events for every unmarshalled code object. Directly
+instantiated code objects will continue to raise an event, and audit event
+handlers should inspect or collect the raw marshal data. This reduces a
+significant performance overhead when loading from ``.pyc`` files.
diff --git a/Python/marshal.c b/Python/marshal.c
index c4538bd373a82e..baafa3ecfbf1de 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -596,14 +596,18 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
{
char buf[BUFSIZ];
WFILE wf;
+ if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
+ return; /* caller must check PyErr_Occurred() */
+ }
memset(&wf, 0, sizeof(wf));
wf.fp = fp;
wf.ptr = wf.buf = buf;
wf.end = wf.ptr + sizeof(buf);
wf.error = WFERR_OK;
wf.version = version;
- if (w_init_refs(&wf, version))
- return; /* caller mush check PyErr_Occurred() */
+ if (w_init_refs(&wf, version)) {
+ return; /* caller must check PyErr_Occurred() */
+ }
w_object(x, &wf);
w_clear_refs(&wf);
w_flush(&wf);
@@ -1371,12 +1375,6 @@ r_object(RFILE *p)
if (lnotab == NULL)
goto code_error;
- if (PySys_Audit("code.__new__", "OOOiiiiii",
- code, filename, name, argcount, posonlyargcount,
- kwonlyargcount, nlocals, stacksize, flags) < 0) {
- goto code_error;
- }
-
v = (PyObject *) PyCode_NewWithPosOnlyArgs(
argcount, posonlyargcount, kwonlyargcount,
nlocals, stacksize, flags,
@@ -1435,6 +1433,15 @@ read_object(RFILE *p)
fprintf(stderr, "XXX readobject called with exception set\n");
return NULL;
}
+ if (p->ptr && p->end) {
+ if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
+ return NULL;
+ }
+ } else if (p->fp || p->readable) {
+ if (PySys_Audit("marshal.load", NULL) < 0) {
+ return NULL;
+ }
+ }
v = r_object(p);
if (v == NULL && !PyErr_Occurred())
PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
@@ -1531,7 +1538,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
rf.refs = PyList_New(0);
if (rf.refs == NULL)
return NULL;
- result = r_object(&rf);
+ result = read_object(&rf);
Py_DECREF(rf.refs);
if (rf.buf != NULL)
PyMem_FREE(rf.buf);
@@ -1552,7 +1559,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
rf.refs = PyList_New(0);
if (rf.refs == NULL)
return NULL;
- result = r_object(&rf);
+ result = read_object(&rf);
Py_DECREF(rf.refs);
if (rf.buf != NULL)
PyMem_FREE(rf.buf);
@@ -1564,6 +1571,9 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
{
WFILE wf;
+ if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
+ return NULL;
+ }
memset(&wf, 0, sizeof(wf));
wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
if (wf.str == NULL)
More information about the Python-checkins
mailing list