[Python-checkins] bpo-41180: Replace marshal code.__new__ audit event with marshal.load[s] and marshal.dumps (GH-26971)

zooba webhook-mailer at python.org
Wed Jun 30 13:52:43 EDT 2021


https://github.com/python/cpython/commit/863e3d5c7e037b24b8294b041ed7686b522973d8
commit: 863e3d5c7e037b24b8294b041ed7686b522973d8
branch: 3.9
author: Steve Dower <steve.dower at python.org>
committer: zooba <steve.dower at microsoft.com>
date: 2021-06-30T18:52:39+01:00
summary:

bpo-41180: Replace marshal code.__new__ audit event with marshal.load[s] and marshal.dumps (GH-26971)

files:
A Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
M Doc/library/marshal.rst
M Lib/test/audit-tests.py
M Lib/test/test_audit.py
M Python/marshal.c

diff --git a/Doc/library/marshal.rst b/Doc/library/marshal.rst
index d65afc20041133..b38ba54b3c3bc6 100644
--- a/Doc/library/marshal.rst
+++ b/Doc/library/marshal.rst
@@ -66,6 +66,8 @@ The module defines these functions:
    The *version* argument indicates the data format that ``dump`` should use
    (see below).
 
+   .. audit-event:: marshal.dumps value,version marshal.dump
+
 
 .. function:: load(file)
 
@@ -74,11 +76,18 @@ The module defines these functions:
    format), raise :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`.  The
    file must be a readable :term:`binary file`.
 
+   .. audit-event:: marshal.load "" marshal.load
+
    .. note::
 
       If an object containing an unsupported type was marshalled with :func:`dump`,
       :func:`load` will substitute ``None`` for the unmarshallable type.
 
+   .. versionchanged:: 3.9.7
+
+      This call used to raise a ``code.__new__`` audit event for each code object. Now
+      it raises a single ``marshal.load`` event for the entire load operation.
+
 
 .. function:: dumps(value[, version])
 
@@ -89,6 +98,8 @@ The module defines these functions:
    The *version* argument indicates the data format that ``dumps`` should use
    (see below).
 
+   .. audit-event:: marshal.dumps value,version marshal.dump
+
 
 .. function:: loads(bytes)
 
@@ -96,6 +107,13 @@ The module defines these functions:
    :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`.  Extra bytes in the
    input are ignored.
 
+   .. audit-event:: marshal.loads bytes marshal.load
+
+   .. versionchanged:: 3.9.7
+
+      This call used to raise a ``code.__new__`` audit event for each code object. Now
+      it raises a single ``marshal.loads`` event for the entire load operation.
+
 
 In addition, the following constants are defined:
 
diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py
index 8e66594e52429b..95216bcc48253c 100644
--- a/Lib/test/audit-tests.py
+++ b/Lib/test/audit-tests.py
@@ -6,6 +6,7 @@
 """
 
 import contextlib
+import os
 import sys
 
 
@@ -106,6 +107,32 @@ def test_block_add_hook_baseexception():
                 pass
 
 
+def test_marshal():
+    import marshal
+    o = ("a", "b", "c", 1, 2, 3)
+    payload = marshal.dumps(o)
+
+    with TestHook() as hook:
+        assertEqual(o, marshal.loads(marshal.dumps(o)))
+
+        try:
+            with open("test-marshal.bin", "wb") as f:
+                marshal.dump(o, f)
+            with open("test-marshal.bin", "rb") as f:
+                assertEqual(o, marshal.load(f))
+        finally:
+            os.unlink("test-marshal.bin")
+
+    actual = [(a[0], a[1]) for e, a in hook.seen if e == "marshal.dumps"]
+    assertSequenceEqual(actual, [(o, marshal.version)] * 2)
+
+    actual = [a[0] for e, a in hook.seen if e == "marshal.loads"]
+    assertSequenceEqual(actual, [payload])
+
+    actual = [e for e, a in hook.seen if e == "marshal.load"]
+    assertSequenceEqual(actual, ["marshal.load"])
+
+
 def test_pickle():
     import pickle
 
diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py
index a9ac6fee446f87..387a31229a2f16 100644
--- a/Lib/test/test_audit.py
+++ b/Lib/test/test_audit.py
@@ -51,6 +51,11 @@ def test_block_add_hook(self):
     def test_block_add_hook_baseexception(self):
         self.do_test("test_block_add_hook_baseexception")
 
+    def test_marshal(self):
+        support.import_module("marshal")
+
+        self.do_test("test_marshal")
+
     def test_pickle(self):
         support.import_module("pickle")
 
diff --git a/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
new file mode 100644
index 00000000000000..88b70c7cea2610
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
@@ -0,0 +1,5 @@
+Add auditing events to the :mod:`marshal` module, and stop raising
+``code.__init__`` events for every unmarshalled code object. Directly
+instantiated code objects will continue to raise an event, and audit event
+handlers should inspect or collect the raw marshal data. This reduces a
+significant performance overhead when loading from ``.pyc`` files.
diff --git a/Python/marshal.c b/Python/marshal.c
index c4538bd373a82e..baafa3ecfbf1de 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -596,14 +596,18 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
 {
     char buf[BUFSIZ];
     WFILE wf;
+    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
+        return; /* caller must check PyErr_Occurred() */
+    }
     memset(&wf, 0, sizeof(wf));
     wf.fp = fp;
     wf.ptr = wf.buf = buf;
     wf.end = wf.ptr + sizeof(buf);
     wf.error = WFERR_OK;
     wf.version = version;
-    if (w_init_refs(&wf, version))
-        return; /* caller mush check PyErr_Occurred() */
+    if (w_init_refs(&wf, version)) {
+        return; /* caller must check PyErr_Occurred() */
+    }
     w_object(x, &wf);
     w_clear_refs(&wf);
     w_flush(&wf);
@@ -1371,12 +1375,6 @@ r_object(RFILE *p)
             if (lnotab == NULL)
                 goto code_error;
 
-            if (PySys_Audit("code.__new__", "OOOiiiiii",
-                            code, filename, name, argcount, posonlyargcount,
-                            kwonlyargcount, nlocals, stacksize, flags) < 0) {
-                goto code_error;
-            }
-
             v = (PyObject *) PyCode_NewWithPosOnlyArgs(
                             argcount, posonlyargcount, kwonlyargcount,
                             nlocals, stacksize, flags,
@@ -1435,6 +1433,15 @@ read_object(RFILE *p)
         fprintf(stderr, "XXX readobject called with exception set\n");
         return NULL;
     }
+    if (p->ptr && p->end) {
+        if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
+            return NULL;
+        }
+    } else if (p->fp || p->readable) {
+        if (PySys_Audit("marshal.load", NULL) < 0) {
+            return NULL;
+        }
+    }
     v = r_object(p);
     if (v == NULL && !PyErr_Occurred())
         PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
@@ -1531,7 +1538,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
     rf.refs = PyList_New(0);
     if (rf.refs == NULL)
         return NULL;
-    result = r_object(&rf);
+    result = read_object(&rf);
     Py_DECREF(rf.refs);
     if (rf.buf != NULL)
         PyMem_FREE(rf.buf);
@@ -1552,7 +1559,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
     rf.refs = PyList_New(0);
     if (rf.refs == NULL)
         return NULL;
-    result = r_object(&rf);
+    result = read_object(&rf);
     Py_DECREF(rf.refs);
     if (rf.buf != NULL)
         PyMem_FREE(rf.buf);
@@ -1564,6 +1571,9 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
 {
     WFILE wf;
 
+    if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
+        return NULL;
+    }
     memset(&wf, 0, sizeof(wf));
     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
     if (wf.str == NULL)



More information about the Python-checkins mailing list