[Python-checkins] gh-78214: marshal: Stabilize FLAG_REF usage (GH-8226)

methane webhook-mailer at python.org
Tue May 3 21:01:21 EDT 2022


https://github.com/python/cpython/commit/6dcfd6c5e3cb46543e82dc3f7234546adf4bb04a
commit: 6dcfd6c5e3cb46543e82dc3f7234546adf4bb04a
branch: main
author: Inada Naoki <songofacandy at gmail.com>
committer: methane <songofacandy at gmail.com>
date: 2022-05-04T10:01:15+09:00
summary:

gh-78214: marshal: Stabilize FLAG_REF usage (GH-8226)

Use FLAG_REF always for interned strings.

Refcounts of interned string is very unstable.
When compiling same source, refcounts of interned string in the output may be 1 or >1.
It makes FLAG_REF usage unstable.

To help reproducible build, use FLAG_REF for interned string even if refcnt(obj)==1.

files:
A Misc/NEWS.d/next/Core and Builtins/2018-07-14-16-58-00.bpo-34093.WaVD-f.rst
M Programs/test_frozenmain.h
M Python/marshal.c

diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-07-14-16-58-00.bpo-34093.WaVD-f.rst b/Misc/NEWS.d/next/Core and Builtins/2018-07-14-16-58-00.bpo-34093.WaVD-f.rst
new file mode 100644
index 0000000000000..383daaccded6c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-07-14-16-58-00.bpo-34093.WaVD-f.rst	
@@ -0,0 +1,2 @@
+``marshal.dumps()`` uses ``FLAG_REF`` for all interned strings.  This makes
+output more deterministic and helps reproducible build.
diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h
index 3034927d7a12b..1c279134e94dc 100644
--- a/Programs/test_frozenmain.h
+++ b/Programs/test_frozenmain.h
@@ -15,19 +15,19 @@ unsigned char M_test_frozenmain[] = {
     0,0,1,0,140,26,100,1,83,0,41,8,233,0,0,0,
     0,78,122,18,70,114,111,122,101,110,32,72,101,108,108,111,
     32,87,111,114,108,100,122,8,115,121,115,46,97,114,103,118,
-    218,6,99,111,110,102,105,103,41,5,90,12,112,114,111,103,
+    218,6,99,111,110,102,105,103,41,5,218,12,112,114,111,103,
     114,97,109,95,110,97,109,101,218,10,101,120,101,99,117,116,
-    97,98,108,101,90,15,117,115,101,95,101,110,118,105,114,111,
-    110,109,101,110,116,90,17,99,111,110,102,105,103,117,114,101,
-    95,99,95,115,116,100,105,111,90,14,98,117,102,102,101,114,
+    97,98,108,101,218,15,117,115,101,95,101,110,118,105,114,111,
+    110,109,101,110,116,218,17,99,111,110,102,105,103,117,114,101,
+    95,99,95,115,116,100,105,111,218,14,98,117,102,102,101,114,
     101,100,95,115,116,100,105,111,122,7,99,111,110,102,105,103,
-    32,122,2,58,32,41,7,218,3,115,121,115,90,17,95,116,
+    32,122,2,58,32,41,7,218,3,115,121,115,218,17,95,116,
     101,115,116,105,110,116,101,114,110,97,108,99,97,112,105,218,
-    5,112,114,105,110,116,218,4,97,114,103,118,90,11,103,101,
+    5,112,114,105,110,116,218,4,97,114,103,118,218,11,103,101,
     116,95,99,111,110,102,105,103,115,114,2,0,0,0,218,3,
     107,101,121,169,0,243,0,0,0,0,250,18,116,101,115,116,
     95,102,114,111,122,101,110,109,97,105,110,46,112,121,250,8,
-    60,109,111,100,117,108,101,62,114,11,0,0,0,1,0,0,
+    60,109,111,100,117,108,101,62,114,17,0,0,0,1,0,0,
     0,115,152,0,0,0,248,240,6,0,1,11,128,10,128,10,
     128,10,216,0,24,208,0,24,208,0,24,208,0,24,224,0,
     5,128,5,208,6,26,209,0,27,212,0,27,208,0,27,216,
@@ -37,6 +37,6 @@ unsigned char M_test_frozenmain[] = {
     7,1,42,240,0,7,1,42,128,67,240,14,0,5,10,128,
     69,208,10,40,144,67,208,10,40,208,10,40,152,54,160,35,
     156,59,208,10,40,208,10,40,209,4,41,212,4,41,208,4,
-    41,208,4,41,240,15,7,1,42,240,0,7,1,42,114,9,
+    41,208,4,41,240,15,7,1,42,240,0,7,1,42,114,15,
     0,0,0,
 };
diff --git a/Python/marshal.c b/Python/marshal.c
index bbe67e3379fd9..90a4405091800 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -298,9 +298,14 @@ w_ref(PyObject *v, char *flag, WFILE *p)
     if (p->version < 3 || p->hashtable == NULL)
         return 0; /* not writing object references */
 
-    /* if it has only one reference, it definitely isn't shared */
-    if (Py_REFCNT(v) == 1)
+    /* If it has only one reference, it definitely isn't shared.
+     * But we use TYPE_REF always for interned string, to PYC file stable
+     * as possible.
+     */
+    if (Py_REFCNT(v) == 1 &&
+            !(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
         return 0;
+    }
 
     entry = _Py_hashtable_get_entry(p->hashtable, v);
     if (entry != NULL) {



More information about the Python-checkins mailing list