gh-78214: marshal: Stabilize FLAG_REF usage (GH-8226)

Use FLAG_REF always for interned strings.

Refcounts of interned string is very unstable.
When compiling same source, refcounts of interned string in the output may be 1 or >1.
It makes FLAG_REF usage unstable.

To help reproducible build, use FLAG_REF for interned string even if refcnt(obj)==1.
This commit is contained in:
Inada Naoki 2022-05-04 10:01:15 +09:00 committed by GitHub
parent dfb1b9da8a
commit 6dcfd6c5e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 10 deletions

View file

@ -0,0 +1,2 @@
``marshal.dumps()`` uses ``FLAG_REF`` for all interned strings. This makes
output more deterministic and helps reproducible build.

View file

@ -15,19 +15,19 @@ unsigned char M_test_frozenmain[] = {
0,0,1,0,140,26,100,1,83,0,41,8,233,0,0,0,
0,78,122,18,70,114,111,122,101,110,32,72,101,108,108,111,
32,87,111,114,108,100,122,8,115,121,115,46,97,114,103,118,
218,6,99,111,110,102,105,103,41,5,90,12,112,114,111,103,
218,6,99,111,110,102,105,103,41,5,218,12,112,114,111,103,
114,97,109,95,110,97,109,101,218,10,101,120,101,99,117,116,
97,98,108,101,90,15,117,115,101,95,101,110,118,105,114,111,
110,109,101,110,116,90,17,99,111,110,102,105,103,117,114,101,
95,99,95,115,116,100,105,111,90,14,98,117,102,102,101,114,
97,98,108,101,218,15,117,115,101,95,101,110,118,105,114,111,
110,109,101,110,116,218,17,99,111,110,102,105,103,117,114,101,
95,99,95,115,116,100,105,111,218,14,98,117,102,102,101,114,
101,100,95,115,116,100,105,111,122,7,99,111,110,102,105,103,
32,122,2,58,32,41,7,218,3,115,121,115,90,17,95,116,
32,122,2,58,32,41,7,218,3,115,121,115,218,17,95,116,
101,115,116,105,110,116,101,114,110,97,108,99,97,112,105,218,
5,112,114,105,110,116,218,4,97,114,103,118,90,11,103,101,
5,112,114,105,110,116,218,4,97,114,103,118,218,11,103,101,
116,95,99,111,110,102,105,103,115,114,2,0,0,0,218,3,
107,101,121,169,0,243,0,0,0,0,250,18,116,101,115,116,
95,102,114,111,122,101,110,109,97,105,110,46,112,121,250,8,
60,109,111,100,117,108,101,62,114,11,0,0,0,1,0,0,
60,109,111,100,117,108,101,62,114,17,0,0,0,1,0,0,
0,115,152,0,0,0,248,240,6,0,1,11,128,10,128,10,
128,10,216,0,24,208,0,24,208,0,24,208,0,24,224,0,
5,128,5,208,6,26,209,0,27,212,0,27,208,0,27,216,
@ -37,6 +37,6 @@ unsigned char M_test_frozenmain[] = {
7,1,42,240,0,7,1,42,128,67,240,14,0,5,10,128,
69,208,10,40,144,67,208,10,40,208,10,40,152,54,160,35,
156,59,208,10,40,208,10,40,209,4,41,212,4,41,208,4,
41,208,4,41,240,15,7,1,42,240,0,7,1,42,114,9,
41,208,4,41,240,15,7,1,42,240,0,7,1,42,114,15,
0,0,0,
};

View file

@ -298,9 +298,14 @@ w_ref(PyObject *v, char *flag, WFILE *p)
if (p->version < 3 || p->hashtable == NULL)
return 0; /* not writing object references */
/* if it has only one reference, it definitely isn't shared */
if (Py_REFCNT(v) == 1)
/* If it has only one reference, it definitely isn't shared.
* But we use TYPE_REF always for interned string, to PYC file stable
* as possible.
*/
if (Py_REFCNT(v) == 1 &&
!(PyUnicode_CheckExact(v) && PyUnicode_CHECK_INTERNED(v))) {
return 0;
}
entry = _Py_hashtable_get_entry(p->hashtable, v);
if (entry != NULL) {