GH-89987: Shrink the BINARY_SUBSCR caches (GH-103022)

This commit is contained in:
Brandt Bucher 2023-03-29 15:53:30 -07:00 committed by GitHub
parent e647dbaded
commit 121057aa36
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 272 additions and 250 deletions

View file

@ -234,7 +234,18 @@ struct _typeobject {
* It should should be treated as an opaque blob
* by code other than the specializer and interpreter. */
struct _specialization_cache {
// In order to avoid bloating the bytecode with lots of inline caches, the
// members of this structure have a somewhat unique contract. They are set
// by the specialization machinery, and are invalidated by PyType_Modified.
// The rules for using them are as follows:
// - If getitem is non-NULL, then it is the same Python function that
// PyType_Lookup(cls, "__getitem__") would return.
// - If getitem is NULL, then getitem_version is meaningless.
// - If getitem->func_version == getitem_version, then getitem can be called
// with two positional arguments and no keyword arguments, and has neither
// *args nor **kwargs (as required by BINARY_SUBSCR_GETITEM):
PyObject *getitem;
uint32_t getitem_version;
};
/* The *real* layout of a type object when allocated on the heap */

View file

@ -47,8 +47,6 @@ typedef struct {
typedef struct {
uint16_t counter;
uint16_t type_version[2];
uint16_t func_version;
} _PyBinarySubscrCache;
#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)

View file

@ -41,7 +41,7 @@ static const uint32_t _PyOpcode_Jump[9] = {
};
const uint8_t _PyOpcode_Caches[256] = {
[BINARY_SUBSCR] = 4,
[BINARY_SUBSCR] = 1,
[STORE_SUBSCR] = 1,
[UNPACK_SEQUENCE] = 1,
[FOR_ITER] = 1,

View file

@ -435,7 +435,9 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.12a6 3519 (Modify SEND instruction)
# Python 3.12a6 3520 (Remove PREP_RERAISE_STAR, add CALL_INTRINSIC_2)
# Python 3.12a7 3521 (Shrink the LOAD_GLOBAL caches)
# Python 3.12a7 3522 (Removed JUMP_IF_FALSE_OR_POP/JUMP_IF_TRUE_OR_POP)
# Python 3.12a7 3523 (Convert COMPARE_AND_BRANCH back to COMPARE_OP)
# Python 3.12a7 3524 (Shrink the BINARY_SUBSCR caches)
# Python 3.13 will start with 3550
@ -452,7 +454,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.
MAGIC_NUMBER = (3523).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3524).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

View file

@ -392,8 +392,6 @@ def pseudo_op(name, op, real_ops):
},
"BINARY_SUBSCR": {
"counter": 1,
"type_version": 2,
"func_version": 1,
},
"FOR_ITER": {
"counter": 1,

View file

@ -1108,7 +1108,7 @@ def test_binary_specialize(self):
1 2 LOAD_NAME 0 (a)
4 LOAD_CONST 0 (0)
6 %s
16 RETURN_VALUE
10 RETURN_VALUE
"""
co_list = compile('a[0]', "<list>", "eval")
self.code_quicken(lambda: exec(co_list, {}, {'a': [0]}))

View file

@ -1556,7 +1556,7 @@ def delx(self): del self.__x
'10P' # PySequenceMethods
'2P' # PyBufferProcs
'6P'
'1P' # Specializer cache
'1PI' # Specializer cache
)
class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries

View file

@ -0,0 +1,2 @@
Reduce the number of inline :opcode:`CACHE` entries for
:opcode:`BINARY_SUBSCR`.

View file

@ -510,6 +510,11 @@ PyType_Modified(PyTypeObject *type)
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache):
((PyHeapTypeObject *)type)->_spec_cache.getitem = NULL;
}
}
static void
@ -563,6 +568,11 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
clear:
type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG;
type->tp_version_tag = 0; /* 0 is not a valid version tag */
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
// This field *must* be invalidated if the type is modified (see the
// comment on struct _specialization_cache):
((PyHeapTypeObject *)type)->_spec_cache.getitem = NULL;
}
}
static int

View file

@ -1,39 +1,38 @@
// Auto-generated by Programs/freeze_test_frozenmain.py
unsigned char M_test_frozenmain[] = {
227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,
0,0,0,0,0,243,182,0,0,0,151,0,100,0,100,1,
0,0,0,0,0,243,170,0,0,0,151,0,100,0,100,1,
108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2,
100,2,171,1,0,0,0,0,0,0,0,0,1,0,2,0,
101,2,100,3,101,0,106,6,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,171,2,0,0,0,0,
0,0,0,0,1,0,2,0,101,1,106,8,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,171,0,
0,0,0,0,0,0,0,0,100,4,25,0,0,0,0,0,
0,0,0,0,90,5,100,5,68,0,93,23,0,0,90,6,
2,0,101,2,100,6,101,6,155,0,100,7,101,5,101,6,
25,0,0,0,0,0,0,0,0,0,155,0,157,4,171,1,
0,0,0,0,0,0,0,0,1,0,140,25,4,0,121,1,
41,8,233,0,0,0,0,78,122,18,70,114,111,122,101,110,
32,72,101,108,108,111,32,87,111,114,108,100,122,8,115,121,
115,46,97,114,103,118,218,6,99,111,110,102,105,103,41,5,
218,12,112,114,111,103,114,97,109,95,110,97,109,101,218,10,
101,120,101,99,117,116,97,98,108,101,218,15,117,115,101,95,
101,110,118,105,114,111,110,109,101,110,116,218,17,99,111,110,
102,105,103,117,114,101,95,99,95,115,116,100,105,111,218,14,
98,117,102,102,101,114,101,100,95,115,116,100,105,111,122,7,
99,111,110,102,105,103,32,122,2,58,32,41,7,218,3,115,
121,115,218,17,95,116,101,115,116,105,110,116,101,114,110,97,
108,99,97,112,105,218,5,112,114,105,110,116,218,4,97,114,
103,118,218,11,103,101,116,95,99,111,110,102,105,103,115,114,
3,0,0,0,218,3,107,101,121,169,0,243,0,0,0,0,
250,18,116,101,115,116,95,102,114,111,122,101,110,109,97,105,
110,46,112,121,250,8,60,109,111,100,117,108,101,62,114,18,
0,0,0,1,0,0,0,115,100,0,0,0,240,3,1,1,
1,243,8,0,1,11,219,0,24,225,0,5,208,6,26,213,
0,27,217,0,5,128,106,144,35,151,40,145,40,213,0,27,
216,9,38,208,9,26,215,9,38,209,9,38,212,9,40,168,
24,212,9,50,128,6,240,2,6,12,2,242,0,7,1,42,
128,67,241,14,0,5,10,208,10,40,144,67,209,10,40,152,
54,160,35,156,59,209,10,40,214,4,41,241,15,7,1,42,
114,16,0,0,0,
0,0,0,0,0,0,0,0,100,4,25,0,0,0,90,5,
100,5,68,0,93,20,0,0,90,6,2,0,101,2,100,6,
101,6,155,0,100,7,101,5,101,6,25,0,0,0,155,0,
157,4,171,1,0,0,0,0,0,0,0,0,1,0,140,22,
4,0,121,1,41,8,233,0,0,0,0,78,122,18,70,114,
111,122,101,110,32,72,101,108,108,111,32,87,111,114,108,100,
122,8,115,121,115,46,97,114,103,118,218,6,99,111,110,102,
105,103,41,5,218,12,112,114,111,103,114,97,109,95,110,97,
109,101,218,10,101,120,101,99,117,116,97,98,108,101,218,15,
117,115,101,95,101,110,118,105,114,111,110,109,101,110,116,218,
17,99,111,110,102,105,103,117,114,101,95,99,95,115,116,100,
105,111,218,14,98,117,102,102,101,114,101,100,95,115,116,100,
105,111,122,7,99,111,110,102,105,103,32,122,2,58,32,41,
7,218,3,115,121,115,218,17,95,116,101,115,116,105,110,116,
101,114,110,97,108,99,97,112,105,218,5,112,114,105,110,116,
218,4,97,114,103,118,218,11,103,101,116,95,99,111,110,102,
105,103,115,114,3,0,0,0,218,3,107,101,121,169,0,243,
0,0,0,0,250,18,116,101,115,116,95,102,114,111,122,101,
110,109,97,105,110,46,112,121,250,8,60,109,111,100,117,108,
101,62,114,18,0,0,0,1,0,0,0,115,100,0,0,0,
240,3,1,1,1,243,8,0,1,11,219,0,24,225,0,5,
208,6,26,213,0,27,217,0,5,128,106,144,35,151,40,145,
40,213,0,27,216,9,38,208,9,26,215,9,38,209,9,38,
212,9,40,168,24,209,9,50,128,6,240,2,6,12,2,242,
0,7,1,42,128,67,241,14,0,5,10,208,10,40,144,67,
209,10,40,152,54,160,35,153,59,209,10,40,214,4,41,241,
15,7,1,42,114,16,0,0,0,
};

View file

@ -292,7 +292,7 @@ dummy_func(
BINARY_SUBSCR_TUPLE_INT,
};
inst(BINARY_SUBSCR, (unused/4, container, sub -- res)) {
inst(BINARY_SUBSCR, (unused/1, container, sub -- res)) {
#if ENABLE_SPECIALIZATION
_PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
@ -339,7 +339,7 @@ dummy_func(
ERROR_IF(err, error);
}
inst(BINARY_SUBSCR_LIST_INT, (unused/4, list, sub -- res)) {
inst(BINARY_SUBSCR_LIST_INT, (unused/1, list, sub -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR);
@ -356,7 +356,7 @@ dummy_func(
Py_DECREF(list);
}
inst(BINARY_SUBSCR_TUPLE_INT, (unused/4, tuple, sub -- res)) {
inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, tuple, sub -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR);
@ -373,7 +373,7 @@ dummy_func(
Py_DECREF(tuple);
}
inst(BINARY_SUBSCR_DICT, (unused/4, dict, sub -- res)) {
inst(BINARY_SUBSCR_DICT, (unused/1, dict, sub -- res)) {
assert(cframe.use_tracing == 0);
DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR);
STAT_INC(BINARY_SUBSCR, hit);
@ -389,14 +389,16 @@ dummy_func(
DECREF_INPUTS();
}
inst(BINARY_SUBSCR_GETITEM, (unused/1, type_version/2, func_version/1, container, sub -- unused)) {
inst(BINARY_SUBSCR_GETITEM, (unused/1, container, sub -- unused)) {
PyTypeObject *tp = Py_TYPE(container);
DEOPT_IF(tp->tp_version_tag != type_version, BINARY_SUBSCR);
assert(tp->tp_flags & Py_TPFLAGS_HEAPTYPE);
PyObject *cached = ((PyHeapTypeObject *)tp)->_spec_cache.getitem;
DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR);
PyHeapTypeObject *ht = (PyHeapTypeObject *)tp;
PyObject *cached = ht->_spec_cache.getitem;
DEOPT_IF(cached == NULL, BINARY_SUBSCR);
assert(PyFunction_Check(cached));
PyFunctionObject *getitem = (PyFunctionObject *)cached;
DEOPT_IF(getitem->func_version != func_version, BINARY_SUBSCR);
uint32_t cached_version = ht->_spec_cache.getitem_version;
DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR);
PyCodeObject *code = (PyCodeObject *)getitem->func_code;
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR);

File diff suppressed because it is too large Load diff

View file

@ -731,13 +731,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR] = { true, INSTR_FMT_IXC000 },
[BINARY_SUBSCR] = { true, INSTR_FMT_IXC },
[BINARY_SLICE] = { true, INSTR_FMT_IX },
[STORE_SLICE] = { true, INSTR_FMT_IX },
[BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC000 },
[BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC000 },
[BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC000 },
[BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC000 },
[BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC },
[BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC },
[LIST_APPEND] = { true, INSTR_FMT_IB },
[SET_ADD] = { true, INSTR_FMT_IB },
[STORE_SUBSCR] = { true, INSTR_FMT_IXC },

View file

@ -1330,16 +1330,16 @@ _Py_Specialize_BinarySubscr(
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
goto fail;
}
assert(cls->tp_version_tag != 0);
write_u32(cache->type_version, cls->tp_version_tag);
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0 || version != (uint16_t)version) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, version == 0 ?
SPEC_FAIL_OUT_OF_VERSIONS : SPEC_FAIL_OUT_OF_RANGE);
uint32_t version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0) {
SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS);
goto fail;
}
cache->func_version = version;
((PyHeapTypeObject *)container_type)->_spec_cache.getitem = descriptor;
PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type;
// This pointer is invalidated by PyType_Modified (see the comment on
// struct _specialization_cache):
ht->_spec_cache.getitem = descriptor;
ht->_spec_cache.getitem_version = version;
instr->op.code = BINARY_SUBSCR_GETITEM;
goto success;
}