bpo-45107: Specialize LOAD_METHOD for instances with dict. (GH-31531)

This commit is contained in:
Mark Shannon 2022-02-24 19:34:57 +00:00 committed by GitHub
parent 4dc746310b
commit 2a6ece572c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 143 additions and 66 deletions

View file

@ -227,6 +227,8 @@ static inline PyObject **_PyObject_ManagedDictPointer(PyObject *obj)
return ((PyObject **)obj)-3;
}
#define MANAGED_DICT_OFFSET (((int)sizeof(PyObject *))*-3)
extern PyObject ** _PyObject_DictPointer(PyObject *);
extern int _PyObject_VisitInstanceAttributes(PyObject *self, visitproc visit, void *arg);
extern void _PyObject_ClearInstanceAttributes(PyObject *self);

71
Include/opcode.h generated
View file

@ -147,41 +147,42 @@ extern "C" {
#define LOAD_GLOBAL_MODULE 45
#define LOAD_GLOBAL_BUILTIN 46
#define LOAD_METHOD_ADAPTIVE 47
#define LOAD_METHOD_CACHED 48
#define LOAD_METHOD_CLASS 55
#define LOAD_METHOD_MODULE 56
#define LOAD_METHOD_NO_DICT 57
#define PRECALL_ADAPTIVE 58
#define PRECALL_BUILTIN_CLASS 59
#define PRECALL_NO_KW_BUILTIN_O 62
#define PRECALL_NO_KW_BUILTIN_FAST 63
#define PRECALL_BUILTIN_FAST_WITH_KEYWORDS 64
#define PRECALL_NO_KW_LEN 65
#define PRECALL_NO_KW_ISINSTANCE 66
#define PRECALL_NO_KW_LIST_APPEND 67
#define PRECALL_NO_KW_METHOD_DESCRIPTOR_O 72
#define PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 76
#define PRECALL_NO_KW_STR_1 77
#define PRECALL_NO_KW_TUPLE_1 78
#define PRECALL_NO_KW_TYPE_1 79
#define PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST 80
#define PRECALL_BOUND_METHOD 81
#define PRECALL_PYFUNC 131
#define RESUME_QUICK 140
#define STORE_ATTR_ADAPTIVE 141
#define STORE_ATTR_INSTANCE_VALUE 143
#define STORE_ATTR_SLOT 150
#define STORE_ATTR_WITH_HINT 153
#define UNPACK_SEQUENCE_ADAPTIVE 154
#define UNPACK_SEQUENCE_LIST 158
#define UNPACK_SEQUENCE_TUPLE 159
#define UNPACK_SEQUENCE_TWO_TUPLE 161
#define LOAD_FAST__LOAD_FAST 167
#define STORE_FAST__LOAD_FAST 168
#define LOAD_FAST__LOAD_CONST 169
#define LOAD_CONST__LOAD_FAST 170
#define STORE_FAST__STORE_FAST 173
#define LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE 174
#define LOAD_METHOD_CLASS 48
#define LOAD_METHOD_MODULE 55
#define LOAD_METHOD_NO_DICT 56
#define LOAD_METHOD_WITH_DICT 57
#define LOAD_METHOD_WITH_VALUES 58
#define PRECALL_ADAPTIVE 59
#define PRECALL_BUILTIN_CLASS 62
#define PRECALL_NO_KW_BUILTIN_O 63
#define PRECALL_NO_KW_BUILTIN_FAST 64
#define PRECALL_BUILTIN_FAST_WITH_KEYWORDS 65
#define PRECALL_NO_KW_LEN 66
#define PRECALL_NO_KW_ISINSTANCE 67
#define PRECALL_NO_KW_LIST_APPEND 72
#define PRECALL_NO_KW_METHOD_DESCRIPTOR_O 76
#define PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 77
#define PRECALL_NO_KW_STR_1 78
#define PRECALL_NO_KW_TUPLE_1 79
#define PRECALL_NO_KW_TYPE_1 80
#define PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST 81
#define PRECALL_BOUND_METHOD 131
#define PRECALL_PYFUNC 140
#define RESUME_QUICK 141
#define STORE_ATTR_ADAPTIVE 143
#define STORE_ATTR_INSTANCE_VALUE 150
#define STORE_ATTR_SLOT 153
#define STORE_ATTR_WITH_HINT 154
#define UNPACK_SEQUENCE_ADAPTIVE 158
#define UNPACK_SEQUENCE_LIST 159
#define UNPACK_SEQUENCE_TUPLE 161
#define UNPACK_SEQUENCE_TWO_TUPLE 167
#define LOAD_FAST__LOAD_FAST 168
#define STORE_FAST__LOAD_FAST 169
#define LOAD_FAST__LOAD_CONST 170
#define LOAD_CONST__LOAD_FAST 173
#define STORE_FAST__STORE_FAST 174
#define LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE 175
#define DO_TRACING 255
#ifdef NEED_OPCODE_JUMP_TABLES
static uint32_t _PyOpcode_RelativeJump[8] = {

View file

@ -260,10 +260,11 @@ def jabs_op(name, op):
"LOAD_GLOBAL_MODULE",
"LOAD_GLOBAL_BUILTIN",
"LOAD_METHOD_ADAPTIVE",
"LOAD_METHOD_CACHED",
"LOAD_METHOD_CLASS",
"LOAD_METHOD_MODULE",
"LOAD_METHOD_NO_DICT",
"LOAD_METHOD_WITH_DICT",
"LOAD_METHOD_WITH_VALUES",
"PRECALL_ADAPTIVE",
"PRECALL_BUILTIN_CLASS",
"PRECALL_NO_KW_BUILTIN_O",

View file

@ -0,0 +1 @@
Specialize ``LOAD_METHOD`` for instances with a dict.

View file

@ -4424,7 +4424,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
}
}
TARGET(LOAD_METHOD_CACHED) {
TARGET(LOAD_METHOD_WITH_VALUES) {
/* LOAD_METHOD, with cached method object */
assert(cframe.use_tracing == 0);
PyObject *self = TOP();
@ -4432,7 +4432,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
SpecializedCacheEntry *caches = GET_CACHE();
_PyAttrCache *cache1 = &caches[-1].attr;
_PyObjectCache *cache2 = &caches[-2].obj;
assert(cache1->tp_version != 0);
DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD);
assert(self_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT);
PyDictObject *dict = *(PyDictObject**)_PyObject_ManagedDictPointer(self);
@ -4448,6 +4448,38 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
NOTRACE_DISPATCH();
}
TARGET(LOAD_METHOD_WITH_DICT) {
/* LOAD_METHOD, with a dict
Can be either a managed dict, or a tp_dictoffset offset.*/
assert(cframe.use_tracing == 0);
PyObject *self = TOP();
PyTypeObject *self_cls = Py_TYPE(self);
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
_PyAttrCache *cache1 = &caches[-1].attr;
_PyObjectCache *cache2 = &caches[-2].obj;
DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD);
/* Treat index as a signed 16 bit value */
int dictoffset = *(int16_t *)&cache0->index;
PyDictObject **dictptr = (PyDictObject**)(((char *)self)+dictoffset);
assert(
dictoffset == MANAGED_DICT_OFFSET ||
(dictoffset == self_cls->tp_dictoffset && dictoffset > 0)
);
PyDictObject *dict = *dictptr;
DEOPT_IF(dict == NULL, LOAD_METHOD);
DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version, LOAD_METHOD);
STAT_INC(LOAD_METHOD, hit);
PyObject *res = cache2->obj;
assert(res != NULL);
assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR));
Py_INCREF(res);
SET_TOP(res);
PUSH(self);
NOTRACE_DISPATCH();
}
TARGET(LOAD_METHOD_NO_DICT) {
assert(cframe.use_tracing == 0);
PyObject *self = TOP();

View file

@ -47,40 +47,40 @@ static void *opcode_targets[256] = {
&&TARGET_LOAD_GLOBAL_MODULE,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_METHOD_ADAPTIVE,
&&TARGET_LOAD_METHOD_CACHED,
&&TARGET_LOAD_METHOD_CLASS,
&&TARGET_WITH_EXCEPT_START,
&&TARGET_GET_AITER,
&&TARGET_GET_ANEXT,
&&TARGET_BEFORE_ASYNC_WITH,
&&TARGET_BEFORE_WITH,
&&TARGET_END_ASYNC_FOR,
&&TARGET_LOAD_METHOD_CLASS,
&&TARGET_LOAD_METHOD_MODULE,
&&TARGET_LOAD_METHOD_NO_DICT,
&&TARGET_LOAD_METHOD_WITH_DICT,
&&TARGET_LOAD_METHOD_WITH_VALUES,
&&TARGET_PRECALL_ADAPTIVE,
&&TARGET_PRECALL_BUILTIN_CLASS,
&&TARGET_STORE_SUBSCR,
&&TARGET_DELETE_SUBSCR,
&&TARGET_PRECALL_BUILTIN_CLASS,
&&TARGET_PRECALL_NO_KW_BUILTIN_O,
&&TARGET_PRECALL_NO_KW_BUILTIN_FAST,
&&TARGET_PRECALL_BUILTIN_FAST_WITH_KEYWORDS,
&&TARGET_PRECALL_NO_KW_LEN,
&&TARGET_PRECALL_NO_KW_ISINSTANCE,
&&TARGET_PRECALL_NO_KW_LIST_APPEND,
&&TARGET_GET_ITER,
&&TARGET_GET_YIELD_FROM_ITER,
&&TARGET_PRINT_EXPR,
&&TARGET_LOAD_BUILD_CLASS,
&&TARGET_PRECALL_NO_KW_METHOD_DESCRIPTOR_O,
&&TARGET_PRECALL_NO_KW_LIST_APPEND,
&&TARGET_GET_AWAITABLE,
&&TARGET_LOAD_ASSERTION_ERROR,
&&TARGET_RETURN_GENERATOR,
&&TARGET_PRECALL_NO_KW_METHOD_DESCRIPTOR_O,
&&TARGET_PRECALL_NO_KW_METHOD_DESCRIPTOR_NOARGS,
&&TARGET_PRECALL_NO_KW_STR_1,
&&TARGET_PRECALL_NO_KW_TUPLE_1,
&&TARGET_PRECALL_NO_KW_TYPE_1,
&&TARGET_PRECALL_NO_KW_METHOD_DESCRIPTOR_FAST,
&&TARGET_PRECALL_BOUND_METHOD,
&&TARGET_LIST_TO_TUPLE,
&&TARGET_RETURN_VALUE,
&&TARGET_IMPORT_STAR,
@ -130,7 +130,7 @@ static void *opcode_targets[256] = {
&&TARGET_POP_JUMP_IF_NOT_NONE,
&&TARGET_POP_JUMP_IF_NONE,
&&TARGET_RAISE_VARARGS,
&&TARGET_PRECALL_PYFUNC,
&&TARGET_PRECALL_BOUND_METHOD,
&&TARGET_MAKE_FUNCTION,
&&TARGET_BUILD_SLICE,
&&TARGET_JUMP_NO_INTERRUPT,
@ -139,39 +139,40 @@ static void *opcode_targets[256] = {
&&TARGET_LOAD_DEREF,
&&TARGET_STORE_DEREF,
&&TARGET_DELETE_DEREF,
&&TARGET_PRECALL_PYFUNC,
&&TARGET_RESUME_QUICK,
&&TARGET_STORE_ATTR_ADAPTIVE,
&&TARGET_CALL_FUNCTION_EX,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_STORE_ATTR_ADAPTIVE,
&&TARGET_EXTENDED_ARG,
&&TARGET_LIST_APPEND,
&&TARGET_SET_ADD,
&&TARGET_MAP_ADD,
&&TARGET_LOAD_CLASSDEREF,
&&TARGET_COPY_FREE_VARS,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_RESUME,
&&TARGET_MATCH_CLASS,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_UNPACK_SEQUENCE_ADAPTIVE,
&&TARGET_FORMAT_VALUE,
&&TARGET_BUILD_CONST_KEY_MAP,
&&TARGET_BUILD_STRING,
&&TARGET_UNPACK_SEQUENCE_ADAPTIVE,
&&TARGET_UNPACK_SEQUENCE_LIST,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_LOAD_METHOD,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&TARGET_UNPACK_SEQUENCE_TUPLE,
&&TARGET_LIST_EXTEND,
&&TARGET_SET_UPDATE,
&&TARGET_DICT_MERGE,
&&TARGET_DICT_UPDATE,
&&TARGET_PRECALL,
&&TARGET_UNPACK_SEQUENCE_TWO_TUPLE,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_STORE_FAST__LOAD_FAST,
&&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_CALL,
&&TARGET_KW_NAMES,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_STORE_FAST__STORE_FAST,
&&TARGET_LOAD_FAST__LOAD_ATTR_INSTANCE_VALUE,
&&_unknown_opcode,
@ -253,6 +254,5 @@ static void *opcode_targets[256] = {
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_DO_TRACING
};

View file

@ -1062,6 +1062,13 @@ specialize_class_load_method(PyObject *owner, _Py_CODEUNIT *instr, PyObject *nam
}
}
typedef enum {
MANAGED_VALUES = 1,
MANAGED_DICT = 2,
OFFSET_DICT = 3,
NO_DICT = 4
} ObjectDictKind;
// Please collect stats carefully before and after modifying. A subtle change
// can cause a significant drop in cache hits. A possible test is
// python.exe -m test_typing test_re test_dis test_zlib.
@ -1071,8 +1078,8 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
_PyAdaptiveEntry *cache0 = &cache->adaptive;
_PyAttrCache *cache1 = &cache[-1].attr;
_PyObjectCache *cache2 = &cache[-2].obj;
PyTypeObject *owner_cls = Py_TYPE(owner);
if (PyModule_CheckExact(owner)) {
int err = specialize_module_load_attr(owner, instr, name, cache0,
LOAD_METHOD, LOAD_METHOD_MODULE);
@ -1102,13 +1109,39 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
SPECIALIZATION_FAIL(LOAD_METHOD, load_method_fail_kind(kind));
goto fail;
}
ObjectDictKind dictkind;
PyDictKeysObject *keys;
if (owner_cls->tp_flags & Py_TPFLAGS_MANAGED_DICT) {
PyObject **owner_dictptr = _PyObject_ManagedDictPointer(owner);
if (*owner_dictptr) {
SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_LOAD_METHOD_HAS_MANAGED_DICT);
PyObject *dict = *_PyObject_ManagedDictPointer(owner);
keys = ((PyHeapTypeObject *)owner_cls)->ht_cached_keys;
if (dict == NULL) {
dictkind = MANAGED_VALUES;
}
else {
dictkind = MANAGED_DICT;
}
}
else {
Py_ssize_t dictoffset = owner_cls->tp_dictoffset;
if (dictoffset < 0 || dictoffset > INT16_MAX) {
SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_OUT_OF_RANGE);
goto fail;
}
PyDictKeysObject *keys = ((PyHeapTypeObject *)owner_cls)->ht_cached_keys;
if (dictoffset == 0) {
dictkind = NO_DICT;
keys = NULL;
}
else {
PyObject *dict = *(PyObject **) ((char *)owner + dictoffset);
if (dict == NULL) {
SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_NO_DICT);
goto fail;
}
keys = ((PyDictObject *)dict)->ma_keys;
dictkind = OFFSET_DICT;
}
}
if (dictkind != NO_DICT) {
Py_ssize_t index = _PyDictKeys_StringLookup(keys, name);
if (index != DKIX_EMPTY) {
SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_LOAD_METHOD_IS_ATTR);
@ -1120,16 +1153,23 @@ _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name,
goto fail;
}
cache1->dk_version = keys_version;
*instr = _Py_MAKECODEUNIT(LOAD_METHOD_CACHED, _Py_OPARG(*instr));
}
else {
if (owner_cls->tp_dictoffset == 0) {
switch(dictkind) {
case NO_DICT:
*instr = _Py_MAKECODEUNIT(LOAD_METHOD_NO_DICT, _Py_OPARG(*instr));
}
else {
SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_LOAD_METHOD_HAS_DICT);
goto fail;
}
break;
case MANAGED_VALUES:
*instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_VALUES, _Py_OPARG(*instr));
break;
case MANAGED_DICT:
*(int16_t *)&cache0->index = (int16_t)MANAGED_DICT_OFFSET;
*instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_DICT, _Py_OPARG(*instr));
break;
case OFFSET_DICT:
assert(owner_cls->tp_dictoffset > 0 && owner_cls->tp_dictoffset <= INT16_MAX);
cache0->index = (uint16_t)owner_cls->tp_dictoffset;
*instr = _Py_MAKECODEUNIT(LOAD_METHOD_WITH_DICT, _Py_OPARG(*instr));
break;
}
/* `descr` is borrowed. This is safe for methods (even inherited ones from
* super classes!) as long as tp_version_tag is validated for two main reasons: