bpo-44525: Specialize simple Python calls. (GH-29033)

This commit is contained in:
Mark Shannon 2021-10-20 16:08:28 +01:00 committed by GitHub
parent 8d6740f489
commit 8863a0fcc5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 176 additions and 60 deletions

View file

@ -35,6 +35,12 @@ typedef struct {
PyObject *obj;
} _PyObjectCache;
typedef struct {
uint32_t func_version;
uint16_t defaults_start;
uint16_t defaults_len;
} _PyCallCache;
/* Add specialized versions of entries to this union.
*
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
@ -51,6 +57,7 @@ typedef union {
_PyAttrCache attr;
_PyLoadGlobalCache load_global;
_PyObjectCache obj;
_PyCallCache call;
} SpecializedCacheEntry;
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))

47
Include/opcode.h generated
View file

@ -153,29 +153,30 @@ extern "C" {
#define CALL_FUNCTION_BUILTIN_FAST 45
#define CALL_FUNCTION_LEN 46
#define CALL_FUNCTION_ISINSTANCE 47
#define JUMP_ABSOLUTE_QUICK 48
#define LOAD_ATTR_ADAPTIVE 58
#define LOAD_ATTR_INSTANCE_VALUE 80
#define LOAD_ATTR_WITH_HINT 81
#define LOAD_ATTR_SLOT 87
#define LOAD_ATTR_MODULE 88
#define LOAD_GLOBAL_ADAPTIVE 120
#define LOAD_GLOBAL_MODULE 122
#define LOAD_GLOBAL_BUILTIN 123
#define LOAD_METHOD_ADAPTIVE 127
#define LOAD_METHOD_CACHED 128
#define LOAD_METHOD_CLASS 134
#define LOAD_METHOD_MODULE 140
#define LOAD_METHOD_NO_DICT 143
#define STORE_ATTR_ADAPTIVE 149
#define STORE_ATTR_INSTANCE_VALUE 150
#define STORE_ATTR_SLOT 151
#define STORE_ATTR_WITH_HINT 153
#define LOAD_FAST__LOAD_FAST 154
#define STORE_FAST__LOAD_FAST 158
#define LOAD_FAST__LOAD_CONST 159
#define LOAD_CONST__LOAD_FAST 167
#define STORE_FAST__STORE_FAST 168
#define CALL_FUNCTION_PY_SIMPLE 48
#define JUMP_ABSOLUTE_QUICK 58
#define LOAD_ATTR_ADAPTIVE 80
#define LOAD_ATTR_INSTANCE_VALUE 81
#define LOAD_ATTR_WITH_HINT 87
#define LOAD_ATTR_SLOT 88
#define LOAD_ATTR_MODULE 120
#define LOAD_GLOBAL_ADAPTIVE 122
#define LOAD_GLOBAL_MODULE 123
#define LOAD_GLOBAL_BUILTIN 127
#define LOAD_METHOD_ADAPTIVE 128
#define LOAD_METHOD_CACHED 134
#define LOAD_METHOD_CLASS 140
#define LOAD_METHOD_MODULE 143
#define LOAD_METHOD_NO_DICT 149
#define STORE_ATTR_ADAPTIVE 150
#define STORE_ATTR_INSTANCE_VALUE 151
#define STORE_ATTR_SLOT 153
#define STORE_ATTR_WITH_HINT 154
#define LOAD_FAST__LOAD_FAST 158
#define STORE_FAST__LOAD_FAST 159
#define LOAD_FAST__LOAD_CONST 167
#define LOAD_CONST__LOAD_FAST 168
#define STORE_FAST__STORE_FAST 169
#define DO_TRACING 255
#ifdef NEED_OPCODE_JUMP_TABLES
static uint32_t _PyOpcode_RelativeJump[8] = {

View file

@ -237,6 +237,7 @@ def jabs_op(name, op):
"CALL_FUNCTION_BUILTIN_FAST",
"CALL_FUNCTION_LEN",
"CALL_FUNCTION_ISINSTANCE",
"CALL_FUNCTION_PY_SIMPLE",
"JUMP_ABSOLUTE_QUICK",
"LOAD_ATTR_ADAPTIVE",
"LOAD_ATTR_INSTANCE_VALUE",

View file

@ -0,0 +1 @@
Specialize simple calls to Python functions (no starargs, keyowrd dict, or closure)

View file

@ -4720,9 +4720,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
TARGET(CALL_FUNCTION_ADAPTIVE) {
SpecializedCacheEntry *cache = GET_CACHE();
nargs = cache->adaptive.original_oparg;
if (cache->adaptive.counter == 0) {
next_instr--;
int nargs = cache->adaptive.original_oparg;
if (_Py_Specialize_CallFunction(
PEEK(nargs + 1), next_instr, nargs, cache, BUILTINS()) < 0) {
goto error;
@ -4732,11 +4732,50 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr
else {
STAT_INC(CALL_FUNCTION, deferred);
cache->adaptive.counter--;
oparg = cache->adaptive.original_oparg;
JUMP_TO_INSTRUCTION(CALL_FUNCTION);
oparg = nargs;
kwnames = NULL;
postcall_shrink = 1;
goto call_function;
}
}
TARGET(CALL_FUNCTION_PY_SIMPLE) {
SpecializedCacheEntry *caches = GET_CACHE();
_PyAdaptiveEntry *cache0 = &caches[0].adaptive;
int argcount = cache0->original_oparg;
_PyCallCache *cache1 = &caches[-1].call;
PyObject *callable = PEEK(argcount+1);
DEOPT_IF(!PyFunction_Check(callable), CALL_FUNCTION);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != cache1->func_version, CALL_FUNCTION);
/* PEP 523 */
DEOPT_IF(tstate->interp->eval_frame != NULL, CALL_FUNCTION);
STAT_INC(CALL_FUNCTION, hit);
record_cache_hit(cache0);
InterpreterFrame *new_frame = _PyThreadState_PushFrame(
tstate, PyFunction_AS_FRAME_CONSTRUCTOR(func), NULL);
if (new_frame == NULL) {
goto error;
}
STACK_SHRINK(argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = stack_pointer[i];
}
int deflen = cache1->defaults_len;
for (int i = 0; i < deflen; i++) {
PyObject *def = PyTuple_GET_ITEM(func->func_defaults, cache1->defaults_start+i);
Py_INCREF(def);
new_frame->localsplus[argcount+i] = def;
}
STACK_SHRINK(1);
Py_DECREF(func);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = tstate->frame;
new_frame->depth = frame->depth + 1;
tstate->frame = frame = new_frame;
goto start_frame;
}
TARGET(CALL_FUNCTION_BUILTIN_O) {
assert(cframe.use_tracing == 0);
/* Builtin METH_O functions */

View file

@ -47,7 +47,7 @@ static void *opcode_targets[256] = {
&&TARGET_CALL_FUNCTION_BUILTIN_FAST,
&&TARGET_CALL_FUNCTION_LEN,
&&TARGET_CALL_FUNCTION_ISINSTANCE,
&&TARGET_JUMP_ABSOLUTE_QUICK,
&&TARGET_CALL_FUNCTION_PY_SIMPLE,
&&TARGET_WITH_EXCEPT_START,
&&TARGET_GET_AITER,
&&TARGET_GET_ANEXT,
@ -57,7 +57,7 @@ static void *opcode_targets[256] = {
&&TARGET_INPLACE_ADD,
&&TARGET_INPLACE_SUBTRACT,
&&TARGET_INPLACE_MULTIPLY,
&&TARGET_LOAD_ATTR_ADAPTIVE,
&&TARGET_JUMP_ABSOLUTE_QUICK,
&&TARGET_INPLACE_MODULO,
&&TARGET_STORE_SUBSCR,
&&TARGET_DELETE_SUBSCR,
@ -79,15 +79,15 @@ static void *opcode_targets[256] = {
&&TARGET_INPLACE_AND,
&&TARGET_INPLACE_XOR,
&&TARGET_INPLACE_OR,
&&TARGET_LOAD_ATTR_ADAPTIVE,
&&TARGET_LOAD_ATTR_INSTANCE_VALUE,
&&TARGET_LOAD_ATTR_WITH_HINT,
&&TARGET_LIST_TO_TUPLE,
&&TARGET_RETURN_VALUE,
&&TARGET_IMPORT_STAR,
&&TARGET_SETUP_ANNOTATIONS,
&&TARGET_YIELD_VALUE,
&&TARGET_LOAD_ATTR_WITH_HINT,
&&TARGET_LOAD_ATTR_SLOT,
&&TARGET_LOAD_ATTR_MODULE,
&&TARGET_POP_EXCEPT,
&&TARGET_STORE_NAME,
&&TARGET_DELETE_NAME,
@ -119,46 +119,46 @@ static void *opcode_targets[256] = {
&&TARGET_IS_OP,
&&TARGET_CONTAINS_OP,
&&TARGET_RERAISE,
&&TARGET_LOAD_GLOBAL_ADAPTIVE,
&&TARGET_LOAD_ATTR_MODULE,
&&TARGET_JUMP_IF_NOT_EXC_MATCH,
&&TARGET_LOAD_GLOBAL_ADAPTIVE,
&&TARGET_LOAD_GLOBAL_MODULE,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_FAST,
&&TARGET_STORE_FAST,
&&TARGET_DELETE_FAST,
&&TARGET_LOAD_GLOBAL_BUILTIN,
&&TARGET_LOAD_METHOD_ADAPTIVE,
&&TARGET_LOAD_METHOD_CACHED,
&&TARGET_GEN_START,
&&TARGET_RAISE_VARARGS,
&&TARGET_CALL_FUNCTION,
&&TARGET_MAKE_FUNCTION,
&&TARGET_BUILD_SLICE,
&&TARGET_LOAD_METHOD_CLASS,
&&TARGET_LOAD_METHOD_CACHED,
&&TARGET_MAKE_CELL,
&&TARGET_LOAD_CLOSURE,
&&TARGET_LOAD_DEREF,
&&TARGET_STORE_DEREF,
&&TARGET_DELETE_DEREF,
&&TARGET_LOAD_METHOD_MODULE,
&&TARGET_LOAD_METHOD_CLASS,
&&TARGET_CALL_FUNCTION_KW,
&&TARGET_CALL_FUNCTION_EX,
&&TARGET_LOAD_METHOD_NO_DICT,
&&TARGET_LOAD_METHOD_MODULE,
&&TARGET_EXTENDED_ARG,
&&TARGET_LIST_APPEND,
&&TARGET_SET_ADD,
&&TARGET_MAP_ADD,
&&TARGET_LOAD_CLASSDEREF,
&&TARGET_LOAD_METHOD_NO_DICT,
&&TARGET_STORE_ATTR_ADAPTIVE,
&&TARGET_STORE_ATTR_INSTANCE_VALUE,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_MATCH_CLASS,
&&TARGET_STORE_ATTR_SLOT,
&&TARGET_STORE_ATTR_WITH_HINT,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_FORMAT_VALUE,
&&TARGET_BUILD_CONST_KEY_MAP,
&&TARGET_BUILD_STRING,
&&TARGET_LOAD_FAST__LOAD_FAST,
&&TARGET_STORE_FAST__LOAD_FAST,
&&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_METHOD,
&&TARGET_CALL_METHOD,
&&TARGET_LIST_EXTEND,
@ -166,6 +166,7 @@ static void *opcode_targets[256] = {
&&TARGET_DICT_MERGE,
&&TARGET_DICT_UPDATE,
&&TARGET_CALL_METHOD_KW,
&&TARGET_LOAD_FAST__LOAD_CONST,
&&TARGET_LOAD_CONST__LOAD_FAST,
&&TARGET_STORE_FAST__STORE_FAST,
&&_unknown_opcode,
@ -253,6 +254,5 @@ static void *opcode_targets[256] = {
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&_unknown_opcode,
&&TARGET_DO_TRACING
};

View file

@ -249,7 +249,7 @@ static uint8_t cache_requirements[256] = {
[BINARY_ADD] = 0,
[BINARY_MULTIPLY] = 0,
[BINARY_SUBSCR] = 0,
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache */
[CALL_FUNCTION] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
};
@ -461,15 +461,20 @@ _Py_Quicken(PyCodeObject *code) {
#define SPEC_FAIL_NON_FUNCTION_SCOPE 11
#define SPEC_FAIL_DIFFERENT_TYPES 12
/* Call function */
/* Calls */
#define SPEC_FAIL_GENERATOR 7
#define SPEC_FAIL_COMPLEX_PARAMETERS 8
#define SPEC_FAIL_WRONG_NUMBER_ARGUMENTS 9
#define SPEC_FAIL_CO_NOT_OPTIMIZED 10
/* SPEC_FAIL_METHOD defined as 11 above */
#define SPEC_FAIL_FREE_VARS 12
#define SPEC_FAIL_PYCFUNCTION 13
#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 14
#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 15
#define SPEC_FAIL_PYCFUNCTION_NOARGS 16
#define SPEC_FAIL_BAD_CALL_FLAGS 17
#define SPEC_FAIL_CLASS 18
#define SPEC_FAIL_PYCFUNCTION 10
#define SPEC_FAIL_PYCFUNCTION_WITH_KEYWORDS 13
#define SPEC_FAIL_PYCFUNCTION_FAST_WITH_KEYWORDS 14
#define SPEC_FAIL_PYCFUNCTION_NOARGS 15
#define SPEC_FAIL_BAD_CALL_FLAGS 16
#define SPEC_FAIL_PYTHON_FUNCTION 17
#define SPEC_FAIL_IMMUTABLE_CLASS 18
static int
specialize_module_load_attr(
@ -1236,6 +1241,69 @@ _Py_Specialize_BinaryMultiply(PyObject *left, PyObject *right, _Py_CODEUNIT *ins
return 0;
}
static int
specialize_class_call(
PyObject *callable, _Py_CODEUNIT *instr,
int nargs, SpecializedCacheEntry *cache)
{
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CLASS);
return -1;
}
static int
specialize_py_call(
PyFunctionObject *func, _Py_CODEUNIT *instr,
int nargs, SpecializedCacheEntry *cache)
{
_PyCallCache *cache1 = &cache[-1].call;
/* Exclude generator or coroutines for now */
PyCodeObject *code = (PyCodeObject *)func->func_code;
int flags = code->co_flags;
if (flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_GENERATOR);
return -1;
}
if ((flags & (CO_VARKEYWORDS | CO_VARARGS)) || code->co_kwonlyargcount) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_COMPLEX_PARAMETERS);
return -1;
}
if ((flags & CO_OPTIMIZED) == 0) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_CO_NOT_OPTIMIZED);
return -1;
}
if (code->co_nfreevars) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_FREE_VARS);
return -1;
}
int argcount = code->co_argcount;
int defcount = func->func_defaults == NULL ? 0 : (int)PyTuple_GET_SIZE(func->func_defaults);
assert(defcount <= argcount);
int min_args = argcount-defcount;
if (nargs > argcount || nargs < min_args) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
return -1;
}
assert(nargs <= argcount && nargs >= min_args);
int defstart = nargs - min_args;
int deflen = argcount - nargs;
assert(defstart >= 0 && deflen >= 0);
assert(deflen == 0 || func->func_defaults != NULL);
if (defstart > 0xffff || deflen > 0xffff) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_RANGE);
return -1;
}
int version = _PyFunction_GetVersionForCurrentState(func);
if (version == 0) {
SPECIALIZATION_FAIL(CALL_FUNCTION, SPEC_FAIL_OUT_OF_VERSIONS);
return -1;
}
cache1->func_version = version;
cache1->defaults_start = defstart;
cache1->defaults_len = deflen;
*instr = _Py_MAKECODEUNIT(CALL_FUNCTION_PY_SIMPLE, _Py_OPARG(*instr));
return 0;
}
#if COLLECT_SPECIALIZATION_STATS_DETAILED
static int
builtin_call_fail_kind(int ml_flags)
@ -1315,11 +1383,7 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs,
static int
call_fail_kind(PyObject *callable)
{
if (PyFunction_Check(callable)) {
return SPEC_FAIL_PYTHON_FUNCTION;
}
// new-style bound methods
else if (PyInstanceMethod_Check(callable)) {
if (PyInstanceMethod_Check(callable)) {
return SPEC_FAIL_METHOD;
}
else if (PyMethod_Check(callable)) {
@ -1330,17 +1394,14 @@ call_fail_kind(PyObject *callable)
return SPEC_FAIL_METHOD;
}
else if (PyType_Check(callable)) {
PyTypeObject *type = Py_TYPE(callable);
return PyType_HasFeature(type, Py_TPFLAGS_IMMUTABLETYPE) ?
SPEC_FAIL_IMMUTABLE_CLASS : SPEC_FAIL_MUTABLE_CLASS;
return SPEC_FAIL_CLASS;
}
return SPEC_FAIL_OTHER;
}
#endif
/* TODO:
- Specialize calling types.
- Specialize python function calls.
- Specialize calling classes.
*/
int
_Py_Specialize_CallFunction(
@ -1352,9 +1413,15 @@ _Py_Specialize_CallFunction(
if (PyCFunction_CheckExact(callable)) {
fail = specialize_c_call(callable, instr, nargs, cache, builtins);
}
else if (PyFunction_Check(callable)) {
fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, cache);
}
else if (PyType_Check(callable)) {
fail = specialize_class_call(callable, instr, nargs, cache);
}
else {
SPECIALIZATION_FAIL(CALL_FUNCTION, call_fail_kind(callable));
fail = 1;
fail = -1;
}
_PyAdaptiveEntry *cache0 = &cache->adaptive;
if (fail) {