gh-117045: Add code object to function version cache (#117028)

Changes to the function version cache:

- In addition to the function object, also store the code object,
  and allow the latter to be retrieved even if the function has been evicted.
- Stop assigning new function versions after a critical attribute (e.g. `__code__`)
  has been modified; the version is permanently reset to zero in this case.
- Changes to `__annotations__` are no longer considered critical. (This fixes gh-109998.)

Changes to the Tier 2 optimization machinery:

- If we cannot map a function version to a function, but it is still mapped to a code object,
  we continue projecting the trace.
  The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL,
  a function object, or a code object with the lowest bit set.

This allows us to trace through code that calls an ephemeral function,
i.e., a function that may not be alive when we are constructing the executor,
e.g. a generator expression or certain nested functions.
We will lose globals removal inside such functions,
but we can still do other peephole operations
(and even possibly [call inlining](https://github.com/python/cpython/pull/116290),
if we decide to do it), which only need the code object.
As before, if we cannot retrieve the code object from the cache, we stop projecting.
This commit is contained in:
Guido van Rossum 2024-03-21 12:37:41 -07:00 committed by GitHub
parent c85d84166a
commit 570a82d46a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 208 additions and 95 deletions

View file

@ -55,7 +55,7 @@ enum _frameowner {
};
typedef struct _PyInterpreterFrame {
PyObject *f_executable; /* Strong reference */
PyObject *f_executable; /* Strong reference (code object or None) */
struct _PyInterpreterFrame *previous;
PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */
PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */

View file

@ -17,20 +17,27 @@ extern PyObject* _PyFunction_Vectorcall(
#define FUNC_MAX_WATCHERS 8
#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
struct _func_version_cache_item {
PyFunctionObject *func;
PyObject *code;
};
struct _py_func_state {
uint32_t next_version;
// Borrowed references to function objects whose
// Borrowed references to function and code objects whose
// func_version % FUNC_VERSION_CACHE_SIZE
// once was equal to the index in the table.
// They are cleared when the function is deallocated.
PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE];
// They are cleared when the function or code object is deallocated.
struct _func_version_cache_item func_version_cache[FUNC_VERSION_CACHE_SIZE];
};
extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);
extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);
void _PyFunction_ClearCodeByVersion(uint32_t version);
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code);
extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params);

View file

@ -1710,6 +1710,7 @@ code_dealloc(PyCodeObject *co)
}
Py_SET_REFCNT(co, 0);
_PyFunction_ClearCodeByVersion(co->co_version);
if (co->co_extra != NULL) {
PyInterpreterState *interp = _PyInterpreterState_GET();
_PyCodeObjectExtra *co_extra = co->co_extra;

View file

@ -218,43 +218,61 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
}
/*
Function versions
-----------------
(This is purely internal documentation. There are no public APIs here.)
Function versions are used to detect when a function object has been
updated, invalidating inline cache data used by the `CALL` bytecode
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
Function (and code) versions
----------------------------
They are also used by the Tier 2 superblock creation code to find
the function being called (and from there the code object).
The Tier 1 specializer generates CALL variants that can be invalidated
by changes to critical function attributes:
How does a function's `func_version` field get initialized?
- __code__
- __defaults__
- __kwdefaults__
- __closure__
- `PyFunction_New` and friends initialize it to 0.
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
- It is reset to 0 when various attributes like `__code__` are set.
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
when the specializer needs a version and the version is 0.
For this purpose function objects have a 32-bit func_version member
that the specializer writes to the specialized instruction's inline
cache and which is checked by a guard on the specialized instructions.
The latter allocates versions using a counter in the interpreter state,
`interp->func_state.next_version`.
When the counter wraps around to 0, no more versions are allocated.
There is one other special case: functions with a non-standard
`vectorcall` field are not given a version.
The MAKE_FUNCTION bytecode sets func_version from the code object's
co_version field. The latter is initialized from a counter in the
interpreter state (interp->func_state.next_version) and never changes.
When this counter overflows, it remains zero and the specializer loses
the ability to specialize calls to new functions.
When the function version is 0, the `CALL` bytecode is not specialized.
The func_version is reset to zero when any of the critical attributes
is modified; after this point the specializer will no longer specialize
calls to this function, and the guard will always fail.
Code object versions
--------------------
The function and code version cache
-----------------------------------
So where to code objects get their `co_version`?
They share the same counter, `interp->func_state.next_version`.
The Tier 2 optimizer now has a problem, since it needs to find the
function and code objects given only the version number from the inline
cache. Our solution is to maintain a cache mapping version numbers to
function and code objects. To limit the cache size we could hash
the version number, but for now we simply use it modulo the table size.
There are some corner cases (e.g. generator expressions) where we will
be unable to find the function object in the cache but we can still
find the code object. For this reason the cache stores both the
function object and the code object.
The cache doesn't contain strong references; cache entries are
invalidated whenever the function or code object is deallocated.
Invariants
----------
These should hold at any time except when one of the cache-mutating
functions is running.
- For any slot s at index i:
- s->func == NULL or s->func->func_version % FUNC_VERSION_CACHE_SIZE == i
- s->code == NULL or s->code->co_version % FUNC_VERSION_CACHE_SIZE == i
if s->func != NULL, then s->func->func_code == s->code
Code objects get a new `co_version` allocated from this counter upon
creation. Since code objects are nominally immutable, `co_version` can
not be invalidated. The only way it can be 0 is when 2**32 or more
code objects have been created during the process's lifetime.
(The counter isn't reset by `fork()`, extending the lifetime.)
*/
void
@ -262,28 +280,61 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (func->func_version != 0) {
PyFunctionObject **slot =
struct _func_version_cache_item *slot =
interp->func_state.func_version_cache
+ (func->func_version % FUNC_VERSION_CACHE_SIZE);
if (*slot == func) {
*slot = NULL;
if (slot->func == func) {
slot->func = NULL;
// Leave slot->code alone, there may be use for it.
}
}
func->func_version = version;
if (version != 0) {
interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE] = func;
struct _func_version_cache_item *slot =
interp->func_state.func_version_cache
+ (version % FUNC_VERSION_CACHE_SIZE);
slot->func = func;
slot->code = func->func_code;
}
}
void
_PyFunction_ClearCodeByVersion(uint32_t version)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _func_version_cache_item *slot =
interp->func_state.func_version_cache
+ (version % FUNC_VERSION_CACHE_SIZE);
if (slot->code) {
assert(PyCode_Check(slot->code));
PyCodeObject *code = (PyCodeObject *)slot->code;
if (code->co_version == version) {
slot->code = NULL;
slot->func = NULL;
}
}
}
PyFunctionObject *
_PyFunction_LookupByVersion(uint32_t version)
_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject *func = interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE];
if (func != NULL && func->func_version == version) {
return func;
struct _func_version_cache_item *slot =
interp->func_state.func_version_cache
+ (version % FUNC_VERSION_CACHE_SIZE);
if (slot->code) {
assert(PyCode_Check(slot->code));
PyCodeObject *code = (PyCodeObject *)slot->code;
if (code->co_version == version) {
*p_code = slot->code;
}
}
else {
*p_code = NULL;
}
if (slot->func && slot->func->func_version == version) {
assert(slot->func->func_code == slot->code);
return slot->func;
}
return NULL;
}
@ -291,19 +342,7 @@ _PyFunction_LookupByVersion(uint32_t version)
uint32_t
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
{
if (func->func_version != 0) {
return func->func_version;
}
if (func->vectorcall != _PyFunction_Vectorcall) {
return 0;
}
PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->func_state.next_version == 0) {
return 0;
}
uint32_t v = interp->func_state.next_version++;
_PyFunction_SetVersion(func, v);
return v;
return func->func_version;
}
PyObject *
@ -507,7 +546,6 @@ PyFunction_SetAnnotations(PyObject *op, PyObject *annotations)
"non-dict annotations");
return -1;
}
_PyFunction_SetVersion((PyFunctionObject *)op, 0);
Py_XSETREF(((PyFunctionObject *)op)->func_annotations, annotations);
return 0;
}
@ -731,7 +769,6 @@ func_set_annotations(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(igno
"__annotations__ must be set to a dict object");
return -1;
}
_PyFunction_SetVersion(op, 0);
Py_XSETREF(op->func_annotations, Py_XNewRef(value));
return 0;
}

View file

@ -211,7 +211,7 @@ _PyOptimizer_Optimize(
_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
PyObject **stack_pointer, _PyExecutorObject **executor_ptr)
{
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
PyCodeObject *code = _PyFrame_GetCode(frame);
assert(PyCode_Check(code));
PyInterpreterState *interp = _PyInterpreterState_GET();
if (!has_space_for_executor(code, start)) {
@ -479,8 +479,9 @@ BRANCH_TO_GUARD[4][2] = {
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \
goto done; \
} \
assert(func->func_code == (PyObject *)code); \
assert(func == NULL || func->func_code == (PyObject *)code); \
trace_stack[trace_stack_depth].func = func; \
trace_stack[trace_stack_depth].code = code; \
trace_stack[trace_stack_depth].instr = instr; \
trace_stack_depth++;
#define TRACE_STACK_POP() \
@ -489,7 +490,8 @@ BRANCH_TO_GUARD[4][2] = {
} \
trace_stack_depth--; \
func = trace_stack[trace_stack_depth].func; \
code = (PyCodeObject *)trace_stack[trace_stack_depth].func->func_code; \
code = trace_stack[trace_stack_depth].code; \
assert(func == NULL || func->func_code == (PyObject *)code); \
instr = trace_stack[trace_stack_depth].instr;
/* Returns 1 on success,
@ -505,7 +507,7 @@ translate_bytecode_to_trace(
_PyBloomFilter *dependencies)
{
bool progress_needed = true;
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
PyCodeObject *code = _PyFrame_GetCode(frame);
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
assert(PyFunction_Check(func));
PyCodeObject *initial_code = code;
@ -515,6 +517,7 @@ translate_bytecode_to_trace(
int max_length = buffer_size;
struct {
PyFunctionObject *func;
PyCodeObject *code;
_Py_CODEUNIT *instr;
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
@ -719,9 +722,19 @@ translate_bytecode_to_trace(
if (uop == _POP_FRAME) {
TRACE_STACK_POP();
/* Set the operand to the function object returned to,
* to assist optimization passes */
ADD_TO_TRACE(uop, oparg, (uintptr_t)func, target);
/* Set the operand to the function or code object returned to,
* to assist optimization passes. (See _PUSH_FRAME below.)
*/
if (func != NULL) {
operand = (uintptr_t)func;
}
else if (code != NULL) {
operand = (uintptr_t)code | 1;
}
else {
operand = 0;
}
ADD_TO_TRACE(uop, oparg, operand, target);
DPRINTF(2,
"Returning to %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
@ -738,10 +751,12 @@ translate_bytecode_to_trace(
// Add one to account for the actual opcode/oparg pair:
+ 1;
uint32_t func_version = read_u32(&instr[func_version_offset].cache);
PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version);
DPRINTF(2, "Function: version=%#x; object=%p\n", (int)func_version, new_func);
if (new_func != NULL) {
PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(new_func);
PyCodeObject *new_code = NULL;
PyFunctionObject *new_func =
_PyFunction_LookupByVersion(func_version, (PyObject **) &new_code);
DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n",
(int)func_version, new_func, new_code);
if (new_code != NULL) {
if (new_code == code) {
// Recursive call, bail (we could be here forever).
DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
@ -766,9 +781,22 @@ translate_bytecode_to_trace(
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
TRACE_STACK_PUSH();
_Py_BloomFilter_Add(dependencies, new_code);
/* Set the operand to the callee's function object,
* to assist optimization passes */
ADD_TO_TRACE(uop, oparg, (uintptr_t)new_func, target);
/* Set the operand to the callee's function or code object,
* to assist optimization passes.
* We prefer setting it to the function (for remove_globals())
* but if that's not available but the code is available,
* use the code, setting the low bit so the optimizer knows.
*/
if (new_func != NULL) {
operand = (uintptr_t)new_func;
}
else if (new_code != NULL) {
operand = (uintptr_t)new_code | 1;
}
else {
operand = 0;
}
ADD_TO_TRACE(uop, oparg, operand, target);
code = new_code;
func = new_func;
instr = _PyCode_CODE(code);
@ -780,8 +808,8 @@ translate_bytecode_to_trace(
2 * INSTR_IP(instr, code));
goto top;
}
DPRINTF(2, "Bail, new_func == NULL\n");
ADD_TO_TRACE(uop, oparg, operand, target);
DPRINTF(2, "Bail, new_code == NULL\n");
ADD_TO_TRACE(uop, oparg, 0, target);
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
goto done;
}
@ -1116,7 +1144,7 @@ counter_optimize(
int Py_UNUSED(curr_stackentries)
)
{
PyCodeObject *code = (PyCodeObject *)frame->f_executable;
PyCodeObject *code = _PyFrame_GetCode(frame);
int oparg = instr->op.arg;
while (instr->op.code == EXTENDED_ARG) {
instr++;

View file

@ -228,7 +228,12 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
builtins_watched <<= 1;
globals_watched <<= 1;
function_checked <<= 1;
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
uint64_t operand = buffer[pc].operand;
if (operand == 0 || (operand & 1)) {
// It's either a code object or NULL, so bail
return 1;
}
PyFunctionObject *func = (PyFunctionObject *)operand;
if (func == NULL) {
return 1;
}
@ -251,7 +256,15 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
builtins_watched >>= 1;
globals_watched >>= 1;
function_checked >>= 1;
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
uint64_t operand = buffer[pc].operand;
if (operand == 0 || (operand & 1)) {
// It's either a code object or NULL, so bail
return 1;
}
PyFunctionObject *func = (PyFunctionObject *)operand;
if (func == NULL) {
return 1;
}
assert(PyFunction_Check(func));
function_version = func->func_version;
globals = func->func_globals;
@ -522,7 +535,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
static void
peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size)
{
PyCodeObject *co = (PyCodeObject *)frame->f_executable;
PyCodeObject *co = _PyFrame_GetCode(frame);
for (int pc = 0; pc < buffer_size; pc++) {
int opcode = buffer[pc].opcode;
switch(opcode) {
@ -545,11 +558,16 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s
case _PUSH_FRAME:
case _POP_FRAME:
{
PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand;
if (func == NULL) {
uint64_t operand = buffer[pc].operand;
if (operand & 1) {
co = (PyCodeObject *)(operand & ~1);
assert(PyCode_Check(co));
}
else if (operand == 0) {
co = NULL;
}
else {
PyFunctionObject *func = (PyFunctionObject *)operand;
assert(PyFunction_Check(func));
co = (PyCodeObject *)func->func_code;
}
@ -587,7 +605,7 @@ _Py_uop_analyze_and_optimize(
peephole_opt(frame, buffer, buffer_size);
err = optimize_uops(
(PyCodeObject *)frame->f_executable, buffer,
_PyFrame_GetCode(frame), buffer,
buffer_size, curr_stacklen, dependencies);
if (err == 0) {

View file

@ -543,14 +543,25 @@ dummy_func(void) {
(void)callable;
PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand;
DPRINTF(3, "func: %p ", func);
if (func == NULL) {
DPRINTF(3, "\n");
DPRINTF(1, "Missing function\n");
goto done;
PyCodeObject *co = NULL;
assert((this_instr + 2)->opcode == _PUSH_FRAME);
uintptr_t push_operand = (this_instr + 2)->operand;
if (push_operand & 1) {
co = (PyCodeObject *)(push_operand & ~1);
DPRINTF(3, "code=%p ", co);
assert(PyCode_Check(co));
}
else {
PyFunctionObject *func = (PyFunctionObject *)push_operand;
DPRINTF(3, "func=%p ", func);
if (func == NULL) {
DPRINTF(3, "\n");
DPRINTF(1, "Missing function\n");
goto done;
}
co = (PyCodeObject *)func->func_code;
DPRINTF(3, "code=%p ", co);
}
PyCodeObject *co = (PyCodeObject *)func->func_code;
assert(self_or_null != NULL);
assert(args != NULL);

View file

@ -1596,14 +1596,25 @@
callable = stack_pointer[-2 - oparg];
int argcount = oparg;
(void)callable;
PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand;
DPRINTF(3, "func: %p ", func);
if (func == NULL) {
DPRINTF(3, "\n");
DPRINTF(1, "Missing function\n");
goto done;
PyCodeObject *co = NULL;
assert((this_instr + 2)->opcode == _PUSH_FRAME);
uintptr_t push_operand = (this_instr + 2)->operand;
if (push_operand & 1) {
co = (PyCodeObject *)(push_operand & ~1);
DPRINTF(3, "code=%p ", co);
assert(PyCode_Check(co));
}
else {
PyFunctionObject *func = (PyFunctionObject *)push_operand;
DPRINTF(3, "func=%p ", func);
if (func == NULL) {
DPRINTF(3, "\n");
DPRINTF(1, "Missing function\n");
goto done;
}
co = (PyCodeObject *)func->func_code;
DPRINTF(3, "code=%p ", co);
}
PyCodeObject *co = (PyCodeObject *)func->func_code;
assert(self_or_null != NULL);
assert(args != NULL);
if (sym_is_not_null(self_or_null)) {