mirror of
https://github.com/python/cpython
synced 2024-11-02 10:03:49 +00:00
22b0de2755
This PR sets up tagged pointers for CPython. The general idea is to create a separate struct _PyStackRef for everything on the evaluation stack to store the bits. This forces the C compiler to warn us if we try to cast things or pull things out of the struct directly. Only for free threading: We tag the low bit if something is deferred - that means we skip incref and decref operations on it. This behavior may change in the future if Mark's plans to defer all objects in the interpreter loop pans out. This implies a strict stack reference discipline is required. ALL incref and decref operations on stackrefs must use the stackref variants. It is unsafe to untag something then do normal incref/decref ops on it. The new incref and decref variants are called dup and close. They mimic a "handle" API operating on these stackrefs. Please read Include/internal/pycore_stackref.h for more information! --------- Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com>
1716 lines
58 KiB
C
1716 lines
58 KiB
C
#ifdef _Py_TIER2
|
|
|
|
#include "Python.h"
|
|
#include "opcode.h"
|
|
#include "pycore_interp.h"
|
|
#include "pycore_backoff.h"
|
|
#include "pycore_bitutils.h" // _Py_popcount32()
|
|
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
|
|
#include "pycore_opcode_metadata.h" // _PyOpcode_OpName[]
|
|
#include "pycore_opcode_utils.h" // MAX_REAL_OPCODE
|
|
#include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize()
|
|
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
|
#include "pycore_uop_ids.h"
|
|
#include "pycore_jit.h"
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
|
|
#define NEED_OPCODE_METADATA
|
|
#include "pycore_uop_metadata.h" // Uop tables
|
|
#undef NEED_OPCODE_METADATA
|
|
|
|
#define MAX_EXECUTORS_SIZE 256
|
|
|
|
#ifdef Py_DEBUG
|
|
static int
|
|
base_opcode(PyCodeObject *code, int offset)
|
|
{
|
|
int opcode = _Py_GetBaseOpcode(code, offset);
|
|
if (opcode == ENTER_EXECUTOR) {
|
|
int oparg = _PyCode_CODE(code)[offset].op.arg;
|
|
_PyExecutorObject *ex = code->co_executors->executors[oparg];
|
|
return ex->vm_data.opcode;
|
|
}
|
|
return opcode;
|
|
}
|
|
#endif
|
|
|
|
static bool
|
|
has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
|
|
{
|
|
if (instr->op.code == ENTER_EXECUTOR) {
|
|
return true;
|
|
}
|
|
if (code->co_executors == NULL) {
|
|
return true;
|
|
}
|
|
return code->co_executors->size < MAX_EXECUTORS_SIZE;
|
|
}
|
|
|
|
static int32_t
|
|
get_index_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
|
|
{
|
|
if (instr->op.code == ENTER_EXECUTOR) {
|
|
return instr->op.arg;
|
|
}
|
|
_PyExecutorArray *old = code->co_executors;
|
|
int size = 0;
|
|
int capacity = 0;
|
|
if (old != NULL) {
|
|
size = old->size;
|
|
capacity = old->capacity;
|
|
assert(size < MAX_EXECUTORS_SIZE);
|
|
}
|
|
assert(size <= capacity);
|
|
if (size == capacity) {
|
|
/* Array is full. Grow array */
|
|
int new_capacity = capacity ? capacity * 2 : 4;
|
|
_PyExecutorArray *new = PyMem_Realloc(
|
|
old,
|
|
offsetof(_PyExecutorArray, executors) +
|
|
new_capacity * sizeof(_PyExecutorObject *));
|
|
if (new == NULL) {
|
|
return -1;
|
|
}
|
|
new->capacity = new_capacity;
|
|
new->size = size;
|
|
code->co_executors = new;
|
|
}
|
|
assert(size < code->co_executors->capacity);
|
|
return size;
|
|
}
|
|
|
|
static void
|
|
insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorObject *executor)
|
|
{
|
|
Py_INCREF(executor);
|
|
if (instr->op.code == ENTER_EXECUTOR) {
|
|
assert(index == instr->op.arg);
|
|
_Py_ExecutorDetach(code->co_executors->executors[index]);
|
|
}
|
|
else {
|
|
assert(code->co_executors->size == index);
|
|
assert(code->co_executors->capacity > index);
|
|
code->co_executors->size++;
|
|
}
|
|
executor->vm_data.opcode = instr->op.code;
|
|
executor->vm_data.oparg = instr->op.arg;
|
|
executor->vm_data.code = code;
|
|
executor->vm_data.index = (int)(instr - _PyCode_CODE(code));
|
|
code->co_executors->executors[index] = executor;
|
|
assert(index < MAX_EXECUTORS_SIZE);
|
|
instr->op.code = ENTER_EXECUTOR;
|
|
instr->op.arg = index;
|
|
}
|
|
|
|
|
|
static int
|
|
never_optimize(
|
|
_PyOptimizerObject* self,
|
|
_PyInterpreterFrame *frame,
|
|
_Py_CODEUNIT *instr,
|
|
_PyExecutorObject **exec,
|
|
int Py_UNUSED(stack_entries))
|
|
{
|
|
// This may be called if the optimizer is reset
|
|
return 0;
|
|
}
|
|
|
|
PyTypeObject _PyDefaultOptimizer_Type = {
|
|
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
|
.tp_name = "noop_optimizer",
|
|
.tp_basicsize = sizeof(_PyOptimizerObject),
|
|
.tp_itemsize = 0,
|
|
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
|
|
};
|
|
|
|
static _PyOptimizerObject _PyOptimizer_Default = {
|
|
PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type)
|
|
.optimize = never_optimize,
|
|
};
|
|
|
|
_PyOptimizerObject *
|
|
_Py_GetOptimizer(void)
|
|
{
|
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
if (interp->optimizer == &_PyOptimizer_Default) {
|
|
return NULL;
|
|
}
|
|
Py_INCREF(interp->optimizer);
|
|
return interp->optimizer;
|
|
}
|
|
|
|
static _PyExecutorObject *
|
|
make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies);
|
|
|
|
static int
|
|
init_cold_exit_executor(_PyExecutorObject *executor, int oparg);
|
|
|
|
/* It is impossible for the number of exits to reach 1/4 of the total length,
|
|
* as the number of exits cannot reach 1/3 of the number of non-exits, due to
|
|
* the presence of CHECK_VALIDITY checks and instructions to produce the values
|
|
* being checked in exits. */
|
|
#define COLD_EXIT_COUNT (UOP_MAX_TRACE_LENGTH/4)
|
|
|
|
static int cold_exits_initialized = 0;
|
|
static _PyExecutorObject COLD_EXITS[COLD_EXIT_COUNT] = { 0 };
|
|
|
|
static const _PyBloomFilter EMPTY_FILTER = { 0 };
|
|
|
|
_PyOptimizerObject *
|
|
_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer)
|
|
{
|
|
if (optimizer == NULL) {
|
|
optimizer = &_PyOptimizer_Default;
|
|
}
|
|
else if (cold_exits_initialized == 0) {
|
|
cold_exits_initialized = 1;
|
|
for (int i = 0; i < COLD_EXIT_COUNT; i++) {
|
|
if (init_cold_exit_executor(&COLD_EXITS[i], i)) {
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
_PyOptimizerObject *old = interp->optimizer;
|
|
if (old == NULL) {
|
|
old = &_PyOptimizer_Default;
|
|
}
|
|
Py_INCREF(optimizer);
|
|
interp->optimizer = optimizer;
|
|
return old;
|
|
}
|
|
|
|
int
|
|
_Py_SetTier2Optimizer(_PyOptimizerObject *optimizer)
|
|
{
|
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
_PyOptimizerObject *old = _Py_SetOptimizer(interp, optimizer);
|
|
Py_XDECREF(old);
|
|
return old == NULL ? -1 : 0;
|
|
}
|
|
|
|
/* Returns 1 if optimized, 0 if not optimized, and -1 for an error.
|
|
* If optimized, *executor_ptr contains a new reference to the executor
|
|
*/
|
|
int
|
|
_PyOptimizer_Optimize(
|
|
_PyInterpreterFrame *frame, _Py_CODEUNIT *start,
|
|
_PyStackRef *stack_pointer, _PyExecutorObject **executor_ptr)
|
|
{
|
|
PyCodeObject *code = _PyFrame_GetCode(frame);
|
|
assert(PyCode_Check(code));
|
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
if (!has_space_for_executor(code, start)) {
|
|
return 0;
|
|
}
|
|
_PyOptimizerObject *opt = interp->optimizer;
|
|
int err = opt->optimize(opt, frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)));
|
|
if (err <= 0) {
|
|
return err;
|
|
}
|
|
assert(*executor_ptr != NULL);
|
|
int index = get_index_for_executor(code, start);
|
|
if (index < 0) {
|
|
/* Out of memory. Don't raise and assume that the
|
|
* error will show up elsewhere.
|
|
*
|
|
* If an optimizer has already produced an executor,
|
|
* it might get confused by the executor disappearing,
|
|
* but there is not much we can do about that here. */
|
|
Py_DECREF(*executor_ptr);
|
|
return 0;
|
|
}
|
|
insert_executor(code, start, index, *executor_ptr);
|
|
assert((*executor_ptr)->vm_data.valid);
|
|
return 1;
|
|
}
|
|
|
|
_PyExecutorObject *
|
|
_Py_GetExecutor(PyCodeObject *code, int offset)
|
|
{
|
|
int code_len = (int)Py_SIZE(code);
|
|
for (int i = 0 ; i < code_len;) {
|
|
if (_PyCode_CODE(code)[i].op.code == ENTER_EXECUTOR && i*2 == offset) {
|
|
int oparg = _PyCode_CODE(code)[i].op.arg;
|
|
_PyExecutorObject *res = code->co_executors->executors[oparg];
|
|
Py_INCREF(res);
|
|
return res;
|
|
}
|
|
i += _PyInstruction_GetLength(code, i);
|
|
}
|
|
PyErr_SetString(PyExc_ValueError, "no executor at given byte offset");
|
|
return NULL;
|
|
}
|
|
|
|
static PyObject *
|
|
is_valid(PyObject *self, PyObject *Py_UNUSED(ignored))
|
|
{
|
|
return PyBool_FromLong(((_PyExecutorObject *)self)->vm_data.valid);
|
|
}
|
|
|
|
static PyObject *
|
|
get_opcode(PyObject *self, PyObject *Py_UNUSED(ignored))
|
|
{
|
|
return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.opcode);
|
|
}
|
|
|
|
static PyObject *
|
|
get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored))
|
|
{
|
|
return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.oparg);
|
|
}
|
|
|
|
static PyMethodDef executor_methods[] = {
|
|
{ "is_valid", is_valid, METH_NOARGS, NULL },
|
|
{ "get_opcode", get_opcode, METH_NOARGS, NULL },
|
|
{ "get_oparg", get_oparg, METH_NOARGS, NULL },
|
|
{ NULL, NULL },
|
|
};
|
|
|
|
///////////////////// Experimental UOp Optimizer /////////////////////
|
|
|
|
static int executor_clear(_PyExecutorObject *executor);
|
|
static void unlink_executor(_PyExecutorObject *executor);
|
|
|
|
static void
|
|
uop_dealloc(_PyExecutorObject *self) {
|
|
_PyObject_GC_UNTRACK(self);
|
|
assert(self->vm_data.code == NULL);
|
|
unlink_executor(self);
|
|
#ifdef _Py_JIT
|
|
_PyJIT_Free(self);
|
|
#endif
|
|
PyObject_GC_Del(self);
|
|
}
|
|
|
|
const char *
|
|
_PyUOpName(int index)
|
|
{
|
|
if (index < 0 || index > MAX_UOP_ID) {
|
|
return NULL;
|
|
}
|
|
return _PyOpcode_uop_name[index];
|
|
}
|
|
|
|
#ifdef Py_DEBUG
|
|
void
|
|
_PyUOpPrint(const _PyUOpInstruction *uop)
|
|
{
|
|
const char *name = _PyUOpName(uop->opcode);
|
|
if (name == NULL) {
|
|
printf("<uop %d>", uop->opcode);
|
|
}
|
|
else {
|
|
printf("%s", name);
|
|
}
|
|
switch(uop->format) {
|
|
case UOP_FORMAT_TARGET:
|
|
printf(" (%d, target=%d, operand=%#" PRIx64,
|
|
uop->oparg,
|
|
uop->target,
|
|
(uint64_t)uop->operand);
|
|
break;
|
|
case UOP_FORMAT_JUMP:
|
|
printf(" (%d, jump_target=%d, operand=%#" PRIx64,
|
|
uop->oparg,
|
|
uop->jump_target,
|
|
(uint64_t)uop->operand);
|
|
break;
|
|
case UOP_FORMAT_EXIT:
|
|
printf(" (%d, exit_index=%d, operand=%#" PRIx64,
|
|
uop->oparg,
|
|
uop->exit_index,
|
|
(uint64_t)uop->operand);
|
|
break;
|
|
default:
|
|
printf(" (%d, Unknown format)", uop->oparg);
|
|
}
|
|
if (_PyUop_Flags[uop->opcode] & HAS_ERROR_FLAG) {
|
|
printf(", error_target=%d", uop->error_target);
|
|
}
|
|
|
|
printf(")");
|
|
}
|
|
#endif
|
|
|
|
static Py_ssize_t
|
|
uop_len(_PyExecutorObject *self)
|
|
{
|
|
return self->code_size;
|
|
}
|
|
|
|
static PyObject *
|
|
uop_item(_PyExecutorObject *self, Py_ssize_t index)
|
|
{
|
|
Py_ssize_t len = uop_len(self);
|
|
if (index < 0 || index >= len) {
|
|
PyErr_SetNone(PyExc_IndexError);
|
|
return NULL;
|
|
}
|
|
const char *name = _PyUOpName(self->trace[index].opcode);
|
|
if (name == NULL) {
|
|
name = "<nil>";
|
|
}
|
|
PyObject *oname = _PyUnicode_FromASCII(name, strlen(name));
|
|
if (oname == NULL) {
|
|
return NULL;
|
|
}
|
|
PyObject *oparg = PyLong_FromUnsignedLong(self->trace[index].oparg);
|
|
if (oparg == NULL) {
|
|
Py_DECREF(oname);
|
|
return NULL;
|
|
}
|
|
PyObject *target = PyLong_FromUnsignedLong(self->trace[index].target);
|
|
if (oparg == NULL) {
|
|
Py_DECREF(oparg);
|
|
Py_DECREF(oname);
|
|
return NULL;
|
|
}
|
|
PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand);
|
|
if (operand == NULL) {
|
|
Py_DECREF(target);
|
|
Py_DECREF(oparg);
|
|
Py_DECREF(oname);
|
|
return NULL;
|
|
}
|
|
PyObject *args[4] = { oname, oparg, target, operand };
|
|
return _PyTuple_FromArraySteal(args, 4);
|
|
}
|
|
|
|
PySequenceMethods uop_as_sequence = {
|
|
.sq_length = (lenfunc)uop_len,
|
|
.sq_item = (ssizeargfunc)uop_item,
|
|
};
|
|
|
|
static int
|
|
executor_traverse(PyObject *o, visitproc visit, void *arg)
|
|
{
|
|
_PyExecutorObject *executor = (_PyExecutorObject *)o;
|
|
for (uint32_t i = 0; i < executor->exit_count; i++) {
|
|
Py_VISIT(executor->exits[i].executor);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static PyObject *
|
|
get_jit_code(PyObject *self, PyObject *Py_UNUSED(ignored))
|
|
{
|
|
#ifndef _Py_JIT
|
|
PyErr_SetString(PyExc_RuntimeError, "JIT support not enabled.");
|
|
return NULL;
|
|
#else
|
|
_PyExecutorObject *executor = (_PyExecutorObject *)self;
|
|
if (executor->jit_code == NULL || executor->jit_size == 0) {
|
|
Py_RETURN_NONE;
|
|
}
|
|
return PyBytes_FromStringAndSize(executor->jit_code, executor->jit_size);
|
|
#endif
|
|
}
|
|
|
|
static PyMethodDef uop_executor_methods[] = {
|
|
{ "is_valid", is_valid, METH_NOARGS, NULL },
|
|
{ "get_jit_code", get_jit_code, METH_NOARGS, NULL},
|
|
{ "get_opcode", get_opcode, METH_NOARGS, NULL },
|
|
{ "get_oparg", get_oparg, METH_NOARGS, NULL },
|
|
{ NULL, NULL },
|
|
};
|
|
|
|
static int
|
|
executor_is_gc(PyObject *o)
|
|
{
|
|
return !_Py_IsImmortal(o);
|
|
}
|
|
|
|
PyTypeObject _PyUOpExecutor_Type = {
|
|
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
|
.tp_name = "uop_executor",
|
|
.tp_basicsize = offsetof(_PyExecutorObject, exits),
|
|
.tp_itemsize = 1,
|
|
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
|
|
.tp_dealloc = (destructor)uop_dealloc,
|
|
.tp_as_sequence = &uop_as_sequence,
|
|
.tp_methods = uop_executor_methods,
|
|
.tp_traverse = executor_traverse,
|
|
.tp_clear = (inquiry)executor_clear,
|
|
.tp_is_gc = executor_is_gc,
|
|
};
|
|
|
|
/* TO DO -- Generate these tables */
|
|
static const uint16_t
|
|
_PyUOp_Replacements[MAX_UOP_ID + 1] = {
|
|
[_ITER_JUMP_RANGE] = _GUARD_NOT_EXHAUSTED_RANGE,
|
|
[_ITER_JUMP_LIST] = _GUARD_NOT_EXHAUSTED_LIST,
|
|
[_ITER_JUMP_TUPLE] = _GUARD_NOT_EXHAUSTED_TUPLE,
|
|
[_FOR_ITER] = _FOR_ITER_TIER_TWO,
|
|
};
|
|
|
|
static const uint8_t
|
|
is_for_iter_test[MAX_UOP_ID + 1] = {
|
|
[_GUARD_NOT_EXHAUSTED_RANGE] = 1,
|
|
[_GUARD_NOT_EXHAUSTED_LIST] = 1,
|
|
[_GUARD_NOT_EXHAUSTED_TUPLE] = 1,
|
|
[_FOR_ITER_TIER_TWO] = 1,
|
|
};
|
|
|
|
static const uint16_t
|
|
BRANCH_TO_GUARD[4][2] = {
|
|
[POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP,
|
|
[POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_FALSE_POP,
|
|
[POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_FALSE_POP,
|
|
[POP_JUMP_IF_TRUE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_TRUE_POP,
|
|
[POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NOT_NONE_POP,
|
|
[POP_JUMP_IF_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NONE_POP,
|
|
[POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_NONE_POP,
|
|
[POP_JUMP_IF_NOT_NONE - POP_JUMP_IF_FALSE][1] = _GUARD_IS_NOT_NONE_POP,
|
|
};
|
|
|
|
|
|
#define CONFIDENCE_RANGE 1000
|
|
#define CONFIDENCE_CUTOFF 333
|
|
|
|
#ifdef Py_DEBUG
|
|
#define DPRINTF(level, ...) \
|
|
if (lltrace >= (level)) { printf(__VA_ARGS__); }
|
|
#else
|
|
#define DPRINTF(level, ...)
|
|
#endif
|
|
|
|
|
|
static inline int
|
|
add_to_trace(
|
|
_PyUOpInstruction *trace,
|
|
int trace_length,
|
|
uint16_t opcode,
|
|
uint16_t oparg,
|
|
uint64_t operand,
|
|
uint32_t target)
|
|
{
|
|
trace[trace_length].opcode = opcode;
|
|
trace[trace_length].format = UOP_FORMAT_TARGET;
|
|
trace[trace_length].target = target;
|
|
trace[trace_length].oparg = oparg;
|
|
trace[trace_length].operand = operand;
|
|
return trace_length + 1;
|
|
}
|
|
|
|
#ifdef Py_DEBUG
|
|
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
|
|
assert(trace_length < max_length); \
|
|
trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET)); \
|
|
if (lltrace >= 2) { \
|
|
printf("%4d ADD_TO_TRACE: ", trace_length); \
|
|
_PyUOpPrint(&trace[trace_length-1]); \
|
|
printf("\n"); \
|
|
}
|
|
#else
|
|
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
|
|
assert(trace_length < max_length); \
|
|
trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET));
|
|
#endif
|
|
|
|
#define INSTR_IP(INSTR, CODE) \
|
|
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
|
|
|
|
// Reserve space for n uops
|
|
#define RESERVE_RAW(n, opname) \
|
|
if (trace_length + (n) > max_length) { \
|
|
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
|
|
(opname), (n), max_length - trace_length); \
|
|
OPT_STAT_INC(trace_too_long); \
|
|
goto done; \
|
|
}
|
|
|
|
// Reserve space for N uops, plus 3 for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE
|
|
#define RESERVE(needed) RESERVE_RAW((needed) + 3, _PyUOpName(opcode))
|
|
|
|
// Trace stack operations (used by _PUSH_FRAME, _RETURN_VALUE)
|
|
#define TRACE_STACK_PUSH() \
|
|
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
|
|
DPRINTF(2, "Trace stack overflow\n"); \
|
|
OPT_STAT_INC(trace_stack_overflow); \
|
|
trace_length = 0; \
|
|
goto done; \
|
|
} \
|
|
assert(func == NULL || func->func_code == (PyObject *)code); \
|
|
trace_stack[trace_stack_depth].func = func; \
|
|
trace_stack[trace_stack_depth].code = code; \
|
|
trace_stack[trace_stack_depth].instr = instr; \
|
|
trace_stack_depth++;
|
|
#define TRACE_STACK_POP() \
|
|
if (trace_stack_depth <= 0) { \
|
|
Py_FatalError("Trace stack underflow\n"); \
|
|
} \
|
|
trace_stack_depth--; \
|
|
func = trace_stack[trace_stack_depth].func; \
|
|
code = trace_stack[trace_stack_depth].code; \
|
|
assert(func == NULL || func->func_code == (PyObject *)code); \
|
|
instr = trace_stack[trace_stack_depth].instr;
|
|
|
|
/* Returns the length of the trace on success,
|
|
* 0 if it failed to produce a worthwhile trace,
|
|
* and -1 on an error.
|
|
*/
|
|
static int
|
|
translate_bytecode_to_trace(
|
|
_PyInterpreterFrame *frame,
|
|
_Py_CODEUNIT *instr,
|
|
_PyUOpInstruction *trace,
|
|
int buffer_size,
|
|
_PyBloomFilter *dependencies)
|
|
{
|
|
bool progress_needed = true;
|
|
PyCodeObject *code = _PyFrame_GetCode(frame);
|
|
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
|
|
assert(PyFunction_Check(func));
|
|
PyCodeObject *initial_code = code;
|
|
_Py_BloomFilter_Add(dependencies, initial_code);
|
|
_Py_CODEUNIT *initial_instr = instr;
|
|
int trace_length = 0;
|
|
// Leave space for possible trailing _EXIT_TRACE
|
|
int max_length = buffer_size-2;
|
|
struct {
|
|
PyFunctionObject *func;
|
|
PyCodeObject *code;
|
|
_Py_CODEUNIT *instr;
|
|
} trace_stack[TRACE_STACK_SIZE];
|
|
int trace_stack_depth = 0;
|
|
int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions
|
|
|
|
#ifdef Py_DEBUG
|
|
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
|
|
int lltrace = 0;
|
|
if (python_lltrace != NULL && *python_lltrace >= '0') {
|
|
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
|
|
}
|
|
#endif
|
|
|
|
DPRINTF(2,
|
|
"Optimizing %s (%s:%d) at byte offset %d\n",
|
|
PyUnicode_AsUTF8(code->co_qualname),
|
|
PyUnicode_AsUTF8(code->co_filename),
|
|
code->co_firstlineno,
|
|
2 * INSTR_IP(initial_instr, code));
|
|
ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
|
|
uint32_t target = 0;
|
|
|
|
top: // Jump here after _PUSH_FRAME or likely branches
|
|
for (;;) {
|
|
target = INSTR_IP(instr, code);
|
|
// Need space for _DEOPT
|
|
max_length--;
|
|
|
|
uint32_t opcode = instr->op.code;
|
|
uint32_t oparg = instr->op.arg;
|
|
|
|
DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);
|
|
|
|
if (opcode == ENTER_EXECUTOR) {
|
|
assert(oparg < 256);
|
|
_PyExecutorObject *executor = code->co_executors->executors[oparg];
|
|
opcode = executor->vm_data.opcode;
|
|
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
|
|
oparg = executor->vm_data.oparg;
|
|
}
|
|
|
|
if (opcode == EXTENDED_ARG) {
|
|
instr++;
|
|
opcode = instr->op.code;
|
|
oparg = (oparg << 8) | instr->op.arg;
|
|
if (opcode == EXTENDED_ARG) {
|
|
instr--;
|
|
goto done;
|
|
}
|
|
}
|
|
assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG);
|
|
RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP");
|
|
ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target);
|
|
|
|
/* Special case the first instruction,
|
|
* so that we can guarantee forward progress */
|
|
if (progress_needed) {
|
|
progress_needed = false;
|
|
if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT) {
|
|
instr += 1 + _PyOpcode_Caches[opcode] - (int32_t)oparg;
|
|
initial_instr = instr;
|
|
if (opcode == JUMP_BACKWARD) {
|
|
ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
|
|
}
|
|
continue;
|
|
}
|
|
else {
|
|
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
|
|
opcode = _PyOpcode_Deopt[opcode];
|
|
}
|
|
assert(!OPCODE_HAS_EXIT(opcode));
|
|
assert(!OPCODE_HAS_DEOPT(opcode));
|
|
}
|
|
}
|
|
|
|
if (OPCODE_HAS_EXIT(opcode)) {
|
|
// Make space for exit code
|
|
max_length--;
|
|
}
|
|
if (OPCODE_HAS_ERROR(opcode)) {
|
|
// Make space for error code
|
|
max_length--;
|
|
}
|
|
switch (opcode) {
|
|
case POP_JUMP_IF_NONE:
|
|
case POP_JUMP_IF_NOT_NONE:
|
|
case POP_JUMP_IF_FALSE:
|
|
case POP_JUMP_IF_TRUE:
|
|
{
|
|
RESERVE(1);
|
|
int counter = instr[1].cache;
|
|
int bitcount = _Py_popcount32(counter);
|
|
int jump_likely = bitcount > 8;
|
|
/* If bitcount is 8 (half the jumps were taken), adjust confidence by 50%.
|
|
If it's 16 or 0 (all or none were taken), adjust by 10%
|
|
(since the future is still somewhat uncertain).
|
|
For values in between, adjust proportionally. */
|
|
if (jump_likely) {
|
|
confidence = confidence * (bitcount + 2) / 20;
|
|
}
|
|
else {
|
|
confidence = confidence * (18 - bitcount) / 20;
|
|
}
|
|
uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
|
|
DPRINTF(2, "%d: %s(%d): counter=%04x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
|
|
target, _PyOpcode_OpName[opcode], oparg,
|
|
counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
|
|
if (confidence < CONFIDENCE_CUTOFF) {
|
|
DPRINTF(2, "Confidence too low (%d < %d)\n", confidence, CONFIDENCE_CUTOFF);
|
|
OPT_STAT_INC(low_confidence);
|
|
goto done;
|
|
}
|
|
_Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
|
|
_Py_CODEUNIT *target_instr = next_instr + oparg;
|
|
if (jump_likely) {
|
|
DPRINTF(2, "Jump likely (%04x = %d bits), continue at byte offset %d\n",
|
|
instr[1].cache, bitcount, 2 * INSTR_IP(target_instr, code));
|
|
instr = target_instr;
|
|
ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(next_instr, code));
|
|
goto top;
|
|
}
|
|
ADD_TO_TRACE(uopcode, 0, 0, INSTR_IP(target_instr, code));
|
|
break;
|
|
}
|
|
|
|
case JUMP_BACKWARD:
|
|
case JUMP_BACKWARD_NO_INTERRUPT:
|
|
{
|
|
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[opcode] - (int)oparg;
|
|
if (target == initial_instr) {
|
|
/* We have looped round to the start */
|
|
RESERVE(1);
|
|
ADD_TO_TRACE(_JUMP_TO_TOP, 0, 0, 0);
|
|
}
|
|
else {
|
|
OPT_STAT_INC(inner_loop);
|
|
DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n");
|
|
}
|
|
goto done;
|
|
}
|
|
|
|
case JUMP_FORWARD:
|
|
{
|
|
RESERVE(0);
|
|
// This will emit two _SET_IP instructions; leave it to the optimizer
|
|
instr += oparg;
|
|
break;
|
|
}
|
|
|
|
case RESUME:
|
|
/* Use a special tier 2 version of RESUME_CHECK to allow traces to
|
|
* start with RESUME_CHECK */
|
|
ADD_TO_TRACE(_TIER2_RESUME_CHECK, 0, 0, target);
|
|
break;
|
|
|
|
default:
|
|
{
|
|
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
|
|
if (expansion->nuops > 0) {
|
|
// Reserve space for nuops (+ _SET_IP + _EXIT_TRACE)
|
|
int nuops = expansion->nuops;
|
|
RESERVE(nuops + 1); /* One extra for exit */
|
|
int16_t last_op = expansion->uops[nuops-1].uop;
|
|
if (last_op == _RETURN_VALUE || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) {
|
|
// Check for trace stack underflow now:
|
|
// We can't bail e.g. in the middle of
|
|
// LOAD_CONST + _RETURN_VALUE.
|
|
if (trace_stack_depth == 0) {
|
|
DPRINTF(2, "Trace stack underflow\n");
|
|
OPT_STAT_INC(trace_stack_underflow);
|
|
goto done;
|
|
}
|
|
}
|
|
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
|
|
for (int i = 0; i < nuops; i++) {
|
|
oparg = orig_oparg;
|
|
uint32_t uop = expansion->uops[i].uop;
|
|
uint64_t operand = 0;
|
|
// Add one to account for the actual opcode/oparg pair:
|
|
int offset = expansion->uops[i].offset + 1;
|
|
switch (expansion->uops[i].size) {
|
|
case OPARG_FULL:
|
|
assert(opcode != JUMP_BACKWARD_NO_INTERRUPT && opcode != JUMP_BACKWARD);
|
|
break;
|
|
case OPARG_CACHE_1:
|
|
operand = read_u16(&instr[offset].cache);
|
|
break;
|
|
case OPARG_CACHE_2:
|
|
operand = read_u32(&instr[offset].cache);
|
|
break;
|
|
case OPARG_CACHE_4:
|
|
operand = read_u64(&instr[offset].cache);
|
|
break;
|
|
case OPARG_TOP: // First half of super-instr
|
|
oparg = orig_oparg >> 4;
|
|
break;
|
|
case OPARG_BOTTOM: // Second half of super-instr
|
|
oparg = orig_oparg & 0xF;
|
|
break;
|
|
case OPARG_SAVE_RETURN_OFFSET: // op=_SAVE_RETURN_OFFSET; oparg=return_offset
|
|
oparg = offset;
|
|
assert(uop == _SAVE_RETURN_OFFSET);
|
|
break;
|
|
case OPARG_REPLACED:
|
|
uop = _PyUOp_Replacements[uop];
|
|
assert(uop != 0);
|
|
#ifdef Py_DEBUG
|
|
{
|
|
uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255);
|
|
uint32_t jump_target = next_inst + oparg;
|
|
assert(base_opcode(code, jump_target) == END_FOR ||
|
|
base_opcode(code, jump_target) == INSTRUMENTED_END_FOR);
|
|
assert(base_opcode(code, jump_target+1) == POP_TOP);
|
|
}
|
|
#endif
|
|
break;
|
|
default:
|
|
fprintf(stderr,
|
|
"opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n",
|
|
opcode, oparg, nuops, i,
|
|
expansion->uops[i].size,
|
|
expansion->uops[i].offset);
|
|
Py_FatalError("garbled expansion");
|
|
}
|
|
|
|
if (uop == _RETURN_VALUE || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) {
|
|
TRACE_STACK_POP();
|
|
/* Set the operand to the function or code object returned to,
|
|
* to assist optimization passes. (See _PUSH_FRAME below.)
|
|
*/
|
|
if (func != NULL) {
|
|
operand = (uintptr_t)func;
|
|
}
|
|
else if (code != NULL) {
|
|
operand = (uintptr_t)code | 1;
|
|
}
|
|
else {
|
|
operand = 0;
|
|
}
|
|
ADD_TO_TRACE(uop, oparg, operand, target);
|
|
DPRINTF(2,
|
|
"Returning to %s (%s:%d) at byte offset %d\n",
|
|
PyUnicode_AsUTF8(code->co_qualname),
|
|
PyUnicode_AsUTF8(code->co_filename),
|
|
code->co_firstlineno,
|
|
2 * INSTR_IP(instr, code));
|
|
goto top;
|
|
}
|
|
|
|
if (uop == _PUSH_FRAME) {
|
|
assert(i + 1 == nuops);
|
|
int func_version_offset =
|
|
offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT)
|
|
// Add one to account for the actual opcode/oparg pair:
|
|
+ 1;
|
|
uint32_t func_version = read_u32(&instr[func_version_offset].cache);
|
|
PyCodeObject *new_code = NULL;
|
|
PyFunctionObject *new_func =
|
|
_PyFunction_LookupByVersion(func_version, (PyObject **) &new_code);
|
|
DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n",
|
|
(int)func_version, new_func, new_code);
|
|
if (new_code != NULL) {
|
|
if (new_code == code) {
|
|
// Recursive call, bail (we could be here forever).
|
|
DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
|
|
PyUnicode_AsUTF8(new_code->co_qualname),
|
|
PyUnicode_AsUTF8(new_code->co_filename),
|
|
new_code->co_firstlineno);
|
|
OPT_STAT_INC(recursive_call);
|
|
ADD_TO_TRACE(uop, oparg, 0, target);
|
|
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
|
|
goto done;
|
|
}
|
|
if (new_code->co_version != func_version) {
|
|
// func.__code__ was updated.
|
|
// Perhaps it may happen again, so don't bother tracing.
|
|
// TODO: Reason about this -- is it better to bail or not?
|
|
DPRINTF(2, "Bailing because co_version != func_version\n");
|
|
ADD_TO_TRACE(uop, oparg, 0, target);
|
|
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0);
|
|
goto done;
|
|
}
|
|
if (opcode == FOR_ITER_GEN) {
|
|
DPRINTF(2, "Bailing due to dynamic target\n");
|
|
ADD_TO_TRACE(uop, oparg, 0, target);
|
|
ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
|
|
goto done;
|
|
}
|
|
// Increment IP to the return address
|
|
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
|
|
TRACE_STACK_PUSH();
|
|
_Py_BloomFilter_Add(dependencies, new_code);
|
|
/* Set the operand to the callee's function or code object,
|
|
* to assist optimization passes.
|
|
* We prefer setting it to the function (for remove_globals())
|
|
* but if that's not available but the code is available,
|
|
* use the code, setting the low bit so the optimizer knows.
|
|
*/
|
|
if (new_func != NULL) {
|
|
operand = (uintptr_t)new_func;
|
|
}
|
|
else if (new_code != NULL) {
|
|
operand = (uintptr_t)new_code | 1;
|
|
}
|
|
else {
|
|
operand = 0;
|
|
}
|
|
ADD_TO_TRACE(uop, oparg, operand, target);
|
|
code = new_code;
|
|
func = new_func;
|
|
instr = _PyCode_CODE(code);
|
|
DPRINTF(2,
|
|
"Continuing in %s (%s:%d) at byte offset %d\n",
|
|
PyUnicode_AsUTF8(code->co_qualname),
|
|
PyUnicode_AsUTF8(code->co_filename),
|
|
code->co_firstlineno,
|
|
2 * INSTR_IP(instr, code));
|
|
goto top;
|
|
}
|
|
DPRINTF(2, "Bail, new_code == NULL\n");
|
|
ADD_TO_TRACE(uop, oparg, 0, target);
|
|
ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0);
|
|
goto done;
|
|
}
|
|
|
|
// All other instructions
|
|
ADD_TO_TRACE(uop, oparg, operand, target);
|
|
}
|
|
break;
|
|
}
|
|
DPRINTF(2, "Unsupported opcode %s\n", _PyOpcode_OpName[opcode]);
|
|
OPT_UNSUPPORTED_OPCODE(opcode);
|
|
goto done; // Break out of loop
|
|
} // End default
|
|
|
|
} // End switch (opcode)
|
|
|
|
instr++;
|
|
// Add cache size for opcode
|
|
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
|
|
} // End for (;;)
|
|
|
|
done:
|
|
while (trace_stack_depth > 0) {
|
|
TRACE_STACK_POP();
|
|
}
|
|
assert(code == initial_code);
|
|
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
|
|
if (progress_needed || trace_length < 5) {
|
|
OPT_STAT_INC(trace_too_short);
|
|
DPRINTF(2,
|
|
"No trace for %s (%s:%d) at byte offset %d (%s)\n",
|
|
PyUnicode_AsUTF8(code->co_qualname),
|
|
PyUnicode_AsUTF8(code->co_filename),
|
|
code->co_firstlineno,
|
|
2 * INSTR_IP(initial_instr, code),
|
|
progress_needed ? "no progress" : "too short");
|
|
return 0;
|
|
}
|
|
if (trace[trace_length-1].opcode != _JUMP_TO_TOP) {
|
|
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
|
|
}
|
|
DPRINTF(1,
|
|
"Created a proto-trace for %s (%s:%d) at byte offset %d -- length %d\n",
|
|
PyUnicode_AsUTF8(code->co_qualname),
|
|
PyUnicode_AsUTF8(code->co_filename),
|
|
code->co_firstlineno,
|
|
2 * INSTR_IP(initial_instr, code),
|
|
trace_length);
|
|
OPT_HIST(trace_length, trace_length_hist);
|
|
return trace_length;
|
|
}
|
|
|
|
#undef RESERVE
|
|
#undef RESERVE_RAW
|
|
#undef INSTR_IP
|
|
#undef ADD_TO_TRACE
|
|
#undef DPRINTF
|
|
|
|
#define UNSET_BIT(array, bit) (array[(bit)>>5] &= ~(1<<((bit)&31)))
|
|
#define SET_BIT(array, bit) (array[(bit)>>5] |= (1<<((bit)&31)))
|
|
#define BIT_IS_SET(array, bit) (array[(bit)>>5] & (1<<((bit)&31)))
|
|
|
|
/* Count the number of unused uops and exits
|
|
*/
|
|
static int
|
|
count_exits(_PyUOpInstruction *buffer, int length)
|
|
{
|
|
int exit_count = 0;
|
|
for (int i = 0; i < length; i++) {
|
|
int opcode = buffer[i].opcode;
|
|
if (opcode == _EXIT_TRACE || opcode == _DYNAMIC_EXIT) {
|
|
exit_count++;
|
|
}
|
|
}
|
|
return exit_count;
|
|
}
|
|
|
|
static void make_exit(_PyUOpInstruction *inst, int opcode, int target)
|
|
{
|
|
inst->opcode = opcode;
|
|
inst->oparg = 0;
|
|
inst->operand = 0;
|
|
inst->format = UOP_FORMAT_TARGET;
|
|
inst->target = target;
|
|
}
|
|
|
|
/* Convert implicit exits, errors and deopts
|
|
* into explicit ones. */
|
|
static int
|
|
prepare_for_execution(_PyUOpInstruction *buffer, int length)
|
|
{
|
|
int32_t current_jump = -1;
|
|
int32_t current_jump_target = -1;
|
|
int32_t current_error = -1;
|
|
int32_t current_error_target = -1;
|
|
int32_t current_popped = -1;
|
|
int32_t current_exit_op = -1;
|
|
/* Leaving in NOPs slows down the interpreter and messes up the stats */
|
|
_PyUOpInstruction *copy_to = &buffer[0];
|
|
for (int i = 0; i < length; i++) {
|
|
_PyUOpInstruction *inst = &buffer[i];
|
|
if (inst->opcode != _NOP) {
|
|
if (copy_to != inst) {
|
|
*copy_to = *inst;
|
|
}
|
|
copy_to++;
|
|
}
|
|
}
|
|
length = (int)(copy_to - buffer);
|
|
int next_spare = length;
|
|
for (int i = 0; i < length; i++) {
|
|
_PyUOpInstruction *inst = &buffer[i];
|
|
int opcode = inst->opcode;
|
|
int32_t target = (int32_t)uop_get_target(inst);
|
|
if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) {
|
|
uint16_t exit_op = (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) ?
|
|
_EXIT_TRACE : _DEOPT;
|
|
int32_t jump_target = target;
|
|
if (is_for_iter_test[opcode]) {
|
|
/* Target the POP_TOP immediately after the END_FOR,
|
|
* leaving only the iterator on the stack. */
|
|
int extended_arg = inst->oparg > 255;
|
|
int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg;
|
|
jump_target = next_inst + inst->oparg + 1;
|
|
}
|
|
if (jump_target != current_jump_target || current_exit_op != exit_op) {
|
|
make_exit(&buffer[next_spare], exit_op, jump_target);
|
|
current_exit_op = exit_op;
|
|
current_jump_target = jump_target;
|
|
current_jump = next_spare;
|
|
next_spare++;
|
|
}
|
|
buffer[i].jump_target = current_jump;
|
|
buffer[i].format = UOP_FORMAT_JUMP;
|
|
}
|
|
if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) {
|
|
int popped = (_PyUop_Flags[opcode] & HAS_ERROR_NO_POP_FLAG) ?
|
|
0 : _PyUop_num_popped(opcode, inst->oparg);
|
|
if (target != current_error_target || popped != current_popped) {
|
|
current_popped = popped;
|
|
current_error = next_spare;
|
|
current_error_target = target;
|
|
make_exit(&buffer[next_spare], _ERROR_POP_N, 0);
|
|
buffer[next_spare].oparg = popped;
|
|
buffer[next_spare].operand = target;
|
|
next_spare++;
|
|
}
|
|
buffer[i].error_target = current_error;
|
|
if (buffer[i].format == UOP_FORMAT_TARGET) {
|
|
buffer[i].format = UOP_FORMAT_JUMP;
|
|
buffer[i].jump_target = 0;
|
|
}
|
|
}
|
|
if (opcode == _JUMP_TO_TOP) {
|
|
assert(buffer[0].opcode == _START_EXECUTOR);
|
|
buffer[i].format = UOP_FORMAT_JUMP;
|
|
buffer[i].jump_target = 1;
|
|
}
|
|
}
|
|
return next_spare;
|
|
}
|
|
|
|
/* Executor side exits */
|
|
|
|
static _PyExecutorObject *
|
|
allocate_executor(int exit_count, int length)
|
|
{
|
|
int size = exit_count*sizeof(_PyExitData) + length*sizeof(_PyUOpInstruction);
|
|
_PyExecutorObject *res = PyObject_GC_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, size);
|
|
if (res == NULL) {
|
|
return NULL;
|
|
}
|
|
res->trace = (_PyUOpInstruction *)(res->exits + exit_count);
|
|
res->code_size = length;
|
|
res->exit_count = exit_count;
|
|
return res;
|
|
}
|
|
|
|
#ifdef Py_DEBUG
|
|
|
|
#define CHECK(PRED) \
|
|
if (!(PRED)) { \
|
|
printf(#PRED " at %d\n", i); \
|
|
assert(0); \
|
|
}
|
|
|
|
static int
|
|
target_unused(int opcode)
|
|
{
|
|
return (_PyUop_Flags[opcode] & (HAS_ERROR_FLAG | HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) == 0;
|
|
}
|
|
|
|
static void
|
|
sanity_check(_PyExecutorObject *executor)
|
|
{
|
|
for (uint32_t i = 0; i < executor->exit_count; i++) {
|
|
_PyExitData *exit = &executor->exits[i];
|
|
CHECK(exit->target < (1 << 25));
|
|
}
|
|
bool ended = false;
|
|
uint32_t i = 0;
|
|
CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT);
|
|
for (; i < executor->code_size; i++) {
|
|
const _PyUOpInstruction *inst = &executor->trace[i];
|
|
uint16_t opcode = inst->opcode;
|
|
CHECK(opcode <= MAX_UOP_ID);
|
|
CHECK(_PyOpcode_uop_name[opcode] != NULL);
|
|
switch(inst->format) {
|
|
case UOP_FORMAT_TARGET:
|
|
CHECK(target_unused(opcode));
|
|
break;
|
|
case UOP_FORMAT_EXIT:
|
|
CHECK(opcode == _EXIT_TRACE);
|
|
CHECK(inst->exit_index < executor->exit_count);
|
|
break;
|
|
case UOP_FORMAT_JUMP:
|
|
CHECK(inst->jump_target < executor->code_size);
|
|
break;
|
|
case UOP_FORMAT_UNUSED:
|
|
CHECK(0);
|
|
break;
|
|
}
|
|
if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) {
|
|
CHECK(inst->format == UOP_FORMAT_JUMP);
|
|
CHECK(inst->error_target < executor->code_size);
|
|
}
|
|
if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _COLD_EXIT) {
|
|
ended = true;
|
|
i++;
|
|
break;
|
|
}
|
|
}
|
|
CHECK(ended);
|
|
for (; i < executor->code_size; i++) {
|
|
const _PyUOpInstruction *inst = &executor->trace[i];
|
|
uint16_t opcode = inst->opcode;
|
|
CHECK(
|
|
opcode == _DEOPT ||
|
|
opcode == _EXIT_TRACE ||
|
|
opcode == _ERROR_POP_N);
|
|
if (opcode == _EXIT_TRACE) {
|
|
CHECK(inst->format == UOP_FORMAT_EXIT);
|
|
}
|
|
}
|
|
}
|
|
|
|
#undef CHECK
|
|
#endif
|
|
|
|
/* Makes an executor from a buffer of uops.
|
|
* Account for the buffer having gaps and NOPs by computing a "used"
|
|
* bit vector and only copying the used uops. Here "used" means reachable
|
|
* and not a NOP.
|
|
*/
|
|
static _PyExecutorObject *
|
|
make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies)
|
|
{
|
|
int exit_count = count_exits(buffer, length);
|
|
_PyExecutorObject *executor = allocate_executor(exit_count, length);
|
|
if (executor == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
/* Initialize exits */
|
|
assert(exit_count < COLD_EXIT_COUNT);
|
|
for (int i = 0; i < exit_count; i++) {
|
|
executor->exits[i].executor = &COLD_EXITS[i];
|
|
executor->exits[i].temperature = initial_temperature_backoff_counter();
|
|
}
|
|
int next_exit = exit_count-1;
|
|
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
|
|
assert(buffer[0].opcode == _START_EXECUTOR);
|
|
buffer[0].operand = (uint64_t)executor;
|
|
for (int i = length-1; i >= 0; i--) {
|
|
int opcode = buffer[i].opcode;
|
|
dest--;
|
|
*dest = buffer[i];
|
|
assert(opcode != _POP_JUMP_IF_FALSE && opcode != _POP_JUMP_IF_TRUE);
|
|
if (opcode == _EXIT_TRACE) {
|
|
executor->exits[next_exit].target = buffer[i].target;
|
|
dest->exit_index = next_exit;
|
|
dest->format = UOP_FORMAT_EXIT;
|
|
next_exit--;
|
|
}
|
|
if (opcode == _DYNAMIC_EXIT) {
|
|
executor->exits[next_exit].target = 0;
|
|
dest->oparg = next_exit;
|
|
next_exit--;
|
|
}
|
|
}
|
|
assert(next_exit == -1);
|
|
assert(dest == executor->trace);
|
|
assert(dest->opcode == _START_EXECUTOR);
|
|
_Py_ExecutorInit(executor, dependencies);
|
|
#ifdef Py_DEBUG
|
|
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
|
|
int lltrace = 0;
|
|
if (python_lltrace != NULL && *python_lltrace >= '0') {
|
|
lltrace = *python_lltrace - '0'; // TODO: Parse an int and all that
|
|
}
|
|
if (lltrace >= 2) {
|
|
printf("Optimized trace (length %d):\n", length);
|
|
for (int i = 0; i < length; i++) {
|
|
printf("%4d OPTIMIZED: ", i);
|
|
_PyUOpPrint(&executor->trace[i]);
|
|
printf("\n");
|
|
}
|
|
}
|
|
sanity_check(executor);
|
|
#endif
|
|
#ifdef _Py_JIT
|
|
executor->jit_code = NULL;
|
|
executor->jit_side_entry = NULL;
|
|
executor->jit_size = 0;
|
|
if (_PyJIT_Compile(executor, executor->trace, length)) {
|
|
Py_DECREF(executor);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
_PyObject_GC_TRACK(executor);
|
|
return executor;
|
|
}
|
|
|
|
static int
|
|
init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
|
|
{
|
|
_Py_SetImmortalUntracked((PyObject *)executor);
|
|
Py_SET_TYPE(executor, &_PyUOpExecutor_Type);
|
|
executor->trace = (_PyUOpInstruction *)executor->exits;
|
|
executor->code_size = 1;
|
|
executor->exit_count = 0;
|
|
_PyUOpInstruction *inst = (_PyUOpInstruction *)&executor->trace[0];
|
|
inst->opcode = _COLD_EXIT;
|
|
inst->oparg = oparg;
|
|
executor->vm_data.valid = true;
|
|
executor->vm_data.linked = false;
|
|
for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
|
|
assert(executor->vm_data.bloom.bits[i] == 0);
|
|
}
|
|
#ifdef Py_DEBUG
|
|
sanity_check(executor);
|
|
#endif
|
|
#ifdef _Py_JIT
|
|
executor->jit_code = NULL;
|
|
executor->jit_side_entry = NULL;
|
|
executor->jit_size = 0;
|
|
if (_PyJIT_Compile(executor, executor->trace, 1)) {
|
|
return -1;
|
|
}
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
#ifdef Py_STATS
|
|
/* Returns the effective trace length.
|
|
* Ignores NOPs and trailing exit and error handling.*/
|
|
int effective_trace_length(_PyUOpInstruction *buffer, int length)
|
|
{
|
|
int nop_count = 0;
|
|
for (int i = 0; i < length; i++) {
|
|
int opcode = buffer[i].opcode;
|
|
if (opcode == _NOP) {
|
|
nop_count++;
|
|
}
|
|
if (opcode == _EXIT_TRACE ||
|
|
opcode == _JUMP_TO_TOP ||
|
|
opcode == _COLD_EXIT) {
|
|
return i+1-nop_count;
|
|
}
|
|
}
|
|
Py_FatalError("No terminating instruction");
|
|
Py_UNREACHABLE();
|
|
}
|
|
#endif
|
|
|
|
static int
|
|
uop_optimize(
|
|
_PyOptimizerObject *self,
|
|
_PyInterpreterFrame *frame,
|
|
_Py_CODEUNIT *instr,
|
|
_PyExecutorObject **exec_ptr,
|
|
int curr_stackentries)
|
|
{
|
|
_PyBloomFilter dependencies;
|
|
_Py_BloomFilter_Init(&dependencies);
|
|
_PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
|
|
OPT_STAT_INC(attempts);
|
|
int length = translate_bytecode_to_trace(frame, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
|
|
if (length <= 0) {
|
|
// Error or nothing translated
|
|
return length;
|
|
}
|
|
assert(length < UOP_MAX_TRACE_LENGTH);
|
|
OPT_STAT_INC(traces_created);
|
|
char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
|
|
if (env_var == NULL || *env_var == '\0' || *env_var > '0') {
|
|
length = _Py_uop_analyze_and_optimize(frame, buffer,
|
|
length,
|
|
curr_stackentries, &dependencies);
|
|
if (length <= 0) {
|
|
return length;
|
|
}
|
|
}
|
|
assert(length < UOP_MAX_TRACE_LENGTH);
|
|
assert(length >= 1);
|
|
/* Fix up */
|
|
for (int pc = 0; pc < length; pc++) {
|
|
int opcode = buffer[pc].opcode;
|
|
int oparg = buffer[pc].oparg;
|
|
if (_PyUop_Flags[opcode] & HAS_OPARG_AND_1_FLAG) {
|
|
buffer[pc].opcode = opcode + 1 + (oparg & 1);
|
|
}
|
|
else if (oparg < _PyUop_Replication[opcode]) {
|
|
buffer[pc].opcode = opcode + oparg + 1;
|
|
}
|
|
else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) {
|
|
break;
|
|
}
|
|
assert(_PyOpcode_uop_name[buffer[pc].opcode]);
|
|
assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0);
|
|
}
|
|
OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist);
|
|
length = prepare_for_execution(buffer, length);
|
|
assert(length <= UOP_MAX_TRACE_LENGTH);
|
|
_PyExecutorObject *executor = make_executor_from_uops(buffer, length, &dependencies);
|
|
if (executor == NULL) {
|
|
return -1;
|
|
}
|
|
assert(length <= UOP_MAX_TRACE_LENGTH);
|
|
*exec_ptr = executor;
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
uop_opt_dealloc(PyObject *self) {
|
|
PyObject_Free(self);
|
|
}
|
|
|
|
PyTypeObject _PyUOpOptimizer_Type = {
|
|
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
|
.tp_name = "uop_optimizer",
|
|
.tp_basicsize = sizeof(_PyOptimizerObject),
|
|
.tp_itemsize = 0,
|
|
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
|
|
.tp_dealloc = uop_opt_dealloc,
|
|
};
|
|
|
|
PyObject *
|
|
_PyOptimizer_NewUOpOptimizer(void)
|
|
{
|
|
_PyOptimizerObject *opt = PyObject_New(_PyOptimizerObject, &_PyUOpOptimizer_Type);
|
|
if (opt == NULL) {
|
|
return NULL;
|
|
}
|
|
opt->optimize = uop_optimize;
|
|
return (PyObject *)opt;
|
|
}
|
|
|
|
static void
|
|
counter_dealloc(_PyExecutorObject *self) {
|
|
/* The optimizer is the operand of the second uop. */
|
|
PyObject *opt = (PyObject *)self->trace[1].operand;
|
|
Py_DECREF(opt);
|
|
uop_dealloc(self);
|
|
}
|
|
|
|
PyTypeObject _PyCounterExecutor_Type = {
|
|
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
|
.tp_name = "counting_executor",
|
|
.tp_basicsize = offsetof(_PyExecutorObject, exits),
|
|
.tp_itemsize = 1,
|
|
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC,
|
|
.tp_dealloc = (destructor)counter_dealloc,
|
|
.tp_methods = executor_methods,
|
|
.tp_traverse = executor_traverse,
|
|
.tp_clear = (inquiry)executor_clear,
|
|
};
|
|
|
|
static int
|
|
counter_optimize(
|
|
_PyOptimizerObject* self,
|
|
_PyInterpreterFrame *frame,
|
|
_Py_CODEUNIT *instr,
|
|
_PyExecutorObject **exec_ptr,
|
|
int Py_UNUSED(curr_stackentries)
|
|
)
|
|
{
|
|
PyCodeObject *code = _PyFrame_GetCode(frame);
|
|
int oparg = instr->op.arg;
|
|
while (instr->op.code == EXTENDED_ARG) {
|
|
instr++;
|
|
oparg = (oparg << 8) | instr->op.arg;
|
|
}
|
|
if (instr->op.code != JUMP_BACKWARD) {
|
|
/* Counter optimizer can only handle backward edges */
|
|
return 0;
|
|
}
|
|
_Py_CODEUNIT *target = instr + 1 + _PyOpcode_Caches[JUMP_BACKWARD] - oparg;
|
|
_PyUOpInstruction buffer[4] = {
|
|
{ .opcode = _START_EXECUTOR, .jump_target = 3, .format=UOP_FORMAT_JUMP },
|
|
{ .opcode = _LOAD_CONST_INLINE, .operand = (uintptr_t)self },
|
|
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
|
|
{ .opcode = _EXIT_TRACE, .target = (uint32_t)(target - _PyCode_CODE(code)), .format=UOP_FORMAT_TARGET }
|
|
};
|
|
_PyExecutorObject *executor = make_executor_from_uops(buffer, 4, &EMPTY_FILTER);
|
|
if (executor == NULL) {
|
|
return -1;
|
|
}
|
|
Py_INCREF(self);
|
|
Py_SET_TYPE(executor, &_PyCounterExecutor_Type);
|
|
*exec_ptr = executor;
|
|
return 1;
|
|
}
|
|
|
|
static PyObject *
|
|
counter_get_counter(PyObject *self, PyObject *args)
|
|
{
|
|
return PyLong_FromLongLong(((_PyCounterOptimizerObject *)self)->count);
|
|
}
|
|
|
|
static PyMethodDef counter_optimizer_methods[] = {
|
|
{ "get_count", counter_get_counter, METH_NOARGS, NULL },
|
|
{ NULL, NULL },
|
|
};
|
|
|
|
PyTypeObject _PyCounterOptimizer_Type = {
|
|
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
|
.tp_name = "Counter optimizer",
|
|
.tp_basicsize = sizeof(_PyCounterOptimizerObject),
|
|
.tp_itemsize = 0,
|
|
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
|
|
.tp_methods = counter_optimizer_methods,
|
|
.tp_dealloc = (destructor)PyObject_Del,
|
|
};
|
|
|
|
PyObject *
|
|
_PyOptimizer_NewCounter(void)
|
|
{
|
|
_PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&_PyCounterOptimizer_Type);
|
|
if (opt == NULL) {
|
|
return NULL;
|
|
}
|
|
opt->base.optimize = counter_optimize;
|
|
opt->count = 0;
|
|
return (PyObject *)opt;
|
|
}
|
|
|
|
|
|
/*****************************************
|
|
* Executor management
|
|
****************************************/
|
|
|
|
/* We use a bloomfilter with k = 6, m = 256
|
|
* The choice of k and the following constants
|
|
* could do with a more rigorous analysis,
|
|
* but here is a simple analysis:
|
|
*
|
|
* We want to keep the false positive rate low.
|
|
* For n = 5 (a trace depends on 5 objects),
|
|
* we expect 30 bits set, giving a false positive
|
|
* rate of (30/256)**6 == 2.5e-6 which is plenty
|
|
* good enough.
|
|
*
|
|
* However with n = 10 we expect 60 bits set (worst case),
|
|
* giving a false positive of (60/256)**6 == 0.0001
|
|
*
|
|
* We choose k = 6, rather than a higher number as
|
|
* it means the false positive rate grows slower for high n.
|
|
*
|
|
* n = 5, k = 6 => fp = 2.6e-6
|
|
* n = 5, k = 8 => fp = 3.5e-7
|
|
* n = 10, k = 6 => fp = 1.6e-4
|
|
* n = 10, k = 8 => fp = 0.9e-4
|
|
* n = 15, k = 6 => fp = 0.18%
|
|
* n = 15, k = 8 => fp = 0.23%
|
|
* n = 20, k = 6 => fp = 1.1%
|
|
* n = 20, k = 8 => fp = 2.3%
|
|
*
|
|
* The above analysis assumes perfect hash functions,
|
|
* but those don't exist, so the real false positive
|
|
* rates may be worse.
|
|
*/
|
|
|
|
#define K 6
|
|
|
|
#define SEED 20221211
|
|
|
|
/* TO DO -- Use more modern hash functions with better distribution of bits */
|
|
static uint64_t
|
|
address_to_hash(void *ptr) {
|
|
assert(ptr != NULL);
|
|
uint64_t uhash = SEED;
|
|
uintptr_t addr = (uintptr_t)ptr;
|
|
for (int i = 0; i < SIZEOF_VOID_P; i++) {
|
|
uhash ^= addr & 255;
|
|
uhash *= (uint64_t)PyHASH_MULTIPLIER;
|
|
addr >>= 8;
|
|
}
|
|
return uhash;
|
|
}
|
|
|
|
void
|
|
_Py_BloomFilter_Init(_PyBloomFilter *bloom)
|
|
{
|
|
for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
|
|
bloom->bits[i] = 0;
|
|
}
|
|
}
|
|
|
|
/* We want K hash functions that each set 1 bit.
|
|
* A hash function that sets 1 bit in M bits can be trivially
|
|
* derived from a log2(M) bit hash function.
|
|
* So we extract 8 (log2(256)) bits at a time from
|
|
* the 64bit hash. */
|
|
void
|
|
_Py_BloomFilter_Add(_PyBloomFilter *bloom, void *ptr)
|
|
{
|
|
uint64_t hash = address_to_hash(ptr);
|
|
assert(K <= 8);
|
|
for (int i = 0; i < K; i++) {
|
|
uint8_t bits = hash & 255;
|
|
bloom->bits[bits >> 5] |= (1 << (bits&31));
|
|
hash >>= 8;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes)
|
|
{
|
|
for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
|
|
if ((bloom->bits[i] & hashes->bits[i]) != hashes->bits[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
link_executor(_PyExecutorObject *executor)
|
|
{
|
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
_PyExecutorLinkListNode *links = &executor->vm_data.links;
|
|
_PyExecutorObject *head = interp->executor_list_head;
|
|
if (head == NULL) {
|
|
interp->executor_list_head = executor;
|
|
links->previous = NULL;
|
|
links->next = NULL;
|
|
}
|
|
else {
|
|
assert(head->vm_data.links.previous == NULL);
|
|
links->previous = NULL;
|
|
links->next = head;
|
|
head->vm_data.links.previous = executor;
|
|
interp->executor_list_head = executor;
|
|
}
|
|
executor->vm_data.linked = true;
|
|
/* executor_list_head must be first in list */
|
|
assert(interp->executor_list_head->vm_data.links.previous == NULL);
|
|
}
|
|
|
|
static void
|
|
unlink_executor(_PyExecutorObject *executor)
|
|
{
|
|
if (!executor->vm_data.linked) {
|
|
return;
|
|
}
|
|
_PyExecutorLinkListNode *links = &executor->vm_data.links;
|
|
assert(executor->vm_data.valid);
|
|
_PyExecutorObject *next = links->next;
|
|
_PyExecutorObject *prev = links->previous;
|
|
if (next != NULL) {
|
|
next->vm_data.links.previous = prev;
|
|
}
|
|
if (prev != NULL) {
|
|
prev->vm_data.links.next = next;
|
|
}
|
|
else {
|
|
// prev == NULL implies that executor is the list head
|
|
PyInterpreterState *interp = PyInterpreterState_Get();
|
|
assert(interp->executor_list_head == executor);
|
|
interp->executor_list_head = next;
|
|
}
|
|
executor->vm_data.linked = false;
|
|
}
|
|
|
|
/* This must be called by optimizers before using the executor */
|
|
void
|
|
_Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_set)
|
|
{
|
|
executor->vm_data.valid = true;
|
|
for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
|
|
executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
|
|
}
|
|
link_executor(executor);
|
|
}
|
|
|
|
/* Detaches the executor from the code object (if any) that
|
|
* holds a reference to it */
|
|
void
|
|
_Py_ExecutorDetach(_PyExecutorObject *executor)
|
|
{
|
|
PyCodeObject *code = executor->vm_data.code;
|
|
if (code == NULL) {
|
|
return;
|
|
}
|
|
_Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
|
|
assert(instruction->op.code == ENTER_EXECUTOR);
|
|
int index = instruction->op.arg;
|
|
assert(code->co_executors->executors[index] == executor);
|
|
instruction->op.code = executor->vm_data.opcode;
|
|
instruction->op.arg = executor->vm_data.oparg;
|
|
executor->vm_data.code = NULL;
|
|
code->co_executors->executors[index] = NULL;
|
|
Py_DECREF(executor);
|
|
}
|
|
|
|
static int
|
|
executor_clear(_PyExecutorObject *executor)
|
|
{
|
|
if (!executor->vm_data.valid) {
|
|
return 0;
|
|
}
|
|
assert(executor->vm_data.valid == 1);
|
|
unlink_executor(executor);
|
|
executor->vm_data.valid = 0;
|
|
/* It is possible for an executor to form a reference
|
|
* cycle with itself, so decref'ing a side exit could
|
|
* free the executor unless we hold a strong reference to it
|
|
*/
|
|
Py_INCREF(executor);
|
|
for (uint32_t i = 0; i < executor->exit_count; i++) {
|
|
const _PyExecutorObject *cold = &COLD_EXITS[i];
|
|
const _PyExecutorObject *side = executor->exits[i].executor;
|
|
executor->exits[i].temperature = initial_unreachable_backoff_counter();
|
|
if (side != cold) {
|
|
executor->exits[i].executor = cold;
|
|
Py_DECREF(side);
|
|
}
|
|
}
|
|
_Py_ExecutorDetach(executor);
|
|
Py_DECREF(executor);
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
_Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj)
|
|
{
|
|
assert(executor->vm_data.valid);
|
|
_Py_BloomFilter_Add(&executor->vm_data.bloom, obj);
|
|
}
|
|
|
|
/* Invalidate all executors that depend on `obj`
|
|
* May cause other executors to be invalidated as well
|
|
*/
|
|
void
|
|
_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation)
|
|
{
|
|
_PyBloomFilter obj_filter;
|
|
_Py_BloomFilter_Init(&obj_filter);
|
|
_Py_BloomFilter_Add(&obj_filter, obj);
|
|
/* Walk the list of executors */
|
|
/* TO DO -- Use a tree to avoid traversing as many objects */
|
|
bool no_memory = false;
|
|
PyObject *invalidate = PyList_New(0);
|
|
if (invalidate == NULL) {
|
|
PyErr_Clear();
|
|
no_memory = true;
|
|
}
|
|
/* Clearing an executor can deallocate others, so we need to make a list of
|
|
* executors to invalidate first */
|
|
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
|
|
assert(exec->vm_data.valid);
|
|
_PyExecutorObject *next = exec->vm_data.links.next;
|
|
if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) {
|
|
unlink_executor(exec);
|
|
if (no_memory) {
|
|
exec->vm_data.valid = 0;
|
|
} else {
|
|
if (PyList_Append(invalidate, (PyObject *)exec) < 0) {
|
|
PyErr_Clear();
|
|
no_memory = true;
|
|
exec->vm_data.valid = 0;
|
|
}
|
|
}
|
|
if (is_invalidation) {
|
|
OPT_STAT_INC(executors_invalidated);
|
|
}
|
|
}
|
|
exec = next;
|
|
}
|
|
if (invalidate != NULL) {
|
|
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
|
|
_PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i);
|
|
executor_clear(exec);
|
|
}
|
|
Py_DECREF(invalidate);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* Invalidate all executors */
|
|
void
|
|
_Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
|
|
{
|
|
while (interp->executor_list_head) {
|
|
_PyExecutorObject *executor = interp->executor_list_head;
|
|
assert(executor->vm_data.valid == 1 && executor->vm_data.linked == 1);
|
|
if (executor->vm_data.code) {
|
|
// Clear the entire code object so its co_executors array be freed:
|
|
_PyCode_Clear_Executors(executor->vm_data.code);
|
|
}
|
|
else {
|
|
executor_clear(executor);
|
|
}
|
|
if (is_invalidation) {
|
|
OPT_STAT_INC(executors_invalidated);
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif /* _Py_TIER2 */
|