From a0c9cf9456c2ee7a89d9bd859c07afac8cf5e893 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 10 Jan 2024 15:44:34 +0000 Subject: [PATCH] GH-113860: All executors are now defined in terms of micro ops. Convert counter executor to use uops. (GH-113864) --- Include/cpython/optimizer.h | 8 +- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 4 +- Include/internal/pycore_uop_metadata.h | 4 + Include/internal/pycore_uops.h | 5 - Python/bytecodes.c | 22 +-- Python/executor_cases.c.h | 18 +++ Python/generated_cases.c.h | 12 +- Python/optimizer.c | 189 +++++++++------------- 9 files changed, 125 insertions(+), 139 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index d521eac79d1..f077da7ee88 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -31,8 +31,6 @@ typedef struct { typedef struct _PyExecutorObject { PyObject_VAR_HEAD - /* WARNING: execute consumes a reference to self. This is necessary to allow executors to tail call into each other. */ - _Py_CODEUNIT *(*execute)(struct _PyExecutorObject *self, struct _PyInterpreterFrame *frame, PyObject **stack_pointer); _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ /* Data needed by the executor goes here, but is opaque to the VM */ } _PyExecutorObject; @@ -52,6 +50,12 @@ typedef struct _PyOptimizerObject { /* Data needed by the optimizer goes here, but is opaque to the VM */ } _PyOptimizerObject; +/** Test support **/ +typedef struct { + _PyOptimizerObject base; + int64_t count; +} _PyCounterOptimizerObject; + PyAPI_FUNC(int) PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *executor); PyAPI_FUNC(void) PyUnstable_SetOptimizer(_PyOptimizerObject* optimizer); diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 7d39e4bc030..a9d698da25a 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1009,7 +1009,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { [END_ASYNC_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [END_FOR] = { true, INSTR_FMT_IX, 0 }, [END_SEND] = { true, INSTR_FMT_IX, 0 }, - [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG }, [EXIT_INIT_CHECK] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [FORMAT_SIMPLE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 4a9a00ba352..b3b36327c48 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -231,7 +231,9 @@ extern "C" { #define _SAVE_RETURN_OFFSET 378 #define _INSERT 379 #define _CHECK_VALIDITY 380 -#define MAX_UOP_ID 380 +#define _LOAD_CONST_INLINE_BORROW 381 +#define _INTERNAL_INCREMENT_OPT_COUNTER 382 +#define MAX_UOP_ID 382 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 300bd3baa7b..ab498e9cefd 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -203,6 +203,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_INSERT] = HAS_ARG_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, + [_LOAD_CONST_INLINE_BORROW] = 0, + [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -303,6 +305,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", [_INSERT] = "_INSERT", + [_INTERNAL_INCREMENT_OPT_COUNTER] = "_INTERNAL_INCREMENT_OPT_COUNTER", [_IS_NONE] = "_IS_NONE", [_IS_OP] = "_IS_OP", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", @@ -328,6 +331,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", [_LOAD_CONST] = "_LOAD_CONST", + [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 153884f4bd2..eb10002d34c 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -24,11 +24,6 @@ typedef struct { _PyUOpInstruction trace[1]; } _PyUOpExecutorObject; -_Py_CODEUNIT *_PyUOpExecute( - _PyExecutorObject *executor, - _PyInterpreterFrame *frame, - PyObject **stack_pointer); - #ifdef __cplusplus } #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e1a6a256fbd..f53ddae8df9 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2366,16 +2366,8 @@ dummy_func( _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; if (executor->vm_data.valid) { Py_INCREF(executor); - if (executor->execute == _PyUOpExecute) { - current_executor = (_PyUOpExecutorObject *)executor; - GOTO_TIER_TWO(); - } - next_instr = executor->execute(executor, frame, stack_pointer); - frame = tstate->current_frame; - if (next_instr == NULL) { - goto resume_with_error; - } - stack_pointer = _PyFrame_GetStackPointer(frame); + current_executor = (_PyUOpExecutorObject *)executor; + GOTO_TIER_TWO(); } else { code->co_executors->executors[oparg & 255] = NULL; @@ -4066,6 +4058,16 @@ dummy_func( DEOPT_IF(!current_executor->base.vm_data.valid); } + op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { + value = ptr; + } + + /* Internal -- for testing executors */ + op(_INTERNAL_INCREMENT_OPT_COUNTER, (opt --)) { + _PyCounterOptimizerObject *exe = (_PyCounterOptimizerObject *)opt; + exe->count++; + } + // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 14fb3a05a9f..ea4caa9a97a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3397,4 +3397,22 @@ break; } + case _LOAD_CONST_INLINE_BORROW: { + PyObject *value; + PyObject *ptr = (PyObject *)CURRENT_OPERAND(); + value = ptr; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _INTERNAL_INCREMENT_OPT_COUNTER: { + PyObject *opt; + opt = stack_pointer[-1]; + _PyCounterOptimizerObject *exe = (_PyCounterOptimizerObject *)opt; + exe->count++; + stack_pointer += -1; + break; + } + #undef TIER_TWO diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8226d827cde..e693e3e2560 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2380,16 +2380,8 @@ _PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; if (executor->vm_data.valid) { Py_INCREF(executor); - if (executor->execute == _PyUOpExecute) { - current_executor = (_PyUOpExecutorObject *)executor; - GOTO_TIER_TWO(); - } - next_instr = executor->execute(executor, frame, stack_pointer); - frame = tstate->current_frame; - if (next_instr == NULL) { - goto resume_with_error; - } - stack_pointer = _PyFrame_GetStackPointer(frame); + current_executor = (_PyUOpExecutorObject *)executor; + GOTO_TIER_TWO(); } else { code->co_executors->executors[oparg & 255] = NULL; diff --git a/Python/optimizer.c b/Python/optimizer.c index ad5b4994318..28e12dbbf5d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -212,27 +212,6 @@ PyUnstable_GetExecutor(PyCodeObject *code, int offset) return NULL; } -/** Test support **/ - - -typedef struct { - _PyOptimizerObject base; - int64_t count; -} _PyCounterOptimizerObject; - -typedef struct { - _PyExecutorObject executor; - _PyCounterOptimizerObject *optimizer; - _Py_CODEUNIT *next_instr; -} _PyCounterExecutorObject; - -static void -counter_dealloc(_PyCounterExecutorObject *self) { - _Py_ExecutorClear((_PyExecutorObject *)self); - Py_DECREF(self->optimizer); - PyObject_Free(self); -} - static PyObject * is_valid(PyObject *self, PyObject *Py_UNUSED(ignored)) { @@ -244,84 +223,6 @@ static PyMethodDef executor_methods[] = { { NULL, NULL }, }; -PyTypeObject _PyCounterExecutor_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "counting_executor", - .tp_basicsize = sizeof(_PyCounterExecutorObject), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, - .tp_dealloc = (destructor)counter_dealloc, - .tp_methods = executor_methods, -}; - -static _Py_CODEUNIT * -counter_execute(_PyExecutorObject *self, _PyInterpreterFrame *frame, PyObject **stack_pointer) -{ - ((_PyCounterExecutorObject *)self)->optimizer->count++; - _PyFrame_SetStackPointer(frame, stack_pointer); - Py_DECREF(self); - return ((_PyCounterExecutorObject *)self)->next_instr; -} - -static int -counter_optimize( - _PyOptimizerObject* self, - PyCodeObject *code, - _Py_CODEUNIT *instr, - _PyExecutorObject **exec_ptr, - int Py_UNUSED(curr_stackentries) -) -{ - _PyCounterExecutorObject *executor = (_PyCounterExecutorObject *)_PyObject_New(&_PyCounterExecutor_Type); - if (executor == NULL) { - return -1; - } - executor->executor.execute = counter_execute; - Py_INCREF(self); - executor->optimizer = (_PyCounterOptimizerObject *)self; - executor->next_instr = instr; - *exec_ptr = (_PyExecutorObject *)executor; - _PyBloomFilter empty; - _Py_BloomFilter_Init(&empty); - _Py_ExecutorInit((_PyExecutorObject *)executor, &empty); - return 1; -} - -static PyObject * -counter_get_counter(PyObject *self, PyObject *args) -{ - return PyLong_FromLongLong(((_PyCounterOptimizerObject *)self)->count); -} - -static PyMethodDef counter_optimizer_methods[] = { - { "get_count", counter_get_counter, METH_NOARGS, NULL }, - { NULL, NULL }, -}; - -PyTypeObject _PyCounterOptimizer_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - .tp_name = "Counter optimizer", - .tp_basicsize = sizeof(_PyCounterOptimizerObject), - .tp_itemsize = 0, - .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, - .tp_methods = counter_optimizer_methods, - .tp_dealloc = (destructor)PyObject_Del, -}; - -PyObject * -PyUnstable_Optimizer_NewCounter(void) -{ - _PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&_PyCounterOptimizer_Type); - if (opt == NULL) { - return NULL; - } - opt->base.optimize = counter_optimize; - opt->base.resume_threshold = INT16_MAX; - opt->base.backedge_threshold = 0; - opt->count = 0; - return (PyObject *)opt; -} - ///////////////////// Experimental UOp Optimizer ///////////////////// static void @@ -381,7 +282,7 @@ PySequenceMethods uop_as_sequence = { PyTypeObject _PyUOpExecutor_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) .tp_name = "uop_executor", - .tp_basicsize = sizeof(_PyUOpExecutorObject) - sizeof(_PyUOpInstruction), + .tp_basicsize = offsetof(_PyUOpExecutorObject, trace), .tp_itemsize = sizeof(_PyUOpInstruction), .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_dealloc = (destructor)uop_dealloc, @@ -843,7 +744,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) dest--; } assert(dest == -1); - executor->base.execute = _PyUOpExecute; _Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -899,15 +799,6 @@ uop_optimize( return 1; } -/* Dummy execute() function for UOp Executor. - * The actual implementation is inlined in ceval.c, - * in _PyEval_EvalFrameDefault(). */ -_Py_CODEUNIT * -_PyUOpExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject **stack_pointer) -{ - Py_FatalError("Tier 2 is now inlined into Tier 1"); -} - static void uop_opt_dealloc(PyObject *self) { PyObject_Free(self); @@ -937,6 +828,84 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return (PyObject *)opt; } +static void +counter_dealloc(_PyUOpExecutorObject *self) { + PyObject *opt = (PyObject *)self->trace[0].operand; + Py_DECREF(opt); + uop_dealloc(self); +} + +PyTypeObject _PyCounterExecutor_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "counting_executor", + .tp_basicsize = offsetof(_PyUOpExecutorObject, trace), + .tp_itemsize = sizeof(_PyUOpInstruction), + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, + .tp_dealloc = (destructor)counter_dealloc, + .tp_methods = executor_methods, +}; + +static int +counter_optimize( + _PyOptimizerObject* self, + PyCodeObject *code, + _Py_CODEUNIT *instr, + _PyExecutorObject **exec_ptr, + int Py_UNUSED(curr_stackentries) +) +{ + _PyUOpInstruction buffer[3] = { + { .opcode = _LOAD_CONST_INLINE_BORROW, .operand = (uintptr_t)self }, + { .opcode = _INTERNAL_INCREMENT_OPT_COUNTER }, + { .opcode = _EXIT_TRACE, .target = (uint32_t)(instr - _PyCode_CODE(code)) } + }; + _PyBloomFilter empty; + _Py_BloomFilter_Init(&empty); + _PyExecutorObject *executor = make_executor_from_uops(buffer, &empty); + if (executor == NULL) { + return -1; + } + Py_INCREF(self); + Py_SET_TYPE(executor, &_PyCounterExecutor_Type); + *exec_ptr = executor; + return 1; +} + +static PyObject * +counter_get_counter(PyObject *self, PyObject *args) +{ + return PyLong_FromLongLong(((_PyCounterOptimizerObject *)self)->count); +} + +static PyMethodDef counter_optimizer_methods[] = { + { "get_count", counter_get_counter, METH_NOARGS, NULL }, + { NULL, NULL }, +}; + +PyTypeObject _PyCounterOptimizer_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + .tp_name = "Counter optimizer", + .tp_basicsize = sizeof(_PyCounterOptimizerObject), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, + .tp_methods = counter_optimizer_methods, + .tp_dealloc = (destructor)PyObject_Del, +}; + +PyObject * +PyUnstable_Optimizer_NewCounter(void) +{ + _PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&_PyCounterOptimizer_Type); + if (opt == NULL) { + return NULL; + } + opt->base.optimize = counter_optimize; + opt->base.resume_threshold = INT16_MAX; + opt->base.backedge_threshold = 0; + opt->count = 0; + return (PyObject *)opt; +} + /***************************************** * Executor management