GH-113860: Get rid of _PyUOpExecutorObject (GH-113954)

This commit is contained in:
Brandt Bucher 2024-01-12 03:58:23 -08:00 committed by GitHub
parent 29e2839cd6
commit 30e6cbdba2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 35 additions and 67 deletions

View file

@ -29,10 +29,17 @@ typedef struct {
_PyExecutorLinkListNode links; _PyExecutorLinkListNode links;
} _PyVMData; } _PyVMData;
typedef struct {
uint16_t opcode;
uint16_t oparg;
uint32_t target;
uint64_t operand; // A cache entry
} _PyUOpInstruction;
typedef struct _PyExecutorObject { typedef struct _PyExecutorObject {
PyObject_VAR_HEAD PyObject_VAR_HEAD
_PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
/* Data needed by the executor goes here, but is opaque to the VM */ _PyUOpInstruction trace[1];
} _PyExecutorObject; } _PyExecutorObject;
typedef struct _PyOptimizerObject _PyOptimizerObject; typedef struct _PyOptimizerObject _PyOptimizerObject;

View file

@ -8,8 +8,6 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define" # error "this header requires Py_BUILD_CORE define"
#endif #endif
#include "pycore_uops.h" // _PyUOpInstruction
int _Py_uop_analyze_and_optimize(PyCodeObject *code, int _Py_uop_analyze_and_optimize(PyCodeObject *code,
_PyUOpInstruction *trace, int trace_len, int curr_stackentries); _PyUOpInstruction *trace, int trace_len, int curr_stackentries);

View file

@ -1,30 +0,0 @@
#ifndef Py_INTERNAL_UOPS_H
#define Py_INTERNAL_UOPS_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
#include "pycore_frame.h" // _PyInterpreterFrame
#define _Py_UOP_MAX_TRACE_LENGTH 512
typedef struct {
uint16_t opcode;
uint16_t oparg;
uint32_t target;
uint64_t operand; // A cache entry
} _PyUOpInstruction;
typedef struct {
_PyExecutorObject base;
_PyUOpInstruction trace[1];
} _PyUOpExecutorObject;
#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_UOPS_H */

View file

@ -1895,7 +1895,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_unionobject.h \
$(srcdir)/Include/internal/pycore_unicodeobject.h \ $(srcdir)/Include/internal/pycore_unicodeobject.h \
$(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \
$(srcdir)/Include/internal/pycore_uops.h \
$(srcdir)/Include/internal/pycore_uop_metadata.h \ $(srcdir)/Include/internal/pycore_uop_metadata.h \
$(srcdir)/Include/internal/pycore_warnings.h \ $(srcdir)/Include/internal/pycore_warnings.h \
$(srcdir)/Include/internal/pycore_weakref.h \ $(srcdir)/Include/internal/pycore_weakref.h \

View file

@ -295,7 +295,6 @@
<ClInclude Include="..\Include\internal\pycore_unionobject.h" /> <ClInclude Include="..\Include\internal\pycore_unionobject.h" />
<ClInclude Include="..\Include\internal\pycore_unicodeobject.h" /> <ClInclude Include="..\Include\internal\pycore_unicodeobject.h" />
<ClInclude Include="..\Include\internal\pycore_unicodeobject_generated.h" /> <ClInclude Include="..\Include\internal\pycore_unicodeobject_generated.h" />
<ClInclude Include="..\Include\internal\pycore_uops.h" />
<ClInclude Include="..\Include\internal\pycore_warnings.h" /> <ClInclude Include="..\Include\internal\pycore_warnings.h" />
<ClInclude Include="..\Include\internal\pycore_weakref.h" /> <ClInclude Include="..\Include\internal\pycore_weakref.h" />
<ClInclude Include="..\Include\interpreteridobject.h" /> <ClInclude Include="..\Include\interpreteridobject.h" />

View file

@ -804,9 +804,6 @@
<ClInclude Include="..\Include\internal\pycore_unionobject.h"> <ClInclude Include="..\Include\internal\pycore_unionobject.h">
<Filter>Include\internal</Filter> <Filter>Include\internal</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Include\internal\pycore_uops.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\mimalloc\mimalloc.h"> <ClInclude Include="..\Include\internal\mimalloc\mimalloc.h">
<Filter>Include\internal\mimalloc</Filter> <Filter>Include\internal\mimalloc</Filter>
</ClInclude> </ClInclude>

View file

@ -68,7 +68,7 @@ static size_t jump;
static uint16_t invert, counter, index, hint; static uint16_t invert, counter, index, hint;
#define unused 0 // Used in a macro def, can't be static #define unused 0 // Used in a macro def, can't be static
static uint32_t type_version; static uint32_t type_version;
static _PyUOpExecutorObject *current_executor; static _PyExecutorObject *current_executor;
static PyObject * static PyObject *
dummy_func( dummy_func(
@ -2369,10 +2369,10 @@ dummy_func(
CHECK_EVAL_BREAKER(); CHECK_EVAL_BREAKER();
PyCodeObject *code = _PyFrame_GetCode(frame); PyCodeObject *code = _PyFrame_GetCode(frame);
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; _PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
if (executor->vm_data.valid) { if (executor->vm_data.valid) {
Py_INCREF(executor); Py_INCREF(executor);
current_executor = (_PyUOpExecutorObject *)executor; current_executor = executor;
GOTO_TIER_TWO(); GOTO_TIER_TWO();
} }
else { else {
@ -4063,7 +4063,7 @@ dummy_func(
op(_CHECK_VALIDITY, (--)) { op(_CHECK_VALIDITY, (--)) {
TIER_TWO_ONLY TIER_TWO_ONLY
DEOPT_IF(!current_executor->base.vm_data.valid); DEOPT_IF(!current_executor->vm_data.valid);
} }
op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {

View file

@ -25,7 +25,6 @@
#include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_tuple.h" // _PyTuple_ITEMS()
#include "pycore_typeobject.h" // _PySuper_Lookup() #include "pycore_typeobject.h" // _PySuper_Lookup()
#include "pycore_uop_ids.h" // Uops #include "pycore_uop_ids.h" // Uops
#include "pycore_uops.h" // _PyUOpExecutorObject
#include "pycore_pyerrors.h" #include "pycore_pyerrors.h"
#include "pycore_dict.h" #include "pycore_dict.h"
@ -739,7 +738,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
} }
/* State shared between Tier 1 and Tier 2 interpreter */ /* State shared between Tier 1 and Tier 2 interpreter */
_PyUOpExecutorObject *current_executor = NULL; _PyExecutorObject *current_executor = NULL;
/* Local "register" variables. /* Local "register" variables.
* These are cached values from the frame and code object. */ * These are cached values from the frame and code object. */

View file

@ -3393,7 +3393,7 @@
case _CHECK_VALIDITY: { case _CHECK_VALIDITY: {
TIER_TWO_ONLY TIER_TWO_ONLY
if (!current_executor->base.vm_data.valid) goto deoptimize; if (!current_executor->vm_data.valid) goto deoptimize;
break; break;
} }

View file

@ -2377,10 +2377,10 @@
TIER_ONE_ONLY TIER_ONE_ONLY
CHECK_EVAL_BREAKER(); CHECK_EVAL_BREAKER();
PyCodeObject *code = _PyFrame_GetCode(frame); PyCodeObject *code = _PyFrame_GetCode(frame);
_PyExecutorObject *executor = (_PyExecutorObject *)code->co_executors->executors[oparg&255]; _PyExecutorObject *executor = code->co_executors->executors[oparg & 255];
if (executor->vm_data.valid) { if (executor->vm_data.valid) {
Py_INCREF(executor); Py_INCREF(executor);
current_executor = (_PyUOpExecutorObject *)executor; current_executor = executor;
GOTO_TIER_TWO(); GOTO_TIER_TWO();
} }
else { else {

View file

@ -7,7 +7,6 @@
#include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize() #include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize()
#include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_uop_ids.h" #include "pycore_uop_ids.h"
#include "pycore_uops.h"
#include "cpython/optimizer.h" #include "cpython/optimizer.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
@ -17,6 +16,8 @@
#include "pycore_uop_metadata.h" // Uop tables #include "pycore_uop_metadata.h" // Uop tables
#undef NEED_OPCODE_METADATA #undef NEED_OPCODE_METADATA
#define UOP_MAX_TRACE_LENGTH 512
#define MAX_EXECUTORS_SIZE 256 #define MAX_EXECUTORS_SIZE 256
@ -224,8 +225,8 @@ static PyMethodDef executor_methods[] = {
///////////////////// Experimental UOp Optimizer ///////////////////// ///////////////////// Experimental UOp Optimizer /////////////////////
static void static void
uop_dealloc(_PyUOpExecutorObject *self) { uop_dealloc(_PyExecutorObject *self) {
_Py_ExecutorClear((_PyExecutorObject *)self); _Py_ExecutorClear(self);
PyObject_Free(self); PyObject_Free(self);
} }
@ -236,13 +237,13 @@ _PyUOpName(int index)
} }
static Py_ssize_t static Py_ssize_t
uop_len(_PyUOpExecutorObject *self) uop_len(_PyExecutorObject *self)
{ {
return Py_SIZE(self); return Py_SIZE(self);
} }
static PyObject * static PyObject *
uop_item(_PyUOpExecutorObject *self, Py_ssize_t index) uop_item(_PyExecutorObject *self, Py_ssize_t index)
{ {
Py_ssize_t len = uop_len(self); Py_ssize_t len = uop_len(self);
if (index < 0 || index >= len) { if (index < 0 || index >= len) {
@ -280,7 +281,7 @@ PySequenceMethods uop_as_sequence = {
PyTypeObject _PyUOpExecutor_Type = { PyTypeObject _PyUOpExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0) PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "uop_executor", .tp_name = "uop_executor",
.tp_basicsize = offsetof(_PyUOpExecutorObject, trace), .tp_basicsize = offsetof(_PyExecutorObject, trace),
.tp_itemsize = sizeof(_PyUOpInstruction), .tp_itemsize = sizeof(_PyUOpInstruction),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_dealloc = (destructor)uop_dealloc, .tp_dealloc = (destructor)uop_dealloc,
@ -423,8 +424,7 @@ translate_bytecode_to_trace(
if (opcode == ENTER_EXECUTOR) { if (opcode == ENTER_EXECUTOR) {
assert(oparg < 256); assert(oparg < 256);
_PyExecutorObject *executor = _PyExecutorObject *executor = code->co_executors->executors[oparg];
(_PyExecutorObject *)code->co_executors->executors[oparg];
opcode = executor->vm_data.opcode; opcode = executor->vm_data.opcode;
DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]); DPRINTF(2, " * ENTER_EXECUTOR -> %s\n", _PyOpcode_OpName[opcode]);
oparg = executor->vm_data.oparg; oparg = executor->vm_data.oparg;
@ -704,7 +704,7 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used)
{ {
int count = 0; int count = 0;
SET_BIT(used, 0); SET_BIT(used, 0);
for (int i = 0; i < _Py_UOP_MAX_TRACE_LENGTH; i++) { for (int i = 0; i < UOP_MAX_TRACE_LENGTH; i++) {
if (!BIT_IS_SET(used, i)) { if (!BIT_IS_SET(used, i)) {
continue; continue;
} }
@ -736,15 +736,15 @@ compute_used(_PyUOpInstruction *buffer, uint32_t *used)
static _PyExecutorObject * static _PyExecutorObject *
make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
{ {
uint32_t used[(_Py_UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 }; uint32_t used[(UOP_MAX_TRACE_LENGTH + 31)/32] = { 0 };
int length = compute_used(buffer, used); int length = compute_used(buffer, used);
_PyUOpExecutorObject *executor = PyObject_NewVar(_PyUOpExecutorObject, &_PyUOpExecutor_Type, length); _PyExecutorObject *executor = PyObject_NewVar(_PyExecutorObject, &_PyUOpExecutor_Type, length);
if (executor == NULL) { if (executor == NULL) {
return NULL; return NULL;
} }
int dest = length - 1; int dest = length - 1;
/* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */ /* Scan backwards, so that we see the destinations of jumps before the jumps themselves. */
for (int i = _Py_UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) { for (int i = UOP_MAX_TRACE_LENGTH-1; i >= 0; i--) {
if (!BIT_IS_SET(used, i)) { if (!BIT_IS_SET(used, i)) {
continue; continue;
} }
@ -763,7 +763,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
dest--; dest--;
} }
assert(dest == -1); assert(dest == -1);
_Py_ExecutorInit((_PyExecutorObject *)executor, dependencies); _Py_ExecutorInit(executor, dependencies);
#ifdef Py_DEBUG #ifdef Py_DEBUG
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
int lltrace = 0; int lltrace = 0;
@ -782,7 +782,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies)
} }
} }
#endif #endif
return (_PyExecutorObject *)executor; return executor;
} }
static int static int
@ -795,8 +795,8 @@ uop_optimize(
{ {
_PyBloomFilter dependencies; _PyBloomFilter dependencies;
_Py_BloomFilter_Init(&dependencies); _Py_BloomFilter_Init(&dependencies);
_PyUOpInstruction buffer[_Py_UOP_MAX_TRACE_LENGTH]; _PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH];
int err = translate_bytecode_to_trace(code, instr, buffer, _Py_UOP_MAX_TRACE_LENGTH, &dependencies); int err = translate_bytecode_to_trace(code, instr, buffer, UOP_MAX_TRACE_LENGTH, &dependencies);
if (err <= 0) { if (err <= 0) {
// Error or nothing translated // Error or nothing translated
return err; return err;
@ -804,7 +804,7 @@ uop_optimize(
OPT_STAT_INC(traces_created); OPT_STAT_INC(traces_created);
char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE");
if (uop_optimize == NULL || *uop_optimize > '0') { if (uop_optimize == NULL || *uop_optimize > '0') {
err = _Py_uop_analyze_and_optimize(code, buffer, _Py_UOP_MAX_TRACE_LENGTH, curr_stackentries); err = _Py_uop_analyze_and_optimize(code, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries);
if (err < 0) { if (err < 0) {
return -1; return -1;
} }
@ -848,7 +848,7 @@ PyUnstable_Optimizer_NewUOpOptimizer(void)
} }
static void static void
counter_dealloc(_PyUOpExecutorObject *self) { counter_dealloc(_PyExecutorObject *self) {
PyObject *opt = (PyObject *)self->trace[0].operand; PyObject *opt = (PyObject *)self->trace[0].operand;
Py_DECREF(opt); Py_DECREF(opt);
uop_dealloc(self); uop_dealloc(self);
@ -857,7 +857,7 @@ counter_dealloc(_PyUOpExecutorObject *self) {
PyTypeObject _PyCounterExecutor_Type = { PyTypeObject _PyCounterExecutor_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0) PyVarObject_HEAD_INIT(&PyType_Type, 0)
.tp_name = "counting_executor", .tp_name = "counting_executor",
.tp_basicsize = offsetof(_PyUOpExecutorObject, trace), .tp_basicsize = offsetof(_PyExecutorObject, trace),
.tp_itemsize = sizeof(_PyUOpInstruction), .tp_itemsize = sizeof(_PyUOpInstruction),
.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION,
.tp_dealloc = (destructor)counter_dealloc, .tp_dealloc = (destructor)counter_dealloc,

View file

@ -5,7 +5,6 @@
#include "pycore_opcode_utils.h" #include "pycore_opcode_utils.h"
#include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "pycore_uop_metadata.h" #include "pycore_uop_metadata.h"
#include "pycore_uops.h"
#include "pycore_long.h" #include "pycore_long.h"
#include "cpython/optimizer.h" #include "cpython/optimizer.h"
#include <stdbool.h> #include <stdbool.h>