GH-111485: Break up instructions with unused cache entries into component micro-ops (GH-113169)

This commit is contained in:
Mark Shannon 2023-12-18 13:16:45 +00:00 committed by GitHub
parent 771903596b
commit 70d378cdaa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 102 additions and 8 deletions

View file

@ -457,6 +457,7 @@ def test_macro_instruction(self):
PyObject *left;
PyObject *arg2;
PyObject *res;
/* Skip 5 cache entries */
right = stack_pointer[-1];
left = stack_pointer[-2];
arg2 = stack_pointer[-3];
@ -467,6 +468,7 @@ def test_macro_instruction(self):
}
"""
self.run_cases_test(input, output)
def test_unused_caches(self):
input = """
inst(OP, (unused/1, unused/2 --)) {
@ -478,6 +480,8 @@ def test_unused_caches(self):
frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(OP);
/* Skip 1 cache entry */
/* Skip 2 cache entries */
body();
DISPATCH();
}

View file

@ -452,6 +452,7 @@
PyObject *sub;
PyObject *dict;
PyObject *res;
/* Skip 1 cache entry */
sub = stack_pointer[-1];
dict = stack_pointer[-2];
DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR);
@ -476,6 +477,7 @@
static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size");
PyObject *sub;
PyObject *container;
/* Skip 1 cache entry */
sub = stack_pointer[-1];
container = stack_pointer[-2];
DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR);
@ -509,6 +511,7 @@
PyObject *sub;
PyObject *list;
PyObject *res;
/* Skip 1 cache entry */
sub = stack_pointer[-1];
list = stack_pointer[-2];
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
@ -536,6 +539,7 @@
PyObject *sub;
PyObject *str;
PyObject *res;
/* Skip 1 cache entry */
sub = stack_pointer[-1];
str = stack_pointer[-2];
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
@ -563,6 +567,7 @@
PyObject *sub;
PyObject *tuple;
PyObject *res;
/* Skip 1 cache entry */
sub = stack_pointer[-1];
tuple = stack_pointer[-2];
DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR);
@ -840,6 +845,8 @@
PyObject **args;
PyObject *null;
PyObject *callable;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1001,6 +1008,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1035,6 +1044,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1080,6 +1091,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1119,6 +1132,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1279,6 +1294,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1408,6 +1425,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1444,6 +1463,8 @@
PyObject **args;
PyObject *self;
PyObject *callable;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1475,6 +1496,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1517,6 +1540,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1559,6 +1584,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1603,6 +1630,8 @@
PyObject *self_or_null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1728,6 +1757,7 @@
PyObject **args;
PyObject *self_or_null;
PyObject *callable;
/* Skip 1 cache entry */
args = &stack_pointer[-oparg];
self_or_null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1774,6 +1804,8 @@
PyObject *null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1801,6 +1833,8 @@
PyObject *null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1828,6 +1862,8 @@
PyObject *null;
PyObject *callable;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
args = &stack_pointer[-oparg];
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
@ -1982,6 +2018,7 @@
PyObject *right;
PyObject *left;
PyObject *res;
/* Skip 1 cache entry */
right = stack_pointer[-1];
left = stack_pointer[-2];
DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP);
@ -2008,6 +2045,7 @@
PyObject *right;
PyObject *left;
PyObject *res;
/* Skip 1 cache entry */
right = stack_pointer[-1];
left = stack_pointer[-2];
DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP);
@ -2038,6 +2076,7 @@
PyObject *right;
PyObject *left;
PyObject *res;
/* Skip 1 cache entry */
right = stack_pointer[-1];
left = stack_pointer[-2];
DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP);
@ -2469,6 +2508,7 @@
INSTRUCTION_STATS(FOR_ITER_GEN);
static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size");
PyObject *iter;
/* Skip 1 cache entry */
iter = stack_pointer[-1];
DEOPT_IF(tstate->interp->eval_frame, FOR_ITER);
PyGenObject *gen = (PyGenObject *)iter;
@ -2843,6 +2883,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(INSTRUMENTED_CALL);
/* Skip 3 cache entries */
int is_meth = PEEK(oparg + 1) != NULL;
int total_args = oparg + is_meth;
PyObject *function = PEEK(oparg + 2);
@ -2929,6 +2970,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(INSTRUMENTED_FOR_ITER);
/* Skip 1 cache entry */
_Py_CODEUNIT *target;
PyObject *iter = TOP();
PyObject *next = (*Py_TYPE(iter)->tp_iternext)(iter);
@ -2976,6 +3018,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(INSTRUMENTED_JUMP_BACKWARD);
/* Skip 1 cache entry */
CHECK_EVAL_BREAKER();
INSTRUMENTED_JUMP(this_instr, next_instr - oparg, PY_MONITORING_EVENT_JUMP);
DISPATCH();
@ -2993,6 +3036,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(INSTRUMENTED_LOAD_SUPER_ATTR);
/* Skip 1 cache entry */
// cancel out the decrement that will happen in LOAD_SUPER_ATTR; we
// don't want to specialize instrumented instructions
INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache);
@ -3003,6 +3047,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_FALSE);
/* Skip 1 cache entry */
PyObject *cond = POP();
assert(PyBool_Check(cond));
int flag = Py_IsFalse(cond);
@ -3018,6 +3063,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_NONE);
/* Skip 1 cache entry */
PyObject *value = POP();
int flag = Py_IsNone(value);
int offset;
@ -3039,6 +3085,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_NOT_NONE);
/* Skip 1 cache entry */
PyObject *value = POP();
int offset;
int nflag = Py_IsNone(value);
@ -3060,6 +3107,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(INSTRUMENTED_POP_JUMP_IF_TRUE);
/* Skip 1 cache entry */
PyObject *cond = POP();
assert(PyBool_Check(cond));
int flag = Py_IsTrue(cond);
@ -3216,6 +3264,7 @@
_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;
next_instr += 2;
INSTRUCTION_STATS(JUMP_BACKWARD);
/* Skip 1 cache entry */
CHECK_EVAL_BREAKER();
assert(oparg <= INSTR_OFFSET());
JUMPBY(-oparg);
@ -3429,6 +3478,7 @@
INSTRUCTION_STATS(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN);
static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size");
PyObject *owner;
/* Skip 1 cache entry */
owner = stack_pointer[-1];
uint32_t type_version = read_u32(&this_instr[2].cache);
uint32_t func_version = read_u32(&this_instr[4].cache);
@ -3743,6 +3793,7 @@
INSTRUCTION_STATS(LOAD_ATTR_PROPERTY);
static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size");
PyObject *owner;
/* Skip 1 cache entry */
owner = stack_pointer[-1];
uint32_t type_version = read_u32(&this_instr[2].cache);
uint32_t func_version = read_u32(&this_instr[4].cache);
@ -4300,6 +4351,7 @@
PyObject *class;
PyObject *global_super;
PyObject *attr;
/* Skip 1 cache entry */
self = stack_pointer[-1];
class = stack_pointer[-2];
global_super = stack_pointer[-3];
@ -4328,6 +4380,7 @@
PyObject *global_super;
PyObject *attr;
PyObject *self_or_null;
/* Skip 1 cache entry */
self = stack_pointer[-1];
class = stack_pointer[-2];
global_super = stack_pointer[-3];
@ -4927,6 +4980,7 @@
static_assert(INLINE_CACHE_ENTRIES_SEND == 1, "incorrect cache size");
PyObject *v;
PyObject *receiver;
/* Skip 1 cache entry */
v = stack_pointer[-1];
receiver = stack_pointer[-2];
DEOPT_IF(tstate->interp->eval_frame, SEND);
@ -5157,6 +5211,7 @@
static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size");
PyObject *owner;
PyObject *value;
/* Skip 1 cache entry */
owner = stack_pointer[-1];
value = stack_pointer[-2];
uint32_t type_version = read_u32(&this_instr[2].cache);
@ -5374,6 +5429,7 @@
PyObject *sub;
PyObject *dict;
PyObject *value;
/* Skip 1 cache entry */
sub = stack_pointer[-1];
dict = stack_pointer[-2];
value = stack_pointer[-3];
@ -5394,6 +5450,7 @@
PyObject *sub;
PyObject *list;
PyObject *value;
/* Skip 1 cache entry */
sub = stack_pointer[-1];
list = stack_pointer[-2];
value = stack_pointer[-3];
@ -5470,6 +5527,7 @@
static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
PyObject *value;
PyObject *res;
/* Skip 1 cache entry */
value = stack_pointer[-1];
uint32_t version = read_u32(&this_instr[2].cache);
// This one is a bit weird, because we expect *some* failures:
@ -5488,6 +5546,8 @@
INSTRUCTION_STATS(TO_BOOL_BOOL);
static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
PyObject *value;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
value = stack_pointer[-1];
DEOPT_IF(!PyBool_Check(value), TO_BOOL);
STAT_INC(TO_BOOL, hit);
@ -5501,6 +5561,8 @@
static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
PyObject *value;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
value = stack_pointer[-1];
DEOPT_IF(!PyLong_CheckExact(value), TO_BOOL);
STAT_INC(TO_BOOL, hit);
@ -5523,6 +5585,8 @@
static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
PyObject *value;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
value = stack_pointer[-1];
DEOPT_IF(!PyList_CheckExact(value), TO_BOOL);
STAT_INC(TO_BOOL, hit);
@ -5539,6 +5603,8 @@
static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
PyObject *value;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
value = stack_pointer[-1];
// This one is a bit weird, because we expect *some* failures:
DEOPT_IF(!Py_IsNone(value), TO_BOOL);
@ -5555,6 +5621,8 @@
static_assert(INLINE_CACHE_ENTRIES_TO_BOOL == 3, "incorrect cache size");
PyObject *value;
PyObject *res;
/* Skip 1 cache entry */
/* Skip 2 cache entries */
value = stack_pointer[-1];
DEOPT_IF(!PyUnicode_CheckExact(value), TO_BOOL);
STAT_INC(TO_BOOL, hit);
@ -5669,6 +5737,7 @@
static_assert(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE == 1, "incorrect cache size");
PyObject *seq;
PyObject **values;
/* Skip 1 cache entry */
seq = stack_pointer[-1];
values = &stack_pointer[-1];
DEOPT_IF(!PyList_CheckExact(seq), UNPACK_SEQUENCE);
@ -5690,6 +5759,7 @@
static_assert(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE == 1, "incorrect cache size");
PyObject *seq;
PyObject **values;
/* Skip 1 cache entry */
seq = stack_pointer[-1];
values = &stack_pointer[-1];
DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE);
@ -5711,6 +5781,7 @@
static_assert(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE == 1, "incorrect cache size");
PyObject *seq;
PyObject **values;
/* Skip 1 cache entry */
seq = stack_pointer[-1];
values = &stack_pointer[-1];
DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE);

View file

@ -234,9 +234,9 @@ def analyze_stack(op: parser.InstDef) -> StackEffect:
return StackEffect(inputs, outputs)
def analyze_caches(op: parser.InstDef) -> list[CacheEntry]:
def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]:
caches: list[parser.CacheEffect] = [
i for i in op.inputs if isinstance(i, parser.CacheEffect)
i for i in inputs if isinstance(i, parser.CacheEffect)
]
return [CacheEntry(i.name, int(i.size)) for i in caches]
@ -314,13 +314,13 @@ def compute_properties(op: parser.InstDef) -> Properties:
)
def make_uop(name: str, op: parser.InstDef) -> Uop:
def make_uop(name: str, op: parser.InstDef, inputs: list[parser.InputEffect]) -> Uop:
return Uop(
name=name,
context=op.context,
annotations=op.annotations,
stack=analyze_stack(op),
caches=analyze_caches(op),
caches=analyze_caches(inputs),
body=op.block.tokens,
properties=compute_properties(op),
)
@ -333,7 +333,7 @@ def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None:
raise override_error(
op.name, op.context, uops[op.name].context, op.tokens[0]
)
uops[op.name] = make_uop(op.name, op)
uops[op.name] = make_uop(op.name, op, op.inputs)
def add_instruction(
@ -347,10 +347,27 @@ def desugar_inst(
) -> None:
assert inst.kind == "inst"
name = inst.name
uop = make_uop("_" + inst.name, inst)
op_inputs: list[parser.InputEffect] = []
parts: list[Part] = []
uop_index = -1
# Move unused cache entries to the Instruction, removing them from the Uop.
for input in inst.inputs:
if isinstance(input, parser.CacheEffect) and input.name == "unused":
parts.append(Skip(input.size))
else:
op_inputs.append(input)
if uop_index < 0:
uop_index = len(parts)
# Place holder for the uop.
parts.append(Skip(0))
uop = make_uop("_" + inst.name, inst, op_inputs)
uop.implicitly_created = True
uops[inst.name] = uop
add_instruction(name, [uop], instructions)
if uop_index < 0:
parts.append(uop)
else:
parts[uop_index] = uop
add_instruction(name, parts, instructions)
def add_macro(

View file

@ -7,6 +7,7 @@
Context,
CacheEffect,
StackEffect,
InputEffect,
OpName,
AstNode,
)

View file

@ -151,7 +151,8 @@ def generate_tier1(
stack = Stack()
for part in inst.parts:
# Only emit braces if more than one uop
offset = write_uop(part, out, offset, stack, inst, len(inst.parts) > 1)
insert_braces = len([p for p in inst.parts if isinstance(p, Uop)]) > 1
offset = write_uop(part, out, offset, stack, inst, insert_braces)
out.start_line()
if not inst.parts[-1].properties.always_exits:
stack.flush(out)