GH-98831: Update generate_cases.py: register inst, opcode_metadata.h (#100735)

(These aren't used yet, but may be coming soon,
and it's easier to keep this tool the same between branches.)

Added a sanity check for all this to compile.c.

Co-authored-by: Irit Katriel <iritkatriel@yahoo.com>
This commit is contained in:
Guido van Rossum 2023-01-05 13:01:07 -08:00 committed by GitHub
parent 28187141cc
commit 14b7f00fdf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 456 additions and 37 deletions

View file

@ -1456,6 +1456,15 @@ regen-cases:
-i $(srcdir)/Python/bytecodes.c \
-o $(srcdir)/Python/generated_cases.c.h.new
$(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new
# Regenerate Python/opcode_metadata.h from Python/bytecodes.c
# using Tools/cases_generator/generate_cases.py --metadata
PYTHONPATH=$(srcdir)/Tools/cases_generator \
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/generate_cases.py \
--metadata \
-i $(srcdir)/Python/bytecodes.c \
-o $(srcdir)/Python/opcode_metadata.h.new
$(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new
Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/condvar.h $(srcdir)/Python/generated_cases.c.h

View file

@ -764,7 +764,7 @@ dummy_func(
ERROR_IF(w == NULL, error);
}
inst(YIELD_VALUE, (retval --)) {
inst(YIELD_VALUE, (retval -- unused)) {
// NOTE: It's important that YIELD_VALUE never raises an exception!
// The compiler treats any exception raised here as a failed close()
// or throw() call.

View file

@ -36,6 +36,8 @@
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
#include "pycore_symtable.h" // PySTEntryObject
#include "opcode_metadata.h" // _PyOpcode_opcode_metadata
#define DEFAULT_BLOCK_SIZE 16
#define DEFAULT_CODE_SIZE 128
@ -8664,6 +8666,31 @@ no_redundant_jumps(cfg_builder *g) {
return true;
}
static bool
opcode_metadata_is_sane(cfg_builder *g) {
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
for (int i = 0; i < b->b_iused; i++) {
struct instr *instr = &b->b_instr[i];
int opcode = instr->i_opcode;
assert(opcode <= MAX_REAL_OPCODE);
int pushed = _PyOpcode_opcode_metadata[opcode].n_pushed;
int popped = _PyOpcode_opcode_metadata[opcode].n_popped;
assert((pushed < 0) == (popped < 0));
if (pushed >= 0) {
assert(_PyOpcode_opcode_metadata[opcode].valid_entry);
int effect = stack_effect(opcode, instr->i_oparg, -1);
if (effect != pushed - popped) {
fprintf(stderr,
"op=%d: stack_effect (%d) != pushed (%d) - popped (%d)\n",
opcode, effect, pushed, popped);
return false;
}
}
}
}
return true;
}
static bool
no_empty_basic_blocks(cfg_builder *g) {
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
@ -8847,6 +8874,7 @@ assemble(struct compiler *c, int addNone)
}
assert(no_redundant_jumps(g));
assert(opcode_metadata_is_sane(g));
/* Can't modify the bytecode after computing jump offsets. */
assemble_jump_offsets(g->g_entryblock);

187
Python/opcode_metadata.h Normal file
View file

@ -0,0 +1,187 @@
// This file is generated by Tools/cases_generator/generate_cases.py --metadata
// from Python/bytecodes.c
// Do not edit!
enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };
static const struct {
short n_popped;
short n_pushed;
enum Direction dir_op1;
enum Direction dir_op2;
enum Direction dir_op3;
bool valid_entry;
char instr_format[10];
} _PyOpcode_opcode_metadata[256] = {
[NOP] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[RESUME] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_CLOSURE] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_FAST_CHECK] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_FAST] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_CONST] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_FAST] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_FAST__LOAD_FAST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
[LOAD_FAST__LOAD_CONST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
[STORE_FAST__LOAD_FAST] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
[STORE_FAST__STORE_FAST] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
[LOAD_CONST__LOAD_FAST] = { 0, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBIB" },
[POP_TOP] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[PUSH_NULL] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[END_FOR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNARY_POSITIVE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNARY_NEGATIVE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNARY_NOT] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNARY_INVERT] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BINARY_OP_MULTIPLY_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[BINARY_OP_MULTIPLY_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[BINARY_OP_SUBTRACT_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[BINARY_OP_SUBTRACT_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[BINARY_OP_ADD_UNICODE] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[BINARY_OP_INPLACE_ADD_UNICODE] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BINARY_OP_ADD_FLOAT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[BINARY_OP_ADD_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[BINARY_SUBSCR] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[BINARY_SLICE] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_SLICE] = { 4, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BINARY_SUBSCR_LIST_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[BINARY_SUBSCR_TUPLE_INT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[BINARY_SUBSCR_DICT] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[BINARY_SUBSCR_GETITEM] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[LIST_APPEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[SET_ADD] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_SUBSCR] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[STORE_SUBSCR_LIST_INT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[STORE_SUBSCR_DICT] = { 3, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[DELETE_SUBSCR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[PRINT_EXPR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[RAISE_VARARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[INTERPRETER_EXIT] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[RETURN_VALUE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[GET_AITER] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[GET_ANEXT] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[GET_AWAITABLE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[SEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[ASYNC_GEN_WRAP] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[YIELD_VALUE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[POP_EXCEPT] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[RERAISE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[PREP_RERAISE_STAR] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[END_ASYNC_FOR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CLEANUP_THROW] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STOPITERATION_ERROR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ASSERTION_ERROR] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_BUILD_CLASS] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_NAME] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[DELETE_NAME] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNPACK_SEQUENCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNPACK_SEQUENCE_TWO_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNPACK_SEQUENCE_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNPACK_SEQUENCE_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[UNPACK_EX] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_ATTR] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[DELETE_ATTR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_GLOBAL] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[DELETE_GLOBAL] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_NAME] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_GLOBAL] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_GLOBAL_MODULE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_GLOBAL_BUILTIN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[DELETE_FAST] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[MAKE_CELL] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[DELETE_DEREF] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_CLASSDEREF] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_DEREF] = { 0, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_DEREF] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[COPY_FREE_VARS] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BUILD_STRING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BUILD_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BUILD_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LIST_TO_TUPLE] = { 1, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LIST_EXTEND] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[SET_UPDATE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BUILD_SET] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BUILD_MAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[SETUP_ANNOTATIONS] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BUILD_CONST_KEY_MAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[DICT_UPDATE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[DICT_MERGE] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[MAP_ADD] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_INSTANCE_VALUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_MODULE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_WITH_HINT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_SLOT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_PROPERTY] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[STORE_ATTR_INSTANCE_VALUE] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[STORE_ATTR_WITH_HINT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[STORE_ATTR_SLOT] = { 2, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC000" },
[COMPARE_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0" },
[COMPARE_OP_FLOAT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" },
[COMPARE_OP_INT_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" },
[COMPARE_OP_STR_JUMP] = { 3, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC0IB" },
[IS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CONTAINS_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CHECK_EG_MATCH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CHECK_EXC_MATCH] = { 2, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[IMPORT_NAME] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[IMPORT_STAR] = { 1, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[IMPORT_FROM] = { 1, 2, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[JUMP_FORWARD] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[JUMP_BACKWARD] = { 0, 0, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[POP_JUMP_IF_FALSE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[POP_JUMP_IF_TRUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[POP_JUMP_IF_NOT_NONE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[POP_JUMP_IF_NONE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[JUMP_IF_FALSE_OR_POP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[JUMP_IF_TRUE_OR_POP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[JUMP_BACKWARD_NO_INTERRUPT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[GET_LEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[MATCH_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[MATCH_MAPPING] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[MATCH_SEQUENCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[MATCH_KEYS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[GET_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[GET_YIELD_FROM_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[FOR_ITER] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[FOR_ITER_LIST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[FOR_ITER_TUPLE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[FOR_ITER_RANGE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[FOR_ITER_GEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BEFORE_ASYNC_WITH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BEFORE_WITH] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[WITH_EXCEPT_START] = { 4, 5, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[PUSH_EXC_INFO] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_METHOD_WITH_VALUES] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_METHOD_WITH_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_METHOD_NO_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[LOAD_ATTR_METHOD_LAZY_DICT] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_BOUND_METHOD_EXACT_ARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[KW_NAMES] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_PY_EXACT_ARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_PY_WITH_DEFAULTS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_TYPE_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_STR_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_TUPLE_1] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_BUILTIN_CLASS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_BUILTIN_O] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_BUILTIN_FAST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_BUILTIN_FAST_WITH_KEYWORDS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_LEN] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_ISINSTANCE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_LIST_APPEND] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_METHOD_DESCRIPTOR_O] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_NO_KW_METHOD_DESCRIPTOR_FAST] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CALL_FUNCTION_EX] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[MAKE_FUNCTION] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[RETURN_GENERATOR] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BUILD_SLICE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[FORMAT_VALUE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[COPY] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[BINARY_OP] = { 2, 1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IBC" },
[SWAP] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[EXTENDED_ARG] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
[CACHE] = { -1, -1, DIR_NONE, DIR_NONE, DIR_NONE, true, "IB" },
};

View file

@ -21,6 +21,9 @@
DEFAULT_OUTPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/generated_cases.c.h")
)
DEFAULT_METADATA_OUTPUT = os.path.relpath(
os.path.join(os.path.dirname(__file__), "../../Python/opcode_metadata.h")
)
BEGIN_MARKER = "// BEGIN BYTECODES //"
END_MARKER = "// END BYTECODES //"
RE_PREDICTED = r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*$"
@ -37,6 +40,12 @@
arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
"-m",
"--metadata",
action="store_true",
help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}",
)
class Formatter:
@ -96,6 +105,8 @@ def assign(self, dst: StackEffect, src: StackEffect):
cast = self.cast(dst, src)
if m := re.match(r"^PEEK\((\d+)\)$", dst.name):
self.emit(f"POKE({m.group(1)}, {cast}{src.name});")
elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
else:
self.emit(f"{dst.name} = {cast}{src.name};")
@ -109,7 +120,8 @@ class Instruction:
# Parts of the underlying instruction definition
inst: parser.InstDef
kind: typing.Literal["inst", "op"]
register: bool
kind: typing.Literal["inst", "op", "legacy"] # Legacy means no (input -- output)
name: str
block: parser.Block
block_text: list[str] # Block.text, less curlies, less PREDICT() calls
@ -121,6 +133,9 @@ class Instruction:
cache_effects: list[parser.CacheEffect]
input_effects: list[StackEffect]
output_effects: list[StackEffect]
# Parallel to input_effects
input_registers: list[str] = dataclasses.field(repr=False)
output_registers: list[str] = dataclasses.field(repr=False)
# Set later
family: parser.Family | None = None
@ -129,6 +144,7 @@ class Instruction:
def __init__(self, inst: parser.InstDef):
self.inst = inst
self.register = inst.register
self.kind = inst.kind
self.name = inst.name
self.block = inst.block
@ -150,9 +166,24 @@ def __init__(self, inst: parser.InstDef):
break
self.unmoved_names = frozenset(unmoved_names)
def analyze_registers(self, a: "Analyzer") -> None:
regs = iter(("REG(oparg1)", "REG(oparg2)", "REG(oparg3)"))
try:
self.input_registers = [
next(regs) for ieff in self.input_effects if ieff.name != UNUSED
]
self.output_registers = [
next(regs) for oeff in self.output_effects if oeff.name != UNUSED
]
except StopIteration: # Running out of registers
a.error(
f"Instruction {self.name} has too many register effects", node=self.inst
)
def write(self, out: Formatter) -> None:
"""Write one instruction, sans prologue and epilogue."""
# Write a static assertion that a family's cache size is correct
if family := self.family:
if self.name == family.members[0]:
if cache_size := family.size:
@ -161,10 +192,16 @@ def write(self, out: Formatter) -> None:
f'{self.cache_offset}, "incorrect cache size");'
)
# Write input stack effect variable declarations and initializations
for i, ieffect in enumerate(reversed(self.input_effects), 1):
src = StackEffect(f"PEEK({i})", "")
out.declare(ieffect, src)
if not self.register:
# Write input stack effect variable declarations and initializations
for i, ieffect in enumerate(reversed(self.input_effects), 1):
src = StackEffect(f"PEEK({i})", "")
out.declare(ieffect, src)
else:
# Write input register variable declarations and initializations
for ieffect, reg in zip(self.input_effects, self.input_registers):
src = StackEffect(reg, "")
out.declare(ieffect, src)
# Write output stack effect variable declarations
input_names = {ieffect.name for ieffect in self.input_effects}
@ -172,20 +209,28 @@ def write(self, out: Formatter) -> None:
if oeffect.name not in input_names:
out.declare(oeffect, None)
# out.emit(f"JUMPBY(OPSIZE({self.inst.name}) - 1);")
self.write_body(out, 0)
# Skip the rest if the block always exits
if self.always_exits:
return
# Write net stack growth/shrinkage
diff = len(self.output_effects) - len(self.input_effects)
out.stack_adjust(diff)
if not self.register:
# Write net stack growth/shrinkage
diff = len(self.output_effects) - len(self.input_effects)
out.stack_adjust(diff)
# Write output stack effect assignments
for i, oeffect in enumerate(reversed(self.output_effects), 1):
if oeffect.name not in self.unmoved_names:
dst = StackEffect(f"PEEK({i})", "")
# Write output stack effect assignments
for i, oeffect in enumerate(reversed(self.output_effects), 1):
if oeffect.name not in self.unmoved_names:
dst = StackEffect(f"PEEK({i})", "")
out.assign(dst, oeffect)
else:
# Write output register assignments
for oeffect, reg in zip(self.output_effects, self.output_registers):
dst = StackEffect(reg, "")
out.assign(dst, oeffect)
# Write cache effect
@ -209,7 +254,9 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None
else:
typ = f"uint{bits}_t "
func = f"read_u{bits}"
out.emit(f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);")
out.emit(
f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
)
cache_offset += ceffect.size
assert cache_offset == self.cache_offset + cache_adjust
@ -222,14 +269,17 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None
# ERROR_IF() must pop the inputs from the stack.
# The code block is responsible for DECREF()ing them.
# NOTE: If the label doesn't exist, just add it to ceval.c.
ninputs = len(self.input_effects)
# Don't pop common input/output effects at the bottom!
# These aren't DECREF'ed so they can stay.
for ieff, oeff in zip(self.input_effects, self.output_effects):
if ieff.name == oeff.name:
ninputs -= 1
else:
break
if not self.register:
ninputs = len(self.input_effects)
# Don't pop common input/output effects at the bottom!
# These aren't DECREF'ed so they can stay.
for ieff, oeff in zip(self.input_effects, self.output_effects):
if ieff.name == oeff.name:
ninputs -= 1
else:
break
else:
ninputs = 0
if ninputs:
out.write_raw(
f"{extra}{space}if ({cond}) goto pop_{ninputs}_{label};\n"
@ -237,10 +287,11 @@ def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None
else:
out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*$", line):
space = m.group(1)
for ieff in self.input_effects:
if ieff.name not in self.unmoved_names:
out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
if not self.register:
space = m.group(1)
for ieff in self.input_effects:
if ieff.name not in self.unmoved_names:
out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
else:
out.write_raw(extra + line)
@ -392,6 +443,7 @@ def analyze(self) -> None:
self.find_predictions()
self.map_families()
self.check_families()
self.analyze_register_instrs()
self.analyze_supers_and_macros()
def find_predictions(self) -> None:
@ -458,6 +510,11 @@ def check_families(self) -> None:
family,
)
def analyze_register_instrs(self) -> None:
for instr in self.instrs.values():
if instr.register:
instr.analyze_registers(self)
def analyze_supers_and_macros(self) -> None:
"""Analyze each super- and macro instruction."""
self.super_instrs = {}
@ -563,6 +620,129 @@ def stack_analysis(
]
return stack, -lowest
def write_metadata(self) -> None:
"""Write instruction metadata to output file."""
with open(self.output_filename, "w") as f:
# Write provenance header
f.write(
f"// This file is generated by {os.path.relpath(__file__)} --metadata\n"
)
f.write(f"// from {os.path.relpath(self.filename)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this
self.out = Formatter(f, 0)
# Write variable definition
self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };")
self.out.emit("static const struct {")
with self.out.indent():
self.out.emit("short n_popped;")
self.out.emit("short n_pushed;")
self.out.emit("enum Direction dir_op1;")
self.out.emit("enum Direction dir_op2;")
self.out.emit("enum Direction dir_op3;")
self.out.emit("bool valid_entry;")
self.out.emit("char instr_format[10];")
self.out.emit("} _PyOpcode_opcode_metadata[256] = {")
# Write metadata for each instruction
for thing in self.everything:
match thing:
case parser.InstDef():
if thing.kind != "op":
self.write_metadata_for_inst(self.instrs[thing.name])
case parser.Super():
self.write_metadata_for_super(self.super_instrs[thing.name])
case parser.Macro():
self.write_metadata_for_macro(self.macro_instrs[thing.name])
case _:
typing.assert_never(thing)
# Write end of array
self.out.emit("};")
def get_format(self, thing: Instruction | SuperInstruction | MacroInstruction) -> str:
"""Get the format string for a single instruction."""
def instr_format(instr: Instruction) -> str:
if instr.register:
fmt = "IBBB"
else:
fmt = "IB"
cache = "C"
for ce in instr.cache_effects:
for _ in range(ce.size):
fmt += cache
cache = "0"
return fmt
match thing:
case Instruction():
format = instr_format(thing)
case SuperInstruction():
format = ""
for part in thing.parts:
format += instr_format(part.instr)
case MacroInstruction():
# Macros don't support register instructions yet
format = "IB"
cache = "C"
for part in thing.parts:
if isinstance(part, parser.CacheEffect):
for _ in range(part.size):
format += cache
cache = "0"
else:
assert isinstance(part, Component)
for ce in part.instr.cache_effects:
for _ in range(ce.size):
format += cache
cache = "0"
case _:
typing.assert_never(thing)
assert len(format) < 10 # Else update the size of instr_format above
return format
def write_metadata_for_inst(self, instr: Instruction) -> None:
"""Write metadata for a single instruction."""
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
if instr.kind == "legacy":
n_popped = n_pushed = -1
assert not instr.register
else:
n_popped = len(instr.input_effects)
n_pushed = len(instr.output_effects)
if instr.register:
directions: list[str] = []
directions.extend("DIR_READ" for _ in instr.input_effects)
directions.extend("DIR_WRITE" for _ in instr.output_effects)
directions.extend("DIR_NONE" for _ in range(3))
dir_op1, dir_op2, dir_op3 = directions[:3]
format = self.get_format(instr)
self.out.emit(
f' [{instr.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
)
def write_metadata_for_super(self, sup: SuperInstruction) -> None:
"""Write metadata for a super-instruction."""
n_popped = sum(len(comp.instr.input_effects) for comp in sup.parts)
n_pushed = sum(len(comp.instr.output_effects) for comp in sup.parts)
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
format = self.get_format(sup)
self.out.emit(
f' [{sup.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
)
def write_metadata_for_macro(self, mac: MacroInstruction) -> None:
"""Write metadata for a macro-instruction."""
parts = [comp for comp in mac.parts if isinstance(comp, Component)]
n_popped = sum(len(comp.instr.input_effects) for comp in parts)
n_pushed = sum(len(comp.instr.output_effects) for comp in parts)
dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
format = self.get_format(mac)
self.out.emit(
f' [{mac.name}] = {{ {n_popped}, {n_pushed}, {dir_op1}, {dir_op2}, {dir_op3}, true, "{format}" }},'
)
def write_instructions(self) -> None:
"""Write instructions to output file."""
with open(self.output_filename, "w") as f:
@ -571,7 +751,7 @@ def write_instructions(self) -> None:
f.write(f"// from {os.path.relpath(self.filename)}\n")
f.write(f"// Do not edit!\n")
# Create formatter; the rest of the code uses this.
# Create formatter; the rest of the code uses this
self.out = Formatter(f, 8)
# Write and count instructions of all kinds
@ -581,7 +761,7 @@ def write_instructions(self) -> None:
for thing in self.everything:
match thing:
case parser.InstDef():
if thing.kind == "inst":
if thing.kind != "op":
n_instrs += 1
self.write_instr(self.instrs[thing.name])
case parser.Super():
@ -616,9 +796,13 @@ def write_super(self, sup: SuperInstruction) -> None:
with self.wrap_super_or_macro(sup):
first = True
for comp in sup.parts:
if not first:
if first:
pass
# self.out.emit("JUMPBY(OPSIZE(opcode) - 1);")
else:
self.out.emit("NEXTOPARG();")
self.out.emit("JUMPBY(1);")
# self.out.emit("JUMPBY(OPSIZE(opcode));")
first = False
comp.write_body(self.out, 0)
if comp.instr.cache_offset:
@ -711,12 +895,18 @@ def always_exits(lines: list[str]) -> bool:
def main():
"""Parse command line, parse input, analyze, write output."""
args = arg_parser.parse_args() # Prints message and sys.exit(2) on error
if args.metadata:
if args.output == DEFAULT_OUTPUT:
args.output = DEFAULT_METADATA_OUTPUT
a = Analyzer(args.input, args.output) # Raises OSError if input unreadable
a.parse() # Raises SyntaxError on failure
a.analyze() # Prints messages and sets a.errors on failure
if a.errors:
sys.exit(f"Found {a.errors} errors")
a.write_instructions() # Raises OSError if output can't be written
if args.metadata:
a.write_metadata()
else:
a.write_instructions() # Raises OSError if output can't be written
if __name__ == "__main__":

View file

@ -84,7 +84,8 @@ class OpName(Node):
@dataclass
class InstHeader(Node):
kind: Literal["inst", "op"]
register: bool
kind: Literal["inst", "op", "legacy"] # Legacy means no (inputs -- outputs)
name: str
inputs: list[InputEffect]
outputs: list[OutputEffect]
@ -92,7 +93,8 @@ class InstHeader(Node):
@dataclass
class InstDef(Node):
kind: Literal["inst", "op"]
register: bool
kind: Literal["inst", "op", "legacy"]
name: str
inputs: list[InputEffect]
outputs: list[OutputEffect]
@ -134,16 +136,19 @@ def definition(self) -> InstDef | Super | Macro | Family | None:
def inst_def(self) -> InstDef | None:
if hdr := self.inst_header():
if block := self.block():
return InstDef(hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block)
return InstDef(
hdr.register, hdr.kind, hdr.name, hdr.inputs, hdr.outputs, block
)
raise self.make_syntax_error("Expected block")
return None
@contextual
def inst_header(self) -> InstHeader | None:
# inst(NAME)
# | inst(NAME, (inputs -- outputs))
# | op(NAME, (inputs -- outputs))
# | [register] inst(NAME, (inputs -- outputs))
# | [register] op(NAME, (inputs -- outputs))
# TODO: Make INST a keyword in the lexer.
register = bool(self.expect(lx.REGISTER))
if (tkn := self.expect(lx.IDENTIFIER)) and (kind := tkn.text) in ("inst", "op"):
if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
name = tkn.text
@ -151,10 +156,10 @@ def inst_header(self) -> InstHeader | None:
inp, outp = self.io_effect()
if self.expect(lx.RPAREN):
if (tkn := self.peek()) and tkn.kind == lx.LBRACE:
return InstHeader(kind, name, inp, outp)
return InstHeader(register, kind, name, inp, outp)
elif self.expect(lx.RPAREN) and kind == "inst":
# No legacy stack effect if kind is "op".
return InstHeader(kind, name, [], [])
return InstHeader(register, "legacy", name, [], [])
return None
def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]: