GH-111485: Separate out parsing, analysis and code-gen phases of tier 1 code generator (GH-112299)

This commit is contained in:
Mark Shannon 2023-12-07 12:49:40 +00:00 committed by GitHub
parent 3d712a9f4c
commit b449415b2f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 1675 additions and 526 deletions

View file

@ -1285,11 +1285,11 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
case _INIT_CALL_PY_EXACT_ARGS:
return 1;
case _PUSH_FRAME:
return 1;
return 0;
case CALL_BOUND_METHOD_EXACT_ARGS:
return 1;
return 0;
case CALL_PY_EXACT_ARGS:
return 1;
return 0;
case CALL_PY_WITH_DEFAULTS:
return 1;
case CALL_TYPE_1:

View file

@ -1587,7 +1587,6 @@ regen-cases:
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/generate_cases.py \
$(CASESFLAG) \
-o $(srcdir)/Python/generated_cases.c.h.new \
-n $(srcdir)/Include/opcode_ids.h.new \
-t $(srcdir)/Python/opcode_targets.h.new \
-m $(srcdir)/Include/internal/pycore_opcode_metadata.h.new \
@ -1595,6 +1594,8 @@ regen-cases:
-p $(srcdir)/Lib/_opcode_metadata.py.new \
-a $(srcdir)/Python/abstract_interp_cases.c.h.new \
$(srcdir)/Python/bytecodes.c
$(PYTHON_FOR_REGEN) \
$(srcdir)/Tools/cases_generator/tier1_generator.py -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c
$(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new
$(UPDATE_FILE) $(srcdir)/Include/opcode_ids.h $(srcdir)/Include/opcode_ids.h.new
$(UPDATE_FILE) $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/opcode_targets.h.new

View file

@ -774,7 +774,8 @@
}
case _PUSH_FRAME: {
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
STACK_SHRINK(1);
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(0)), true);
break;
}

View file

@ -800,11 +800,11 @@ dummy_func(
// We also push it onto the stack on exit, but that's a
// different frame, and it's accounted for by _PUSH_FRAME.
op(_POP_FRAME, (retval --)) {
assert(EMPTY());
#if TIER_ONE
assert(frame != &entry_frame);
#endif
STORE_SP();
assert(EMPTY());
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
@ -1165,7 +1165,6 @@ dummy_func(
}
}
inst(STORE_NAME, (v -- )) {
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
PyObject *ns = LOCALS();
@ -3130,7 +3129,7 @@ dummy_func(
// The 'unused' output effect represents the return value
// (which will be pushed when the frame returns).
// It is needed so CALL_PY_EXACT_ARGS matches its family.
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused)) {
op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused if (0))) {
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);

View file

@ -661,11 +661,11 @@
PyObject *retval;
retval = stack_pointer[-1];
STACK_SHRINK(1);
assert(EMPTY());
#if TIER_ONE
assert(frame != &entry_frame);
#endif
STORE_SP();
assert(EMPTY());
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,456 @@
from dataclasses import dataclass
import lexer
import parser
from typing import Optional
@dataclass
class Properties:
escapes: bool
infallible: bool
deopts: bool
oparg: bool
jumps: bool
ends_with_eval_breaker: bool
needs_this: bool
always_exits: bool
stores_sp: bool
def dump(self, indent: str) -> None:
print(indent, end="")
text = ", ".join([f"{key}: {value}" for (key, value) in self.__dict__.items()])
print(indent, text, sep="")
@staticmethod
def from_list(properties: list["Properties"]) -> "Properties":
return Properties(
escapes=any(p.escapes for p in properties),
infallible=all(p.infallible for p in properties),
deopts=any(p.deopts for p in properties),
oparg=any(p.oparg for p in properties),
jumps=any(p.jumps for p in properties),
ends_with_eval_breaker=any(p.ends_with_eval_breaker for p in properties),
needs_this=any(p.needs_this for p in properties),
always_exits=any(p.always_exits for p in properties),
stores_sp=any(p.stores_sp for p in properties),
)
SKIP_PROPERTIES = Properties(
escapes=False,
infallible=True,
deopts=False,
oparg=False,
jumps=False,
ends_with_eval_breaker=False,
needs_this=False,
always_exits=False,
stores_sp=False,
)
@dataclass
class Skip:
"Unused cache entry"
size: int
@property
def name(self) -> str:
return f"unused/{self.size}"
@property
def properties(self) -> Properties:
return SKIP_PROPERTIES
@dataclass
class StackItem:
name: str
type: str | None
condition: str | None
size: str
peek: bool = False
def __str__(self) -> str:
cond = f" if ({self.condition})" if self.condition else ""
size = f"[{self.size}]" if self.size != "1" else ""
type = "" if self.type is None else f"{self.type} "
return f"{type}{self.name}{size}{cond} {self.peek}"
def is_array(self) -> bool:
return self.type == "PyObject **"
@dataclass
class StackEffect:
inputs: list[StackItem]
outputs: list[StackItem]
def __str__(self) -> str:
return f"({', '.join([str(i) for i in self.inputs])} -- {', '.join([str(i) for i in self.outputs])})"
@dataclass
class CacheEntry:
name: str
size: int
def __str__(self) -> str:
return f"{self.name}/{self.size}"
@dataclass
class Uop:
name: str
context: parser.Context | None
annotations: list[str]
stack: StackEffect
caches: list[CacheEntry]
body: list[lexer.Token]
properties: Properties
_size: int = -1
def dump(self, indent: str) -> None:
print(
indent, self.name, ", ".join(self.annotations) if self.annotations else ""
)
print(indent, self.stack, ", ".join([str(c) for c in self.caches]))
self.properties.dump(" " + indent)
@property
def size(self) -> int:
if self._size < 0:
self._size = sum(c.size for c in self.caches)
return self._size
Part = Uop | Skip
@dataclass
class Instruction:
name: str
parts: list[Part]
_properties: Properties | None
is_target: bool = False
family: Optional["Family"] = None
@property
def properties(self) -> Properties:
if self._properties is None:
self._properties = self._compute_properties()
return self._properties
def _compute_properties(self) -> Properties:
return Properties.from_list([part.properties for part in self.parts])
def dump(self, indent: str) -> None:
print(indent, self.name, "=", ", ".join([part.name for part in self.parts]))
self.properties.dump(" " + indent)
@property
def size(self) -> int:
return 1 + sum(part.size for part in self.parts)
@dataclass
class PseudoInstruction:
name: str
targets: list[Instruction]
flags: list[str]
def dump(self, indent: str) -> None:
print(indent, self.name, "->", " or ".join([t.name for t in self.targets]))
@dataclass
class Family:
name: str
size: str
members: list[Instruction]
def dump(self, indent: str) -> None:
print(indent, self.name, "= ", ", ".join([m.name for m in self.members]))
@dataclass
class Analysis:
instructions: dict[str, Instruction]
uops: dict[str, Uop]
families: dict[str, Family]
pseudos: dict[str, PseudoInstruction]
def analysis_error(message: str, tkn: lexer.Token) -> SyntaxError:
# To do -- support file and line output
# Construct a SyntaxError instance from message and token
return lexer.make_syntax_error(message, "", tkn.line, tkn.column, "")
def override_error(
name: str,
context: parser.Context | None,
prev_context: parser.Context | None,
token: lexer.Token,
) -> SyntaxError:
return analysis_error(
f"Duplicate definition of '{name}' @ {context} "
f"previous definition @ {prev_context}",
token,
)
def convert_stack_item(item: parser.StackEffect) -> StackItem:
return StackItem(item.name, item.type, item.cond, (item.size or "1"))
def analyze_stack(op: parser.InstDef) -> StackEffect:
inputs: list[StackItem] = [
convert_stack_item(i) for i in op.inputs if isinstance(i, parser.StackEffect)
]
outputs: list[StackItem] = [convert_stack_item(i) for i in op.outputs]
for input, output in zip(inputs, outputs):
if input.name == output.name:
input.peek = output.peek = True
return StackEffect(inputs, outputs)
def analyze_caches(op: parser.InstDef) -> list[CacheEntry]:
caches: list[parser.CacheEffect] = [
i for i in op.inputs if isinstance(i, parser.CacheEffect)
]
return [CacheEntry(i.name, int(i.size)) for i in caches]
def variable_used(node: parser.InstDef, name: str) -> bool:
"""Determine whether a variable with a given name is used in a node."""
return any(
token.kind == "IDENTIFIER" and token.text == name for token in node.tokens
)
def is_infallible(op: parser.InstDef) -> bool:
return not (
variable_used(op, "ERROR_IF")
or variable_used(op, "error")
or variable_used(op, "pop_1_error")
or variable_used(op, "exception_unwind")
or variable_used(op, "resume_with_error")
)
from flags import makes_escaping_api_call
EXITS = {
"DISPATCH",
"GO_TO_INSTRUCTION",
"Py_UNREACHABLE",
"DISPATCH_INLINED",
"DISPATCH_GOTO",
}
def eval_breaker_at_end(op: parser.InstDef) -> bool:
return op.tokens[-5].text == "CHECK_EVAL_BREAKER"
def always_exits(op: parser.InstDef) -> bool:
depth = 0
tkn_iter = iter(op.tokens)
for tkn in tkn_iter:
if tkn.kind == "LBRACE":
depth += 1
elif tkn.kind == "RBRACE":
depth -= 1
elif depth > 1:
continue
elif tkn.kind == "GOTO" or tkn.kind == "RETURN":
return True
elif tkn.kind == "KEYWORD":
if tkn.text in EXITS:
return True
elif tkn.kind == "IDENTIFIER":
if tkn.text in EXITS:
return True
if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF":
next(tkn_iter) # '('
t = next(tkn_iter)
if t.text == "true":
return True
return False
def compute_properties(op: parser.InstDef) -> Properties:
return Properties(
escapes=makes_escaping_api_call(op),
infallible=is_infallible(op),
deopts=variable_used(op, "DEOPT_IF"),
oparg=variable_used(op, "oparg"),
jumps=variable_used(op, "JUMPBY"),
ends_with_eval_breaker=eval_breaker_at_end(op),
needs_this=variable_used(op, "this_instr"),
always_exits=always_exits(op),
stores_sp=variable_used(op, "STORE_SP"),
)
def make_uop(name: str, op: parser.InstDef) -> Uop:
return Uop(
name=name,
context=op.context,
annotations=op.annotations,
stack=analyze_stack(op),
caches=analyze_caches(op),
body=op.block.tokens,
properties=compute_properties(op),
)
def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None:
assert op.kind == "op"
if op.name in uops:
if "override" not in op.annotations:
raise override_error(
op.name, op.context, uops[op.name].context, op.tokens[0]
)
uops[op.name] = make_uop(op.name, op)
def add_instruction(
name: str, parts: list[Part], instructions: dict[str, Instruction]
) -> None:
instructions[name] = Instruction(name, parts, None)
def desugar_inst(
inst: parser.InstDef, instructions: dict[str, Instruction], uops: dict[str, Uop]
) -> None:
assert inst.kind == "inst"
name = inst.name
uop = make_uop("_" + inst.name, inst)
uops[inst.name] = uop
add_instruction(name, [uop], instructions)
def add_macro(
macro: parser.Macro, instructions: dict[str, Instruction], uops: dict[str, Uop]
) -> None:
parts: list[Uop | Skip] = []
for part in macro.uops:
match part:
case parser.OpName():
if part.name not in uops:
analysis_error(f"No Uop named {part.name}", macro.tokens[0])
parts.append(uops[part.name])
case parser.CacheEffect():
parts.append(Skip(part.size))
case _:
assert False
assert parts
add_instruction(macro.name, parts, instructions)
def add_family(
pfamily: parser.Family,
instructions: dict[str, Instruction],
families: dict[str, Family],
) -> None:
family = Family(
pfamily.name,
pfamily.size,
[instructions[member_name] for member_name in pfamily.members],
)
for member in family.members:
member.family = family
# The head of the family is an implicit jump target for DEOPTs
instructions[family.name].is_target = True
families[family.name] = family
def add_pseudo(
pseudo: parser.Pseudo,
instructions: dict[str, Instruction],
pseudos: dict[str, PseudoInstruction],
) -> None:
pseudos[pseudo.name] = PseudoInstruction(
pseudo.name,
[instructions[target] for target in pseudo.targets],
pseudo.flags,
)
def analyze_forest(forest: list[parser.AstNode]) -> Analysis:
instructions: dict[str, Instruction] = {}
uops: dict[str, Uop] = {}
families: dict[str, Family] = {}
pseudos: dict[str, PseudoInstruction] = {}
for node in forest:
match node:
case parser.InstDef(name):
if node.kind == "inst":
desugar_inst(node, instructions, uops)
else:
assert node.kind == "op"
add_op(node, uops)
case parser.Macro():
pass
case parser.Family():
pass
case parser.Pseudo():
pass
case _:
assert False
for node in forest:
if isinstance(node, parser.Macro):
add_macro(node, instructions, uops)
for node in forest:
match node:
case parser.Family():
add_family(node, instructions, families)
case parser.Pseudo():
add_pseudo(node, instructions, pseudos)
case _:
pass
for uop in uops.values():
tkn_iter = iter(uop.body)
for tkn in tkn_iter:
if tkn.kind == "IDENTIFIER" and tkn.text == "GO_TO_INSTRUCTION":
if next(tkn_iter).kind != "LPAREN":
continue
target = next(tkn_iter)
if target.kind != "IDENTIFIER":
continue
if target.text in instructions:
instructions[target.text].is_target = True
# Hack
instructions["BINARY_OP_INPLACE_ADD_UNICODE"].family = families["BINARY_OP"]
return Analysis(instructions, uops, families, pseudos)
def analyze_files(filenames: list[str]) -> Analysis:
return analyze_forest(parser.parse_files(filenames))
def dump_analysis(analysis: Analysis) -> None:
print("Uops:")
for u in analysis.uops.values():
u.dump(" ")
print("Instructions:")
for i in analysis.instructions.values():
i.dump(" ")
print("Families:")
for f in analysis.families.values():
f.dump(" ")
print("Pseudos:")
for p in analysis.pseudos.values():
p.dump(" ")
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("No input")
else:
filenames = sys.argv[1:]
dump_analysis(analyze_files(filenames))

View file

@ -0,0 +1,111 @@
from lexer import Token
from typing import TextIO
class CWriter:
"A writer that understands tokens and how to format C code"
last_token: Token | None
def __init__(self, out: TextIO, indent: int, line_directives: bool):
self.out = out
self.base_column = indent * 4
self.indents = [i * 4 for i in range(indent + 1)]
self.line_directives = line_directives
self.last_token = None
self.newline = True
def set_position(self, tkn: Token) -> None:
if self.last_token is not None:
if self.last_token.line < tkn.line:
self.out.write("\n")
if self.line_directives:
self.out.write(f'#line {tkn.line} "{tkn.filename}"\n')
self.out.write(" " * self.indents[-1])
else:
gap = tkn.column - self.last_token.end_column
self.out.write(" " * gap)
elif self.newline:
self.out.write(" " * self.indents[-1])
self.last_token = tkn
self.newline = False
def emit_at(self, txt: str, where: Token) -> None:
self.set_position(where)
self.out.write(txt)
def maybe_dedent(self, txt: str) -> None:
parens = txt.count("(") - txt.count(")")
if parens < 0:
self.indents.pop()
elif "}" in txt or is_label(txt):
self.indents.pop()
def maybe_indent(self, txt: str) -> None:
parens = txt.count("(") - txt.count(")")
if parens > 0 and self.last_token:
offset = self.last_token.end_column - 1
if offset <= self.indents[-1] or offset > 40:
offset = self.indents[-1] + 4
self.indents.append(offset)
elif "{" in txt or is_label(txt):
self.indents.append(self.indents[-1] + 4)
def emit_text(self, txt: str) -> None:
self.out.write(txt)
def emit_multiline_comment(self, tkn: Token) -> None:
self.set_position(tkn)
lines = tkn.text.splitlines(True)
first = True
for line in lines:
text = line.lstrip()
if first:
spaces = 0
else:
spaces = self.indents[-1]
if text.startswith("*"):
spaces += 1
else:
spaces += 3
first = False
self.out.write(" " * spaces)
self.out.write(text)
def emit_token(self, tkn: Token) -> None:
if tkn.kind == "COMMENT" and "\n" in tkn.text:
return self.emit_multiline_comment(tkn)
self.maybe_dedent(tkn.text)
self.set_position(tkn)
self.emit_text(tkn.text)
self.maybe_indent(tkn.text)
def emit_str(self, txt: str) -> None:
self.maybe_dedent(txt)
if self.newline and txt:
if txt[0] != "\n":
self.out.write(" " * self.indents[-1])
self.newline = False
self.emit_text(txt)
if txt.endswith("\n"):
self.newline = True
self.maybe_indent(txt)
self.last_token = None
def emit(self, txt: str | Token) -> None:
if isinstance(txt, Token):
self.emit_token(txt)
elif isinstance(txt, str):
self.emit_str(txt)
else:
assert False
def start_line(self) -> None:
if not self.newline:
self.out.write("\n")
self.newline = True
self.last_token = None
def is_label(txt: str) -> bool:
return not txt.startswith("//") and txt.endswith(":")

View file

@ -883,7 +883,6 @@ def main() -> None:
return
# These raise OSError if output can't be written
a.write_instructions(args.output, args.emit_line_directives)
a.assign_opcode_ids()
a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h)

View file

@ -112,7 +112,7 @@ def choice(*opts: str) -> str:
char = r"\'.\'" # TODO: escape sequence
CHARACTER = "CHARACTER"
comment_re = r"//.*|/\*([^*]|\*[^/])*\*/"
comment_re = r"(//.*)|/\*([^*]|\*[^/])*\*/"
COMMENT = "COMMENT"
newline = r"\n"
@ -234,6 +234,7 @@ def make_syntax_error(
@dataclass(slots=True)
class Token:
filename: str
kind: str
text: str
begin: tuple[int, int]
@ -261,7 +262,7 @@ def width(self) -> int:
def replaceText(self, txt: str) -> "Token":
assert isinstance(txt, str)
return Token(self.kind, txt, self.begin, self.end)
return Token(self.filename, self.kind, txt, self.begin, self.end)
def __repr__(self) -> str:
b0, b1 = self.begin
@ -272,7 +273,7 @@ def __repr__(self) -> str:
return f"{self.kind}({self.text!r}, {b0}:{b1}, {e0}:{e1})"
def tokenize(src: str, line: int = 1, filename: str | None = None) -> Iterator[Token]:
def tokenize(src: str, line: int = 1, filename: str = "") -> Iterator[Token]:
linestart = -1
for m in matcher.finditer(src):
start, end = m.span()
@ -323,7 +324,7 @@ def tokenize(src: str, line: int = 1, filename: str | None = None) -> Iterator[T
else:
begin = line, start - linestart
if kind != "\n":
yield Token(kind, text, begin, (line, start - linestart + len(text)))
yield Token(filename, kind, text, begin, (line, start - linestart + len(text)))
def to_text(tkns: list[Token], dedent: int = 0) -> str:

View file

@ -11,3 +11,5 @@ strict = True
strict_concatenate = True
enable_error_code = ignore-without-code,redundant-expr,truthy-bool,possibly-undefined
warn_unreachable = True
allow_redefinition = True
implicit_reexport = True

View file

@ -0,0 +1,55 @@
from parsing import (
InstDef,
Macro,
Pseudo,
Family,
Parser,
Context,
CacheEffect,
StackEffect,
OpName,
AstNode,
)
from formatting import prettify_filename
BEGIN_MARKER = "// BEGIN BYTECODES //"
END_MARKER = "// END BYTECODES //"
def parse_files(filenames: list[str]) -> list[AstNode]:
result: list[AstNode] = []
for filename in filenames:
with open(filename) as file:
src = file.read()
psr = Parser(src, filename=prettify_filename(filename))
# Skip until begin marker
while tkn := psr.next(raw=True):
if tkn.text == BEGIN_MARKER:
break
else:
raise psr.make_syntax_error(
f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}"
)
start = psr.getpos()
# Find end marker, then delete everything after it
while tkn := psr.next(raw=True):
if tkn.text == END_MARKER:
break
del psr.tokens[psr.getpos() - 1 :]
# Parse from start
psr.setpos(start)
thing_first_token = psr.peek()
while node := psr.definition():
assert node is not None
result.append(node) # type: ignore[arg-type]
if not psr.eof():
psr.backup()
raise psr.make_syntax_error(
f"Extra stuff at the end of {filename}", psr.next(True)
)
return result

View file

@ -141,10 +141,11 @@ class Pseudo(Node):
flags: list[str] # instr flags to set on the pseudo instruction
targets: list[str] # opcodes this can be replaced by
AstNode = InstDef | Macro | Pseudo | Family
class Parser(PLexer):
@contextual
def definition(self) -> InstDef | Macro | Pseudo | Family | None:
def definition(self) -> AstNode | None:
if macro := self.macro_def():
return macro
if family := self.family_def():

View file

@ -0,0 +1,81 @@
import sys
from analyzer import StackItem
from dataclasses import dataclass
from formatting import maybe_parenthesize
def var_size(var: StackItem) -> str:
if var.condition:
# Special case simplification
if var.condition == "oparg & 1" and var.size == "1":
return f"({var.condition})"
else:
return f"(({var.condition}) ? {var.size} : 0)"
else:
return var.size
class StackOffset:
"The stack offset of the virtual base of the stack from the physical stack pointer"
def __init__(self) -> None:
self.popped: list[str] = []
self.pushed: list[str] = []
def pop(self, item: StackItem) -> None:
self.popped.append(var_size(item))
def push(self, item: StackItem) -> None:
self.pushed.append(var_size(item))
def simplify(self) -> None:
"Remove matching values from both the popped and pushed list"
if not self.popped or not self.pushed:
return
# Sort the list so the lexically largest element is last.
popped = sorted(self.popped)
pushed = sorted(self.pushed)
self.popped = []
self.pushed = []
while popped and pushed:
pop = popped.pop()
push = pushed.pop()
if pop == push:
pass
elif pop > push:
# if pop > push, there can be no element in pushed matching pop.
self.popped.append(pop)
pushed.append(push)
else:
self.pushed.append(push)
popped.append(pop)
self.popped.extend(popped)
self.pushed.extend(pushed)
def to_c(self) -> str:
self.simplify()
int_offset = 0
symbol_offset = ""
for item in self.popped:
try:
int_offset -= int(item)
except ValueError:
symbol_offset += f" - {maybe_parenthesize(item)}"
for item in self.pushed:
try:
int_offset += int(item)
except ValueError:
symbol_offset += f" + {maybe_parenthesize(item)}"
if symbol_offset and not int_offset:
res = symbol_offset
else:
res = f"{int_offset}{symbol_offset}"
if res.startswith(" + "):
res = res[3:]
if res.startswith(" - "):
res = "-" + res[3:]
return res
def clear(self) -> None:
self.popped = []
self.pushed = []

View file

@ -0,0 +1,417 @@
"""Generate the main interpreter switch.
Reads the instruction definitions from bytecodes.c.
Writes the cases to generated_cases.c.h, which is #included in ceval.c.
"""
import argparse
import os.path
import sys
from analyzer import (
Analysis,
Instruction,
Uop,
Part,
analyze_files,
Skip,
StackItem,
analysis_error,
)
from cwriter import CWriter
from typing import TextIO, Iterator
from lexer import Token
from stack import StackOffset
HERE = os.path.dirname(__file__)
ROOT = os.path.join(HERE, "../..")
THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, "/")
DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
def write_header(filename: str, outfile: TextIO) -> None:
outfile.write(
f"""// This file is generated by {THIS}
// from:
// {filename}
// Do not edit!
#ifdef TIER_TWO
#error "This file is for Tier 1 only"
#endif
#define TIER_ONE 1
"""
)
FOOTER = "#undef TIER_ONE\n"
class SizeMismatch(Exception):
pass
class Stack:
def __init__(self) -> None:
self.top_offset = StackOffset()
self.base_offset = StackOffset()
self.peek_offset = StackOffset()
self.variables: list[StackItem] = []
self.defined: set[str] = set()
def pop(self, var: StackItem) -> str:
self.top_offset.pop(var)
if not var.peek:
self.peek_offset.pop(var)
indirect = "&" if var.is_array() else ""
if self.variables:
popped = self.variables.pop()
if popped.size != var.size:
raise SizeMismatch(
f"Size mismatch when popping '{popped.name}' from stack to assign to {var.name}. "
f"Expected {var.size} got {popped.size}"
)
if popped.name == var.name:
return ""
elif popped.name == "unused":
self.defined.add(var.name)
return (
f"{var.name} = {indirect}stack_pointer[{self.top_offset.to_c()}];\n"
)
elif var.name == "unused":
return ""
else:
self.defined.add(var.name)
return f"{var.name} = {popped.name};\n"
self.base_offset.pop(var)
if var.name == "unused":
return ""
else:
self.defined.add(var.name)
assign = f"{var.name} = {indirect}stack_pointer[{self.base_offset.to_c()}];"
if var.condition:
return f"if ({var.condition}) {{ {assign} }}\n"
return f"{assign}\n"
def push(self, var: StackItem) -> str:
self.variables.append(var)
if var.is_array() and var.name not in self.defined and var.name != "unused":
c_offset = self.top_offset.to_c()
self.top_offset.push(var)
self.defined.add(var.name)
return f"{var.name} = &stack_pointer[{c_offset}];\n"
else:
self.top_offset.push(var)
return ""
def flush(self, out: CWriter) -> None:
for var in self.variables:
if not var.peek:
if var.name != "unused" and not var.is_array():
if var.condition:
out.emit(f" if ({var.condition}) ")
out.emit(
f"stack_pointer[{self.base_offset.to_c()}] = {var.name};\n"
)
self.base_offset.push(var)
if self.base_offset.to_c() != self.top_offset.to_c():
print("base", self.base_offset.to_c(), "top", self.top_offset.to_c())
assert False
number = self.base_offset.to_c()
if number != "0":
out.emit(f"stack_pointer += {number};\n")
self.variables = []
self.base_offset.clear()
self.top_offset.clear()
self.peek_offset.clear()
def as_comment(self) -> str:
return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */"
def declare_variables(inst: Instruction, out: CWriter) -> None:
variables = {"unused"}
for uop in inst.parts:
if isinstance(uop, Uop):
for var in reversed(uop.stack.inputs):
if var.name not in variables:
type = var.type if var.type else "PyObject *"
variables.add(var.name)
if var.condition:
out.emit(f"{type}{var.name} = NULL;\n")
else:
out.emit(f"{type}{var.name};\n")
for var in uop.stack.outputs:
if var.name not in variables:
variables.add(var.name)
type = var.type if var.type else "PyObject *"
if var.condition:
out.emit(f"{type}{var.name} = NULL;\n")
else:
out.emit(f"{type}{var.name};\n")
def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str) -> None:
parens = 0
for tkn in tkn_iter:
if tkn.kind == end and parens == 0:
return
if tkn.kind == "LPAREN":
parens += 1
if tkn.kind == "RPAREN":
parens -= 1
out.emit(tkn)
def replace_deopt(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
unused: Stack,
inst: Instruction,
) -> None:
out.emit_at("DEOPT_IF", tkn)
out.emit(next(tkn_iter))
emit_to(out, tkn_iter, "RPAREN")
next(tkn_iter) # Semi colon
out.emit(", ")
assert inst.family is not None
out.emit(inst.family.name)
out.emit(");\n")
def replace_error(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
stack: Stack,
inst: Instruction,
) -> None:
out.emit_at("if ", tkn)
out.emit(next(tkn_iter))
emit_to(out, tkn_iter, "COMMA")
label = next(tkn_iter).text
next(tkn_iter) # RPAREN
next(tkn_iter) # Semi colon
out.emit(") ")
c_offset = stack.peek_offset.to_c()
try:
offset = -int(c_offset)
close = ";\n"
except ValueError:
offset = None
out.emit(f"{{ stack_pointer += {c_offset}; ")
close = "; }\n"
out.emit("goto ")
if offset:
out.emit(f"pop_{offset}_")
out.emit(label)
out.emit(close)
def replace_decrefs(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
stack: Stack,
inst: Instruction,
) -> None:
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
out.emit_at("", tkn)
for var in uop.stack.inputs:
if var.name == "unused" or var.name == "null" or var.peek:
continue
if var.size != "1":
out.emit(f"for (int _i = {var.size}; --_i >= 0;) {{\n")
out.emit(f"Py_DECREF({var.name}[_i]);\n")
out.emit("}\n")
elif var.condition:
out.emit(f"Py_XDECREF({var.name});\n")
else:
out.emit(f"Py_DECREF({var.name});\n")
def replace_store_sp(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
stack: Stack,
inst: Instruction,
) -> None:
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
out.emit_at("", tkn)
stack.flush(out)
out.emit("_PyFrame_SetStackPointer(frame, stack_pointer);\n")
def replace_check_eval_breaker(
out: CWriter,
tkn: Token,
tkn_iter: Iterator[Token],
uop: Uop,
stack: Stack,
inst: Instruction,
) -> None:
next(tkn_iter)
next(tkn_iter)
next(tkn_iter)
if not uop.properties.ends_with_eval_breaker:
out.emit_at("CHECK_EVAL_BREAKER();", tkn)
REPLACEMENT_FUNCTIONS = {
"DEOPT_IF": replace_deopt,
"ERROR_IF": replace_error,
"DECREF_INPUTS": replace_decrefs,
"CHECK_EVAL_BREAKER": replace_check_eval_breaker,
"STORE_SP": replace_store_sp,
}
# Move this to formatter
def emit_tokens(out: CWriter, uop: Uop, stack: Stack, inst: Instruction) -> None:
tkns = uop.body[1:-1]
if not tkns:
return
tkn_iter = iter(tkns)
out.start_line()
for tkn in tkn_iter:
if tkn.kind == "IDENTIFIER" and tkn.text in REPLACEMENT_FUNCTIONS:
REPLACEMENT_FUNCTIONS[tkn.text](out, tkn, tkn_iter, uop, stack, inst)
else:
out.emit(tkn)
def write_uop(
uop: Part, out: CWriter, offset: int, stack: Stack, inst: Instruction, braces: bool
) -> int:
# out.emit(stack.as_comment() + "\n")
if isinstance(uop, Skip):
entries = "entries" if uop.size > 1 else "entry"
out.emit(f"/* Skip {uop.size} cache {entries} */\n")
return offset + uop.size
try:
out.start_line()
if braces:
out.emit(f"// {uop.name}\n")
for var in reversed(uop.stack.inputs):
out.emit(stack.pop(var))
if braces:
out.emit("{\n")
if not uop.properties.stores_sp:
for i, var in enumerate(uop.stack.outputs):
out.emit(stack.push(var))
for cache in uop.caches:
if cache.name != "unused":
if cache.size == 4:
type = "PyObject *"
reader = "read_obj"
else:
type = f"uint{cache.size*16}_t "
reader = f"read_u{cache.size*16}"
out.emit(
f"{type}{cache.name} = {reader}(&this_instr[{offset}].cache);\n"
)
offset += cache.size
emit_tokens(out, uop, stack, inst)
if uop.properties.stores_sp:
for i, var in enumerate(uop.stack.outputs):
out.emit(stack.push(var))
if braces:
out.start_line()
out.emit("}\n")
# out.emit(stack.as_comment() + "\n")
return offset
except SizeMismatch as ex:
raise analysis_error(ex.args[0], uop.body[0])
def uses_this(inst: Instruction) -> bool:
if inst.properties.needs_this:
return True
for uop in inst.parts:
if isinstance(uop, Skip):
continue
for cache in uop.caches:
if cache.name != "unused":
return True
return False
def generate_tier1(
filenames: str, analysis: Analysis, outfile: TextIO, lines: bool
) -> None:
write_header(filenames, outfile)
out = CWriter(outfile, 2, lines)
out.emit("\n")
for name, inst in sorted(analysis.instructions.items()):
needs_this = uses_this(inst)
out.emit("\n")
out.emit(f"TARGET({name}) {{\n")
if needs_this and not inst.is_target:
out.emit(f"_Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr;\n")
else:
out.emit(f"frame->instr_ptr = next_instr;\n")
out.emit(f"next_instr += {inst.size};\n")
out.emit(f"INSTRUCTION_STATS({name});\n")
if inst.is_target:
out.emit(f"PREDICTED({name});\n")
if needs_this:
out.emit(f"_Py_CODEUNIT *this_instr = next_instr - {inst.size};\n")
if inst.family is not None:
out.emit(
f"static_assert({inst.family.size} == {inst.size-1}"
', "incorrect cache size");\n'
)
declare_variables(inst, out)
offset = 1 # The instruction itself
stack = Stack()
for part in inst.parts:
# Only emit braces if more than one uop
offset = write_uop(part, out, offset, stack, inst, len(inst.parts) > 1)
out.start_line()
if not inst.parts[-1].properties.always_exits:
stack.flush(out)
if inst.parts[-1].properties.ends_with_eval_breaker:
out.emit("CHECK_EVAL_BREAKER();\n")
out.emit("DISPATCH();\n")
out.start_line()
out.emit("}")
out.emit("\n")
outfile.write(FOOTER)
arg_parser = argparse.ArgumentParser(
description="Generate the code for the interpreter switch.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
arg_parser.add_argument(
"-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
)
arg_parser.add_argument(
"-l", "--emit-line-directives", help="Emit #line directives", action="store_true"
)
arg_parser.add_argument(
"input", nargs=argparse.REMAINDER, help="Instruction definition file(s)"
)
if __name__ == "__main__":
args = arg_parser.parse_args()
if len(args.input) == 0:
args.input.append(DEFAULT_INPUT)
data = analyze_files(args.input)
with open(args.output, "w") as outfile:
generate_tier1(args.input, data, outfile, args.emit_line_directives)