gh-91404: Use computed gotos and reduce indirection in re (#91495)

This commit is contained in:
Brandt Bucher 2022-04-15 09:26:44 -07:00 committed by GitHub
parent d104f4d21f
commit 1b34b5687b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 483 additions and 348 deletions

View file

@ -520,6 +520,12 @@ Optimizations
becomes 272 bytes from 352 bytes on 64bit platform.
(Contributed by Inada Naoki in :issue:`46845`.)
* :mod:`re`'s regular expression matching engine has been partially refactored,
and now uses computed gotos (or "threaded code") on supported platforms. As a
result, Python 3.11 executes the `pyperformance regular expression benchmarks
<https://pyperformance.readthedocs.io/benchmarks.html#regex-dna>`_ up to 10%
faster than Python 3.10.
Faster CPython
==============

View file

@ -1351,11 +1351,12 @@ regen-stdlib-module-names: build_all Programs/_testembed
$(UPDATE_FILE) $(srcdir)/Python/stdlib_module_names.h $(srcdir)/Python/stdlib_module_names.h.new
regen-sre:
# Regenerate Modules/_sre/sre_constants.h from Lib/re/_constants.py
# using Tools/scripts/generate_sre_constants.py
# Regenerate Modules/_sre/sre_constants.h and Modules/_sre/sre_targets.h
# from Lib/re/_constants.py using Tools/scripts/generate_sre_constants.py
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_sre_constants.py \
$(srcdir)/Lib/re/_constants.py \
$(srcdir)/Modules/_sre/sre_constants.h
$(srcdir)/Modules/_sre/sre_constants.h \
$(srcdir)/Modules/_sre/sre_targets.h
Python/compile.o Python/symtable.o Python/ast_unparse.o Python/ast.o Python/future.o: $(srcdir)/Include/internal/pycore_ast.h

View file

@ -0,0 +1,3 @@
Improve the performance of :mod:`re` matching by using computed gotos (or
"threaded code") on supported platforms and removing expensive pointer
indirections.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,59 @@
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* Auto-generated by Tools/scripts/generate_sre_constants.py from
* Lib/re/_constants.py.
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the sre.c file for information on usage and redistribution.
*/
static void *sre_targets[44] = {
&&TARGET_SRE_OP_FAILURE,
&&TARGET_SRE_OP_SUCCESS,
&&TARGET_SRE_OP_ANY,
&&TARGET_SRE_OP_ANY_ALL,
&&TARGET_SRE_OP_ASSERT,
&&TARGET_SRE_OP_ASSERT_NOT,
&&TARGET_SRE_OP_AT,
&&TARGET_SRE_OP_BRANCH,
&&TARGET_SRE_OP_CALL,
&&TARGET_SRE_OP_CATEGORY,
&&TARGET_SRE_OP_CHARSET,
&&TARGET_SRE_OP_BIGCHARSET,
&&TARGET_SRE_OP_GROUPREF,
&&TARGET_SRE_OP_GROUPREF_EXISTS,
&&TARGET_SRE_OP_IN,
&&TARGET_SRE_OP_INFO,
&&TARGET_SRE_OP_JUMP,
&&TARGET_SRE_OP_LITERAL,
&&TARGET_SRE_OP_MARK,
&&TARGET_SRE_OP_MAX_UNTIL,
&&TARGET_SRE_OP_MIN_UNTIL,
&&TARGET_SRE_OP_NOT_LITERAL,
&&TARGET_SRE_OP_NEGATE,
&&TARGET_SRE_OP_RANGE,
&&TARGET_SRE_OP_REPEAT,
&&TARGET_SRE_OP_REPEAT_ONE,
&&TARGET_SRE_OP_SUBPATTERN,
&&TARGET_SRE_OP_MIN_REPEAT_ONE,
&&TARGET_SRE_OP_ATOMIC_GROUP,
&&TARGET_SRE_OP_POSSESSIVE_REPEAT,
&&TARGET_SRE_OP_POSSESSIVE_REPEAT_ONE,
&&TARGET_SRE_OP_GROUPREF_IGNORE,
&&TARGET_SRE_OP_IN_IGNORE,
&&TARGET_SRE_OP_LITERAL_IGNORE,
&&TARGET_SRE_OP_NOT_LITERAL_IGNORE,
&&TARGET_SRE_OP_GROUPREF_LOC_IGNORE,
&&TARGET_SRE_OP_IN_LOC_IGNORE,
&&TARGET_SRE_OP_LITERAL_LOC_IGNORE,
&&TARGET_SRE_OP_NOT_LITERAL_LOC_IGNORE,
&&TARGET_SRE_OP_GROUPREF_UNI_IGNORE,
&&TARGET_SRE_OP_IN_UNI_IGNORE,
&&TARGET_SRE_OP_LITERAL_UNI_IGNORE,
&&TARGET_SRE_OP_NOT_LITERAL_UNI_IGNORE,
&&TARGET_SRE_OP_RANGE_UNI_IGNORE,
};

View file

@ -29,7 +29,11 @@ def update_file(file, content):
"""
def main(infile='Lib/re/_constants.py', outfile='Modules/_sre/sre_constants.h'):
def main(
infile="Lib/re/_constants.py",
outfile_constants="Modules/_sre/sre_constants.h",
outfile_targets="Modules/_sre/sre_targets.h",
):
ns = {}
with open(infile) as fp:
code = fp.read()
@ -46,6 +50,11 @@ def dump2(d, prefix):
for value, name in sorted(items):
yield "#define %s %d\n" % (name, value)
def dump_gotos(d, prefix):
for i, item in enumerate(sorted(d)):
assert i == item
yield f" &&{prefix}_{item},\n"
content = [sre_constants_header]
content.append("#define SRE_MAGIC %d\n" % ns["MAGIC"])
content.extend(dump(ns["OPCODES"], "SRE_OP"))
@ -54,7 +63,14 @@ def dump2(d, prefix):
content.extend(dump2(ns, "SRE_FLAG_"))
content.extend(dump2(ns, "SRE_INFO_"))
update_file(outfile, ''.join(content))
update_file(outfile_constants, ''.join(content))
content = [sre_constants_header]
content.append(f"static void *sre_targets[{len(ns['OPCODES'])}] = {{\n")
content.extend(dump_gotos(ns["OPCODES"], "TARGET_SRE_OP"))
content.append("};\n")
update_file(outfile_targets, ''.join(content))
if __name__ == '__main__':