bpo-36143: Regenerate Lib/keyword.py from the Grammar and Tokens file using pgen (GH-12456)

Now that the parser generator is written in Python (Parser/pgen) we can make use of it to regenerate the Lib/keyword file that contains the language keywords instead of parsing the autogenerated grammar files. This also allows checking in the CI that the autogenerated files are up to date.
This commit is contained in:
Pablo Galindo 2019-03-25 22:01:12 +00:00 committed by GitHub
parent 027b09c5a1
commit 91759d9801
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 124 additions and 197 deletions

129
Lib/keyword.py Executable file → Normal file
View file

@ -1,98 +1,55 @@
#! /usr/bin/env python3
"""Keywords (from "graminit.c")
"""Keywords (from "Grammar/Grammar")
This file is automatically generated; please don't muck it up!
To update the symbols in this file, 'cd' to the top directory of
the python source tree after building the interpreter and run:
the python source tree and run:
./python Lib/keyword.py
python3 -m Parser.pgen.keywordgen Grammar/Grammar \
Grammar/Tokens \
Lib/keyword.py
Alternatively, you can run 'make regen-keyword'.
"""
__all__ = ["iskeyword", "kwlist"]
kwlist = [
#--start keywords--
'False',
'None',
'True',
'and',
'as',
'assert',
'break',
'class',
'continue',
'def',
'del',
'elif',
'else',
'except',
'finally',
'for',
'from',
'global',
'if',
'import',
'in',
'is',
'lambda',
'nonlocal',
'not',
'or',
'pass',
'raise',
'return',
'try',
'while',
'with',
'yield',
#--end keywords--
]
kwlist.append('async')
kwlist.append('await')
kwlist.sort()
'False',
'None',
'True',
'and',
'as',
'assert',
'async',
'await',
'break',
'class',
'continue',
'def',
'del',
'elif',
'else',
'except',
'finally',
'for',
'from',
'global',
'if',
'import',
'in',
'is',
'lambda',
'nonlocal',
'not',
'or',
'pass',
'raise',
'return',
'try',
'while',
'with',
'yield'
]
iskeyword = frozenset(kwlist).__contains__
def main():
import sys, re
args = sys.argv[1:]
iptfile = args and args[0] or "Python/graminit.c"
if len(args) > 1: optfile = args[1]
else: optfile = "Lib/keyword.py"
# load the output skeleton from the target, taking care to preserve its
# newline convention.
with open(optfile, newline='') as fp:
format = fp.readlines()
nl = format[0][len(format[0].strip()):] if format else '\n'
# scan the source file for keywords
with open(iptfile) as fp:
strprog = re.compile('"([^"]+)"')
lines = []
for line in fp:
if '{1, "' in line:
match = strprog.search(line)
if match:
lines.append(" '" + match.group(1) + "'," + nl)
lines.sort()
# insert the lines of keywords into the skeleton
try:
start = format.index("#--start keywords--" + nl) + 1
end = format.index("#--end keywords--" + nl)
format[start:end] = lines
except ValueError:
sys.stderr.write("target does not contain format markers\n")
sys.exit(1)
# write the output file
with open(optfile, 'w', newline='') as fp:
fp.writelines(format)
if __name__ == "__main__":
main()

View file

@ -1,20 +1,5 @@
import keyword
import unittest
from test import support
import filecmp
import os
import sys
import subprocess
import shutil
import textwrap
KEYWORD_FILE = support.findfile('keyword.py')
GRAMMAR_FILE = os.path.join(os.path.split(__file__)[0],
'..', '..', 'Python', 'graminit.c')
TEST_PY_FILE = 'keyword_test.py'
GRAMMAR_TEST_FILE = 'graminit_test.c'
PY_FILE_WITHOUT_KEYWORDS = 'minimal_keyword.py'
NONEXISTENT_FILE = 'not_here.txt'
class Test_iskeyword(unittest.TestCase):
@ -35,103 +20,17 @@ def test_changing_the_kwlist_does_not_affect_iskeyword(self):
keyword.kwlist = ['its', 'all', 'eggs', 'beans', 'and', 'a', 'slice']
self.assertFalse(keyword.iskeyword('eggs'))
def test_all_keywords_fail_to_be_used_as_names(self):
for key in keyword.kwlist:
with self.assertRaises(SyntaxError):
exec(f"{key} = 42")
class TestKeywordGeneration(unittest.TestCase):
def test_async_and_await_are_keywords(self):
self.assertIn("async", keyword.kwlist)
self.assertIn("await", keyword.kwlist)
def _copy_file_without_generated_keywords(self, source_file, dest_file):
with open(source_file, 'rb') as fp:
lines = fp.readlines()
nl = lines[0][len(lines[0].strip()):]
with open(dest_file, 'wb') as fp:
fp.writelines(lines[:lines.index(b"#--start keywords--" + nl) + 1])
fp.writelines(lines[lines.index(b"#--end keywords--" + nl):])
def _generate_keywords(self, grammar_file, target_keyword_py_file):
proc = subprocess.Popen([sys.executable,
KEYWORD_FILE,
grammar_file,
target_keyword_py_file], stderr=subprocess.PIPE)
stderr = proc.communicate()[1]
return proc.returncode, stderr
@unittest.skipIf(not os.path.exists(GRAMMAR_FILE),
'test only works from source build directory')
def test_real_grammar_and_keyword_file(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
self.assertFalse(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
self.assertEqual((0, b''), self._generate_keywords(GRAMMAR_FILE,
TEST_PY_FILE))
self.assertTrue(filecmp.cmp(KEYWORD_FILE, TEST_PY_FILE))
def test_grammar(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
with open(GRAMMAR_TEST_FILE, 'w') as fp:
# Some of these are probably implementation accidents.
fp.writelines(textwrap.dedent("""\
{2, 1},
{11, "encoding_decl", 0, 2, states_79,
"\000\000\040\000\000\000\000\000\000\000\000\000"
"\000\000\000\000\000\000\000\000\000"},
{1, "jello"},
{326, 0},
{1, "turnip"},
\t{1, "This one is tab indented"
{278, 0},
{1, "crazy but legal"
"also legal" {1, "
{1, "continue"},
{1, "lemon"},
{1, "tomato"},
{1, "wigii"},
{1, 'no good'}
{283, 0},
{1, "too many spaces"}"""))
self.addCleanup(support.unlink, GRAMMAR_TEST_FILE)
self._generate_keywords(GRAMMAR_TEST_FILE, TEST_PY_FILE)
expected = [
" 'This one is tab indented',",
" 'also legal',",
" 'continue',",
" 'crazy but legal',",
" 'jello',",
" 'lemon',",
" 'tomato',",
" 'turnip',",
" 'wigii',",
]
with open(TEST_PY_FILE) as fp:
lines = fp.read().splitlines()
start = lines.index("#--start keywords--") + 1
end = lines.index("#--end keywords--")
actual = lines[start:end]
self.assertEqual(actual, expected)
def test_empty_grammar_results_in_no_keywords(self):
self._copy_file_without_generated_keywords(KEYWORD_FILE,
PY_FILE_WITHOUT_KEYWORDS)
self.addCleanup(support.unlink, PY_FILE_WITHOUT_KEYWORDS)
shutil.copyfile(KEYWORD_FILE, TEST_PY_FILE)
self.addCleanup(support.unlink, TEST_PY_FILE)
self.assertEqual((0, b''), self._generate_keywords(os.devnull,
TEST_PY_FILE))
self.assertTrue(filecmp.cmp(TEST_PY_FILE, PY_FILE_WITHOUT_KEYWORDS))
def test_keywords_py_without_markers_produces_error(self):
rc, stderr = self._generate_keywords(os.devnull, os.devnull)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'does not contain format markers')
def test_missing_grammar_file_produces_error(self):
rc, stderr = self._generate_keywords(NONEXISTENT_FILE, KEYWORD_FILE)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
def test_missing_keywords_py_file_produces_error(self):
rc, stderr = self._generate_keywords(os.devnull, NONEXISTENT_FILE)
self.assertNotEqual(rc, 0)
self.assertRegex(stderr, b'(?ms)' + NONEXISTENT_FILE.encode())
def test_keywords_are_sorted(self):
self.assertListEqual(sorted(keyword.kwlist), keyword.kwlist)
if __name__ == "__main__":

View file

@ -724,7 +724,7 @@ regen-importlib: Programs/_freeze_importlib
# Regenerate all generated files
regen-all: regen-opcode regen-opcode-targets regen-typeslots regen-grammar \
regen-token regen-symbol regen-ast regen-importlib clinic
regen-token regen-keyword regen-symbol regen-ast regen-importlib clinic
############################################################################
# Special rules for object files
@ -843,6 +843,15 @@ regen-token:
$(srcdir)/Grammar/Tokens \
$(srcdir)/Lib/token.py
.PHONY: regen-keyword
regen-keyword:
# Regenerate Lib/keyword.py from Grammar/Grammar and Grammar/Tokens
# using Parser/pgen
$(PYTHON_FOR_REGEN) -m Parser.pgen.keywordgen $(srcdir)/Grammar/Grammar \
$(srcdir)/Grammar/Tokens \
$(srcdir)/Lib/keyword.py.new
$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
.PHONY: regen-symbol
regen-symbol: $(srcdir)/Include/graminit.h
# Regenerate Lib/symbol.py from Include/graminit.h

View file

@ -0,0 +1,2 @@
Regenerate :mod:`keyword` from the Grammar and Tokens file using pgen. Patch
by Pablo Galindo.

60
Parser/pgen/keywordgen.py Normal file
View file

@ -0,0 +1,60 @@
"""Generate Lib/keyword.py from the Grammar and Tokens files using pgen"""
import argparse
from .pgen import ParserGenerator
TEMPLATE = r'''
"""Keywords (from "Grammar/Grammar")
This file is automatically generated; please don't muck it up!
To update the symbols in this file, 'cd' to the top directory of
the python source tree and run:
python3 -m Parser.pgen.keywordgen Grammar/Grammar \
Grammar/Tokens \
Lib/keyword.py
Alternatively, you can run 'make regen-keyword'.
"""
__all__ = ["iskeyword", "kwlist"]
kwlist = [
{keywords}
]
iskeyword = frozenset(kwlist).__contains__
'''.lstrip()
EXTRA_KEYWORDS = ["async", "await"]
def main():
parser = argparse.ArgumentParser(description="Generate the Lib/keywords.py "
"file from the grammar.")
parser.add_argument(
"grammar", type=str, help="The file with the grammar definition in EBNF format"
)
parser.add_argument(
"tokens", type=str, help="The file with the token definitions"
)
parser.add_argument(
"keyword_file",
type=argparse.FileType('w'),
help="The path to write the keyword definitions",
)
args = parser.parse_args()
p = ParserGenerator(args.grammar, args.tokens)
grammar = p.make_grammar()
with args.keyword_file as thefile:
all_keywords = sorted(list(grammar.keywords) + EXTRA_KEYWORDS)
keywords = ",\n ".join(map(repr, all_keywords))
thefile.write(TEMPLATE.format(keywords=keywords))
if __name__ == "__main__":
main()