bpo-24960: use pkgutil.get_data in lib2to3 to read pickled grammar files (#4977)

This is more complicated than it should be because we need to preserve the
useful mtime-based regeneration feature that lib2to3.pgen2.driver.load_grammar
has. We only look for the pickled grammar file with pkgutil.get_data and only if
the source file does not exist.
This commit is contained in:
Benjamin Peterson 2017-12-22 12:18:33 -08:00 committed by GitHub
parent 62ed6be8da
commit 8a5877165e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 45 additions and 2 deletions

View file

@ -20,6 +20,7 @@
import io
import os
import logging
import pkgutil
import sys
# Pgen imports
@ -140,6 +141,26 @@ def _newer(a, b):
return os.path.getmtime(a) >= os.path.getmtime(b)
def load_packaged_grammar(package, grammar_source):
"""Normally, loads a pickled grammar by doing
pkgutil.get_data(package, pickled_grammar)
where *pickled_grammar* is computed from *grammar_source* by adding the
Python version and using a ``.pickle`` extension.
However, if *grammar_source* is an extant file, load_grammar(grammar_source)
is called instead. This facilities using a packaged grammar file when needed
but preserves load_grammar's automatic regeneration behavior when possible.
"""
if os.path.isfile(grammar_source):
return load_grammar(grammar_source)
pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
data = pkgutil.get_data(package, pickled_name)
g = grammar.Grammar()
g.loads(data)
return g
def main(*args):
"""Main program, when run as a script: produce grammar pickle files.

View file

@ -108,6 +108,10 @@ def load(self, filename):
d = pickle.load(f)
self.__dict__.update(d)
def loads(self, pkl):
"""Load the grammar tables from a pickle bytes object."""
self.__dict__.update(pickle.loads(pkl))
def copy(self):
"""
Copy the grammar.

View file

@ -29,12 +29,12 @@ def __init__(self, grammar):
setattr(self, name, symbol)
python_grammar = driver.load_grammar(_GRAMMAR_FILE)
python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE)
python_symbols = Symbols(python_grammar)
python_grammar_no_print_statement = python_grammar.copy()
del python_grammar_no_print_statement.keywords["print"]
pattern_grammar = driver.load_grammar(_PATTERN_GRAMMAR_FILE)
pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE)
pattern_symbols = Symbols(pattern_grammar)

View file

@ -12,7 +12,10 @@
from test.support import verbose
# Python imports
import importlib
import operator
import os
import pickle
import shutil
import subprocess
import sys
@ -99,6 +102,18 @@ def test_load_grammar_from_subprocess(self):
finally:
shutil.rmtree(tmpdir)
def test_load_packaged_grammar(self):
modname = __name__ + '.load_test'
class MyLoader:
def get_data(self, where):
return pickle.dumps({'elephant': 19})
class MyModule:
__file__ = 'parsertestmodule'
__spec__ = importlib.util.spec_from_loader(modname, MyLoader())
sys.modules[modname] = MyModule()
self.addCleanup(operator.delitem, sys.modules, modname)
g = pgen2_driver.load_packaged_grammar(modname, 'Grammar.txt')
self.assertEqual(g.elephant, 19)
class GrammarTest(support.TestCase):

View file

@ -0,0 +1,3 @@
2to3 and lib2to3 can now read pickled grammar files using pkgutil.get_data()
rather than probing the filesystem. This lets 2to3 and lib2to3 work when run
from a zipfile.