Allow pgen to produce a DOT format dump of the grammar (GH-18005)

Originally suggested by Anthony Shaw.
This commit is contained in:
Pablo Galindo 2020-01-14 22:32:55 +00:00 committed by GitHub
parent 65a5ce247f
commit 45cf5db587
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 2 deletions

View file

@ -21,9 +21,19 @@ def main():
)
parser.add_argument("--verbose", "-v", action="count")
parser.add_argument(
"--graph",
type=argparse.FileType("w"),
action="store",
metavar="GRAPH_OUTPUT_FILE",
help="Dumps a DOT representation of the generated automata in a file",
)
args = parser.parse_args()
p = ParserGenerator(args.grammar, args.tokens, verbose=args.verbose)
p = ParserGenerator(
args.grammar, args.tokens, verbose=args.verbose, graph_file=args.graph
)
grammar = p.make_grammar()
grammar.produce_graminit_h(args.graminit_h.write)
grammar.produce_graminit_c(args.graminit_c.write)

View file

@ -48,6 +48,26 @@ def dump(self, writer=print):
else:
writer(" %s -> %d" % (label, j))
def dump_graph(self, writer):
"""Dump a DOT representation of the NFA"""
writer('digraph %s_nfa {\n' % self.name)
todo = [self.start]
for i, state in enumerate(todo):
writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
for arc in state.arcs:
label = arc.label
next = arc.target
if next in todo:
j = todo.index(next)
else:
j = len(todo)
todo.append(next)
if label is None:
writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
else:
writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
writer('}\n')
class NFAArc:
"""An arc representing a transition between two NFA states.
@ -301,6 +321,15 @@ def dump(self, writer=print):
for label, next in sorted(state.arcs.items()):
writer(" %s -> %d" % (label, self.states.index(next)))
def dump_graph(self, writer):
"""Dump a DOT representation of the DFA"""
writer('digraph %s_dfa {\n' % self.name)
for i, state in enumerate(self.states):
writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
for label, next in sorted(state.arcs.items()):
writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
writer('}\n')
class DFAState(object):
"""A state of a DFA

View file

@ -130,7 +130,7 @@ def __repr__(self):
class ParserGenerator(object):
def __init__(self, grammar_file, token_file, verbose=False):
def __init__(self, grammar_file, token_file, verbose=False, graph_file=None):
with open(grammar_file) as f:
self.grammar = f.read()
with open(token_file) as tok_file:
@ -141,6 +141,7 @@ def __init__(self, grammar_file, token_file, verbose=False):
self.opmap["<>"] = "NOTEQUAL"
self.verbose = verbose
self.filename = grammar_file
self.graph_file = graph_file
self.dfas, self.startsymbol = self.create_dfas()
self.first = {} # map from symbol name to set of tokens
self.calculate_first_sets()
@ -152,11 +153,15 @@ def create_dfas(self):
if self.verbose:
print("Dump of NFA for", nfa.name)
nfa.dump()
if self.graph_file is not None:
nfa.dump_graph(self.graph_file.write)
dfa = DFA.from_nfa(nfa)
if self.verbose:
print("Dump of DFA for", dfa.name)
dfa.dump()
dfa.simplify()
if self.graph_file is not None:
dfa.dump_graph(self.graph_file.write)
rule_to_dfas[dfa.name] = dfa
if start_nonterminal is None: