diff --git a/Parser/pgen/__main__.py b/Parser/pgen/__main__.py
index eea52618422..bb96e75beea 100644
--- a/Parser/pgen/__main__.py
+++ b/Parser/pgen/__main__.py
@@ -8,17 +8,15 @@ def main():
     parser.add_argument(
         "grammar", type=str, help="The file with the grammar definition in EBNF format"
     )
-    parser.add_argument(
-        "tokens", type=str, help="The file with the token definitions"
-    )
+    parser.add_argument("tokens", type=str, help="The file with the token definitions")
     parser.add_argument(
         "graminit_h",
-        type=argparse.FileType('w'),
+        type=argparse.FileType("w"),
         help="The path to write the grammar's non-terminals as #defines",
     )
     parser.add_argument(
         "graminit_c",
-        type=argparse.FileType('w'),
+        type=argparse.FileType("w"),
         help="The path to write the grammar as initialized data",
     )
 
diff --git a/Parser/pgen/automata.py b/Parser/pgen/automata.py
new file mode 100644
index 00000000000..3147d8636ff
--- /dev/null
+++ b/Parser/pgen/automata.py
@@ -0,0 +1,371 @@
+"""Classes representing state-machine concepts"""
+
+class NFA:
+    """A non deterministic finite automata
+
+    A non deterministic automata is a form of a finite state
+    machine. An NFA's rules are less restrictive than a DFA.
+    The NFA rules are:
+
+      * A transition can be non-deterministic and can result in
+        nothing, one, or two or more states.
+
+      * An epsilon transition consuming empty input is valid.
+        Transitions consuming labeled symbols are also permitted.
+
+    This class assumes that there is only one starting state and one
+    accepting (ending) state.
+
+    Attributes:
+        name (str): The name of the rule the NFA is representing.
+        start (NFAState): The starting state.
+        end (NFAState): The ending state
+    """
+
+    def __init__(self, start, end):
+        self.name = start.rule_name
+        self.start = start
+        self.end = end
+
+    def __repr__(self):
+        return "NFA(start={}, end={})".format(self.start, self.end)
+
+    def dump(self, writer=print):
+        """Dump a graphical representation of the NFA"""
+        todo = [self.start]
+        for i, state in enumerate(todo):
+            writer("  State", i, state is self.end and "(final)" or "")
+            for arc in state.arcs:
+                label = arc.label
+                next = arc.target
+                if next in todo:
+                    j = todo.index(next)
+                else:
+                    j = len(todo)
+                    todo.append(next)
+                if label is None:
+                    writer("    -> %d" % j)
+                else:
+                    writer("    %s -> %d" % (label, j))
+
+
+class NFAArc:
+    """An arc representing a transition between two NFA states.
+
+    NFA states can be connected via two ways:
+
+        * A label transition: An input equal to the label must
+          be consumed to perform the transition.
+        * An epsilon transition: The transition can be taken without
+          consuming any input symbol.
+
+        Attributes:
+            target (NFAState): The ending state of the transition arc.
+            label (Optional[str]): The label that must be consumed to make
+                the transition. An epsilon transition is represented
+                using `None`.
+    """
+
+    def __init__(self, target, label):
+        self.target = target
+        self.label = label
+
+    def __repr__(self):
+        return "<%s: %s>" % (self.__class__.__name__, self.label)
+
+
+class NFAState:
+    """A state of a NFA, non deterministic finite automata.
+
+    Attributes:
+        target (rule_name): The name of the rule used to represent the NFA's
+            ending state after a transition.
+        arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
+            between the current NFA state and another NFA state via following
+            a label.
+    """
+
+    def __init__(self, rule_name):
+        self.rule_name = rule_name
+        self.arcs = []
+
+    def add_arc(self, target, label=None):
+        """Add a new arc to connect the state to a target state within the NFA
+
+        The method adds a new arc to the list of arcs available as transitions
+        from the present state. An optional label indicates a named transition
+        that consumes an input while the absence of a label represents an epsilon
+        transition.
+
+        Attributes:
+            target (NFAState): The end of the transition that the arc represents.
+            label (Optional[str]): The label that must be consumed for making
+                the transition. If the label is not provided the transition is assumed
+                to be an epsilon-transition.
+        """
+        assert label is None or isinstance(label, str)
+        assert isinstance(target, NFAState)
+        self.arcs.append(NFAArc(target, label))
+
+    def __repr__(self):
+        return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
+
+
+class DFA:
+    """A deterministic finite automata
+
+    A deterministic finite automata is a form of a finite state machine
+    that obeys the following rules:
+
+       * Each of the transitions is uniquely determined by
+         the source state and input symbol
+       * Reading an input symbol is required for each state
+         transition (no epsilon transitions).
+
+    The finite-state machine will accept or reject a string of symbols
+    and only produces a unique computation of the automaton for each input
+    string. The DFA must have a unique starting state (represented as the first
+    element in the list of states) but can have multiple final states.
+
+    Attributes:
+        name (str): The name of the rule the DFA is representing.
+        states (List[DFAState]): A collection of DFA states.
+    """
+
+    def __init__(self, name, states):
+        self.name = name
+        self.states = states
+
+    @classmethod
+    def from_nfa(cls, nfa):
+        """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
+
+        To simulate the operation of a DFA on a given input string, it's
+        necessary to keep track of a single state at any time, or more precisely,
+        the state that the automaton will reach after seeing a prefix of the
+        input. In contrast, to simulate an NFA, it's necessary to keep track of
+        a set of states: all of the states that the automaton could reach after
+        seeing the same prefix of the input, according to the nondeterministic
+        choices made by the automaton. There are two possible sources of
+        non-determinism:
+
+        1) Multiple (one or more) transitions with the same label
+
+                         'A'     +-------+
+                    +----------->+ State +----------->+
+                    |            |   2   |
+            +-------+            +-------+
+            | State |
+            |   1   |            +-------+
+            +-------+            | State |
+                    +----------->+   3   +----------->+
+                         'A'     +-------+
+
+        2) Epsilon transitions (transitions that can be taken without consuming any input)
+
+            +-------+            +-------+
+            | State |     ε      | State |
+            |   1   +----------->+   2   +----------->+
+            +-------+            +-------+
+
+        Looking at the first case above, we can't determine which transition should be
+        followed when given an input A. We could choose whether or not to follow the
+        transition while in the second case the problem is that we can choose both to
+        follow the transition or not doing it. To solve this problem we can imagine that
+        we follow all possibilities at the same time and we construct new states from the
+        set of all possible reachable states. For every case in the previous example:
+
+
+        1) For multiple transitions with the same label we colapse all of the
+           final states under the same one
+
+            +-------+            +-------+
+            | State |     'A'    | State |
+            |   1   +----------->+  2-3  +----------->+
+            +-------+            +-------+
+
+        2) For epsilon transitions we collapse all epsilon-reachable states
+           into the same one
+
+            +-------+
+            | State |
+            |  1-2  +----------->
+            +-------+
+
+        Because the DFA states consist of sets of NFA states, an n-state NFA
+        may be converted to a DFA with at most 2**n states. Notice that the
+        constructed DFA is not minimal and can be simplified or reduced
+        afterwards.
+
+        Parameters:
+            name (NFA): The NFA to transform to DFA.
+        """
+        assert isinstance(nfa, NFA)
+
+        def add_closure(nfa_state, base_nfa_set):
+            """Calculate the epsilon-closure of a given state
+
+            Add to the *base_nfa_set* all the states that are
+            reachable from *nfa_state* via epsilon-transitions.
+            """
+            assert isinstance(nfa_state, NFAState)
+            if nfa_state in base_nfa_set:
+                return
+            base_nfa_set.add(nfa_state)
+            for nfa_arc in nfa_state.arcs:
+                if nfa_arc.label is None:
+                    add_closure(nfa_arc.target, base_nfa_set)
+
+        # Calculte the epsilon-closure of the starting state
+        base_nfa_set = set()
+        add_closure(nfa.start, base_nfa_set)
+
+        # Start by visiting the NFA starting state (there is only one).
+        states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
+
+        for state in states:  # NB states grow while we're iterating
+
+            # Find transitions from the current state to other reachable states
+            # and store them in mapping that correlates the label to all the
+            # possible reachable states that can be obtained by consuming a
+            # token equal to the label. Each set of all the states that can
+            # be reached after following a label will be the a DFA state.
+            arcs = {}
+            for nfa_state in state.nfa_set:
+                for nfa_arc in nfa_state.arcs:
+                    if nfa_arc.label is not None:
+                        nfa_set = arcs.setdefault(nfa_arc.label, set())
+                        # All states that can be reached by epsilon-transitions
+                        # are also included in the set of reachable states.
+                        add_closure(nfa_arc.target, nfa_set)
+
+            # Now create new DFAs by visiting all posible transitions between
+            # the current DFA state and the new power-set states (each nfa_set)
+            # via the different labels. As the nodes are appended to *states* this
+            # is performing a deep-first search traversal over the power-set of
+            # the states of the original NFA.
+            for label, nfa_set in sorted(arcs.items()):
+                for exisisting_state in states:
+                    if exisisting_state.nfa_set == nfa_set:
+                        # The DFA state already exists for this rule.
+                        next_state = exisisting_state
+                        break
+                else:
+                    next_state = DFAState(nfa.name, nfa_set, nfa.end)
+                    states.append(next_state)
+
+                # Add a transition between the current DFA state and the new
+                # DFA state (the power-set state) via the current label.
+                state.add_arc(next_state, label)
+
+        return cls(nfa.name, states)
+
+    def __iter__(self):
+        return iter(self.states)
+
+    def simplify(self):
+        """Attempt to reduce the number of states of the DFA
+
+        Transform the DFA into an equivalent DFA that has fewer states. Two
+        classes of states can be removed or merged from the original DFA without
+        affecting the language it accepts to minimize it:
+
+            * Unreachable states can not be reached from the initial
+              state of the DFA, for any input string.
+            * Nondistinguishable states are those that cannot be distinguished
+              from one another for any input string.
+
+        This algorithm does not achieve the optimal fully-reduced solution, but it
+        works well enough for the particularities of the Python grammar. The
+        algorithm repeatedly looks for two states that have the same set of
+        arcs (same labels pointing to the same nodes) and unifies them, until
+        things stop changing.
+        """
+        changes = True
+        while changes:
+            changes = False
+            for i, state_i in enumerate(self.states):
+                for j in range(i + 1, len(self.states)):
+                    state_j = self.states[j]
+                    if state_i == state_j:
+                        del self.states[j]
+                        for state in self.states:
+                            state.unifystate(state_j, state_i)
+                        changes = True
+                        break
+
+    def dump(self, writer=print):
+        """Dump a graphical representation of the DFA"""
+        for i, state in enumerate(self.states):
+            writer("  State", i, state.is_final and "(final)" or "")
+            for label, next in sorted(state.arcs.items()):
+                writer("    %s -> %d" % (label, self.states.index(next)))
+
+
+class DFAState(object):
+    """A state of a DFA
+
+    Attributes:
+        rule_name (rule_name): The name of the DFA rule containing the represented state.
+        nfa_set (Set[NFAState]): The set of NFA states used to create this state.
+        final (bool): True if the state represents an accepting state of the DFA
+            containing this state.
+        arcs (Dict[label, DFAState]): A mapping representing transitions between
+            the current DFA state and another DFA state via following a label.
+    """
+
+    def __init__(self, rule_name, nfa_set, final):
+        assert isinstance(nfa_set, set)
+        assert isinstance(next(iter(nfa_set)), NFAState)
+        assert isinstance(final, NFAState)
+        self.rule_name = rule_name
+        self.nfa_set = nfa_set
+        self.arcs = {}  # map from terminals/nonterminals to DFAState
+        self.is_final = final in nfa_set
+
+    def add_arc(self, target, label):
+        """Add a new arc to the current state.
+
+        Parameters:
+            target (DFAState): The DFA state at the end of the arc.
+            label (str): The label respresenting the token that must be consumed
+                to perform this transition.
+        """
+        assert isinstance(label, str)
+        assert label not in self.arcs
+        assert isinstance(target, DFAState)
+        self.arcs[label] = target
+
+    def unifystate(self, old, new):
+        """Replace all arcs from the current node to *old* with *new*.
+
+        Parameters:
+            old (DFAState): The  DFA state to remove from all existing arcs.
+            new (DFAState): The DFA state to replace in all existing arcs.
+        """
+        for label, next_ in self.arcs.items():
+            if next_ is old:
+                self.arcs[label] = new
+
+    def __eq__(self, other):
+        # The nfa_set does not matter for  equality
+        assert isinstance(other, DFAState)
+        if self.is_final != other.is_final:
+            return False
+        # We cannot just return self.arcs == other.arcs because that
+        # would invoke this method recursively if there are any cycles.
+        if len(self.arcs) != len(other.arcs):
+            return False
+        for label, next_ in self.arcs.items():
+            if next_ is not other.arcs.get(label):
+                return False
+        return True
+
+    __hash__ = None  # For Py3 compatibility.
+
+    def __repr__(self):
+        return "<%s: %s is_final=%s>" % (
+            self.__class__.__name__,
+            self.rule_name,
+            self.is_final,
+        )
diff --git a/Parser/pgen/grammar.py b/Parser/pgen/grammar.py
index 5cd652426b4..56188db775a 100644
--- a/Parser/pgen/grammar.py
+++ b/Parser/pgen/grammar.py
@@ -76,12 +76,14 @@ def produce_graminit_c(self, writer):
 
     def print_labels(self, writer):
         writer(
-            "static const label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
+            "static const label labels[{n_labels}] = {{\n".format(
+                n_labels=len(self.labels)
+            )
         )
         for label, name in self.labels:
             label_name = '"{}"'.format(name) if name is not None else 0
             writer(
-                '    {{{label}, {label_name}}},\n'.format(
+                "    {{{label}, {label_name}}},\n".format(
                     label=label, label_name=label_name
                 )
             )
diff --git a/Parser/pgen/keywordgen.py b/Parser/pgen/keywordgen.py
index eeb3ef739fa..f0234a81b62 100644
--- a/Parser/pgen/keywordgen.py
+++ b/Parser/pgen/keywordgen.py
@@ -32,17 +32,16 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description="Generate the Lib/keywords.py "
-                                                 "file from the grammar.")
+    parser = argparse.ArgumentParser(
+        description="Generate the Lib/keywords.py " "file from the grammar."
+    )
     parser.add_argument(
         "grammar", type=str, help="The file with the grammar definition in EBNF format"
     )
-    parser.add_argument(
-        "tokens", type=str, help="The file with the token definitions"
-    )
+    parser.add_argument("tokens", type=str, help="The file with the token definitions")
     parser.add_argument(
         "keyword_file",
-        type=argparse.FileType('w'),
+        type=argparse.FileType("w"),
         help="The path to write the keyword definitions",
     )
     args = parser.parse_args()
diff --git a/Parser/pgen/metaparser.py b/Parser/pgen/metaparser.py
new file mode 100644
index 00000000000..074a083fb74
--- /dev/null
+++ b/Parser/pgen/metaparser.py
@@ -0,0 +1,152 @@
+"""Parser for the Python metagrammar"""
+
+import io
+import tokenize  # from stdlib
+
+from .automata import NFA, NFAState
+
+
+class GrammarParser:
+    """Parser for Python grammar files."""
+
+    _translation_table = {
+        tokenize.NAME: "NAME",
+        tokenize.STRING: "STRING",
+        tokenize.NEWLINE: "NEWLINE",
+        tokenize.NL: "NL",
+        tokenize.OP: "OP",
+        tokenize.ENDMARKER: "ENDMARKER",
+        tokenize.COMMENT: "COMMENT",
+    }
+
+    def __init__(self, grammar):
+        self.grammar = grammar
+        grammar_adaptor = io.StringIO(grammar)
+        self.generator = tokenize.generate_tokens(grammar_adaptor.readline)
+        self._gettoken()  # Initialize lookahead
+        self._current_rule_name = None
+
+    def parse(self):
+        """Turn the grammar into a collection of NFAs"""
+        # grammar: (NEWLINE | rule)* ENDMARKER
+        while self.type != tokenize.ENDMARKER:
+            while self.type == tokenize.NEWLINE:
+                self._gettoken()
+            # rule: NAME ':' rhs NEWLINE
+            self._current_rule_name = self._expect(tokenize.NAME)
+            self._expect(tokenize.OP, ":")
+            a, z = self._parse_rhs()
+            self._expect(tokenize.NEWLINE)
+
+            yield NFA(a, z)
+
+    def _parse_rhs(self):
+        # rhs: items ('|' items)*
+        a, z = self._parse_items()
+        if self.value != "|":
+            return a, z
+        else:
+            aa = NFAState(self._current_rule_name)
+            zz = NFAState(self._current_rule_name)
+            while True:
+                # Allow to transit directly to the previous state and connect the end of the
+                # previous state to the end of the current one, effectively allowing to skip
+                # the current state.
+                aa.add_arc(a)
+                z.add_arc(zz)
+                if self.value != "|":
+                    break
+
+                self._gettoken()
+                a, z = self._parse_items()
+            return aa, zz
+
+    def _parse_items(self):
+        # items: item+
+        a, b = self._parse_item()
+        while self.type in (tokenize.NAME, tokenize.STRING) or self.value in ("(", "["):
+            c, d = self._parse_item()
+            # Allow a transition between the end of the previous state
+            # and the beginning of the new one, connecting all the items
+            # together. In this way we can only reach the end if we visit
+            # all the items.
+            b.add_arc(c)
+            b = d
+        return a, b
+
+    def _parse_item(self):
+        # item: '[' rhs ']' | atom ['+' | '*']
+        if self.value == "[":
+            self._gettoken()
+            a, z = self._parse_rhs()
+            self._expect(tokenize.OP, "]")
+            # Make a transition from the beginning to the end so it is possible to
+            # advance for free to the next state of this item # without consuming
+            # anything from the rhs.
+            a.add_arc(z)
+            return a, z
+        else:
+            a, z = self._parse_atom()
+            value = self.value
+            if value not in ("+", "*"):
+                return a, z
+            self._gettoken()
+            z.add_arc(a)
+            if value == "+":
+                # Create a cycle to the beginning so we go back to the old state in this
+                # item and repeat.
+                return a, z
+            else:
+                # The end state is the same as the beginning, so we can cycle arbitrarily
+                # and end in the beginning if necessary.
+                return a, a
+
+    def _parse_atom(self):
+        # atom: '(' rhs ')' | NAME | STRING
+        if self.value == "(":
+            self._gettoken()
+            a, z = self._parse_rhs()
+            self._expect(tokenize.OP, ")")
+            return a, z
+        elif self.type in (tokenize.NAME, tokenize.STRING):
+            a = NFAState(self._current_rule_name)
+            z = NFAState(self._current_rule_name)
+            # We can transit to the next state only if we consume the value.
+            a.add_arc(z, self.value)
+            self._gettoken()
+            return a, z
+        else:
+            self._raise_error(
+                "expected (...) or NAME or STRING, got {} ({})",
+                self._translation_table.get(self.type, self.type),
+                self.value,
+            )
+
+    def _expect(self, type_, value=None):
+        if self.type != type_:
+            self._raise_error(
+                "expected {}, got {} ({})",
+                self._translation_table.get(type_, type_),
+                self._translation_table.get(self.type, self.type),
+                self.value,
+            )
+        if value is not None and self.value != value:
+            self._raise_error("expected {}, got {}", value, self.value)
+        value = self.value
+        self._gettoken()
+        return value
+
+    def _gettoken(self):
+        tup = next(self.generator)
+        while tup[0] in (tokenize.COMMENT, tokenize.NL):
+            tup = next(self.generator)
+        self.type, self.value, self.begin, self.end, self.line = tup
+
+    def _raise_error(self, msg, *args):
+        if args:
+            try:
+                msg = msg.format(*args)
+            except Exception:
+                msg = " ".join([msg] + list(map(str, args)))
+        line = self.grammar.splitlines()[self.begin[0] - 1]
+        raise SyntaxError(msg, ("<grammar>", self.begin[0], self.begin[1], line))
diff --git a/Parser/pgen/pgen.py b/Parser/pgen/pgen.py
index d52d58f64e4..d7dcb769334 100644
--- a/Parser/pgen/pgen.py
+++ b/Parser/pgen/pgen.py
@@ -1,42 +1,180 @@
+"""Python parser generator
+
+
+This parser generator transforms a Python grammar file into parsing tables
+that can be consumed by Python's LL(1) parser written in C.
+
+Concepts
+--------
+
+* An LL(1) parser (Left-to-right, Leftmost derivation, 1 token-lookahead) is a
+  top-down parser for a subset of context-free languages. It parses the input
+  from Left to right, performing Leftmost derivation of the sentence, and can
+  only use 1 tokens of lookahead when parsing a sentence.
+
+* A parsing table is a collection of data that a generic implementation of the
+  LL(1) parser consumes to know how to parse a given context-free grammar. In
+  this case the collection of thata involves Deterministic Finite Automatons,
+  calculated first sets, keywords and transition labels.
+
+* A grammar is defined by production rules (or just 'productions') that specify
+  which symbols may replace which other symbols; these rules may be used to
+  generate strings, or to parse them. Each such rule has a head, or left-hand
+  side, which consists of the string that may be replaced, and a body, or
+  right-hand side, which consists of a string that may replace it. In the
+  Python grammar, rules are written in the form
+
+  rule_name: rule_description;
+
+  meaning the rule 'a: b' specifies that a can be replaced by b. A Context-free
+  grammars is a grammars in which the left-hand side of each production rule
+  consists of only a single nonterminal symbol. Context free grammars can
+  always be recognized by a Non-Deterministic Automatons.
+
+* Terminal symbols are literal symbols which may appear in the outputs of the
+  production rules of the grammar and which cannot be changed using the rules
+  of the grammar. Applying the rules recursively to a source string of symbols
+  will usually terminate in a final output string consisting only of terminal
+  symbols.
+
+* Nonterminal symbols are those symbols which can be replaced. The grammar
+  includes a start symbol a designated member of the set of nonterminals from
+  which all the strings in the language may be derived by successive
+  applications of the production rules.
+
+* The language defined by the grammar is defined as the set of terminal strings
+  that can be derived using the production rules.
+
+* The first sets of a rule (FIRST(rule)) are defined to be the set of terminals
+  that can appear in the first position of any string derived from the rule.
+  This is useful for LL(1) parsers as the parser is only allow to look at the
+  next token in the input to know which rule needs to parse. For example given
+  this grammar:
+
+  start: '(' A | B ')'
+  A: 'a' '<'
+  B: 'b' '<'
+
+  and the input '(b<)' the parser can only look at 'b' to know if it needs
+  to parse A o B. Because FIRST(A) = {'a'} and FIRST(B) = {'b'} it knows
+  that needs to continue parsing rule B because only that rule can start
+  with 'b'.
+
+Description
+-----------
+
+The input for the parser generator is a grammar in extended BNF form (using *
+for repetition, + for at-least-once repetition, [] for optional parts, | for
+alternatives and () for grouping).
+
+Each rule in the grammar file is considered as a regular expression in its
+own right. It is turned into a Non-deterministic Finite Automaton (NFA),
+which is then turned into a Deterministic Finite Automaton (DFA), which is
+then optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3,
+or similar compiler books (this technique is more often used for lexical
+analyzers).
+
+The DFA's are used by the parser as parsing tables in a special way that's
+probably unique. Before they are usable, the FIRST sets of all non-terminals
+are computed so the LL(1) parser consuming the parsing tables can distinguish
+between different transitions.
+Reference
+---------
+
+[Aho&Ullman 77]
+    Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
+    (first edition)
+"""
+
+from ast import literal_eval
 import collections
-import tokenize  # from stdlib
 
 from . import grammar, token
+from .automata import DFA
+from .metaparser import GrammarParser
+
+import enum
+
+
+class LabelType(enum.Enum):
+    NONTERMINAL = 0
+    NAMED_TOKEN = 1
+    KEYWORD = 2
+    OPERATOR = 3
+    NONE = 4
+
+
+class Label(str):
+    def __init__(self, value):
+        self.type = self._get_type()
+
+    def _get_type(self):
+        if self[0].isalpha():
+            if self.upper() == self:
+                # NAMED tokens (ASYNC, NAME...) are all uppercase by convention
+                return LabelType.NAMED_TOKEN
+            else:
+                # If is not uppercase it must be a non terminal.
+                return LabelType.NONTERMINAL
+        else:
+            # Keywords and operators are wrapped in quotes
+            assert self[0] == self[-1] in ('"', "'"), self
+            value = literal_eval(self)
+            if value[0].isalpha():
+                return LabelType.KEYWORD
+            else:
+                return LabelType.OPERATOR
+
+    def __repr__(self):
+        return "{}({})".format(self.type, super().__repr__())
 
 
 class ParserGenerator(object):
-
-    def __init__(self, grammar_file, token_file, stream=None, verbose=False):
-        close_stream = None
-        if stream is None:
-            stream = open(grammar_file)
-            close_stream = stream.close
+    def __init__(self, grammar_file, token_file, verbose=False):
+        with open(grammar_file) as f:
+            self.grammar = f.read()
         with open(token_file) as tok_file:
             token_lines = tok_file.readlines()
         self.tokens = dict(token.generate_tokens(token_lines))
         self.opmap = dict(token.generate_opmap(token_lines))
         # Manually add <> so it does not collide with !=
-        self.opmap['<>'] = "NOTEQUAL"
+        self.opmap["<>"] = "NOTEQUAL"
         self.verbose = verbose
         self.filename = grammar_file
-        self.stream = stream
-        self.generator = tokenize.generate_tokens(stream.readline)
-        self.gettoken() # Initialize lookahead
-        self.dfas, self.startsymbol = self.parse()
-        if close_stream is not None:
-            close_stream()
-        self.first = {} # map from symbol name to set of tokens
-        self.addfirstsets()
+        self.dfas, self.startsymbol = self.create_dfas()
+        self.first = {}  # map from symbol name to set of tokens
+        self.calculate_first_sets()
+
+    def create_dfas(self):
+        rule_to_dfas = collections.OrderedDict()
+        start_nonterminal = None
+        for nfa in GrammarParser(self.grammar).parse():
+            if self.verbose:
+                print("Dump of NFA for", nfa.name)
+                nfa.dump()
+            dfa = DFA.from_nfa(nfa)
+            if self.verbose:
+                print("Dump of DFA for", dfa.name)
+                dfa.dump()
+            dfa.simplify()
+            rule_to_dfas[dfa.name] = dfa
+
+            if start_nonterminal is None:
+                start_nonterminal = dfa.name
+
+        return rule_to_dfas, start_nonterminal
 
     def make_grammar(self):
         c = grammar.Grammar()
+        c.all_labels = set()
         names = list(self.dfas.keys())
         names.remove(self.startsymbol)
         names.insert(0, self.startsymbol)
         for name in names:
             i = 256 + len(c.symbol2number)
-            c.symbol2number[name] = i
-            c.number2symbol[i] = name
+            c.symbol2number[Label(name)] = i
+            c.number2symbol[i] = Label(name)
+            c.all_labels.add(name)
         for name in names:
             self.make_label(c, name)
             dfa = self.dfas[name]
@@ -44,12 +182,13 @@ def make_grammar(self):
             for state in dfa:
                 arcs = []
                 for label, next in sorted(state.arcs.items()):
-                    arcs.append((self.make_label(c, label), dfa.index(next)))
-                if state.isfinal:
-                    arcs.append((0, dfa.index(state)))
+                    c.all_labels.add(label)
+                    arcs.append((self.make_label(c, label), dfa.states.index(next)))
+                if state.is_final:
+                    arcs.append((0, dfa.states.index(state)))
                 states.append(arcs)
             c.states.append(states)
-            c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name))
+            c.dfas[c.symbol2number[name]] = (states, self.make_first_sets(c, name))
         c.start = c.symbol2number[self.startsymbol]
 
         if self.verbose:
@@ -68,7 +207,7 @@ def make_grammar(self):
             )
         return c
 
-    def make_first(self, c, name):
+    def make_first_sets(self, c, name):
         rawfirst = self.first[name]
         first = set()
         for label in sorted(rawfirst):
@@ -78,67 +217,65 @@ def make_first(self, c, name):
         return first
 
     def make_label(self, c, label):
-        # XXX Maybe this should be a method on a subclass of converter?
+        label = Label(label)
         ilabel = len(c.labels)
-        if label[0].isalpha():
-            # Either a symbol name or a named token
-            if label in c.symbol2number:
-                # A symbol name (a non-terminal)
-                if label in c.symbol2label:
-                    return c.symbol2label[label]
-                else:
-                    c.labels.append((c.symbol2number[label], None))
-                    c.symbol2label[label] = ilabel
-                    return ilabel
-            else:
-                # A named token (NAME, NUMBER, STRING)
-                itoken = self.tokens.get(label, None)
-                assert isinstance(itoken, int), label
-                assert itoken in self.tokens.values(), label
-                if itoken in c.tokens:
-                    return c.tokens[itoken]
-                else:
-                    c.labels.append((itoken, None))
-                    c.tokens[itoken] = ilabel
-                    return ilabel
-        else:
-            # Either a keyword or an operator
-            assert label[0] in ('"', "'"), label
-            value = eval(label)
-            if value[0].isalpha():
-                # A keyword
-                if value in c.keywords:
-                    return c.keywords[value]
-                else:
-                    c.labels.append((self.tokens["NAME"], value))
-                    c.keywords[value] = ilabel
-                    return ilabel
-            else:
-                # An operator (any non-numeric token)
-                tok_name = self.opmap[value] # Fails if unknown token
-                itoken = self.tokens[tok_name]
-                if itoken in c.tokens:
-                    return c.tokens[itoken]
-                else:
-                    c.labels.append((itoken, None))
-                    c.tokens[itoken] = ilabel
-                    return ilabel
 
-    def addfirstsets(self):
+        if label.type == LabelType.NONTERMINAL:
+            if label in c.symbol2label:
+                return c.symbol2label[label]
+            else:
+                c.labels.append((c.symbol2number[label], None))
+                c.symbol2label[label] = ilabel
+                return ilabel
+        elif label.type == LabelType.NAMED_TOKEN:
+            # A named token (NAME, NUMBER, STRING)
+            itoken = self.tokens.get(label, None)
+            assert isinstance(itoken, int), label
+            assert itoken in self.tokens.values(), label
+            if itoken in c.tokens:
+                return c.tokens[itoken]
+            else:
+                c.labels.append((itoken, None))
+                c.tokens[itoken] = ilabel
+                return ilabel
+        elif label.type == LabelType.KEYWORD:
+            # A keyword
+            value = literal_eval(label)
+            if value in c.keywords:
+                return c.keywords[value]
+            else:
+                c.labels.append((self.tokens["NAME"], value))
+                c.keywords[value] = ilabel
+                return ilabel
+        elif label.type == LabelType.OPERATOR:
+            # An operator (any non-numeric token)
+            value = literal_eval(label)
+            tok_name = self.opmap[value]  # Fails if unknown token
+            itoken = self.tokens[tok_name]
+            if itoken in c.tokens:
+                return c.tokens[itoken]
+            else:
+                c.labels.append((itoken, None))
+                c.tokens[itoken] = ilabel
+                return ilabel
+        else:
+            raise ValueError("Cannot categorize label {}".format(label))
+
+    def calculate_first_sets(self):
         names = list(self.dfas.keys())
         for name in names:
             if name not in self.first:
-                self.calcfirst(name)
+                self.calculate_first_sets_for_rule(name)
 
             if self.verbose:
                 print("First set for {dfa_name}".format(dfa_name=name))
                 for item in self.first[name]:
                     print("    - {terminal}".format(terminal=item))
 
-    def calcfirst(self, name):
+    def calculate_first_sets_for_rule(self, name):
         dfa = self.dfas[name]
-        self.first[name] = None # dummy to detect left recursion
-        state = dfa[0]
+        self.first[name] = None  # dummy to detect left recursion
+        state = dfa.states[0]
         totalset = set()
         overlapcheck = {}
         for label, next in state.arcs.items():
@@ -148,7 +285,7 @@ def calcfirst(self, name):
                     if fset is None:
                         raise ValueError("recursion for rule %r" % name)
                 else:
-                    self.calcfirst(label)
+                    self.calculate_first_sets_for_rule(label)
                     fset = self.first[label]
                 totalset.update(fset)
                 overlapcheck[label] = fset
@@ -159,248 +296,10 @@ def calcfirst(self, name):
         for label, itsfirst in overlapcheck.items():
             for symbol in itsfirst:
                 if symbol in inverse:
-                    raise ValueError("rule %s is ambiguous; %s is in the"
-                                     " first sets of %s as well as %s" %
-                                     (name, symbol, label, inverse[symbol]))
+                    raise ValueError(
+                        "rule %s is ambiguous; %s is in the"
+                        " first sets of %s as well as %s"
+                        % (name, symbol, label, inverse[symbol])
+                    )
                 inverse[symbol] = label
         self.first[name] = totalset
-
-    def parse(self):
-        dfas = collections.OrderedDict()
-        startsymbol = None
-        # MSTART: (NEWLINE | RULE)* ENDMARKER
-        while self.type != tokenize.ENDMARKER:
-            while self.type == tokenize.NEWLINE:
-                self.gettoken()
-            # RULE: NAME ':' RHS NEWLINE
-            name = self.expect(tokenize.NAME)
-            if self.verbose:
-                print("Processing rule {dfa_name}".format(dfa_name=name))
-            self.expect(tokenize.OP, ":")
-            a, z = self.parse_rhs()
-            self.expect(tokenize.NEWLINE)
-            if self.verbose:
-                self.dump_nfa(name, a, z)
-            dfa = self.make_dfa(a, z)
-            if self.verbose:
-                self.dump_dfa(name, dfa)
-            self.simplify_dfa(dfa)
-            dfas[name] = dfa
-            if startsymbol is None:
-                startsymbol = name
-        return dfas, startsymbol
-
-    def make_dfa(self, start, finish):
-        # To turn an NFA into a DFA, we define the states of the DFA
-        # to correspond to *sets* of states of the NFA.  Then do some
-        # state reduction.  Let's represent sets as dicts with 1 for
-        # values.
-        assert isinstance(start, NFAState)
-        assert isinstance(finish, NFAState)
-        def closure(state):
-            base = set()
-            addclosure(state, base)
-            return base
-        def addclosure(state, base):
-            assert isinstance(state, NFAState)
-            if state in base:
-                return
-            base.add(state)
-            for label, next in state.arcs:
-                if label is None:
-                    addclosure(next, base)
-        states = [DFAState(closure(start), finish)]
-        for state in states: # NB states grows while we're iterating
-            arcs = {}
-            for nfastate in state.nfaset:
-                for label, next in nfastate.arcs:
-                    if label is not None:
-                        addclosure(next, arcs.setdefault(label, set()))
-            for label, nfaset in sorted(arcs.items()):
-                for st in states:
-                    if st.nfaset == nfaset:
-                        break
-                else:
-                    st = DFAState(nfaset, finish)
-                    states.append(st)
-                state.addarc(st, label)
-        return states # List of DFAState instances; first one is start
-
-    def dump_nfa(self, name, start, finish):
-        print("Dump of NFA for", name)
-        todo = [start]
-        for i, state in enumerate(todo):
-            print("  State", i, state is finish and "(final)" or "")
-            for label, next in state.arcs:
-                if next in todo:
-                    j = todo.index(next)
-                else:
-                    j = len(todo)
-                    todo.append(next)
-                if label is None:
-                    print("    -> %d" % j)
-                else:
-                    print("    %s -> %d" % (label, j))
-
-    def dump_dfa(self, name, dfa):
-        print("Dump of DFA for", name)
-        for i, state in enumerate(dfa):
-            print("  State", i, state.isfinal and "(final)" or "")
-            for label, next in sorted(state.arcs.items()):
-                print("    %s -> %d" % (label, dfa.index(next)))
-
-    def simplify_dfa(self, dfa):
-        # This is not theoretically optimal, but works well enough.
-        # Algorithm: repeatedly look for two states that have the same
-        # set of arcs (same labels pointing to the same nodes) and
-        # unify them, until things stop changing.
-
-        # dfa is a list of DFAState instances
-        changes = True
-        while changes:
-            changes = False
-            for i, state_i in enumerate(dfa):
-                for j in range(i+1, len(dfa)):
-                    state_j = dfa[j]
-                    if state_i == state_j:
-                        #print "  unify", i, j
-                        del dfa[j]
-                        for state in dfa:
-                            state.unifystate(state_j, state_i)
-                        changes = True
-                        break
-
-    def parse_rhs(self):
-        # RHS: ALT ('|' ALT)*
-        a, z = self.parse_alt()
-        if self.value != "|":
-            return a, z
-        else:
-            aa = NFAState()
-            zz = NFAState()
-            aa.addarc(a)
-            z.addarc(zz)
-            while self.value == "|":
-                self.gettoken()
-                a, z = self.parse_alt()
-                aa.addarc(a)
-                z.addarc(zz)
-            return aa, zz
-
-    def parse_alt(self):
-        # ALT: ITEM+
-        a, b = self.parse_item()
-        while (self.value in ("(", "[") or
-               self.type in (tokenize.NAME, tokenize.STRING)):
-            c, d = self.parse_item()
-            b.addarc(c)
-            b = d
-        return a, b
-
-    def parse_item(self):
-        # ITEM: '[' RHS ']' | ATOM ['+' | '*']
-        if self.value == "[":
-            self.gettoken()
-            a, z = self.parse_rhs()
-            self.expect(tokenize.OP, "]")
-            a.addarc(z)
-            return a, z
-        else:
-            a, z = self.parse_atom()
-            value = self.value
-            if value not in ("+", "*"):
-                return a, z
-            self.gettoken()
-            z.addarc(a)
-            if value == "+":
-                return a, z
-            else:
-                return a, a
-
-    def parse_atom(self):
-        # ATOM: '(' RHS ')' | NAME | STRING
-        if self.value == "(":
-            self.gettoken()
-            a, z = self.parse_rhs()
-            self.expect(tokenize.OP, ")")
-            return a, z
-        elif self.type in (tokenize.NAME, tokenize.STRING):
-            a = NFAState()
-            z = NFAState()
-            a.addarc(z, self.value)
-            self.gettoken()
-            return a, z
-        else:
-            self.raise_error("expected (...) or NAME or STRING, got %s/%s",
-                             self.type, self.value)
-
-    def expect(self, type, value=None):
-        if self.type != type or (value is not None and self.value != value):
-            self.raise_error("expected %s/%s, got %s/%s",
-                             type, value, self.type, self.value)
-        value = self.value
-        self.gettoken()
-        return value
-
-    def gettoken(self):
-        tup = next(self.generator)
-        while tup[0] in (tokenize.COMMENT, tokenize.NL):
-            tup = next(self.generator)
-        self.type, self.value, self.begin, self.end, self.line = tup
-        # print(getattr(tokenize, 'tok_name')[self.type], repr(self.value))
-
-    def raise_error(self, msg, *args):
-        if args:
-            try:
-                msg = msg % args
-            except Exception:
-                msg = " ".join([msg] + list(map(str, args)))
-        raise SyntaxError(msg, (self.filename, self.end[0],
-                                self.end[1], self.line))
-
-class NFAState(object):
-
-    def __init__(self):
-        self.arcs = [] # list of (label, NFAState) pairs
-
-    def addarc(self, next, label=None):
-        assert label is None or isinstance(label, str)
-        assert isinstance(next, NFAState)
-        self.arcs.append((label, next))
-
-class DFAState(object):
-
-    def __init__(self, nfaset, final):
-        assert isinstance(nfaset, set)
-        assert isinstance(next(iter(nfaset)), NFAState)
-        assert isinstance(final, NFAState)
-        self.nfaset = nfaset
-        self.isfinal = final in nfaset
-        self.arcs = {} # map from label to DFAState
-
-    def addarc(self, next, label):
-        assert isinstance(label, str)
-        assert label not in self.arcs
-        assert isinstance(next, DFAState)
-        self.arcs[label] = next
-
-    def unifystate(self, old, new):
-        for label, next in self.arcs.items():
-            if next is old:
-                self.arcs[label] = new
-
-    def __eq__(self, other):
-        # Equality test -- ignore the nfaset instance variable
-        assert isinstance(other, DFAState)
-        if self.isfinal != other.isfinal:
-            return False
-        # Can't just return self.arcs == other.arcs, because that
-        # would invoke this method recursively, with cycles...
-        if len(self.arcs) != len(other.arcs):
-            return False
-        for label, next in self.arcs.items():
-            if next is not other.arcs.get(label):
-                return False
-        return True
-
-    __hash__ = None # For Py3 compatibility.
diff --git a/Parser/pgen/token.py b/Parser/pgen/token.py
index e7e8f3f1b66..2cff62ce3b2 100644
--- a/Parser/pgen/token.py
+++ b/Parser/pgen/token.py
@@ -6,21 +6,21 @@ def generate_tokens(tokens):
     for line in tokens:
         line = line.strip()
 
-        if not line or line.startswith('#'):
+        if not line or line.startswith("#"):
             continue
 
         name = line.split()[0]
         yield (name, next(numbers))
 
-    yield ('N_TOKENS', next(numbers))
-    yield ('NT_OFFSET', 256)
+    yield ("N_TOKENS", next(numbers))
+    yield ("NT_OFFSET", 256)
 
 
 def generate_opmap(tokens):
     for line in tokens:
         line = line.strip()
 
-        if not line or line.startswith('#'):
+        if not line or line.startswith("#"):
             continue
 
         pieces = line.split()
@@ -35,4 +35,4 @@ def generate_opmap(tokens):
     # with the token generation in "generate_tokens" because if this
     # symbol is included in Grammar/Tokens, it will collide with !=
     # as it has the same name (NOTEQUAL).
-    yield ('<>', 'NOTEQUAL')
+    yield ("<>", "NOTEQUAL")