bpo-40661: Fix segfault when parsing invalid input (GH-20165)

Fix segfaults when parsing very complex invalid input, like `import äˆ ð£„¯ð¢·žð±‹á”€ð””ð‘©±å®ä±¬ð©¾\n𗶽`.

Co-authored-by: Guido van Rossum <guido@python.org>
Co-authored-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
Lysandros Nikolaou 2020-05-18 20:32:03 +03:00 committed by GitHub
parent 08b47c367a
commit 7b7a21bc4f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 1603 additions and 8 deletions

View file

@ -591,6 +591,7 @@ def f(*a, b):
("f-string_single_closing_brace", "f'}'"),
("from_import_invalid", "from import import a"),
("from_import_trailing_comma", "from a import b,"),
("import_non_ascii_syntax_error", "import ä £"),
# This test case checks error paths involving tokens with uninitialized
# values of col_offset and end_col_offset.
("invalid indentation",

View file

@ -659,6 +659,9 @@
Traceback (most recent call last):
SyntaxError: cannot assign to __debug__
>>> import ä £
Traceback (most recent call last):
SyntaxError: invalid character '£' (U+00A3)
"""
import re

File diff suppressed because it is too large Load diff

View file

@ -433,6 +433,12 @@ def _set_up_token_end_metadata_extraction(self) -> None:
self.print("int _end_col_offset = _token->end_col_offset;")
self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
def _check_for_errors(self) -> None:
self.print("if (p->error_indicator) {")
with self.indent():
self.print("return NULL;")
self.print("}")
def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
self.print("{")
with self.indent():
@ -468,10 +474,7 @@ def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> N
memoize = self._should_memoize(node)
with self.indent():
self.print("if (p->error_indicator) {")
with self.indent():
self.print("return NULL;")
self.print("}")
self._check_for_errors()
self.print(f"{result_type} _res = NULL;")
if memoize:
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
@ -500,10 +503,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
is_repeat1 = node.name.startswith("_loop1")
with self.indent():
self.print("if (p->error_indicator) {")
with self.indent():
self.print("return NULL;")
self.print("}")
self._check_for_errors()
self.print("void *_res = NULL;")
if memoize:
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res))")
@ -687,6 +687,7 @@ def visit_Alt(
) -> None:
self.print(f"{{ // {node}")
with self.indent():
self._check_for_errors()
# Prepare variable declarations for the alternative
vars = self.collect_vars(node)
for v, var_type in sorted(item for item in vars.items() if item[0] is not None):