From d334c73b56756e90c33ce06e3a6ec23271aa099d Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Tue, 8 Jun 2021 12:25:22 +0100 Subject: [PATCH] bpo-44335: Fix a regression when identifying invalid characters in syntax errors (GH-26589) --- Lib/test/test_exceptions.py | 1 + .../2021-06-08-01-13-47.bpo-44335.GQTTkl.rst | 2 ++ Parser/pegen.c | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 5fb651f4c22..df5778d7e5f 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -211,6 +211,7 @@ def testSyntaxErrorOffset(self): check('lambda x: x = 2', 1, 1) check('f{a + b + c}', 1, 2) check('[file for str(file) in []\n])', 2, 2) + check('a = « hello » « world »', 1, 5) check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5) check('[file for\n str(file) in []]', 2, 2) check("ages = {'Alice'=22, 'Bob'=23}", 1, 16) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst new file mode 100644 index 00000000000..b57904e5da6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst @@ -0,0 +1,2 @@ +Fix a regression when identifying incorrect characters in syntax errors. +Patch by Pablo Galindo diff --git a/Parser/pegen.c b/Parser/pegen.c index aac7e368a79..c69a042f8de 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1288,7 +1288,9 @@ _PyPegen_run_parser(Parser *p) reset_parser_state(p); _PyPegen_parse(p); if (PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_SyntaxError)) { + // Prioritize tokenizer errors to custom syntax errors raised + // on the second phase only if the errors come from the parser. + if (p->tok->done != E_ERROR && PyErr_ExceptionMatches(PyExc_SyntaxError)) { _PyPegen_check_tokenizer_errors(p); } return NULL;