bpo-44335: Fix a regression when identifying invalid characters in syntax errors (GH-26589)

2024-07-20 06:25:42 +00:00 · 2021-06-08 12:25:22 +01:00 · 2021-06-08 12:25:22 +01:00 · d334c73b56
parent 6d518bb3a1
commit d334c73b56
3 changed files with 6 additions and 1 deletions
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@ -211,6 +211,7 @@ def testSyntaxErrorOffset(self):
        check('lambda x: x = 2', 1, 1)
        check('f{a + b + c}', 1, 2)
        check('[file for str(file) in []\n])', 2, 2)
+        check('a = « hello » « world »', 1, 5)
        check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
        check('[file for\n str(file) in []]', 2, 2)
        check("ages = {'Alice'=22, 'Bob'=23}", 1, 16)
--- a/Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst
+++ b/Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst
@ -0,0 +1,2 @@
+Fix a regression when identifying incorrect characters in syntax errors.
+Patch by Pablo Galindo
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@ -1288,7 +1288,9 @@ _PyPegen_run_parser(Parser *p)
        reset_parser_state(p);
        _PyPegen_parse(p);
        if (PyErr_Occurred()) {
-            if (PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+            // Prioritize tokenizer errors to custom syntax errors raised
+            // on the second phase only if the errors come from the parser.
+            if (p->tok->done != E_ERROR && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
                _PyPegen_check_tokenizer_errors(p);
            }
            return NULL;