From d334c73b56756e90c33ce06e3a6ec23271aa099d Mon Sep 17 00:00:00 2001
From: Pablo Galindo <Pablogsal@gmail.com>
Date: Tue, 8 Jun 2021 12:25:22 +0100
Subject: [PATCH] bpo-44335: Fix a regression when identifying invalid
 characters in syntax errors (GH-26589)

---
 Lib/test/test_exceptions.py                                   | 1 +
 .../2021-06-08-01-13-47.bpo-44335.GQTTkl.rst                  | 2 ++
 Parser/pegen.c                                                | 4 +++-
 3 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst

diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 5fb651f4c22..df5778d7e5f 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -211,6 +211,7 @@ def testSyntaxErrorOffset(self):
         check('lambda x: x = 2', 1, 1)
         check('f{a + b + c}', 1, 2)
         check('[file for str(file) in []\n])', 2, 2)
+        check('a = « hello » « world »', 1, 5)
         check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
         check('[file for\n str(file) in []]', 2, 2)
         check("ages = {'Alice'=22, 'Bob'=23}", 1, 16)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst
new file mode 100644
index 00000000000..b57904e5da6
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-06-08-01-13-47.bpo-44335.GQTTkl.rst	
@@ -0,0 +1,2 @@
+Fix a regression when identifying incorrect characters in syntax errors.
+Patch by Pablo Galindo
diff --git a/Parser/pegen.c b/Parser/pegen.c
index aac7e368a79..c69a042f8de 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -1288,7 +1288,9 @@ _PyPegen_run_parser(Parser *p)
         reset_parser_state(p);
         _PyPegen_parse(p);
         if (PyErr_Occurred()) {
-            if (PyErr_ExceptionMatches(PyExc_SyntaxError)) {
+            // Prioritize tokenizer errors to custom syntax errors raised
+            // on the second phase only if the errors come from the parser.
+            if (p->tok->done != E_ERROR && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
                 _PyPegen_check_tokenizer_errors(p);
             }
             return NULL;