From 015b97d19a24a169cc3c0939119e1228791e4253 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Mon, 26 Feb 2024 13:57:09 +0100 Subject: [PATCH] gh-115823: Calculate correctly error locations when dealing with implicit encodings (#115824) --- Lib/test/test_exceptions.py | 1 + ...-02-22-16-17-53.gh-issue-115823.c1TreJ.rst | 3 +++ Parser/pegen_errors.c | 20 +++++++++---------- 3 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index c7e76414ff07..c5eff8ad8ccc 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -301,6 +301,7 @@ class ExceptionTests(unittest.TestCase): { 6 0="""''', 5, 13) + check('b"fooжжж"'.encode(), 1, 1, 1, 10) # Errors thrown by symtable.c check('x = [(yield i) for i in range(3)]', 1, 7) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst new file mode 100644 index 000000000000..8cda4c9343d4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst @@ -0,0 +1,3 @@ +Properly calculate error ranges in the parser when raising +:exc:`SyntaxError` exceptions caused by invalid byte sequences. Patch by +Pablo Galindo diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index e15673d02dd3..e8f11a67e50f 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -369,20 +369,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t col_number = col_offset; Py_ssize_t end_col_number = end_col_offset; - if (p->tok->encoding != NULL) { - col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); - if (col_number < 0) { + col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); + if (col_number < 0) { + goto error; + } + + if (end_col_offset > 0) { + end_col_number = _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset); + if (end_col_number < 0) { goto error; } - if (end_col_number > 0) { - Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); - if (end_col_offset < 0) { - goto error; - } else { - end_col_number = end_col_offset; - } - } } + tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); if (!tmp) { goto error;