diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index c7e76414ff0..c5eff8ad8cc 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -301,6 +301,7 @@ def baz(): { 6 0="""''', 5, 13) + check('b"fooжжж"'.encode(), 1, 1, 1, 10) # Errors thrown by symtable.c check('x = [(yield i) for i in range(3)]', 1, 7) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst new file mode 100644 index 00000000000..8cda4c9343d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-22-16-17-53.gh-issue-115823.c1TreJ.rst @@ -0,0 +1,3 @@ +Properly calculate error ranges in the parser when raising +:exc:`SyntaxError` exceptions caused by invalid byte sequences. Patch by +Pablo Galindo diff --git a/Parser/pegen_errors.c b/Parser/pegen_errors.c index e15673d02dd..e8f11a67e50 100644 --- a/Parser/pegen_errors.c +++ b/Parser/pegen_errors.c @@ -369,20 +369,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, Py_ssize_t col_number = col_offset; Py_ssize_t end_col_number = end_col_offset; - if (p->tok->encoding != NULL) { - col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); - if (col_number < 0) { + col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); + if (col_number < 0) { + goto error; + } + + if (end_col_offset > 0) { + end_col_number = _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset); + if (end_col_number < 0) { goto error; } - if (end_col_number > 0) { - Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number); - if (end_col_offset < 0) { - goto error; - } else { - end_col_number = end_col_offset; - } - } } + tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); if (!tmp) { goto error;