gh-100445: Improve error message for unterminated strings with escapes (#100446)

This commit is contained in:
Shantanu 2023-10-18 05:58:51 -07:00 committed by GitHub
parent baefbb21d9
commit 3156d193b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 4 deletions

View file

@ -2298,8 +2298,14 @@ def test_error_parenthesis(self):
def test_error_string_literal(self):
self._check_error("'blech", "unterminated string literal")
self._check_error('"blech', "unterminated string literal")
self._check_error("'blech", r"unterminated string literal \(.*\)$")
self._check_error('"blech', r"unterminated string literal \(.*\)$")
self._check_error(
r'"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
)
self._check_error(
r'r"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
)
self._check_error("'''blech", "unterminated triple-quoted string literal")
self._check_error('"""blech', "unterminated triple-quoted string literal")

View file

@ -0,0 +1 @@
Improve error message for unterminated strings with escapes.

View file

@ -972,6 +972,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
int quote = c;
int quote_size = 1; /* 1 or 3 */
int end_quote_size = 0;
int has_escaped_quote = 0;
/* Nodes of type STRING, especially multi line strings
must be handled differently in order to get both
@ -1037,8 +1038,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
return MAKE_TOKEN(ERRORTOKEN);
}
else {
_PyTokenizer_syntaxerror(tok, "unterminated string literal (detected at"
" line %d)", start);
if (has_escaped_quote) {
_PyTokenizer_syntaxerror(
tok,
"unterminated string literal (detected at line %d); "
"perhaps you escaped the end quote?",
start
);
} else {
_PyTokenizer_syntaxerror(
tok, "unterminated string literal (detected at line %d)", start
);
}
if (c != '\n') {
tok->done = E_EOLS;
}
@ -1052,6 +1063,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
end_quote_size = 0;
if (c == '\\') {
c = tok_nextc(tok); /* skip escaped char */
if (c == quote) { /* but record whether the escaped char was a quote */
has_escaped_quote = 1;
}
if (c == '\r') {
c = tok_nextc(tok);
}