From cf7303ed2aa19fb48687d7140dbc86fc23c9fca4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 9 Jul 2018 15:09:35 +0300 Subject: [PATCH] bpo-33305: Improve SyntaxError for invalid numerical literals. (GH-6517) --- Lib/test/test_grammar.py | 24 +++++++ .../2018-04-18-14-17-44.bpo-33305.9z3dDH.rst | 1 + Parser/tokenizer.c | 65 +++++++++++++++---- 3 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index ee4136286ba..78918ae250c 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -100,6 +100,8 @@ class TokenTests(unittest.TestCase): + check_syntax_error = check_syntax_error + def test_backslash(self): # Backslash means line continuation: x = 1 \ @@ -184,6 +186,28 @@ def test_underscore_literals(self): # Sanity check: no literal begins with an underscore self.assertRaises(NameError, eval, "_0") + def test_bad_numerical_literals(self): + check = self.check_syntax_error + check("0b12", "invalid digit '2' in binary literal") + check("0b1_2", "invalid digit '2' in binary literal") + check("0b2", "invalid digit '2' in binary literal") + check("0b1_", "invalid binary literal") + check("0b", "invalid binary literal") + check("0o18", "invalid digit '8' in octal literal") + check("0o1_8", "invalid digit '8' in octal literal") + check("0o8", "invalid digit '8' in octal literal") + check("0o1_", "invalid octal literal") + check("0o", "invalid octal literal") + check("0x1_", "invalid hexadecimal literal") + check("0x", "invalid hexadecimal literal") + check("1_", "invalid decimal literal") + check("012", + "leading zeros in decimal integer literals are not permitted; " + "use an 0o prefix for octal integers") + check("1.2_", "invalid decimal literal") + check("1e2_", "invalid decimal literal") + check("1e+", "invalid decimal literal") + def test_string_literals(self): x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y) x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39) diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst new file mode 100644 index 00000000000..cae2f7f8595 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-18-14-17-44.bpo-33305.9z3dDH.rst @@ -0,0 +1 @@ +Improved syntax error messages for invalid numerical literals. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index fbc98880c9a..f8b83c9f3d7 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1280,6 +1280,28 @@ PyToken_ThreeChars(int c1, int c2, int c3) return OP; } +static int +syntaxerror(struct tok_state *tok, const char *format, ...) +{ +#ifndef PGEN + va_list vargs; +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + PyErr_FormatV(PyExc_SyntaxError, format, vargs); + va_end(vargs); + PyErr_SyntaxLocationObject(tok->filename, + tok->lineno, + tok->cur - tok->line_start); + tok->done = E_ERROR; +#else + tok->done = E_TOKEN; +#endif + return ERRORTOKEN; +} + static int indenterror(struct tok_state *tok) { @@ -1333,8 +1355,8 @@ tok_decimal_tail(struct tok_state *tok) } c = tok_nextc(tok); if (!isdigit(c)) { - tok->done = E_TOKEN; tok_backup(tok, c); + syntaxerror(tok, "invalid decimal literal"); return 0; } } @@ -1562,9 +1584,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); } if (!isxdigit(c)) { - tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return syntaxerror(tok, "invalid hexadecimal literal"); } do { c = tok_nextc(tok); @@ -1579,14 +1600,23 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); } if (c < '0' || c >= '8') { - tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + if (isdigit(c)) { + return syntaxerror(tok, + "invalid digit '%c' in octal literal", c); + } + else { + return syntaxerror(tok, "invalid octal literal"); + } } do { c = tok_nextc(tok); } while ('0' <= c && c < '8'); } while (c == '_'); + if (isdigit(c)) { + return syntaxerror(tok, + "invalid digit '%c' in octal literal", c); + } } else if (c == 'b' || c == 'B') { /* Binary */ @@ -1596,14 +1626,23 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); } if (c != '0' && c != '1') { - tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + if (isdigit(c)) { + return syntaxerror(tok, + "invalid digit '%c' in binary literal", c); + } + else { + return syntaxerror(tok, "invalid binary literal"); + } } do { c = tok_nextc(tok); } while (c == '0' || c == '1'); } while (c == '_'); + if (isdigit(c)) { + return syntaxerror(tok, + "invalid digit '%c' in binary literal", c); + } } else { int nonzero = 0; @@ -1613,9 +1652,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) if (c == '_') { c = tok_nextc(tok); if (!isdigit(c)) { - tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return syntaxerror(tok, "invalid decimal literal"); } } if (c != '0') { @@ -1642,9 +1680,11 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } else if (nonzero) { /* Old-style octal: now disallowed. */ - tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return syntaxerror(tok, + "leading zeros in decimal integer " + "literals are not permitted; " + "use an 0o prefix for octal integers"); } } } @@ -1676,9 +1716,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) if (c == '+' || c == '-') { c = tok_nextc(tok); if (!isdigit(c)) { - tok->done = E_TOKEN; tok_backup(tok, c); - return ERRORTOKEN; + return syntaxerror(tok, "invalid decimal literal"); } } else if (!isdigit(c)) { tok_backup(tok, c);