From 2ea6d890281c415e0a2f00e63526e592da8ce3d9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 9 Jun 2021 02:31:10 +0300 Subject: [PATCH] bpo-43833: Emit warnings for numeric literals followed by keyword (GH-25466) Emit a deprecation warning if the numeric literal is immediately followed by one of keywords: and, else, for, if, in, is, or. Raise a syntax error with more informative message if it is immediately followed by other keyword or identifier. Automerge-Triggered-By: GH:pablogsal --- Doc/whatsnew/3.10.rst | 23 ++++ Lib/test/test_compile.py | 2 +- Lib/test/test_grammar.py | 92 ++++++++++++- .../2021-04-18-18-07-33.bpo-43833.oChkCi.rst | 4 + Parser/tokenizer.c | 128 ++++++++++++++++++ 5 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-04-18-18-07-33.bpo-43833.oChkCi.rst diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 74e6b0384ad..df9806a2118 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -1444,6 +1444,17 @@ Optimizations Deprecated ========== +* Currently Python accepts numeric literals immediately followed by keywords, + for example ``0in x``, ``1or x``, ``0if 1else 2``. It allows confusing + and ambigious expressions like ``[0x1for x in y]`` (which can be + interpreted as ``[0x1 for x in y]`` or ``[0x1f or x in y]``). Starting in + this release, a deprecation warning is raised if the numeric literal is + immediately followed by one of keywords :keyword:`and`, :keyword:`else`, + :keyword:`for`, :keyword:`if`, :keyword:`in`, :keyword:`is` and :keyword:`or`. + If future releases it will be changed to syntax warning, and finally to + syntax error. + (Contributed by Serhiy Storchaka in :issue:`43833`). + * Starting in this release, there will be a concerted effort to begin cleaning up old import semantics that were kept for Python 2.7 compatibility. Specifically, @@ -1670,6 +1681,18 @@ This section lists previously described changes and other bugfixes that may require changes to your code. +Changes in the Python syntax +---------------------------- + +* Deprecation warning is now emitted when compiling previously valid syntax + if the numeric literal is immediately followed by a keyword (like in ``0in x``). + If future releases it will be changed to syntax warning, and finally to a + syntax error. To get rid of the warning and make the code compatible with + future releases just add a space between the numeric literal and the + following keyword. + (Contributed by Serhiy Storchaka in :issue:`43833`). + + Changes in the Python API ------------------------- diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index d40347c3c6b..ea8ae22fce4 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -162,7 +162,7 @@ def test_literals_with_leading_zeroes(self): for arg in ["077787", "0xj", "0x.", "0e", "090000000000000", "080000000000000", "000000000000009", "000000000000008", "0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2", - "0b101j2", "0o153j2", "0b100e1", "0o777e1", "0777", + "0b101j", "0o153j", "0b100e1", "0o777e1", "0777", "000777", "000000000000007"]: self.assertRaises(SyntaxError, eval, arg) diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index ebc9dde97e7..c0820fd6368 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -177,8 +177,10 @@ def test_floats(self): def test_float_exponent_tokenization(self): # See issue 21642. - self.assertEqual(1 if 1else 0, 1) - self.assertEqual(1 if 0else 0, 0) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + self.assertEqual(eval("1 if 1else 0"), 1) + self.assertEqual(eval("1 if 0else 0"), 0) self.assertRaises(SyntaxError, eval, "0 if 1Else 0") def test_underscore_literals(self): @@ -211,6 +213,92 @@ def test_bad_numerical_literals(self): check("1e2_", "invalid decimal literal") check("1e+", "invalid decimal literal") + def test_end_of_numerical_literals(self): + def check(test): + with self.assertWarns(DeprecationWarning): + compile(test, "", "eval") + + def check_error(test): + with warnings.catch_warnings(record=True) as w: + with self.assertRaises(SyntaxError): + compile(test, "", "eval") + self.assertEqual(w, []) + + check_error("0xfand x") + check("0o7and x") + check("0b1and x") + check("9and x") + check("0and x") + check("1.and x") + check("1e3and x") + check("1jand x") + + check("0xfor x") + check("0o7or x") + check("0b1or x") + check("9or x") + check_error("0or x") + check("1.or x") + check("1e3or x") + check("1jor x") + + check("0xfin x") + check("0o7in x") + check("0b1in x") + check("9in x") + check("0in x") + check("1.in x") + check("1e3in x") + check("1jin x") + + with warnings.catch_warnings(): + warnings.simplefilter('ignore', SyntaxWarning) + check("0xfis x") + check("0o7is x") + check("0b1is x") + check("9is x") + check("0is x") + check("1.is x") + check("1e3is x") + check("1jis x") + + check("0xfif x else y") + check("0o7if x else y") + check("0b1if x else y") + check("9if x else y") + check("0if x else y") + check("1.if x else y") + check("1e3if x else y") + check("1jif x else y") + + check_error("x if 0xfelse y") + check("x if 0o7else y") + check("x if 0b1else y") + check("x if 9else y") + check("x if 0else y") + check("x if 1.else y") + check("x if 1e3else y") + check("x if 1jelse y") + + check("[0x1ffor x in ()]") + check("[0x1for x in ()]") + check("[0xfor x in ()]") + check("[0o7for x in ()]") + check("[0b1for x in ()]") + check("[9for x in ()]") + check("[1.for x in ()]") + check("[1e3for x in ()]") + check("[1jfor x in ()]") + + check_error("0xfspam") + check_error("0o7spam") + check_error("0b1spam") + check_error("9spam") + check_error("0spam") + check_error("1.spam") + check_error("1e3spam") + check_error("1jspam") + def test_string_literals(self): x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y) x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-04-18-18-07-33.bpo-43833.oChkCi.rst b/Misc/NEWS.d/next/Core and Builtins/2021-04-18-18-07-33.bpo-43833.oChkCi.rst new file mode 100644 index 00000000000..2adbdba651b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-04-18-18-07-33.bpo-43833.oChkCi.rst @@ -0,0 +1,4 @@ +Emit a deprecation warning if the numeric literal is immediately followed by +one of keywords: and, else, for, if, in, is, or. Raise a syntax error with +more informative message if it is immediately followed by other keyword or +identifier. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index a86af9bc062..6002f3e05a8 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1121,6 +1121,113 @@ indenterror(struct tok_state *tok) return ERRORTOKEN; } +static int +parser_warn(struct tok_state *tok, const char *format, ...) +{ + PyObject *errmsg; + va_list vargs; +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + errmsg = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); + if (!errmsg) { + goto error; + } + + if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, errmsg, tok->filename, + tok->lineno, NULL, NULL) < 0) { + if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) { + /* Replace the DeprecationWarning exception with a SyntaxError + to get a more accurate error report */ + PyErr_Clear(); + syntaxerror(tok, "%U", errmsg); + } + goto error; + } + Py_DECREF(errmsg); + return 0; + +error: + Py_XDECREF(errmsg); + tok->done = E_ERROR; + return -1; +} + +static int +lookahead(struct tok_state *tok, const char *test) +{ + const char *s = test; + int res = 0; + while (1) { + int c = tok_nextc(tok); + if (*s == 0) { + res = !is_potential_identifier_char(c); + } + else if (c == *s) { + s++; + continue; + } + + tok_backup(tok, c); + while (s != test) { + tok_backup(tok, *--s); + } + return res; + } +} + +static int +verify_end_of_number(struct tok_state *tok, int c, const char *kind) +{ + /* Emit a deprecation warning only if the numeric literal is immediately + * followed by one of keywords which can occurr after a numeric literal + * in valid code: "and", "else", "for", "if", "in", "is" and "or". + * It allows to gradually deprecate existing valid code without adding + * warning before error in most cases of invalid numeric literal (which + * would be confusiong and break existing tests). + * Raise a syntax error with slighly better message than plain + * "invalid syntax" if the numeric literal is immediately followed by + * other keyword or identifier. + */ + int r = 0; + if (c == 'a') { + r = lookahead(tok, "nd"); + } + else if (c == 'e') { + r = lookahead(tok, "lse"); + } + else if (c == 'f') { + r = lookahead(tok, "or"); + } + else if (c == 'i') { + int c2 = tok_nextc(tok); + if (c2 == 'f' || c2 == 'n' || c2 == 's') { + r = 1; + } + tok_backup(tok, c2); + } + else if (c == 'o') { + r = lookahead(tok, "r"); + } + if (r) { + tok_backup(tok, c); + if (parser_warn(tok, "invalid %s literal", kind)) { + return 0; + } + tok_nextc(tok); + } + else /* In future releases, only error will remain. */ + if (is_potential_identifier_char(c)) { + tok_backup(tok, c); + syntaxerror(tok, "invalid %s literal", kind); + return 0; + } + return 1; +} + /* Verify that the identifier follows PEP 3131. All identifier strings are guaranteed to be "ready" unicode objects. */ @@ -1569,6 +1676,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) c = tok_nextc(tok); } while (isxdigit(c)); } while (c == '_'); + if (!verify_end_of_number(tok, c, "hexadecimal")) { + return ERRORTOKEN; + } } else if (c == 'o' || c == 'O') { /* Octal */ @@ -1595,6 +1705,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) return syntaxerror(tok, "invalid digit '%c' in octal literal", c); } + if (!verify_end_of_number(tok, c, "octal")) { + return ERRORTOKEN; + } } else if (c == 'b' || c == 'B') { /* Binary */ @@ -1621,6 +1734,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) return syntaxerror(tok, "invalid digit '%c' in binary literal", c); } + if (!verify_end_of_number(tok, c, "binary")) { + return ERRORTOKEN; + } } else { int nonzero = 0; @@ -1664,6 +1780,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) "literals are not permitted; " "use an 0o prefix for octal integers"); } + if (!verify_end_of_number(tok, c, "decimal")) { + return ERRORTOKEN; + } } } else { @@ -1699,6 +1818,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) } } else if (!isdigit(c)) { tok_backup(tok, c); + if (!verify_end_of_number(tok, e, "decimal")) { + return ERRORTOKEN; + } tok_backup(tok, e); *p_start = tok->start; *p_end = tok->cur; @@ -1713,6 +1835,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Imaginary part */ imaginary: c = tok_nextc(tok); + if (!verify_end_of_number(tok, c, "imaginary")) { + return ERRORTOKEN; + } + } + else if (!verify_end_of_number(tok, c, "decimal")) { + return ERRORTOKEN; } } }