mirror of
https://github.com/python/cpython
synced 2024-09-16 00:48:28 +00:00
bpo-43833: Emit warnings for numeric literals followed by keyword (GH-25466)
Emit a deprecation warning if the numeric literal is immediately followed by one of keywords: and, else, for, if, in, is, or. Raise a syntax error with more informative message if it is immediately followed by other keyword or identifier. Automerge-Triggered-By: GH:pablogsal
This commit is contained in:
parent
3e1c7167d8
commit
2ea6d89028
|
@ -1444,6 +1444,17 @@ Optimizations
|
||||||
Deprecated
|
Deprecated
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
* Currently Python accepts numeric literals immediately followed by keywords,
|
||||||
|
for example ``0in x``, ``1or x``, ``0if 1else 2``. It allows confusing
|
||||||
|
and ambigious expressions like ``[0x1for x in y]`` (which can be
|
||||||
|
interpreted as ``[0x1 for x in y]`` or ``[0x1f or x in y]``). Starting in
|
||||||
|
this release, a deprecation warning is raised if the numeric literal is
|
||||||
|
immediately followed by one of keywords :keyword:`and`, :keyword:`else`,
|
||||||
|
:keyword:`for`, :keyword:`if`, :keyword:`in`, :keyword:`is` and :keyword:`or`.
|
||||||
|
If future releases it will be changed to syntax warning, and finally to
|
||||||
|
syntax error.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`43833`).
|
||||||
|
|
||||||
* Starting in this release, there will be a concerted effort to begin
|
* Starting in this release, there will be a concerted effort to begin
|
||||||
cleaning up old import semantics that were kept for Python 2.7
|
cleaning up old import semantics that were kept for Python 2.7
|
||||||
compatibility. Specifically,
|
compatibility. Specifically,
|
||||||
|
@ -1670,6 +1681,18 @@ This section lists previously described changes and other bugfixes
|
||||||
that may require changes to your code.
|
that may require changes to your code.
|
||||||
|
|
||||||
|
|
||||||
|
Changes in the Python syntax
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
* Deprecation warning is now emitted when compiling previously valid syntax
|
||||||
|
if the numeric literal is immediately followed by a keyword (like in ``0in x``).
|
||||||
|
If future releases it will be changed to syntax warning, and finally to a
|
||||||
|
syntax error. To get rid of the warning and make the code compatible with
|
||||||
|
future releases just add a space between the numeric literal and the
|
||||||
|
following keyword.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`43833`).
|
||||||
|
|
||||||
|
|
||||||
Changes in the Python API
|
Changes in the Python API
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
|
|
@ -162,7 +162,7 @@ def test_literals_with_leading_zeroes(self):
|
||||||
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",
|
for arg in ["077787", "0xj", "0x.", "0e", "090000000000000",
|
||||||
"080000000000000", "000000000000009", "000000000000008",
|
"080000000000000", "000000000000009", "000000000000008",
|
||||||
"0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2",
|
"0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2",
|
||||||
"0b101j2", "0o153j2", "0b100e1", "0o777e1", "0777",
|
"0b101j", "0o153j", "0b100e1", "0o777e1", "0777",
|
||||||
"000777", "000000000000007"]:
|
"000777", "000000000000007"]:
|
||||||
self.assertRaises(SyntaxError, eval, arg)
|
self.assertRaises(SyntaxError, eval, arg)
|
||||||
|
|
||||||
|
|
|
@ -177,8 +177,10 @@ def test_floats(self):
|
||||||
|
|
||||||
def test_float_exponent_tokenization(self):
|
def test_float_exponent_tokenization(self):
|
||||||
# See issue 21642.
|
# See issue 21642.
|
||||||
self.assertEqual(1 if 1else 0, 1)
|
with warnings.catch_warnings():
|
||||||
self.assertEqual(1 if 0else 0, 0)
|
warnings.simplefilter('ignore', DeprecationWarning)
|
||||||
|
self.assertEqual(eval("1 if 1else 0"), 1)
|
||||||
|
self.assertEqual(eval("1 if 0else 0"), 0)
|
||||||
self.assertRaises(SyntaxError, eval, "0 if 1Else 0")
|
self.assertRaises(SyntaxError, eval, "0 if 1Else 0")
|
||||||
|
|
||||||
def test_underscore_literals(self):
|
def test_underscore_literals(self):
|
||||||
|
@ -211,6 +213,92 @@ def test_bad_numerical_literals(self):
|
||||||
check("1e2_", "invalid decimal literal")
|
check("1e2_", "invalid decimal literal")
|
||||||
check("1e+", "invalid decimal literal")
|
check("1e+", "invalid decimal literal")
|
||||||
|
|
||||||
|
def test_end_of_numerical_literals(self):
|
||||||
|
def check(test):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
compile(test, "<testcase>", "eval")
|
||||||
|
|
||||||
|
def check_error(test):
|
||||||
|
with warnings.catch_warnings(record=True) as w:
|
||||||
|
with self.assertRaises(SyntaxError):
|
||||||
|
compile(test, "<testcase>", "eval")
|
||||||
|
self.assertEqual(w, [])
|
||||||
|
|
||||||
|
check_error("0xfand x")
|
||||||
|
check("0o7and x")
|
||||||
|
check("0b1and x")
|
||||||
|
check("9and x")
|
||||||
|
check("0and x")
|
||||||
|
check("1.and x")
|
||||||
|
check("1e3and x")
|
||||||
|
check("1jand x")
|
||||||
|
|
||||||
|
check("0xfor x")
|
||||||
|
check("0o7or x")
|
||||||
|
check("0b1or x")
|
||||||
|
check("9or x")
|
||||||
|
check_error("0or x")
|
||||||
|
check("1.or x")
|
||||||
|
check("1e3or x")
|
||||||
|
check("1jor x")
|
||||||
|
|
||||||
|
check("0xfin x")
|
||||||
|
check("0o7in x")
|
||||||
|
check("0b1in x")
|
||||||
|
check("9in x")
|
||||||
|
check("0in x")
|
||||||
|
check("1.in x")
|
||||||
|
check("1e3in x")
|
||||||
|
check("1jin x")
|
||||||
|
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter('ignore', SyntaxWarning)
|
||||||
|
check("0xfis x")
|
||||||
|
check("0o7is x")
|
||||||
|
check("0b1is x")
|
||||||
|
check("9is x")
|
||||||
|
check("0is x")
|
||||||
|
check("1.is x")
|
||||||
|
check("1e3is x")
|
||||||
|
check("1jis x")
|
||||||
|
|
||||||
|
check("0xfif x else y")
|
||||||
|
check("0o7if x else y")
|
||||||
|
check("0b1if x else y")
|
||||||
|
check("9if x else y")
|
||||||
|
check("0if x else y")
|
||||||
|
check("1.if x else y")
|
||||||
|
check("1e3if x else y")
|
||||||
|
check("1jif x else y")
|
||||||
|
|
||||||
|
check_error("x if 0xfelse y")
|
||||||
|
check("x if 0o7else y")
|
||||||
|
check("x if 0b1else y")
|
||||||
|
check("x if 9else y")
|
||||||
|
check("x if 0else y")
|
||||||
|
check("x if 1.else y")
|
||||||
|
check("x if 1e3else y")
|
||||||
|
check("x if 1jelse y")
|
||||||
|
|
||||||
|
check("[0x1ffor x in ()]")
|
||||||
|
check("[0x1for x in ()]")
|
||||||
|
check("[0xfor x in ()]")
|
||||||
|
check("[0o7for x in ()]")
|
||||||
|
check("[0b1for x in ()]")
|
||||||
|
check("[9for x in ()]")
|
||||||
|
check("[1.for x in ()]")
|
||||||
|
check("[1e3for x in ()]")
|
||||||
|
check("[1jfor x in ()]")
|
||||||
|
|
||||||
|
check_error("0xfspam")
|
||||||
|
check_error("0o7spam")
|
||||||
|
check_error("0b1spam")
|
||||||
|
check_error("9spam")
|
||||||
|
check_error("0spam")
|
||||||
|
check_error("1.spam")
|
||||||
|
check_error("1e3spam")
|
||||||
|
check_error("1jspam")
|
||||||
|
|
||||||
def test_string_literals(self):
|
def test_string_literals(self):
|
||||||
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
|
x = ''; y = ""; self.assertTrue(len(x) == 0 and x == y)
|
||||||
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
|
x = '\''; y = "'"; self.assertTrue(len(x) == 1 and x == y and ord(x) == 39)
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
Emit a deprecation warning if the numeric literal is immediately followed by
|
||||||
|
one of keywords: and, else, for, if, in, is, or. Raise a syntax error with
|
||||||
|
more informative message if it is immediately followed by other keyword or
|
||||||
|
identifier.
|
|
@ -1121,6 +1121,113 @@ indenterror(struct tok_state *tok)
|
||||||
return ERRORTOKEN;
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
parser_warn(struct tok_state *tok, const char *format, ...)
|
||||||
|
{
|
||||||
|
PyObject *errmsg;
|
||||||
|
va_list vargs;
|
||||||
|
#ifdef HAVE_STDARG_PROTOTYPES
|
||||||
|
va_start(vargs, format);
|
||||||
|
#else
|
||||||
|
va_start(vargs);
|
||||||
|
#endif
|
||||||
|
errmsg = PyUnicode_FromFormatV(format, vargs);
|
||||||
|
va_end(vargs);
|
||||||
|
if (!errmsg) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, errmsg, tok->filename,
|
||||||
|
tok->lineno, NULL, NULL) < 0) {
|
||||||
|
if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
|
||||||
|
/* Replace the DeprecationWarning exception with a SyntaxError
|
||||||
|
to get a more accurate error report */
|
||||||
|
PyErr_Clear();
|
||||||
|
syntaxerror(tok, "%U", errmsg);
|
||||||
|
}
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
Py_DECREF(errmsg);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error:
|
||||||
|
Py_XDECREF(errmsg);
|
||||||
|
tok->done = E_ERROR;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
lookahead(struct tok_state *tok, const char *test)
|
||||||
|
{
|
||||||
|
const char *s = test;
|
||||||
|
int res = 0;
|
||||||
|
while (1) {
|
||||||
|
int c = tok_nextc(tok);
|
||||||
|
if (*s == 0) {
|
||||||
|
res = !is_potential_identifier_char(c);
|
||||||
|
}
|
||||||
|
else if (c == *s) {
|
||||||
|
s++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
tok_backup(tok, c);
|
||||||
|
while (s != test) {
|
||||||
|
tok_backup(tok, *--s);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
verify_end_of_number(struct tok_state *tok, int c, const char *kind)
|
||||||
|
{
|
||||||
|
/* Emit a deprecation warning only if the numeric literal is immediately
|
||||||
|
* followed by one of keywords which can occurr after a numeric literal
|
||||||
|
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
|
||||||
|
* It allows to gradually deprecate existing valid code without adding
|
||||||
|
* warning before error in most cases of invalid numeric literal (which
|
||||||
|
* would be confusiong and break existing tests).
|
||||||
|
* Raise a syntax error with slighly better message than plain
|
||||||
|
* "invalid syntax" if the numeric literal is immediately followed by
|
||||||
|
* other keyword or identifier.
|
||||||
|
*/
|
||||||
|
int r = 0;
|
||||||
|
if (c == 'a') {
|
||||||
|
r = lookahead(tok, "nd");
|
||||||
|
}
|
||||||
|
else if (c == 'e') {
|
||||||
|
r = lookahead(tok, "lse");
|
||||||
|
}
|
||||||
|
else if (c == 'f') {
|
||||||
|
r = lookahead(tok, "or");
|
||||||
|
}
|
||||||
|
else if (c == 'i') {
|
||||||
|
int c2 = tok_nextc(tok);
|
||||||
|
if (c2 == 'f' || c2 == 'n' || c2 == 's') {
|
||||||
|
r = 1;
|
||||||
|
}
|
||||||
|
tok_backup(tok, c2);
|
||||||
|
}
|
||||||
|
else if (c == 'o') {
|
||||||
|
r = lookahead(tok, "r");
|
||||||
|
}
|
||||||
|
if (r) {
|
||||||
|
tok_backup(tok, c);
|
||||||
|
if (parser_warn(tok, "invalid %s literal", kind)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
tok_nextc(tok);
|
||||||
|
}
|
||||||
|
else /* In future releases, only error will remain. */
|
||||||
|
if (is_potential_identifier_char(c)) {
|
||||||
|
tok_backup(tok, c);
|
||||||
|
syntaxerror(tok, "invalid %s literal", kind);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Verify that the identifier follows PEP 3131.
|
/* Verify that the identifier follows PEP 3131.
|
||||||
All identifier strings are guaranteed to be "ready" unicode objects.
|
All identifier strings are guaranteed to be "ready" unicode objects.
|
||||||
*/
|
*/
|
||||||
|
@ -1569,6 +1676,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
} while (isxdigit(c));
|
} while (isxdigit(c));
|
||||||
} while (c == '_');
|
} while (c == '_');
|
||||||
|
if (!verify_end_of_number(tok, c, "hexadecimal")) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (c == 'o' || c == 'O') {
|
else if (c == 'o' || c == 'O') {
|
||||||
/* Octal */
|
/* Octal */
|
||||||
|
@ -1595,6 +1705,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
return syntaxerror(tok,
|
return syntaxerror(tok,
|
||||||
"invalid digit '%c' in octal literal", c);
|
"invalid digit '%c' in octal literal", c);
|
||||||
}
|
}
|
||||||
|
if (!verify_end_of_number(tok, c, "octal")) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (c == 'b' || c == 'B') {
|
else if (c == 'b' || c == 'B') {
|
||||||
/* Binary */
|
/* Binary */
|
||||||
|
@ -1621,6 +1734,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
return syntaxerror(tok,
|
return syntaxerror(tok,
|
||||||
"invalid digit '%c' in binary literal", c);
|
"invalid digit '%c' in binary literal", c);
|
||||||
}
|
}
|
||||||
|
if (!verify_end_of_number(tok, c, "binary")) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int nonzero = 0;
|
int nonzero = 0;
|
||||||
|
@ -1664,6 +1780,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
"literals are not permitted; "
|
"literals are not permitted; "
|
||||||
"use an 0o prefix for octal integers");
|
"use an 0o prefix for octal integers");
|
||||||
}
|
}
|
||||||
|
if (!verify_end_of_number(tok, c, "decimal")) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -1699,6 +1818,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
}
|
}
|
||||||
} else if (!isdigit(c)) {
|
} else if (!isdigit(c)) {
|
||||||
tok_backup(tok, c);
|
tok_backup(tok, c);
|
||||||
|
if (!verify_end_of_number(tok, e, "decimal")) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
tok_backup(tok, e);
|
tok_backup(tok, e);
|
||||||
*p_start = tok->start;
|
*p_start = tok->start;
|
||||||
*p_end = tok->cur;
|
*p_end = tok->cur;
|
||||||
|
@ -1713,6 +1835,12 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
/* Imaginary part */
|
/* Imaginary part */
|
||||||
imaginary:
|
imaginary:
|
||||||
c = tok_nextc(tok);
|
c = tok_nextc(tok);
|
||||||
|
if (!verify_end_of_number(tok, c, "imaginary")) {
|
||||||
|
return ERRORTOKEN;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (!verify_end_of_number(tok, c, "decimal")) {
|
||||||
|
return ERRORTOKEN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue