bpo-43822: Improve syntax errors for missing commas (GH-25377)

This commit is contained in:
Pablo Galindo 2021-04-15 21:38:45 +01:00 committed by GitHub
parent e692f55979
commit b280248be8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 1235 additions and 1034 deletions

View file

@ -211,6 +211,8 @@
.. data:: TYPE_COMMENT
.. data:: SOFT_KEYWORD
.. data:: ERRORTOKEN
.. data:: N_TOKENS

View file

@ -59,6 +59,7 @@ AWAIT
ASYNC
TYPE_IGNORE
TYPE_COMMENT
SOFT_KEYWORD
ERRORTOKEN
# These aren't used by the C tokenizer but are needed for tokenize.py

View file

@ -7,6 +7,7 @@ _PyPegen_parse(Parser *p)
// Initialize keywords
p->keywords = reserved_keywords;
p->n_keyword_lists = n_keyword_lists;
p->soft_keywords = soft_keywords;
// Run parser
void *result = NULL;
@ -459,6 +460,7 @@ expressions[expr_ty]:
| a=expression ',' { _PyAST_Tuple(CHECK(asdl_expr_seq*, _PyPegen_singleton_seq(p, a)), Load, EXTRA) }
| expression
expression[expr_ty] (memo):
| invalid_expression
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
| disjunction
| lambdef
@ -778,6 +780,13 @@ invalid_kwarg:
| expression a='=' {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
a, "expression cannot contain assignment, perhaps you meant \"==\"?") }
invalid_expression:
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
| !(NAME STRING | SOFT_KEYWORD) a=disjunction expression {
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, "invalid syntax. Perhaps you forgot a comma?") }
invalid_named_expression:
| a=expression ':=' expression {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(

5
Include/token.h generated
View file

@ -69,8 +69,9 @@ extern "C" {
#define ASYNC 56
#define TYPE_IGNORE 57
#define TYPE_COMMENT 58
#define ERRORTOKEN 59
#define N_TOKENS 63
#define SOFT_KEYWORD 59
#define ERRORTOKEN 60
#define N_TOKENS 64
#define NT_OFFSET 256
/* Special definitions for cooperation with parser */

View file

@ -103,7 +103,7 @@
>>> dict(a = i for i in range(10))
Traceback (most recent call last):
...
SyntaxError: invalid syntax
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
Verify that parenthesis are required when used as a keyword argument value

View file

@ -248,22 +248,36 @@
# Missing commas in literals collections should not
# produce special error messages regarding missing
# parentheses
# parentheses, but about missing commas instead
>>> [1, 2 3]
Traceback (most recent call last):
SyntaxError: invalid syntax
SyntaxError: invalid syntax. Perhaps you forgot a comma?
>>> {1, 2 3}
Traceback (most recent call last):
SyntaxError: invalid syntax
SyntaxError: invalid syntax. Perhaps you forgot a comma?
>>> {1:2, 2:5 3:12}
Traceback (most recent call last):
SyntaxError: invalid syntax
SyntaxError: invalid syntax. Perhaps you forgot a comma?
>>> (1, 2 3)
Traceback (most recent call last):
SyntaxError: invalid syntax. Perhaps you forgot a comma?
# Make sure soft keywords constructs don't raise specialized
# errors regarding missing commas
>>> match x:
... y = 3
Traceback (most recent call last):
SyntaxError: invalid syntax
>>> match x:
... case y:
... 3 $ 3
Traceback (most recent call last):
SyntaxError: invalid syntax
From compiler_complex_args():
@ -864,7 +878,7 @@
SyntaxError: cannot assign to attribute here. Maybe you meant '==' instead of '='?
Ensure that early = are not matched by the parser as invalid comparisons
>>> f(2, 4, x=34); {1,2 a}
>>> f(2, 4, x=34); 1 $ 2
Traceback (most recent call last):
SyntaxError: invalid syntax

11
Lib/token.py generated
View file

@ -62,12 +62,13 @@ AWAIT = 55
ASYNC = 56
TYPE_IGNORE = 57
TYPE_COMMENT = 58
SOFT_KEYWORD = 59
# These aren't used by the C tokenizer but are needed for tokenize.py
ERRORTOKEN = 59
COMMENT = 60
NL = 61
ENCODING = 62
N_TOKENS = 63
ERRORTOKEN = 60
COMMENT = 61
NL = 62
ENCODING = 63
N_TOKENS = 64
# Special definitions for cooperation with parser
NT_OFFSET = 256

View file

@ -0,0 +1,2 @@
Improve syntax errors in the parser for missing commas between expressions.
Patch by Pablo Galindo.

File diff suppressed because it is too large Load diff

View file

@ -943,6 +943,23 @@ _PyPegen_string_token(Parser *p)
return _PyPegen_expect_token(p, STRING);
}
expr_ty _PyPegen_soft_keyword_token(Parser *p) {
Token *t = _PyPegen_expect_token(p, NAME);
if (t == NULL) {
return NULL;
}
char *the_token;
Py_ssize_t size;
PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
if (strncmp(*keyword, the_token, size) == 0) {
return _PyPegen_name_token(p);
}
}
return NULL;
}
static PyObject *
parsenumber_raw(const char *s)
{
@ -1151,6 +1168,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
p->tok = tok;
p->keywords = NULL;
p->n_keyword_lists = -1;
p->soft_keywords = NULL;
p->tokens = PyMem_Malloc(sizeof(Token *));
if (!p->tokens) {
PyMem_Free(p);

View file

@ -59,6 +59,7 @@ typedef struct {
int fill, size;
PyArena *arena;
KeywordToken **keywords;
char **soft_keywords;
int n_keyword_lists;
int start_rule;
int *errcode;
@ -125,6 +126,7 @@ int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type);
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
expr_ty _PyPegen_soft_keyword_token(Parser *p);
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
int _PyPegen_fill_token(Parser *p);
expr_ty _PyPegen_name_token(Parser *p);

1
Parser/token.c generated
View file

@ -65,6 +65,7 @@ const char * const _PyParser_TokenNames[] = {
"ASYNC",
"TYPE_IGNORE",
"TYPE_COMMENT",
"SOFT_KEYWORD",
"<ERRORTOKEN>",
"<COMMENT>",
"<NL>",

View file

@ -46,6 +46,7 @@
// Initialize keywords
p->keywords = reserved_keywords;
p->n_keyword_lists = n_keyword_lists;
p->soft_keywords = soft_keywords;
return start_rule(p);
}
@ -66,6 +67,7 @@ class NodeTypes(Enum):
"NAME": NodeTypes.NAME_TOKEN,
"NUMBER": NodeTypes.NUMBER_TOKEN,
"STRING": NodeTypes.STRING_TOKEN,
"SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD,
}
@ -411,6 +413,7 @@ def generate(self, filename: str) -> None:
if subheader:
self.print(subheader)
self._setup_keywords()
self._setup_soft_keywords()
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
comment = " // Left-recursive" if rule.left_recursive else ""
self.print(f"#define {rulename}_type {i}{comment}")
@ -474,6 +477,15 @@ def _setup_keywords(self) -> None:
self.print("},")
self.print("};")
def _setup_soft_keywords(self) -> None:
soft_keywords = sorted(self.callmakervisitor.soft_keywords)
self.print("static char *soft_keywords[] = {")
with self.indent():
for keyword in soft_keywords:
self.print(f'"{keyword}",')
self.print("NULL,")
self.print("};")
def _set_up_token_start_metadata_extraction(self) -> None:
self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
with self.indent():