cpython/Parser/pegen.c
Gregory P. Smith 511ca94520
gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96499)
Integer to and from text conversions via CPython's bignum `int` type is not safe against denial of service attacks due to malicious input. Very large input strings with hundred thousands of digits can consume several CPU seconds.

This PR comes fresh from a pile of work done in our private PSRT security response team repo.

Signed-off-by: Christian Heimes [Red Hat] <christian@python.org>
Tons-of-polishing-up-by: Gregory P. Smith [Google] <greg@krypto.org>
Reviews via the private PSRT repo via many others (see the NEWS entry in the PR).

<!-- gh-issue-number: gh-95778 -->
* Issue: gh-95778
<!-- /gh-issue-number -->

I wrote up [a one pager for the release managers](https://docs.google.com/document/d/1KjuF_aXlzPUxTK4BMgezGJ2Pn7uevfX7g0_mvgHlL7Y/edit#). Much of that text wound up in the Issue. Backports PRs already exist. See the issue for links.
2022-09-02 09:35:08 -07:00

956 lines
25 KiB
C

#include <Python.h>
#include "pycore_ast.h" // _PyAST_Validate(),
#include "pycore_pystate.h" // _PyThreadState_GET()
#include <errcode.h>
#include "tokenizer.h"
#include "pegen.h"
// Internal parser functions
asdl_stmt_seq*
_PyPegen_interactive_exit(Parser *p)
{
if (p->errcode) {
*(p->errcode) = E_EOF;
}
return NULL;
}
Py_ssize_t
_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
{
const char *str = PyUnicode_AsUTF8(line);
if (!str) {
return -1;
}
Py_ssize_t len = strlen(str);
if (col_offset > len + 1) {
col_offset = len + 1;
}
assert(col_offset >= 0);
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
if (!text) {
return -1;
}
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
Py_DECREF(text);
return size;
}
// Here, mark is the start of the node, while p->mark is the end.
// If node==NULL, they should be the same.
int
_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
{
// Insert in front
Memo *m = _PyArena_Malloc(p->arena, sizeof(Memo));
if (m == NULL) {
return -1;
}
m->type = type;
m->node = node;
m->mark = p->mark;
m->next = p->tokens[mark]->memo;
p->tokens[mark]->memo = m;
return 0;
}
// Like _PyPegen_insert_memo(), but updates an existing node if found.
int
_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
{
for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
if (m->type == type) {
// Update existing node.
m->node = node;
m->mark = p->mark;
return 0;
}
}
// Insert new node.
return _PyPegen_insert_memo(p, mark, type, node);
}
static int
init_normalization(Parser *p)
{
if (p->normalize) {
return 1;
}
p->normalize = _PyImport_GetModuleAttrString("unicodedata", "normalize");
if (!p->normalize)
{
return 0;
}
return 1;
}
static int
growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
assert(initial_size > 0);
arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
arr->size = initial_size;
arr->num_items = 0;
return arr->items != NULL;
}
static int
growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
if (arr->num_items >= arr->size) {
size_t new_size = arr->size * 2;
void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
if (!new_items_array) {
return 0;
}
arr->items = new_items_array;
arr->size = new_size;
}
arr->items[arr->num_items].lineno = lineno;
arr->items[arr->num_items].comment = comment; // Take ownership
arr->num_items++;
return 1;
}
static void
growable_comment_array_deallocate(growable_comment_array *arr) {
for (unsigned i = 0; i < arr->num_items; i++) {
PyMem_Free(arr->items[i].comment);
}
PyMem_Free(arr->items);
}
static int
_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
{
assert(name_len > 0);
if (name_len >= p->n_keyword_lists ||
p->keywords[name_len] == NULL ||
p->keywords[name_len]->type == -1) {
return NAME;
}
for (KeywordToken *k = p->keywords[name_len]; k != NULL && k->type != -1; k++) {
if (strncmp(k->str, name, name_len) == 0) {
return k->type;
}
}
return NAME;
}
static int
initialize_token(Parser *p, Token *token, const char *start, const char *end, int token_type) {
assert(token != NULL);
token->type = (token_type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : token_type;
token->bytes = PyBytes_FromStringAndSize(start, end - start);
if (token->bytes == NULL) {
return -1;
}
if (_PyArena_AddPyObject(p->arena, token->bytes) < 0) {
Py_DECREF(token->bytes);
return -1;
}
token->level = p->tok->level;
const char *line_start = token_type == STRING ? p->tok->multi_line_start : p->tok->line_start;
int lineno = token_type == STRING ? p->tok->first_lineno : p->tok->lineno;
int end_lineno = p->tok->lineno;
int col_offset = (start != NULL && start >= line_start) ? (int)(start - line_start) : -1;
int end_col_offset = (end != NULL && end >= p->tok->line_start) ? (int)(end - p->tok->line_start) : -1;
token->lineno = lineno;
token->col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + col_offset : col_offset;
token->end_lineno = end_lineno;
token->end_col_offset = p->tok->lineno == p->starting_lineno ? p->starting_col_offset + end_col_offset : end_col_offset;
p->fill += 1;
if (token_type == ERRORTOKEN && p->tok->done == E_DECODE) {
return _Pypegen_raise_decode_error(p);
}
return (token_type == ERRORTOKEN ? _Pypegen_tokenizer_error(p) : 0);
}
static int
_resize_tokens_array(Parser *p) {
int newsize = p->size * 2;
Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
if (new_tokens == NULL) {
PyErr_NoMemory();
return -1;
}
p->tokens = new_tokens;
for (int i = p->size; i < newsize; i++) {
p->tokens[i] = PyMem_Calloc(1, sizeof(Token));
if (p->tokens[i] == NULL) {
p->size = i; // Needed, in order to cleanup correctly after parser fails
PyErr_NoMemory();
return -1;
}
}
p->size = newsize;
return 0;
}
int
_PyPegen_fill_token(Parser *p)
{
const char *start;
const char *end;
int type = _PyTokenizer_Get(p->tok, &start, &end);
// Record and skip '# type: ignore' comments
while (type == TYPE_IGNORE) {
Py_ssize_t len = end - start;
char *tag = PyMem_Malloc(len + 1);
if (tag == NULL) {
PyErr_NoMemory();
return -1;
}
strncpy(tag, start, len);
tag[len] = '\0';
// Ownership of tag passes to the growable array
if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
PyErr_NoMemory();
return -1;
}
type = _PyTokenizer_Get(p->tok, &start, &end);
}
// If we have reached the end and we are in single input mode we need to insert a newline and reset the parsing
if (p->start_rule == Py_single_input && type == ENDMARKER && p->parsing_started) {
type = NEWLINE; /* Add an extra newline */
p->parsing_started = 0;
if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
p->tok->pendin = -p->tok->indent;
p->tok->indent = 0;
}
}
else {
p->parsing_started = 1;
}
// Check if we are at the limit of the token array capacity and resize if needed
if ((p->fill == p->size) && (_resize_tokens_array(p) != 0)) {
return -1;
}
Token *t = p->tokens[p->fill];
return initialize_token(p, t, start, end, type);
}
#if defined(Py_DEBUG)
// Instrumentation to count the effectiveness of memoization.
// The array counts the number of tokens skipped by memoization,
// indexed by type.
#define NSTATISTICS 2000
static long memo_statistics[NSTATISTICS];
void
_PyPegen_clear_memo_statistics()
{
for (int i = 0; i < NSTATISTICS; i++) {
memo_statistics[i] = 0;
}
}
PyObject *
_PyPegen_get_memo_statistics()
{
PyObject *ret = PyList_New(NSTATISTICS);
if (ret == NULL) {
return NULL;
}
for (int i = 0; i < NSTATISTICS; i++) {
PyObject *value = PyLong_FromLong(memo_statistics[i]);
if (value == NULL) {
Py_DECREF(ret);
return NULL;
}
// PyList_SetItem borrows a reference to value.
if (PyList_SetItem(ret, i, value) < 0) {
Py_DECREF(ret);
return NULL;
}
}
return ret;
}
#endif
int // bool
_PyPegen_is_memoized(Parser *p, int type, void *pres)
{
if (p->mark == p->fill) {
if (_PyPegen_fill_token(p) < 0) {
p->error_indicator = 1;
return -1;
}
}
Token *t = p->tokens[p->mark];
for (Memo *m = t->memo; m != NULL; m = m->next) {
if (m->type == type) {
#if defined(PY_DEBUG)
if (0 <= type && type < NSTATISTICS) {
long count = m->mark - p->mark;
// A memoized negative result counts for one.
if (count <= 0) {
count = 1;
}
memo_statistics[type] += count;
}
#endif
p->mark = m->mark;
*(void **)(pres) = m->node;
return 1;
}
}
return 0;
}
int
_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
{
int mark = p->mark;
void *res = func(p);
p->mark = mark;
return (res != NULL) == positive;
}
int
_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
{
int mark = p->mark;
void *res = func(p, arg);
p->mark = mark;
return (res != NULL) == positive;
}
int
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
{
int mark = p->mark;
void *res = func(p, arg);
p->mark = mark;
return (res != NULL) == positive;
}
int
_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
{
int mark = p->mark;
void *res = (void*)func(p);
p->mark = mark;
return (res != NULL) == positive;
}
Token *
_PyPegen_expect_token(Parser *p, int type)
{
if (p->mark == p->fill) {
if (_PyPegen_fill_token(p) < 0) {
p->error_indicator = 1;
return NULL;
}
}
Token *t = p->tokens[p->mark];
if (t->type != type) {
return NULL;
}
p->mark += 1;
return t;
}
void*
_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
if (p->error_indicator == 1) {
return NULL;
}
if (result == NULL) {
RAISE_SYNTAX_ERROR("expected (%s)", expected);
return NULL;
}
return result;
}
Token *
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {
if (p->error_indicator == 1) {
return NULL;
}
if (p->mark == p->fill) {
if (_PyPegen_fill_token(p) < 0) {
p->error_indicator = 1;
return NULL;
}
}
Token *t = p->tokens[p->mark];
if (t->type != type) {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "expected '%s'", expected);
return NULL;
}
p->mark += 1;
return t;
}
expr_ty
_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
{
if (p->mark == p->fill) {
if (_PyPegen_fill_token(p) < 0) {
p->error_indicator = 1;
return NULL;
}
}
Token *t = p->tokens[p->mark];
if (t->type != NAME) {
return NULL;
}
const char *s = PyBytes_AsString(t->bytes);
if (!s) {
p->error_indicator = 1;
return NULL;
}
if (strcmp(s, keyword) != 0) {
return NULL;
}
return _PyPegen_name_token(p);
}
Token *
_PyPegen_get_last_nonnwhitespace_token(Parser *p)
{
assert(p->mark >= 0);
Token *token = NULL;
for (int m = p->mark - 1; m >= 0; m--) {
token = p->tokens[m];
if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
break;
}
}
return token;
}
PyObject *
_PyPegen_new_identifier(Parser *p, const char *n)
{
PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
if (!id) {
goto error;
}
/* PyUnicode_DecodeUTF8 should always return a ready string. */
assert(PyUnicode_IS_READY(id));
/* Check whether there are non-ASCII characters in the
identifier; if so, normalize to NFKC. */
if (!PyUnicode_IS_ASCII(id))
{
PyObject *id2;
if (!init_normalization(p))
{
Py_DECREF(id);
goto error;
}
PyObject *form = PyUnicode_InternFromString("NFKC");
if (form == NULL)
{
Py_DECREF(id);
goto error;
}
PyObject *args[2] = {form, id};
id2 = _PyObject_FastCall(p->normalize, args, 2);
Py_DECREF(id);
Py_DECREF(form);
if (!id2) {
goto error;
}
if (!PyUnicode_Check(id2))
{
PyErr_Format(PyExc_TypeError,
"unicodedata.normalize() must return a string, not "
"%.200s",
_PyType_Name(Py_TYPE(id2)));
Py_DECREF(id2);
goto error;
}
id = id2;
}
PyUnicode_InternInPlace(&id);
if (_PyArena_AddPyObject(p->arena, id) < 0)
{
Py_DECREF(id);
goto error;
}
return id;
error:
p->error_indicator = 1;
return NULL;
}
static expr_ty
_PyPegen_name_from_token(Parser *p, Token* t)
{
if (t == NULL) {
return NULL;
}
const char *s = PyBytes_AsString(t->bytes);
if (!s) {
p->error_indicator = 1;
return NULL;
}
PyObject *id = _PyPegen_new_identifier(p, s);
if (id == NULL) {
p->error_indicator = 1;
return NULL;
}
return _PyAST_Name(id, Load, t->lineno, t->col_offset, t->end_lineno,
t->end_col_offset, p->arena);
}
expr_ty
_PyPegen_name_token(Parser *p)
{
Token *t = _PyPegen_expect_token(p, NAME);
return _PyPegen_name_from_token(p, t);
}
void *
_PyPegen_string_token(Parser *p)
{
return _PyPegen_expect_token(p, STRING);
}
expr_ty _PyPegen_soft_keyword_token(Parser *p) {
Token *t = _PyPegen_expect_token(p, NAME);
if (t == NULL) {
return NULL;
}
char *the_token;
Py_ssize_t size;
PyBytes_AsStringAndSize(t->bytes, &the_token, &size);
for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) {
if (strncmp(*keyword, the_token, size) == 0) {
return _PyPegen_name_from_token(p, t);
}
}
return NULL;
}
static PyObject *
parsenumber_raw(const char *s)
{
const char *end;
long x;
double dx;
Py_complex compl;
int imflag;
assert(s != NULL);
errno = 0;
end = s + strlen(s) - 1;
imflag = *end == 'j' || *end == 'J';
if (s[0] == '0') {
x = (long)PyOS_strtoul(s, (char **)&end, 0);
if (x < 0 && errno == 0) {
return PyLong_FromString(s, (char **)0, 0);
}
}
else {
x = PyOS_strtol(s, (char **)&end, 0);
}
if (*end == '\0') {
if (errno != 0) {
return PyLong_FromString(s, (char **)0, 0);
}
return PyLong_FromLong(x);
}
/* XXX Huge floats may silently fail */
if (imflag) {
compl.real = 0.;
compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
if (compl.imag == -1.0 && PyErr_Occurred()) {
return NULL;
}
return PyComplex_FromCComplex(compl);
}
dx = PyOS_string_to_double(s, NULL, NULL);
if (dx == -1.0 && PyErr_Occurred()) {
return NULL;
}
return PyFloat_FromDouble(dx);
}
static PyObject *
parsenumber(const char *s)
{
char *dup;
char *end;
PyObject *res = NULL;
assert(s != NULL);
if (strchr(s, '_') == NULL) {
return parsenumber_raw(s);
}
/* Create a duplicate without underscores. */
dup = PyMem_Malloc(strlen(s) + 1);
if (dup == NULL) {
return PyErr_NoMemory();
}
end = dup;
for (; *s; s++) {
if (*s != '_') {
*end++ = *s;
}
}
*end = '\0';
res = parsenumber_raw(dup);
PyMem_Free(dup);
return res;
}
expr_ty
_PyPegen_number_token(Parser *p)
{
Token *t = _PyPegen_expect_token(p, NUMBER);
if (t == NULL) {
return NULL;
}
const char *num_raw = PyBytes_AsString(t->bytes);
if (num_raw == NULL) {
p->error_indicator = 1;
return NULL;
}
if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
p->error_indicator = 1;
return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported "
"in Python 3.6 and greater");
}
PyObject *c = parsenumber(num_raw);
if (c == NULL) {
p->error_indicator = 1;
PyThreadState *tstate = _PyThreadState_GET();
// The only way a ValueError should happen in _this_ code is via
// PyLong_FromString hitting a length limit.
if (tstate->curexc_type == PyExc_ValueError &&
tstate->curexc_value != NULL) {
PyObject *type, *value, *tb;
// This acts as PyErr_Clear() as we're replacing curexc.
PyErr_Fetch(&type, &value, &tb);
Py_XDECREF(tb);
Py_DECREF(type);
/* Intentionally omitting columns to avoid a wall of 1000s of '^'s
* on the error message. Nobody is going to overlook their huge
* numeric literal once given the line. */
RAISE_ERROR_KNOWN_LOCATION(
p, PyExc_SyntaxError,
t->lineno, -1 /* col_offset */,
t->end_lineno, -1 /* end_col_offset */,
"%S - Consider hexadecimal for huge integer literals "
"to avoid decimal conversion limits.",
value);
Py_DECREF(value);
}
return NULL;
}
if (_PyArena_AddPyObject(p->arena, c) < 0) {
Py_DECREF(c);
p->error_indicator = 1;
return NULL;
}
return _PyAST_Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno,
t->end_col_offset, p->arena);
}
/* Check that the source for a single input statement really is a single
statement by looking at what is left in the buffer after parsing.
Trailing whitespace and comments are OK. */
static int // bool
bad_single_statement(Parser *p)
{
char *cur = p->tok->cur;
char c = *cur;
for (;;) {
while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
c = *++cur;
}
if (!c) {
return 0;
}
if (c != '#') {
return 1;
}
/* Suck up comment. */
while (c && c != '\n') {
c = *++cur;
}
}
}
static int
compute_parser_flags(PyCompilerFlags *flags)
{
int parser_flags = 0;
if (!flags) {
return 0;
}
if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
}
if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
parser_flags |= PyPARSE_IGNORE_COOKIE;
}
if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
parser_flags |= PyPARSE_BARRY_AS_BDFL;
}
if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
parser_flags |= PyPARSE_TYPE_COMMENTS;
}
if ((flags->cf_flags & PyCF_ONLY_AST) && flags->cf_feature_version < 7) {
parser_flags |= PyPARSE_ASYNC_HACKS;
}
if (flags->cf_flags & PyCF_ALLOW_INCOMPLETE_INPUT) {
parser_flags |= PyPARSE_ALLOW_INCOMPLETE_INPUT;
}
return parser_flags;
}
// Parser API
Parser *
_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
int feature_version, int *errcode, PyArena *arena)
{
Parser *p = PyMem_Malloc(sizeof(Parser));
if (p == NULL) {
return (Parser *) PyErr_NoMemory();
}
assert(tok != NULL);
tok->type_comments = (flags & PyPARSE_TYPE_COMMENTS) > 0;
tok->async_hacks = (flags & PyPARSE_ASYNC_HACKS) > 0;
p->tok = tok;
p->keywords = NULL;
p->n_keyword_lists = -1;
p->soft_keywords = NULL;
p->tokens = PyMem_Malloc(sizeof(Token *));
if (!p->tokens) {
PyMem_Free(p);
return (Parser *) PyErr_NoMemory();
}
p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
if (!p->tokens[0]) {
PyMem_Free(p->tokens);
PyMem_Free(p);
return (Parser *) PyErr_NoMemory();
}
if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
PyMem_Free(p->tokens[0]);
PyMem_Free(p->tokens);
PyMem_Free(p);
return (Parser *) PyErr_NoMemory();
}
p->mark = 0;
p->fill = 0;
p->size = 1;
p->errcode = errcode;
p->arena = arena;
p->start_rule = start_rule;
p->parsing_started = 0;
p->normalize = NULL;
p->error_indicator = 0;
p->starting_lineno = 0;
p->starting_col_offset = 0;
p->flags = flags;
p->feature_version = feature_version;
p->known_err_token = NULL;
p->level = 0;
p->call_invalid_rules = 0;
#ifdef Py_DEBUG
p->debug = _Py_GetConfig()->parser_debug;
#endif
return p;
}
void
_PyPegen_Parser_Free(Parser *p)
{
Py_XDECREF(p->normalize);
for (int i = 0; i < p->size; i++) {
PyMem_Free(p->tokens[i]);
}
PyMem_Free(p->tokens);
growable_comment_array_deallocate(&p->type_ignore_comments);
PyMem_Free(p);
}
static void
reset_parser_state_for_error_pass(Parser *p)
{
for (int i = 0; i < p->fill; i++) {
p->tokens[i]->memo = NULL;
}
p->mark = 0;
p->call_invalid_rules = 1;
// Don't try to get extra tokens in interactive mode when trying to
// raise specialized errors in the second pass.
p->tok->interactive_underflow = IUNDERFLOW_STOP;
}
static inline int
_is_end_of_source(Parser *p) {
int err = p->tok->done;
return err == E_EOF || err == E_EOFS || err == E_EOLS;
}
void *
_PyPegen_run_parser(Parser *p)
{
void *res = _PyPegen_parse(p);
assert(p->level == 0);
if (res == NULL) {
if ((p->flags & PyPARSE_ALLOW_INCOMPLETE_INPUT) && _is_end_of_source(p)) {
PyErr_Clear();
return RAISE_SYNTAX_ERROR("incomplete input");
}
if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_SyntaxError)) {
return NULL;
}
// Make a second parser pass. In this pass we activate heavier and slower checks
// to produce better error messages and more complete diagnostics. Extra "invalid_*"
// rules will be active during parsing.
Token *last_token = p->tokens[p->fill - 1];
reset_parser_state_for_error_pass(p);
_PyPegen_parse(p);
// Set SyntaxErrors accordingly depending on the parser/tokenizer status at the failure
// point.
_Pypegen_set_syntax_error(p, last_token);
return NULL;
}
if (p->start_rule == Py_single_input && bad_single_statement(p)) {
p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
}
// test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate()
#if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN)
if (p->start_rule == Py_single_input ||
p->start_rule == Py_file_input ||
p->start_rule == Py_eval_input)
{
if (!_PyAST_Validate(res)) {
return NULL;
}
}
#endif
return res;
}
mod_ty
_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
const char *enc, const char *ps1, const char *ps2,
PyCompilerFlags *flags, int *errcode, PyArena *arena)
{
struct tok_state *tok = _PyTokenizer_FromFile(fp, enc, ps1, ps2);
if (tok == NULL) {
if (PyErr_Occurred()) {
_PyPegen_raise_tokenizer_init_error(filename_ob);
return NULL;
}
return NULL;
}
if (!tok->fp || ps1 != NULL || ps2 != NULL ||
PyUnicode_CompareWithASCIIString(filename_ob, "<stdin>") == 0) {
tok->fp_interactive = 1;
}
// This transfers the ownership to the tokenizer
tok->filename = filename_ob;
Py_INCREF(filename_ob);
// From here on we need to clean up even if there's an error
mod_ty result = NULL;
int parser_flags = compute_parser_flags(flags);
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
errcode, arena);
if (p == NULL) {
goto error;
}
result = _PyPegen_run_parser(p);
_PyPegen_Parser_Free(p);
error:
_PyTokenizer_Free(tok);
return result;
}
mod_ty
_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
PyCompilerFlags *flags, PyArena *arena)
{
int exec_input = start_rule == Py_file_input;
struct tok_state *tok;
if (flags != NULL && flags->cf_flags & PyCF_IGNORE_COOKIE) {
tok = _PyTokenizer_FromUTF8(str, exec_input);
} else {
tok = _PyTokenizer_FromString(str, exec_input);
}
if (tok == NULL) {
if (PyErr_Occurred()) {
_PyPegen_raise_tokenizer_init_error(filename_ob);
}
return NULL;
}
// This transfers the ownership to the tokenizer
tok->filename = filename_ob;
Py_INCREF(filename_ob);
// We need to clear up from here on
mod_ty result = NULL;
int parser_flags = compute_parser_flags(flags);
int feature_version = flags && (flags->cf_flags & PyCF_ONLY_AST) ?
flags->cf_feature_version : PY_MINOR_VERSION;
Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
NULL, arena);
if (p == NULL) {
goto error;
}
result = _PyPegen_run_parser(p);
_PyPegen_Parser_Free(p);
error:
_PyTokenizer_Free(tok);
return result;
}