cpython/Python/Python-tokenize.c
Pablo Galindo Salgado 1ef61cf71a
gh-102856: Initial implementation of PEP 701 (#102855)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
Co-authored-by: Marta Gómez Macías <mgmacias@google.com>
Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
2023-04-19 11:18:16 -05:00

194 lines
5 KiB
C

#include "Python.h"
#include "../Parser/tokenizer.h"
static struct PyModuleDef _tokenizemodule;
typedef struct {
PyTypeObject *TokenizerIter;
} tokenize_state;
static tokenize_state *
get_tokenize_state(PyObject *module) {
return (tokenize_state *)PyModule_GetState(module);
}
#define _tokenize_get_state_by_type(type) \
get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule))
#include "pycore_runtime.h"
#include "clinic/Python-tokenize.c.h"
/*[clinic input]
module _tokenizer
class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/
typedef struct
{
PyObject_HEAD struct tok_state *tok;
} tokenizeriterobject;
/*[clinic input]
@classmethod
_tokenizer.tokenizeriter.__new__ as tokenizeriter_new
source: str
[clinic start generated code]*/
static PyObject *
tokenizeriter_new_impl(PyTypeObject *type, const char *source)
/*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/
{
tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
}
PyObject *filename = PyUnicode_FromString("<string>");
if (filename == NULL) {
return NULL;
}
self->tok = _PyTokenizer_FromUTF8(source, 1);
if (self->tok == NULL) {
Py_DECREF(filename);
return NULL;
}
self->tok->filename = filename;
return (PyObject *)self;
}
static PyObject *
tokenizeriter_next(tokenizeriterobject *it)
{
struct token token;
int type = _PyTokenizer_Get(it->tok, &token);
if (type == ERRORTOKEN && PyErr_Occurred()) {
return NULL;
}
if (type == ERRORTOKEN || type == ENDMARKER) {
PyErr_SetString(PyExc_StopIteration, "EOF");
return NULL;
}
PyObject *str = NULL;
if (token.start == NULL || token.end == NULL) {
str = PyUnicode_FromString("");
}
else {
str = PyUnicode_FromStringAndSize(token.start, token.end - token.start);
}
if (str == NULL) {
return NULL;
}
Py_ssize_t size = it->tok->inp - it->tok->buf;
PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
if (line == NULL) {
Py_DECREF(str);
return NULL;
}
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
int lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
int end_lineno = it->tok->lineno;
int col_offset = -1;
int end_col_offset = -1;
if (token.start != NULL && token.start >= line_start) {
col_offset = (int)(token.start - line_start);
}
if (token.end != NULL && token.end >= it->tok->line_start) {
end_col_offset = (int)(token.end - it->tok->line_start);
}
return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
}
static void
tokenizeriter_dealloc(tokenizeriterobject *it)
{
PyTypeObject *tp = Py_TYPE(it);
_PyTokenizer_Free(it->tok);
tp->tp_free(it);
Py_DECREF(tp);
}
static PyType_Slot tokenizeriter_slots[] = {
{Py_tp_new, tokenizeriter_new},
{Py_tp_dealloc, tokenizeriter_dealloc},
{Py_tp_getattro, PyObject_GenericGetAttr},
{Py_tp_iter, PyObject_SelfIter},
{Py_tp_iternext, tokenizeriter_next},
{0, NULL},
};
static PyType_Spec tokenizeriter_spec = {
.name = "_tokenize.TokenizerIter",
.basicsize = sizeof(tokenizeriterobject),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
.slots = tokenizeriter_slots,
};
static int
tokenizemodule_exec(PyObject *m)
{
tokenize_state *state = get_tokenize_state(m);
if (state == NULL) {
return -1;
}
state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL);
if (state->TokenizerIter == NULL) {
return -1;
}
if (PyModule_AddType(m, state->TokenizerIter) < 0) {
return -1;
}
return 0;
}
static PyMethodDef tokenize_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
static PyModuleDef_Slot tokenizemodule_slots[] = {
{Py_mod_exec, tokenizemodule_exec},
{0, NULL}
};
static int
tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg)
{
tokenize_state *state = get_tokenize_state(m);
Py_VISIT(state->TokenizerIter);
return 0;
}
static int
tokenizemodule_clear(PyObject *m)
{
tokenize_state *state = get_tokenize_state(m);
Py_CLEAR(state->TokenizerIter);
return 0;
}
static void
tokenizemodule_free(void *m)
{
tokenizemodule_clear((PyObject *)m);
}
static struct PyModuleDef _tokenizemodule = {
PyModuleDef_HEAD_INIT,
.m_name = "_tokenize",
.m_size = sizeof(tokenize_state),
.m_slots = tokenizemodule_slots,
.m_methods = tokenize_methods,
.m_traverse = tokenizemodule_traverse,
.m_clear = tokenizemodule_clear,
.m_free = tokenizemodule_free,
};
PyMODINIT_FUNC
PyInit__tokenize(void)
{
return PyModuleDef_Init(&_tokenizemodule);
}