cpython/Objects/iterobject.c
Eric Snow 81c72044a1
bpo-46541: Replace core use of _Py_IDENTIFIER() with statically initialized global objects. (gh-30928)
We're no longer using _Py_IDENTIFIER() (or _Py_static_string()) in any core CPython code.  It is still used in a number of non-builtin stdlib modules.

The replacement is: PyUnicodeObject (not pointer) fields under _PyRuntimeState, statically initialized as part of _PyRuntime.  A new _Py_GET_GLOBAL_IDENTIFIER() macro facilitates lookup of the fields (along with _Py_GET_GLOBAL_STRING() for non-identifier strings).

https://bugs.python.org/issue46541#msg411799 explains the rationale for this change.

The core of the change is in:

* (new) Include/internal/pycore_global_strings.h - the declarations for the global strings, along with the macros
* Include/internal/pycore_runtime_init.h - added the static initializers for the global strings
* Include/internal/pycore_global_objects.h - where the struct in pycore_global_strings.h is hooked into _PyRuntimeState
* Tools/scripts/generate_global_objects.py - added generation of the global string declarations and static initializers

I've also added a --check flag to generate_global_objects.py (along with make check-global-objects) to check for unused global strings.  That check is added to the PR CI config.

The remainder of this change updates the core code to use _Py_GET_GLOBAL_IDENTIFIER() instead of _Py_IDENTIFIER() and the related _Py*Id functions (likewise for _Py_GET_GLOBAL_STRING() instead of _Py_static_string()).  This includes adding a few functions where there wasn't already an alternative to _Py*Id(), replacing the _Py_Identifier * parameter with PyObject *.

The following are not changed (yet):

* stop using _Py_IDENTIFIER() in the stdlib modules
* (maybe) get rid of _Py_IDENTIFIER(), etc. entirely -- this may not be doable as at least one package on PyPI using this (private) API
* (maybe) intern the strings during runtime init

https://bugs.python.org/issue46541
2022-02-08 13:39:07 -07:00

500 lines
16 KiB
C

/* Iterator objects */
#include "Python.h"
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_object.h" // _PyObject_GC_TRACK()
typedef struct {
PyObject_HEAD
Py_ssize_t it_index;
PyObject *it_seq; /* Set to NULL when iterator is exhausted */
} seqiterobject;
PyObject *
PySeqIter_New(PyObject *seq)
{
seqiterobject *it;
if (!PySequence_Check(seq)) {
PyErr_BadInternalCall();
return NULL;
}
it = PyObject_GC_New(seqiterobject, &PySeqIter_Type);
if (it == NULL)
return NULL;
it->it_index = 0;
Py_INCREF(seq);
it->it_seq = seq;
_PyObject_GC_TRACK(it);
return (PyObject *)it;
}
static void
iter_dealloc(seqiterobject *it)
{
_PyObject_GC_UNTRACK(it);
Py_XDECREF(it->it_seq);
PyObject_GC_Del(it);
}
static int
iter_traverse(seqiterobject *it, visitproc visit, void *arg)
{
Py_VISIT(it->it_seq);
return 0;
}
static PyObject *
iter_iternext(PyObject *iterator)
{
seqiterobject *it;
PyObject *seq;
PyObject *result;
assert(PySeqIter_Check(iterator));
it = (seqiterobject *)iterator;
seq = it->it_seq;
if (seq == NULL)
return NULL;
if (it->it_index == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"iter index too large");
return NULL;
}
result = PySequence_GetItem(seq, it->it_index);
if (result != NULL) {
it->it_index++;
return result;
}
if (PyErr_ExceptionMatches(PyExc_IndexError) ||
PyErr_ExceptionMatches(PyExc_StopIteration))
{
PyErr_Clear();
it->it_seq = NULL;
Py_DECREF(seq);
}
return NULL;
}
static PyObject *
iter_len(seqiterobject *it, PyObject *Py_UNUSED(ignored))
{
Py_ssize_t seqsize, len;
if (it->it_seq) {
if (_PyObject_HasLen(it->it_seq)) {
seqsize = PySequence_Size(it->it_seq);
if (seqsize == -1)
return NULL;
}
else {
Py_RETURN_NOTIMPLEMENTED;
}
len = seqsize - it->it_index;
if (len >= 0)
return PyLong_FromSsize_t(len);
}
return PyLong_FromLong(0);
}
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
static PyObject *
iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored))
{
if (it->it_seq != NULL)
return Py_BuildValue("N(O)n", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_seq, it->it_index);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
}
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
static PyObject *
iter_setstate(seqiterobject *it, PyObject *state)
{
Py_ssize_t index = PyLong_AsSsize_t(state);
if (index == -1 && PyErr_Occurred())
return NULL;
if (it->it_seq != NULL) {
if (index < 0)
index = 0;
it->it_index = index;
}
Py_RETURN_NONE;
}
PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
static PyMethodDef seqiter_methods[] = {
{"__length_hint__", (PyCFunction)iter_len, METH_NOARGS, length_hint_doc},
{"__reduce__", (PyCFunction)iter_reduce, METH_NOARGS, reduce_doc},
{"__setstate__", (PyCFunction)iter_setstate, METH_O, setstate_doc},
{NULL, NULL} /* sentinel */
};
PyTypeObject PySeqIter_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"iterator", /* tp_name */
sizeof(seqiterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)iter_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)iter_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
iter_iternext, /* tp_iternext */
seqiter_methods, /* tp_methods */
0, /* tp_members */
};
/* -------------------------------------- */
typedef struct {
PyObject_HEAD
PyObject *it_callable; /* Set to NULL when iterator is exhausted */
PyObject *it_sentinel; /* Set to NULL when iterator is exhausted */
} calliterobject;
PyObject *
PyCallIter_New(PyObject *callable, PyObject *sentinel)
{
calliterobject *it;
it = PyObject_GC_New(calliterobject, &PyCallIter_Type);
if (it == NULL)
return NULL;
Py_INCREF(callable);
it->it_callable = callable;
Py_INCREF(sentinel);
it->it_sentinel = sentinel;
_PyObject_GC_TRACK(it);
return (PyObject *)it;
}
static void
calliter_dealloc(calliterobject *it)
{
_PyObject_GC_UNTRACK(it);
Py_XDECREF(it->it_callable);
Py_XDECREF(it->it_sentinel);
PyObject_GC_Del(it);
}
static int
calliter_traverse(calliterobject *it, visitproc visit, void *arg)
{
Py_VISIT(it->it_callable);
Py_VISIT(it->it_sentinel);
return 0;
}
static PyObject *
calliter_iternext(calliterobject *it)
{
PyObject *result;
if (it->it_callable == NULL) {
return NULL;
}
result = _PyObject_CallNoArgs(it->it_callable);
if (result != NULL) {
int ok;
ok = PyObject_RichCompareBool(it->it_sentinel, result, Py_EQ);
if (ok == 0) {
return result; /* Common case, fast path */
}
Py_DECREF(result);
if (ok > 0) {
Py_CLEAR(it->it_callable);
Py_CLEAR(it->it_sentinel);
}
}
else if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
PyErr_Clear();
Py_CLEAR(it->it_callable);
Py_CLEAR(it->it_sentinel);
}
return NULL;
}
static PyObject *
calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored))
{
if (it->it_callable != NULL && it->it_sentinel != NULL)
return Py_BuildValue("N(OO)", _PyEval_GetBuiltin(&_Py_ID(iter)),
it->it_callable, it->it_sentinel);
else
return Py_BuildValue("N(())", _PyEval_GetBuiltin(&_Py_ID(iter)));
}
static PyMethodDef calliter_methods[] = {
{"__reduce__", (PyCFunction)calliter_reduce, METH_NOARGS, reduce_doc},
{NULL, NULL} /* sentinel */
};
PyTypeObject PyCallIter_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"callable_iterator", /* tp_name */
sizeof(calliterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)calliter_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)calliter_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)calliter_iternext, /* tp_iternext */
calliter_methods, /* tp_methods */
};
/* -------------------------------------- */
typedef struct {
PyObject_HEAD
PyObject *wrapped;
PyObject *default_value;
} anextawaitableobject;
static void
anextawaitable_dealloc(anextawaitableobject *obj)
{
_PyObject_GC_UNTRACK(obj);
Py_XDECREF(obj->wrapped);
Py_XDECREF(obj->default_value);
PyObject_GC_Del(obj);
}
static int
anextawaitable_traverse(anextawaitableobject *obj, visitproc visit, void *arg)
{
Py_VISIT(obj->wrapped);
Py_VISIT(obj->default_value);
return 0;
}
static PyObject *
anextawaitable_getiter(anextawaitableobject *obj)
{
assert(obj->wrapped != NULL);
PyObject *awaitable = _PyCoro_GetAwaitableIter(obj->wrapped);
if (awaitable == NULL) {
return NULL;
}
if (Py_TYPE(awaitable)->tp_iternext == NULL) {
/* _PyCoro_GetAwaitableIter returns a Coroutine, a Generator,
* or an iterator. Of these, only coroutines lack tp_iternext.
*/
assert(PyCoro_CheckExact(awaitable));
unaryfunc getter = Py_TYPE(awaitable)->tp_as_async->am_await;
PyObject *new_awaitable = getter(awaitable);
if (new_awaitable == NULL) {
Py_DECREF(awaitable);
return NULL;
}
Py_SETREF(awaitable, new_awaitable);
if (!PyIter_Check(awaitable)) {
PyErr_SetString(PyExc_TypeError,
"__await__ returned a non-iterable");
Py_DECREF(awaitable);
return NULL;
}
}
return awaitable;
}
static PyObject *
anextawaitable_iternext(anextawaitableobject *obj)
{
/* Consider the following class:
*
* class A:
* async def __anext__(self):
* ...
* a = A()
*
* Then `await anext(a)` should call
* a.__anext__().__await__().__next__()
*
* On the other hand, given
*
* async def agen():
* yield 1
* yield 2
* gen = agen()
*
* Then `await anext(gen)` can just call
* gen.__anext__().__next__()
*/
PyObject *awaitable = anextawaitable_getiter(obj);
if (awaitable == NULL) {
return NULL;
}
PyObject *result = (*Py_TYPE(awaitable)->tp_iternext)(awaitable);
Py_DECREF(awaitable);
if (result != NULL) {
return result;
}
if (PyErr_ExceptionMatches(PyExc_StopAsyncIteration)) {
_PyGen_SetStopIterationValue(obj->default_value);
}
return NULL;
}
static PyObject *
anextawaitable_proxy(anextawaitableobject *obj, char *meth, PyObject *arg) {
PyObject *awaitable = anextawaitable_getiter(obj);
if (awaitable == NULL) {
return NULL;
}
PyObject *ret = PyObject_CallMethod(awaitable, meth, "O", arg);
Py_DECREF(awaitable);
if (ret != NULL) {
return ret;
}
if (PyErr_ExceptionMatches(PyExc_StopAsyncIteration)) {
/* `anextawaitableobject` is only used by `anext()` when
* a default value is provided. So when we have a StopAsyncIteration
* exception we replace it with a `StopIteration(default)`, as if
* it was the return value of `__anext__()` coroutine.
*/
_PyGen_SetStopIterationValue(obj->default_value);
}
return NULL;
}
static PyObject *
anextawaitable_send(anextawaitableobject *obj, PyObject *arg) {
return anextawaitable_proxy(obj, "send", arg);
}
static PyObject *
anextawaitable_throw(anextawaitableobject *obj, PyObject *arg) {
return anextawaitable_proxy(obj, "throw", arg);
}
static PyObject *
anextawaitable_close(anextawaitableobject *obj, PyObject *arg) {
return anextawaitable_proxy(obj, "close", arg);
}
PyDoc_STRVAR(send_doc,
"send(arg) -> send 'arg' into the wrapped iterator,\n\
return next yielded value or raise StopIteration.");
PyDoc_STRVAR(throw_doc,
"throw(typ[,val[,tb]]) -> raise exception in the wrapped iterator,\n\
return next yielded value or raise StopIteration.");
PyDoc_STRVAR(close_doc,
"close() -> raise GeneratorExit inside generator.");
static PyMethodDef anextawaitable_methods[] = {
{"send",(PyCFunction)anextawaitable_send, METH_O, send_doc},
{"throw",(PyCFunction)anextawaitable_throw, METH_VARARGS, throw_doc},
{"close",(PyCFunction)anextawaitable_close, METH_VARARGS, close_doc},
{NULL, NULL} /* Sentinel */
};
static PyAsyncMethods anextawaitable_as_async = {
PyObject_SelfIter, /* am_await */
0, /* am_aiter */
0, /* am_anext */
0, /* am_send */
};
PyTypeObject _PyAnextAwaitable_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"anext_awaitable", /* tp_name */
sizeof(anextawaitableobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)anextawaitable_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
&anextawaitable_as_async, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)anextawaitable_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(unaryfunc)anextawaitable_iternext, /* tp_iternext */
anextawaitable_methods, /* tp_methods */
};
PyObject *
PyAnextAwaitable_New(PyObject *awaitable, PyObject *default_value)
{
anextawaitableobject *anext = PyObject_GC_New(
anextawaitableobject, &_PyAnextAwaitable_Type);
if (anext == NULL) {
return NULL;
}
Py_INCREF(awaitable);
anext->wrapped = awaitable;
Py_INCREF(default_value);
anext->default_value = default_value;
_PyObject_GC_TRACK(anext);
return (PyObject *)anext;
}