gh-112529: Implement GC for free-threaded builds (#114262)

* gh-112529: Implement GC for free-threaded builds

This implements a mark and sweep GC for the free-threaded builds of
CPython. The implementation relies on mimalloc to find GC tracked
objects (i.e., "containers").
This commit is contained in:
Sam Gross 2024-01-25 13:27:36 -05:00 committed by GitHub
parent 4850410b60
commit b52fc70d1a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1952 additions and 22 deletions

View file

@ -20,6 +20,7 @@ extern "C" {
# define PyFloat_MAXFREELIST 100
# define PyContext_MAXFREELIST 255
# define _PyAsyncGen_MAXFREELIST 80
# define _PyObjectStackChunk_MAXFREELIST 4
#else
# define PyTuple_NFREELISTS 0
# define PyTuple_MAXFREELIST 0
@ -27,6 +28,7 @@ extern "C" {
# define PyFloat_MAXFREELIST 0
# define PyContext_MAXFREELIST 0
# define _PyAsyncGen_MAXFREELIST 0
# define _PyObjectStackChunk_MAXFREELIST 0
#endif
struct _Py_list_state {
@ -93,6 +95,13 @@ struct _Py_async_gen_state {
#endif
};
struct _PyObjectStackChunk;
struct _Py_object_stack_state {
struct _PyObjectStackChunk *free_list;
Py_ssize_t numfree;
};
typedef struct _Py_freelist_state {
struct _Py_float_state float_state;
struct _Py_tuple_state tuple_state;
@ -100,6 +109,7 @@ typedef struct _Py_freelist_state {
struct _Py_slice_state slice_state;
struct _Py_context_state context_state;
struct _Py_async_gen_state async_gen_state;
struct _Py_object_stack_state object_stack_state;
} _PyFreeListState;
#ifdef __cplusplus

View file

@ -37,10 +37,22 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) {
}
/* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) */
#ifdef Py_GIL_DISABLED
# define _PyGC_BITS_TRACKED (1)
# define _PyGC_BITS_FINALIZED (2)
# define _PyGC_BITS_UNREACHABLE (4)
# define _PyGC_BITS_FROZEN (8)
#endif
/* True if the object is currently tracked by the GC. */
static inline int _PyObject_GC_IS_TRACKED(PyObject *op) {
#ifdef Py_GIL_DISABLED
return (op->ob_gc_bits & _PyGC_BITS_TRACKED) != 0;
#else
PyGC_Head *gc = _Py_AS_GC(op);
return (gc->_gc_next != 0);
#endif
}
#define _PyObject_GC_IS_TRACKED(op) _PyObject_GC_IS_TRACKED(_Py_CAST(PyObject*, op))
@ -107,24 +119,29 @@ static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) {
gc->_gc_prev = ((gc->_gc_prev & ~_PyGC_PREV_MASK) | uprev);
}
static inline int _PyGCHead_FINALIZED(PyGC_Head *gc) {
return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0);
}
static inline void _PyGCHead_SET_FINALIZED(PyGC_Head *gc) {
gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED;
}
static inline int _PyGC_FINALIZED(PyObject *op) {
#ifdef Py_GIL_DISABLED
return (op->ob_gc_bits & _PyGC_BITS_FINALIZED) != 0;
#else
PyGC_Head *gc = _Py_AS_GC(op);
return _PyGCHead_FINALIZED(gc);
return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0);
#endif
}
static inline void _PyGC_SET_FINALIZED(PyObject *op) {
#ifdef Py_GIL_DISABLED
op->ob_gc_bits |= _PyGC_BITS_FINALIZED;
#else
PyGC_Head *gc = _Py_AS_GC(op);
_PyGCHead_SET_FINALIZED(gc);
gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED;
#endif
}
static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) {
#ifdef Py_GIL_DISABLED
op->ob_gc_bits &= ~_PyGC_BITS_FINALIZED;
#else
PyGC_Head *gc = _Py_AS_GC(op);
gc->_gc_prev &= ~_PyGC_PREV_MASK_FINALIZED;
#endif
}

View file

@ -322,6 +322,9 @@ static inline void _PyObject_GC_TRACK(
"object is in generation which is garbage collected",
filename, lineno, __func__);
#ifdef Py_GIL_DISABLED
op->ob_gc_bits |= _PyGC_BITS_TRACKED;
#else
PyInterpreterState *interp = _PyInterpreterState_GET();
PyGC_Head *generation0 = interp->gc.generation0;
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
@ -329,6 +332,7 @@ static inline void _PyObject_GC_TRACK(
_PyGCHead_SET_PREV(gc, last);
_PyGCHead_SET_NEXT(gc, generation0);
generation0->_gc_prev = (uintptr_t)gc;
#endif
}
/* Tell the GC to stop tracking this object.
@ -352,6 +356,9 @@ static inline void _PyObject_GC_UNTRACK(
"object not tracked by the garbage collector",
filename, lineno, __func__);
#ifdef Py_GIL_DISABLED
op->ob_gc_bits &= ~_PyGC_BITS_TRACKED;
#else
PyGC_Head *gc = _Py_AS_GC(op);
PyGC_Head *prev = _PyGCHead_PREV(gc);
PyGC_Head *next = _PyGCHead_NEXT(gc);
@ -359,6 +366,7 @@ static inline void _PyObject_GC_UNTRACK(
_PyGCHead_SET_PREV(next, prev);
gc->_gc_next = 0;
gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
#endif
}
// Macros to accept any type for the parameter, and to automatically pass

View file

@ -0,0 +1,84 @@
#ifndef Py_INTERNAL_OBJECT_STACK_H
#define Py_INTERNAL_OBJECT_STACK_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
// _PyObjectStack is a stack of Python objects implemented as a linked list of
// fixed size buffers.
// Chosen so that _PyObjectStackChunk is a power-of-two size.
#define _Py_OBJECT_STACK_CHUNK_SIZE 254
typedef struct _PyObjectStackChunk {
struct _PyObjectStackChunk *prev;
Py_ssize_t n;
PyObject *objs[_Py_OBJECT_STACK_CHUNK_SIZE];
} _PyObjectStackChunk;
typedef struct _PyObjectStack {
_PyObjectStackChunk *head;
} _PyObjectStack;
extern _PyObjectStackChunk *
_PyObjectStackChunk_New(void);
extern void
_PyObjectStackChunk_Free(_PyObjectStackChunk *);
extern void
_PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization);
// Push an item onto the stack. Return -1 on allocation failure, 0 on success.
static inline int
_PyObjectStack_Push(_PyObjectStack *stack, PyObject *obj)
{
_PyObjectStackChunk *buf = stack->head;
if (buf == NULL || buf->n == _Py_OBJECT_STACK_CHUNK_SIZE) {
buf = _PyObjectStackChunk_New();
if (buf == NULL) {
return -1;
}
buf->prev = stack->head;
buf->n = 0;
stack->head = buf;
}
assert(buf->n >= 0 && buf->n < _Py_OBJECT_STACK_CHUNK_SIZE);
buf->objs[buf->n] = obj;
buf->n++;
return 0;
}
// Pop the top item from the stack. Return NULL if the stack is empty.
static inline PyObject *
_PyObjectStack_Pop(_PyObjectStack *stack)
{
_PyObjectStackChunk *buf = stack->head;
if (buf == NULL) {
return NULL;
}
assert(buf->n > 0 && buf->n <= _Py_OBJECT_STACK_CHUNK_SIZE);
buf->n--;
PyObject *obj = buf->objs[buf->n];
if (buf->n == 0) {
stack->head = buf->prev;
_PyObjectStackChunk_Free(buf);
}
return obj;
}
// Remove all items from the stack
extern void
_PyObjectStack_Clear(_PyObjectStack *stack);
#ifdef __cplusplus
}
#endif
#endif // !Py_INTERNAL_OBJECT_STACK_H

View file

@ -212,7 +212,9 @@ struct _object {
struct _PyMutex { uint8_t v; };
struct _object {
uintptr_t ob_tid; // thread id (or zero)
// ob_tid stores the thread id (or zero). It is also used by the GC to
// store linked lists and the computed "gc_refs" refcount.
uintptr_t ob_tid;
uint16_t _padding;
struct _PyMutex ob_mutex; // per-object lock
uint8_t ob_gc_bits; // gc-related state

View file

@ -349,7 +349,7 @@ def closed(self):
def close(self):
fileobj = self.fileobj
if fileobj is None:
if fileobj is None or self._buffer.closed:
return
try:
if self.mode == WRITE:

View file

@ -1,7 +1,7 @@
import unittest
import unittest.mock
from test.support import (verbose, refcount_test,
cpython_only, requires_subprocess)
cpython_only, requires_subprocess, Py_GIL_DISABLED)
from test.support.import_helper import import_module
from test.support.os_helper import temp_dir, TESTFN, unlink
from test.support.script_helper import assert_python_ok, make_script
@ -815,6 +815,15 @@ def test_freeze(self):
self.assertEqual(gc.get_freeze_count(), 0)
def test_get_objects(self):
gc.collect()
l = []
l.append(l)
self.assertTrue(
any(l is element for element in gc.get_objects())
)
@unittest.skipIf(Py_GIL_DISABLED, 'need generational GC')
def test_get_objects_generations(self):
gc.collect()
l = []
l.append(l)
@ -1225,7 +1234,7 @@ def test_refcount_errors(self):
p.stderr.close()
# Verify that stderr has a useful error message:
self.assertRegex(stderr,
br'gc\.c:[0-9]+: gc_decref: Assertion "gc_get_refs\(g\) > 0" failed.')
br'gc.*\.c:[0-9]+: .*: Assertion "gc_get_refs\(.+\) .*" failed.')
self.assertRegex(stderr,
br'refcount is too small')
# "address : 0x7fb5062efc18"

View file

@ -3652,10 +3652,8 @@ def _check_create_at_shutdown(self, **kwargs):
codecs.lookup('utf-8')
class C:
def __init__(self):
self.buf = io.BytesIO()
def __del__(self):
io.TextIOWrapper(self.buf, **{kwargs})
io.TextIOWrapper(io.BytesIO(), **{kwargs})
print("ok")
c = C()
""".format(iomod=iomod, kwargs=kwargs)

View file

@ -439,6 +439,7 @@ PYTHON_OBJS= \
Python/modsupport.o \
Python/mysnprintf.o \
Python/mystrtoul.o \
Python/object_stack.o \
Python/optimizer.o \
Python/optimizer_analysis.o \
Python/parking_lot.o \
@ -1832,6 +1833,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_frame.h \
$(srcdir)/Include/internal/pycore_freelist.h \
$(srcdir)/Include/internal/pycore_function.h \
$(srcdir)/Include/internal/pycore_gc.h \
$(srcdir)/Include/internal/pycore_genobject.h \
$(srcdir)/Include/internal/pycore_getopt.h \
$(srcdir)/Include/internal/pycore_gil.h \
@ -1853,6 +1855,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_namespace.h \
$(srcdir)/Include/internal/pycore_object.h \
$(srcdir)/Include/internal/pycore_object_alloc.h \
$(srcdir)/Include/internal/pycore_object_stack.h \
$(srcdir)/Include/internal/pycore_object_state.h \
$(srcdir)/Include/internal/pycore_obmalloc.h \
$(srcdir)/Include/internal/pycore_obmalloc_init.h \

View file

@ -0,0 +1,3 @@
The free-threaded build now has its own thread-safe GC implementation that
uses mimalloc to find GC tracked objects. It is non-generational, unlike the
existing GC implementation.

View file

@ -230,6 +230,7 @@
<ClCompile Include="..\Python\modsupport.c" />
<ClCompile Include="..\Python\mysnprintf.c" />
<ClCompile Include="..\Python\mystrtoul.c" />
<ClCompile Include="..\Python\object_stack.c" />
<ClCompile Include="..\Python\optimizer.c" />
<ClCompile Include="..\Python\optimizer_analysis.c" />
<ClCompile Include="..\Python\parking_lot.c" />

View file

@ -289,6 +289,9 @@
<ClCompile Include="..\Objects\object.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Python\object_stack.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Objects\obmalloc.c">
<Filter>Source Files</Filter>
</ClCompile>

View file

@ -591,6 +591,7 @@
<ClCompile Include="..\Python\modsupport.c" />
<ClCompile Include="..\Python\mysnprintf.c" />
<ClCompile Include="..\Python\mystrtoul.c" />
<ClCompile Include="..\Python\object_stack.c" />
<ClCompile Include="..\Python\optimizer.c" />
<ClCompile Include="..\Python\optimizer_analysis.c" />
<ClCompile Include="..\Python\parking_lot.c" />

View file

@ -1355,6 +1355,9 @@
<ClCompile Include="..\Python\mystrtoul.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\object_stack.c">
<Filter>Python</Filter>
</ClCompile>
<ClCompile Include="..\Python\optimizer.c">
<Filter>Python</Filter>
</ClCompile>

View file

@ -15,6 +15,8 @@
#include "pycore_weakref.h" // _PyWeakref_ClearRef()
#include "pydtrace.h"
#ifndef Py_GIL_DISABLED
typedef struct _gc_runtime_state GCState;
#ifdef Py_DEBUG
@ -964,10 +966,10 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
PyGC_Head *gc = GC_NEXT(collectable);
PyObject *op = FROM_GC(gc);
gc_list_move(gc, &seen);
if (!_PyGCHead_FINALIZED(gc) &&
if (!_PyGC_FINALIZED(op) &&
(finalize = Py_TYPE(op)->tp_finalize) != NULL)
{
_PyGCHead_SET_FINALIZED(gc);
_PyGC_SET_FINALIZED(op);
Py_INCREF(op);
finalize(op);
assert(!_PyErr_Occurred(tstate));
@ -1942,3 +1944,5 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
done:
gcstate->enabled = origenstate;
}
#endif // Py_GIL_DISABLED

File diff suppressed because it is too large Load diff

87
Python/object_stack.c Normal file
View file

@ -0,0 +1,87 @@
// Stack of Python objects
#include "Python.h"
#include "pycore_freelist.h"
#include "pycore_pystate.h"
#include "pycore_object_stack.h"
extern _PyObjectStackChunk *_PyObjectStackChunk_New(void);
extern void _PyObjectStackChunk_Free(_PyObjectStackChunk *);
static struct _Py_object_stack_state *
get_state(void)
{
_PyFreeListState *state = _PyFreeListState_GET();
return &state->object_stack_state;
}
_PyObjectStackChunk *
_PyObjectStackChunk_New(void)
{
_PyObjectStackChunk *buf;
struct _Py_object_stack_state *state = get_state();
if (state->numfree > 0) {
buf = state->free_list;
state->free_list = buf->prev;
state->numfree--;
}
else {
// NOTE: we use PyMem_RawMalloc() here because this is used by the GC
// during mimalloc heap traversal. In that context, it is not safe to
// allocate mimalloc memory, such as via PyMem_Malloc().
buf = PyMem_RawMalloc(sizeof(_PyObjectStackChunk));
if (buf == NULL) {
return NULL;
}
}
buf->prev = NULL;
buf->n = 0;
return buf;
}
void
_PyObjectStackChunk_Free(_PyObjectStackChunk *buf)
{
assert(buf->n == 0);
struct _Py_object_stack_state *state = get_state();
if (state->numfree >= 0 &&
state->numfree < _PyObjectStackChunk_MAXFREELIST)
{
buf->prev = state->free_list;
state->free_list = buf;
state->numfree++;
}
else {
PyMem_RawFree(buf);
}
}
void
_PyObjectStack_Clear(_PyObjectStack *queue)
{
while (queue->head != NULL) {
_PyObjectStackChunk *buf = queue->head;
buf->n = 0;
queue->head = buf->prev;
_PyObjectStackChunk_Free(buf);
}
}
void
_PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalization)
{
if (!is_finalization) {
// Ignore requests to clear the free list during GC. We use object
// stacks during GC, so emptying the free-list is counterproductive.
return;
}
struct _Py_object_stack_state *state = &free_lists->object_stack_state;
while (state->numfree > 0) {
_PyObjectStackChunk *buf = state->free_list;
state->free_list = buf->prev;
state->numfree--;
PyMem_RawFree(buf);
}
state->numfree = -1;
}

View file

@ -10,6 +10,7 @@
#include "pycore_frame.h"
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_object.h" // _PyType_InitCache()
#include "pycore_object_stack.h" // _PyObjectStackChunk_ClearFreeList()
#include "pycore_parking_lot.h" // _PyParkingLot_AfterFork()
#include "pycore_pyerrors.h" // _PyErr_Clear()
#include "pycore_pylifecycle.h" // _PyAST_Fini()
@ -1468,6 +1469,7 @@ _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization)
_PyList_ClearFreeList(state, is_finalization);
_PyContext_ClearFreeList(state, is_finalization);
_PyAsyncGen_ClearFreeLists(state, is_finalization);
_PyObjectStackChunk_ClearFreeList(state, is_finalization);
}
void
@ -2055,7 +2057,6 @@ start_the_world(struct _stoptheworld_state *stw)
HEAD_LOCK(runtime);
stw->requested = 0;
stw->world_stopped = 0;
stw->requester = NULL;
// Switch threads back to the detached state.
PyInterpreterState *i;
PyThreadState *t;
@ -2066,6 +2067,7 @@ start_the_world(struct _stoptheworld_state *stw)
_PyParkingLot_UnparkAll(&t->state);
}
}
stw->requester = NULL;
HEAD_UNLOCK(runtime);
if (stw->is_global) {
_PyRWMutex_Unlock(&runtime->stoptheworld_mutex);