GH-108362: Incremental Cycle GC (GH-116206)

This commit is contained in:
Mark Shannon 2024-03-20 08:54:42 +00:00 committed by GitHub
parent d5ebf8b71f
commit 15309329b6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 743 additions and 447 deletions

View file

@ -111,6 +111,14 @@ Improved Error Messages
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
TypeError: split() got an unexpected keyword argument 'max_split'. Did you mean 'maxsplit'?
Incremental Garbage Collection
------------------------------
* The cycle garbage collector is now incremental.
This means that maximum pause times are reduced
by an order of magnitude or more for larger heaps.
Other Language Changes
======================
@ -350,6 +358,28 @@ fractions
sign handling, minimum width and grouping. (Contributed by Mark Dickinson
in :gh:`111320`.)
gc
--
* The cyclic garbage collector is now incremental, which changes the meanings
of the results of :meth:`gc.get_threshold` and :meth:`gc.get_threshold` as
well as :meth:`gc.get_count` and :meth:`gc.get_stats`.
* :meth:`gc.get_threshold` returns a three-tuple for backwards compatibility,
the first value is the threshold for young collections, as before, the second
value determines the rate at which the old collection is scanned; the
default is 10 and higher values mean that the old collection is scanned more slowly.
The third value is meangless and is always zero.
* :meth:`gc.set_threshold` ignores any items after the second.
* :meth:`gc.get_count` and :meth:`gc.get_stats`.
These functions return the same format of results as before.
The only difference is that instead of the results refering to
the young, aging and old generations, the results refer to the
young generation and the aging and collecting spaces of the old generation.
In summary, code that attempted to manipulate the behavior of the cycle GC may
not work exactly as intended, but it is very unlikely to harmful.
All other code will work just fine.
glob
----

View file

@ -109,11 +109,14 @@ static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) {
/* Bit flags for _gc_prev */
/* Bit 0 is set when tp_finalize is called */
#define _PyGC_PREV_MASK_FINALIZED (1)
#define _PyGC_PREV_MASK_FINALIZED 1
/* Bit 1 is set when the object is in generation which is GCed currently. */
#define _PyGC_PREV_MASK_COLLECTING (2)
/* The (N-2) most significant bits contain the real address. */
#define _PyGC_PREV_SHIFT (2)
#define _PyGC_PREV_MASK_COLLECTING 2
/* Bit 0 is set if the object belongs to old space 1 */
#define _PyGC_NEXT_MASK_OLD_SPACE_1 1
#define _PyGC_PREV_SHIFT 2
#define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT)
/* set for debugging information */
@ -139,11 +142,13 @@ typedef enum {
// Lowest bit of _gc_next is used for flags only in GC.
// But it is always 0 for normal code.
static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) {
uintptr_t next = gc->_gc_next;
uintptr_t next = gc->_gc_next & _PyGC_PREV_MASK;
return (PyGC_Head*)next;
}
static inline void _PyGCHead_SET_NEXT(PyGC_Head *gc, PyGC_Head *next) {
gc->_gc_next = (uintptr_t)next;
uintptr_t unext = (uintptr_t)next;
assert((unext & ~_PyGC_PREV_MASK) == 0);
gc->_gc_next = (gc->_gc_next & ~_PyGC_PREV_MASK) | unext;
}
// Lowest two bits of _gc_prev is used for _PyGC_PREV_MASK_* flags.
@ -151,6 +156,7 @@ static inline PyGC_Head* _PyGCHead_PREV(PyGC_Head *gc) {
uintptr_t prev = (gc->_gc_prev & _PyGC_PREV_MASK);
return (PyGC_Head*)prev;
}
static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) {
uintptr_t uprev = (uintptr_t)prev;
assert((uprev & ~_PyGC_PREV_MASK) == 0);
@ -236,6 +242,13 @@ struct gc_generation {
generations */
};
struct gc_collection_stats {
/* number of collected objects */
Py_ssize_t collected;
/* total number of uncollectable objects (put into gc.garbage) */
Py_ssize_t uncollectable;
};
/* Running stats per generation */
struct gc_generation_stats {
/* total number of collections */
@ -257,8 +270,8 @@ struct _gc_runtime_state {
int enabled;
int debug;
/* linked lists of container objects */
struct gc_generation generations[NUM_GENERATIONS];
PyGC_Head *generation0;
struct gc_generation young;
struct gc_generation old[2];
/* a permanent generation which won't be collected */
struct gc_generation permanent_generation;
struct gc_generation_stats generation_stats[NUM_GENERATIONS];
@ -268,6 +281,12 @@ struct _gc_runtime_state {
PyObject *garbage;
/* a list of callbacks to be invoked when collection is performed */
PyObject *callbacks;
Py_ssize_t work_to_do;
/* Which of the old spaces is the visited space */
int visited_space;
#ifdef Py_GIL_DISABLED
/* This is the number of objects that survived the last full
collection. It approximates the number of long lived objects
tracked by the GC.
@ -279,6 +298,7 @@ struct _gc_runtime_state {
collections, and are awaiting to undergo a full collection for
the first time. */
Py_ssize_t long_lived_pending;
#endif
};
#ifdef Py_GIL_DISABLED
@ -291,9 +311,8 @@ struct _gc_thread_state {
extern void _PyGC_InitState(struct _gc_runtime_state *);
extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation,
_PyGC_Reason reason);
extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate);
extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason);
extern void _PyGC_CollectNoFail(PyThreadState *tstate);
/* Freeze objects tracked by the GC and ignore them in future collections. */
extern void _PyGC_Freeze(PyInterpreterState *interp);

View file

@ -125,19 +125,8 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
}
#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
static inline void _Py_SetImmortal(PyObject *op)
{
if (op) {
#ifdef Py_GIL_DISABLED
op->ob_tid = _Py_UNOWNED_TID;
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
op->ob_ref_shared = 0;
#else
op->ob_refcnt = _Py_IMMORTAL_REFCNT;
#endif
}
}
#define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op))
extern void _Py_SetImmortal(PyObject *op);
extern void _Py_SetImmortalUntracked(PyObject *op);
// Makes an immortal object mortal again with the specified refcnt. Should only
// be used during runtime finalization.
@ -325,11 +314,12 @@ static inline void _PyObject_GC_TRACK(
filename, lineno, __func__);
PyInterpreterState *interp = _PyInterpreterState_GET();
PyGC_Head *generation0 = interp->gc.generation0;
PyGC_Head *generation0 = &interp->gc.young.head;
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
_PyGCHead_SET_NEXT(last, gc);
_PyGCHead_SET_PREV(gc, last);
_PyGCHead_SET_NEXT(gc, generation0);
assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0);
generation0->_gc_prev = (uintptr_t)gc;
#endif
}

View file

@ -168,12 +168,12 @@ extern PyTypeObject _PyExc_MemoryError;
}, \
.gc = { \
.enabled = 1, \
.generations = { \
/* .head is set in _PyGC_InitState(). */ \
{ .threshold = 700, }, \
{ .threshold = 10, }, \
.young = { .threshold = 2000, }, \
.old = { \
{ .threshold = 10, }, \
{ .threshold = 0, }, \
}, \
.work_to_do = -5000, \
}, \
.qsbr = { \
.wr_seq = QSBR_INITIAL, \

View file

@ -384,19 +384,11 @@ def test_collect_generations(self):
# each call to collect(N)
x = []
gc.collect(0)
# x is now in gen 1
# x is now in the old gen
a, b, c = gc.get_count()
gc.collect(1)
# x is now in gen 2
d, e, f = gc.get_count()
gc.collect(2)
# x is now in gen 3
g, h, i = gc.get_count()
# We don't check a, d, g since their exact values depends on
# We don't check a since its exact values depends on
# internal implementation details of the interpreter.
self.assertEqual((b, c), (1, 0))
self.assertEqual((e, f), (0, 1))
self.assertEqual((h, i), (0, 0))
def test_trashcan(self):
class Ouch:
@ -847,16 +839,6 @@ def test_get_objects_generations(self):
self.assertFalse(
any(l is element for element in gc.get_objects(generation=2))
)
gc.collect(generation=1)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=0))
)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=1))
)
self.assertTrue(
any(l is element for element in gc.get_objects(generation=2))
)
gc.collect(generation=2)
self.assertFalse(
any(l is element for element in gc.get_objects(generation=0))
@ -1076,6 +1058,56 @@ class Z:
callback.assert_not_called()
gc.enable()
@unittest.skipIf(Py_GIL_DISABLED, "Free threading does not support incremental GC")
def test_incremental_gc_handles_fast_cycle_creation(self):
class LinkedList:
#Use slots to reduce number of implicit objects
__slots__ = "next", "prev", "surprise"
def __init__(self, next=None, prev=None):
self.next = next
if next is not None:
next.prev = self
self.prev = prev
if prev is not None:
prev.next = self
def make_ll(depth):
head = LinkedList()
for i in range(depth):
head = LinkedList(head, head.prev)
return head
head = make_ll(10000)
count = 10000
# We expect the counts to go negative eventually
# as there will some objects we aren't counting,
# e.g. the gc stats dicts. The test merely checks
# that the counts don't grow.
enabled = gc.isenabled()
gc.enable()
olds = []
for i in range(1000):
newhead = make_ll(200)
count += 200
newhead.surprise = head
olds.append(newhead)
if len(olds) == 50:
stats = gc.get_stats()
young = stats[0]
incremental = stats[1]
old = stats[2]
collected = young['collected'] + incremental['collected'] + old['collected']
live = count - collected
self.assertLess(live, 25000)
del olds[:]
if not enabled:
gc.disable()
class GCCallbackTests(unittest.TestCase):
def setUp(self):

View file

@ -0,0 +1,12 @@
Implement an incremental cyclic garbage collector. By collecting the old
generation in increments, there is no need for a full heap scan. This can
hugely reduce maximum pause time for programs with large heaps.
Reduce the number of generations from three to two. The old generation is
split into two spaces, "visited" and "pending".
Collection happens in two steps::
* An increment is formed from the young generation and a small part of the pending space.
* This increment is scanned and the survivors moved to the end of the visited space.
When the collecting space becomes empty, the two spaces are swapped.

View file

@ -158,17 +158,12 @@ gc_set_threshold_impl(PyObject *module, int threshold0, int group_right_1,
{
GCState *gcstate = get_gc_state();
gcstate->generations[0].threshold = threshold0;
gcstate->young.threshold = threshold0;
if (group_right_1) {
gcstate->generations[1].threshold = threshold1;
gcstate->old[0].threshold = threshold1;
}
if (group_right_2) {
gcstate->generations[2].threshold = threshold2;
/* generations higher than 2 get the same threshold */
for (int i = 3; i < NUM_GENERATIONS; i++) {
gcstate->generations[i].threshold = gcstate->generations[2].threshold;
}
gcstate->old[1].threshold = threshold2;
}
Py_RETURN_NONE;
}
@ -185,9 +180,9 @@ gc_get_threshold_impl(PyObject *module)
{
GCState *gcstate = get_gc_state();
return Py_BuildValue("(iii)",
gcstate->generations[0].threshold,
gcstate->generations[1].threshold,
gcstate->generations[2].threshold);
gcstate->young.threshold,
gcstate->old[0].threshold,
0);
}
/*[clinic input]
@ -207,14 +202,14 @@ gc_get_count_impl(PyObject *module)
struct _gc_thread_state *gc = &tstate->gc;
// Flush the local allocation count to the global count
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
_Py_atomic_add_int(&gcstate->young.count, (int)gc->alloc_count);
gc->alloc_count = 0;
#endif
return Py_BuildValue("(iii)",
gcstate->generations[0].count,
gcstate->generations[1].count,
gcstate->generations[2].count);
gcstate->young.count,
gcstate->old[gcstate->visited_space].count,
gcstate->old[gcstate->visited_space^1].count);
}
/*[clinic input]

View file

@ -2401,6 +2401,27 @@ _Py_NewReferenceNoTotal(PyObject *op)
new_reference(op);
}
void
_Py_SetImmortalUntracked(PyObject *op)
{
#ifdef Py_GIL_DISABLED
op->ob_tid = _Py_UNOWNED_TID;
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
op->ob_ref_shared = 0;
#else
op->ob_refcnt = _Py_IMMORTAL_REFCNT;
#endif
}
void
_Py_SetImmortal(PyObject *op)
{
if (PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op)) {
_PyObject_GC_UNTRACK(op);
}
_Py_SetImmortalUntracked(op);
}
void
_Py_ResurrectReference(PyObject *op)
{

View file

@ -603,6 +603,9 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
PyStructSequence_Desc *desc,
unsigned long tp_flags)
{
if (Py_TYPE(type) == NULL) {
Py_SET_TYPE(type, &PyType_Type);
}
Py_ssize_t n_unnamed_members;
Py_ssize_t n_members = count_members(desc, &n_unnamed_members);
PyMemberDef *members = NULL;
@ -618,7 +621,7 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
}
initialize_static_fields(type, desc, members, tp_flags);
_Py_SetImmortal(type);
_Py_SetImmortal((PyObject *)type);
}
#ifndef NDEBUG
else {

File diff suppressed because it is too large Load diff

View file

@ -675,7 +675,7 @@ void
_PyGC_InitState(GCState *gcstate)
{
// TODO: move to pycore_runtime_init.h once the incremental GC lands.
gcstate->generations[0].threshold = 2000;
gcstate->young.threshold = 2000;
}
@ -970,8 +970,8 @@ cleanup_worklist(struct worklist *worklist)
static bool
gc_should_collect(GCState *gcstate)
{
int count = _Py_atomic_load_int_relaxed(&gcstate->generations[0].count);
int threshold = gcstate->generations[0].threshold;
int count = _Py_atomic_load_int_relaxed(&gcstate->young.count);
int threshold = gcstate->young.threshold;
if (count <= threshold || threshold == 0 || !gcstate->enabled) {
return false;
}
@ -979,7 +979,7 @@ gc_should_collect(GCState *gcstate)
// objects. A few tests rely on immediate scheduling of the GC so we ignore
// the scaled threshold if generations[1].threshold is set to zero.
return (count > gcstate->long_lived_total / 4 ||
gcstate->generations[1].threshold == 0);
gcstate->old[0].threshold == 0);
}
static void
@ -993,7 +993,7 @@ record_allocation(PyThreadState *tstate)
if (gc->alloc_count >= LOCAL_ALLOC_COUNT_THRESHOLD) {
// TODO: Use Py_ssize_t for the generation count.
GCState *gcstate = &tstate->interp->gc;
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
_Py_atomic_add_int(&gcstate->young.count, (int)gc->alloc_count);
gc->alloc_count = 0;
if (gc_should_collect(gcstate) &&
@ -1012,7 +1012,7 @@ record_deallocation(PyThreadState *tstate)
gc->alloc_count--;
if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) {
GCState *gcstate = &tstate->interp->gc;
_Py_atomic_add_int(&gcstate->generations[0].count, (int)gc->alloc_count);
_Py_atomic_add_int(&gcstate->young.count, (int)gc->alloc_count);
gc->alloc_count = 0;
}
}
@ -1137,10 +1137,11 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
/* update collection and allocation counters */
if (generation+1 < NUM_GENERATIONS) {
gcstate->generations[generation+1].count += 1;
gcstate->old[generation].count += 1;
}
for (i = 0; i <= generation; i++) {
gcstate->generations[i].count = 0;
gcstate->young.count = 0;
for (i = 1; i <= generation; i++) {
gcstate->old[i-1].count = 0;
}
PyInterpreterState *interp = tstate->interp;
@ -1463,7 +1464,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
return gc_collect_main(tstate, generation, reason);
}
Py_ssize_t
void
_PyGC_CollectNoFail(PyThreadState *tstate)
{
/* Ideally, this function is only called on interpreter shutdown,
@ -1472,7 +1473,7 @@ _PyGC_CollectNoFail(PyThreadState *tstate)
during interpreter shutdown (and then never finish it).
See http://bugs.python.org/issue8713#msg195178 for an example.
*/
return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
}
void

View file

@ -1031,7 +1031,7 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def)
if (!already_set) {
/* We assume that all module defs are statically allocated
and will never be freed. Otherwise, we would incref here. */
_Py_SetImmortal(def);
_Py_SetImmortal((PyObject *)def);
}
res = 0;

View file

@ -983,7 +983,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *depende
static int
init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
{
_Py_SetImmortal(executor);
_Py_SetImmortalUntracked((PyObject *)executor);
Py_SET_TYPE(executor, &_PyUOpExecutor_Type);
executor->trace = (_PyUOpInstruction *)executor->exits;
executor->code_size = 1;

View file

@ -1753,8 +1753,11 @@ def is_waiting_for_gil(self):
return (name == 'take_gil')
def is_gc_collect(self):
'''Is this frame gc_collect_main() within the garbage-collector?'''
return self._gdbframe.name() in ('collect', 'gc_collect_main')
'''Is this frame a collector within the garbage-collector?'''
return self._gdbframe.name() in (
'collect', 'gc_collect_full', 'gc_collect_main',
'gc_collect_young', 'gc_collect_increment',
)
def get_pyop(self):
try: