gh-112532: Isolate abandoned segments by interpreter (#113717)

* gh-112532: Isolate abandoned segments by interpreter

Mimalloc segments are data structures that contain memory allocations along
with metadata. Each segment is "owned" by a thread. When a thread exits,
it abandons its segments to a global pool to be later reclaimed by other
threads. This changes the pool to be per-interpreter instead of process-wide.

This will be important for when we use mimalloc to find GC objects in the
`--disable-gil` builds. We want heaps to only store Python objects from a
single interpreter. Absent this change, the abandoning and reclaiming process
could break this isolation.

* Add missing '&_mi_abandoned_default' to 'tld_empty'
This commit is contained in:
Sam Gross 2024-01-04 17:21:40 -05:00 committed by GitHub
parent c2e8298eba
commit fcb3c2a444
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 76 deletions

View file

@ -23,23 +23,6 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_trace_message(...)
#endif
#define MI_CACHE_LINE 64
#if defined(_MSC_VER)
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#pragma warning(disable:26812) // unscoped enum warning
#define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#endif
#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
#define __wasi__
#endif
@ -131,6 +114,7 @@ void _mi_segment_map_allocated_at(const mi_segment_t* segment);
void _mi_segment_map_freed_at(const mi_segment_t* segment);
// "segment.c"
extern mi_abandoned_pool_t _mi_abandoned_default; // global abandoned pool
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
@ -145,7 +129,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, m
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void);
void _mi_abandoned_await_readers(mi_abandoned_pool_t *pool);
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
// "page.c"

View file

@ -33,6 +33,23 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
#endif
#define MI_CACHE_LINE 64
#if defined(_MSC_VER)
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#pragma warning(disable:26812) // unscoped enum warning
#define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#endif
// ------------------------------------------------------
// Variants
// ------------------------------------------------------
@ -445,6 +462,28 @@ typedef struct mi_segment_s {
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
} mi_segment_t;
typedef uintptr_t mi_tagged_segment_t;
// Segments unowned by any thread are put in a shared pool
typedef struct mi_abandoned_pool_s {
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list (tagged as it supports pop)
mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// Maintain these for debug purposes (these counts may be a bit off)
mi_decl_cache_align _Atomic(size_t) abandoned_count;
mi_decl_cache_align _Atomic(size_t) abandoned_visited_count;
// We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might
// still be read.
mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0
} mi_abandoned_pool_t;
// ------------------------------------------------------
// Heaps
@ -654,6 +693,7 @@ typedef struct mi_segments_tld_s {
size_t peak_size; // peak size of all segments
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
mi_abandoned_pool_t* abandoned; // pool of abandoned segments
} mi_segments_tld_t;
// Thread local data

View file

@ -27,6 +27,7 @@ extern "C" {
#include "pycore_import.h" // struct _import_state
#include "pycore_instruments.h" // _PY_MONITORING_EVENTS
#include "pycore_list.h" // struct _Py_list_state
#include "pycore_mimalloc.h" // struct _mimalloc_interp_state
#include "pycore_object_state.h" // struct _py_object_state
#include "pycore_obmalloc.h" // struct _obmalloc_state
#include "pycore_tstate.h" // _PyThreadStateImpl
@ -166,6 +167,10 @@ struct _is {
struct _warnings_runtime_state warnings;
struct atexit_state atexit;
#if defined(Py_GIL_DISABLED)
struct _mimalloc_interp_state mimalloc;
#endif
struct _obmalloc_state obmalloc;
PyObject *audit_hooks;

View file

@ -35,6 +35,12 @@ typedef enum {
#endif
#ifdef Py_GIL_DISABLED
struct _mimalloc_interp_state {
// When exiting, threads place any segments with live blocks in this
// shared pool for other threads to claim and reuse.
mi_abandoned_pool_t abandoned_pool;
};
struct _mimalloc_thread_state {
mi_heap_t *current_object_heap;
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];

View file

@ -131,7 +131,7 @@ mi_decl_cache_align static const mi_tld_t tld_empty = {
0,
false,
NULL, NULL,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, tld_empty_stats, tld_empty_os, &_mi_abandoned_default }, // segments
{ 0, tld_empty_stats }, // os
{ MI_STATS_NULL } // stats
};
@ -148,7 +148,7 @@ extern mi_heap_t _mi_heap_main;
static mi_tld_t tld_main = {
0, false,
&_mi_heap_main, & _mi_heap_main,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, &tld_main.stats, &tld_main.os, &_mi_abandoned_default }, // segments
{ 0, &tld_main.stats }, // os
{ MI_STATS_NULL } // stats
};
@ -308,6 +308,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->segments.abandoned = &_mi_abandoned_default;
tld->os.stats = &tld->stats;
tld->heap_backing = bheap;
tld->heaps = bheap;

View file

@ -395,7 +395,7 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
const size_t size = mi_segment_size(segment);
const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
_mi_abandoned_await_readers(); // wait until safe to free
_mi_abandoned_await_readers(tld->abandoned); // wait until safe to free
_mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid, tld->stats);
}
@ -1059,7 +1059,6 @@ would be spread among all other segments in the arenas.
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// to put in a tag that increments on update to avoid the A-B-A problem.
#define MI_TAGGED_MASK MI_SEGMENT_MASK
typedef uintptr_t mi_tagged_segment_t;
static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
@ -1071,55 +1070,40 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
return ((uintptr_t)segment | tag);
}
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list (tagged as it supports pop)
static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// Maintain these for debug purposes (these counts may be a bit off)
static mi_decl_cache_align _Atomic(size_t) abandoned_count;
static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count;
// We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might
// still be read.
static mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0
mi_abandoned_pool_t _mi_abandoned_default;
// Push on the visited list
static void mi_abandoned_visited_push(mi_segment_t* segment) {
static void mi_abandoned_visited_push(mi_abandoned_pool_t *pool, mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL);
mi_assert_internal(segment->next == NULL);
mi_assert_internal(segment->used > 0);
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited);
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &pool->abandoned_visited);
do {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext);
} while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment));
mi_atomic_increment_relaxed(&abandoned_visited_count);
} while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &pool->abandoned_visited, &anext, segment));
mi_atomic_increment_relaxed(&pool->abandoned_visited_count);
}
// Move the visited list to the abandoned list.
static bool mi_abandoned_visited_revisit(void)
static bool mi_abandoned_visited_revisit(mi_abandoned_pool_t *pool)
{
// quick check if the visited list is empty
if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false;
if (mi_atomic_load_ptr_relaxed(mi_segment_t, &pool->abandoned_visited) == NULL) return false;
// grab the whole visited list
mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL);
mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &pool->abandoned_visited, NULL);
if (first == NULL) return false;
// first try to swap directly if the abandoned list happens to be NULL
mi_tagged_segment_t afirst;
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&pool->abandoned);
if (mi_tagged_segment_ptr(ts)==NULL) {
size_t count = mi_atomic_load_relaxed(&abandoned_visited_count);
size_t count = mi_atomic_load_relaxed(&pool->abandoned_visited_count);
afirst = mi_tagged_segment(first, ts);
if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) {
mi_atomic_add_relaxed(&abandoned_count, count);
mi_atomic_sub_relaxed(&abandoned_visited_count, count);
if (mi_atomic_cas_strong_acq_rel(&pool->abandoned, &ts, afirst)) {
mi_atomic_add_relaxed(&pool->abandoned_count, count);
mi_atomic_sub_relaxed(&pool->abandoned_visited_count, count);
return true;
}
}
@ -1133,51 +1117,51 @@ static bool mi_abandoned_visited_revisit(void)
// and atomically prepend to the abandoned list
// (no need to increase the readers as we don't access the abandoned segments)
mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned);
mi_tagged_segment_t anext = mi_atomic_load_relaxed(&pool->abandoned);
size_t count;
do {
count = mi_atomic_load_relaxed(&abandoned_visited_count);
count = mi_atomic_load_relaxed(&pool->abandoned_visited_count);
mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext));
afirst = mi_tagged_segment(first, anext);
} while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst));
mi_atomic_add_relaxed(&abandoned_count, count);
mi_atomic_sub_relaxed(&abandoned_visited_count, count);
} while (!mi_atomic_cas_weak_release(&pool->abandoned, &anext, afirst));
mi_atomic_add_relaxed(&pool->abandoned_count, count);
mi_atomic_sub_relaxed(&pool->abandoned_visited_count, count);
return true;
}
// Push on the abandoned list.
static void mi_abandoned_push(mi_segment_t* segment) {
static void mi_abandoned_push(mi_abandoned_pool_t* pool, mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
mi_assert_internal(segment->next == NULL);
mi_assert_internal(segment->used > 0);
mi_tagged_segment_t next;
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&pool->abandoned);
do {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts));
next = mi_tagged_segment(segment, ts);
} while (!mi_atomic_cas_weak_release(&abandoned, &ts, next));
mi_atomic_increment_relaxed(&abandoned_count);
} while (!mi_atomic_cas_weak_release(&pool->abandoned, &ts, next));
mi_atomic_increment_relaxed(&pool->abandoned_count);
}
// Wait until there are no more pending reads on segments that used to be in the abandoned list
// called for example from `arena.c` before decommitting
void _mi_abandoned_await_readers(void) {
void _mi_abandoned_await_readers(mi_abandoned_pool_t* pool) {
size_t n;
do {
n = mi_atomic_load_acquire(&abandoned_readers);
n = mi_atomic_load_acquire(&pool->abandoned_readers);
if (n != 0) mi_atomic_yield();
} while (n != 0);
}
// Pop from the abandoned list
static mi_segment_t* mi_abandoned_pop(void) {
static mi_segment_t* mi_abandoned_pop(mi_abandoned_pool_t* pool) {
mi_segment_t* segment;
// Check efficiently if it is empty (or if the visited list needs to be moved)
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&pool->abandoned);
segment = mi_tagged_segment_ptr(ts);
if mi_likely(segment == NULL) {
if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL
if mi_likely(!mi_abandoned_visited_revisit(pool)) { // try to swap in the visited list on NULL
return NULL;
}
}
@ -1186,20 +1170,20 @@ static mi_segment_t* mi_abandoned_pop(void) {
// a segment to be decommitted while a read is still pending,
// and a tagged pointer to prevent A-B-A link corruption.
// (this is called from `region.c:_mi_mem_free` for example)
mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted
mi_atomic_increment_relaxed(&pool->abandoned_readers); // ensure no segment gets decommitted
mi_tagged_segment_t next = 0;
ts = mi_atomic_load_acquire(&abandoned);
ts = mi_atomic_load_acquire(&pool->abandoned);
do {
segment = mi_tagged_segment_ptr(ts);
if (segment != NULL) {
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next);
next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
}
} while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next));
mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock
} while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&pool->abandoned, &ts, next));
mi_atomic_decrement_relaxed(&pool->abandoned_readers); // release reader lock
if (segment != NULL) {
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
mi_atomic_decrement_relaxed(&abandoned_count);
mi_atomic_decrement_relaxed(&pool->abandoned_count);
}
return segment;
}
@ -1237,7 +1221,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
segment->thread_id = 0;
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned
mi_abandoned_push(segment);
mi_abandoned_push(tld->abandoned, segment);
}
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -1381,7 +1365,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment;
while ((segment = mi_abandoned_pop()) != NULL) {
while ((segment = mi_abandoned_pop(tld->abandoned)) != NULL) {
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
}
@ -1391,7 +1375,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
*reclaimed = false;
mi_segment_t* segment;
long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop(tld->abandoned)) != NULL)) {
segment->abandoned_visits++;
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
// and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
@ -1418,7 +1402,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
else {
// otherwise, push on the visited list so it gets not looked at too quickly again
mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again
mi_abandoned_visited_push(segment);
mi_abandoned_visited_push(tld->abandoned, segment);
}
}
return NULL;
@ -1428,11 +1412,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
{
mi_segment_t* segment;
mi_abandoned_pool_t* pool = tld->abandoned;
int max_tries = (force ? 16*1024 : 1024); // limit latency
if (force) {
mi_abandoned_visited_revisit();
mi_abandoned_visited_revisit(pool);
}
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop(pool)) != NULL)) {
mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
if (segment->used == 0) {
// free the segment (by forced reclaim) to make it available to other threads.
@ -1444,7 +1429,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
// otherwise, purge if needed and push on the visited list
// note: forced purge can be expensive if many threads are destroyed/created as in mstress.
mi_segment_try_purge(segment, force, tld->stats);
mi_abandoned_visited_push(segment);
mi_abandoned_visited_push(pool, segment);
}
}
}

View file

@ -2533,6 +2533,11 @@ tstate_mimalloc_bind(PyThreadState *tstate)
mi_tld_t *tld = &mts->tld;
_mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]);
// Exiting threads push any remaining in-use segments to the abandoned
// pool to be re-claimed later by other threads. We use per-interpreter
// pools to keep Python objects from different interpreters separate.
tld->segments.abandoned = &tstate->interp->mimalloc.abandoned_pool;
// Initialize each heap
for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) {
_mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none());