GH-106176, GH-104702: Fix reference leak when importing across multiple threads (#108497)

This commit is contained in:
Brett Cannon 2023-08-29 00:17:25 -07:00 committed by GitHub
parent c780698e9b
commit 5f85b443f7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 107 additions and 12 deletions

View file

@ -51,18 +51,106 @@ def _new_module(name):
# Module-level locking ########################################################
# A dict mapping module names to weakrefs of _ModuleLock instances
# Dictionary protected by the global import lock
# For a list that can have a weakref to it.
class _List(list):
pass
# Copied from weakref.py with some simplifications and modifications unique to
# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they
# are needed in the future they may work if simply copied back in.
class _WeakValueDictionary:
def __init__(self):
self_weakref = _weakref.ref(self)
# Inlined to avoid issues with inheriting from _weakref.ref before _weakref is
# set by _setup(). Since there's only one instance of this class, this is
# not expensive.
class KeyedRef(_weakref.ref):
__slots__ = "key",
def __new__(type, ob, key):
self = super().__new__(type, ob, type.remove)
self.key = key
return self
def __init__(self, ob, key):
super().__init__(ob, self.remove)
@staticmethod
def remove(wr):
nonlocal self_weakref
self = self_weakref()
if self is not None:
if self._iterating:
self._pending_removals.append(wr.key)
else:
_weakref._remove_dead_weakref(self.data, wr.key)
self._KeyedRef = KeyedRef
self.clear()
def clear(self):
self._pending_removals = []
self._iterating = set()
self.data = {}
def _commit_removals(self):
pop = self._pending_removals.pop
d = self.data
while True:
try:
key = pop()
except IndexError:
return
_weakref._remove_dead_weakref(d, key)
def get(self, key, default=None):
if self._pending_removals:
self._commit_removals()
try:
wr = self.data[key]
except KeyError:
return default
else:
if (o := wr()) is None:
return default
else:
return o
def setdefault(self, key, default=None):
try:
o = self.data[key]()
except KeyError:
o = None
if o is None:
if self._pending_removals:
self._commit_removals()
self.data[key] = self._KeyedRef(default, key)
return default
else:
return o
# A dict mapping module names to weakrefs of _ModuleLock instances.
# Dictionary protected by the global import lock.
_module_locks = {}
# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a
# thread to the module locks it is blocking on acquiring. The values are
# lists because a single thread could perform a re-entrant import and be "in
# the process" of blocking on locks for more than one module. A thread can
# be "in the process" because a thread cannot actually block on acquiring
# more than one lock but it can have set up bookkeeping that reflects that
# it intends to block on acquiring more than one lock.
_blocking_on = {}
# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances.
# This maps a thread to the module locks it is blocking on acquiring. The
# values are lists because a single thread could perform a re-entrant import
# and be "in the process" of blocking on locks for more than one module. A
# thread can be "in the process" because a thread cannot actually block on
# acquiring more than one lock but it can have set up bookkeeping that reflects
# that it intends to block on acquiring more than one lock.
#
# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary
# lists around, regardless of GC runs. This way there's no memory leak if
# the list is no longer needed (GH-106176).
_blocking_on = None
class _BlockingOnManager:
@ -79,7 +167,7 @@ def __enter__(self):
# re-entrant (i.e., a single thread may take it more than once) so it
# wouldn't help us be correct in the face of re-entrancy either.
self.blocked_on = _blocking_on.setdefault(self.thread_id, [])
self.blocked_on = _blocking_on.setdefault(self.thread_id, _List())
self.blocked_on.append(self.lock)
def __exit__(self, *args, **kwargs):
@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module):
modules, those two modules must be explicitly passed in.
"""
global _imp, sys
global _imp, sys, _blocking_on
_imp = _imp_module
sys = sys_module
@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module):
builtin_module = sys.modules[builtin_name]
setattr(self_module, builtin_name, builtin_module)
# Instantiation requires _weakref to have been set.
_blocking_on = _WeakValueDictionary()
def _install(sys_module, _imp_module):
"""Install importers for builtin and frozen modules"""

View file

@ -0,0 +1,4 @@
Use a ``WeakValueDictionary`` to track the lists containing the modules each
thread is currently importing. This helps avoid a reference leak from
keeping the list around longer than necessary. Weakrefs are used as GC can't
interrupt the cleanup.