bpo-1635741: Release Unicode interned strings at exit (GH-21269)

* PyUnicode_InternInPlace() now ensures that interned strings are
  ready.
* Add _PyUnicode_ClearInterned().
* Py_Finalize() now releases Unicode interned strings:
  call _PyUnicode_ClearInterned().
This commit is contained in:
Victor Stinner 2020-07-02 01:19:57 +02:00 committed by GitHub
parent 90db4653ae
commit 666ecfb095
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 36 deletions

View file

@ -78,6 +78,7 @@ extern void _PyGC_Fini(PyThreadState *tstate);
extern void _PyType_Fini(void);
extern void _Py_HashRandomization_Fini(void);
extern void _PyUnicode_Fini(PyThreadState *tstate);
extern void _PyUnicode_ClearInterned(PyThreadState *tstate);
extern void _PyLong_Fini(PyThreadState *tstate);
extern void _PyFaulthandler_Fini(void);
extern void _PyHash_Fini(void);

View file

@ -55,8 +55,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include <windows.h>
#endif
/* Uncomment to display statistics on interned strings at exit when
using Valgrind or Insecure++. */
/* Uncomment to display statistics on interned strings at exit
in _PyUnicode_ClearInterned(). */
/* #define INTERNED_STATS 1 */
@ -15681,6 +15681,11 @@ PyUnicode_InternInPlace(PyObject **p)
}
#ifdef INTERNED_STRINGS
if (PyUnicode_READY(s) == -1) {
PyErr_Clear();
return;
}
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
@ -15733,23 +15738,29 @@ PyUnicode_InternFromString(const char *cp)
}
#if defined(WITH_VALGRIND) || defined(__INSURE__)
static void
unicode_release_interned(void)
void
_PyUnicode_ClearInterned(PyThreadState *tstate)
{
if (interned == NULL || !PyDict_Check(interned)) {
return;
}
PyObject *keys = PyDict_Keys(interned);
if (keys == NULL || !PyList_Check(keys)) {
PyErr_Clear();
if (!_Py_IsMainInterpreter(tstate)) {
// interned dict is shared by all interpreters
return;
}
/* Since unicode_release_interned() is intended to help a leak
detector, interned unicode strings are not forcibly deallocated;
rather, we give them their stolen references back, and then clear
and DECREF the interned dict. */
if (interned == NULL) {
return;
}
assert(PyDict_CheckExact(interned));
PyObject *keys = PyDict_Keys(interned);
if (keys == NULL) {
PyErr_Clear();
return;
}
assert(PyList_CheckExact(keys));
/* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the
interned dict. */
Py_ssize_t n = PyList_GET_SIZE(keys);
#ifdef INTERNED_STATS
@ -15759,9 +15770,8 @@ unicode_release_interned(void)
#endif
for (Py_ssize_t i = 0; i < n; i++) {
PyObject *s = PyList_GET_ITEM(keys, i);
if (PyUnicode_READY(s) == -1) {
Py_UNREACHABLE();
}
assert(PyUnicode_IS_READY(s));
switch (PyUnicode_CHECK_INTERNED(s)) {
case SSTATE_INTERNED_IMMORTAL:
Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
@ -15788,10 +15798,10 @@ unicode_release_interned(void)
mortal_size, immortal_size);
#endif
Py_DECREF(keys);
PyDict_Clear(interned);
Py_CLEAR(interned);
}
#endif
/********************* Unicode Iterator **************************/
@ -16160,23 +16170,9 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
void
_PyUnicode_Fini(PyThreadState *tstate)
{
struct _Py_unicode_state *state = &tstate->interp->unicode;
// _PyUnicode_ClearInterned() must be called before
int is_main_interp = _Py_IsMainInterpreter(tstate);
if (is_main_interp) {
#if defined(WITH_VALGRIND) || defined(__INSURE__)
/* Insure++ is a memory analysis tool that aids in discovering
* memory leaks and other memory problems. On Python exit, the
* interned string dictionaries are flagged as being in use at exit
* (which it is). Under normal circumstances, this is fine because
* the memory will be automatically reclaimed by the system. Under
* memory debugging, it's a huge source of useless noise, so we
* trade off slower shutdown for less distraction in the memory
* reports. -baw
*/
unicode_release_interned();
#endif /* __INSURE__ */
}
struct _Py_unicode_state *state = &tstate->interp->unicode;
Py_CLEAR(state->empty_string);
@ -16184,7 +16180,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
Py_CLEAR(state->latin1[i]);
}
if (is_main_interp) {
if (_Py_IsMainInterpreter(tstate)) {
unicode_clear_static_strings();
}

View file

@ -1263,6 +1263,7 @@ finalize_interp_types(PyThreadState *tstate)
_PyFrame_Fini(tstate);
_PyAsyncGen_Fini(tstate);
_PyContext_Fini(tstate);
_PyUnicode_ClearInterned(tstate);
_PyDict_Fini(tstate);
_PyList_Fini(tstate);