gh-111262: Add PyDict_Pop() function (#112028)

_PyDict_Pop_KnownHash(): remove the default value and the return type
becomes an int.

Co-authored-by: Stefan Behnel <stefan_ml@behnel.de>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
This commit is contained in:
Victor Stinner 2023-11-14 13:51:00 +01:00 committed by GitHub
parent f44d6ff6e0
commit 4f04172c92
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 338 additions and 76 deletions

View file

@ -173,6 +173,33 @@ Dictionary Objects
.. versionadded:: 3.4
.. c:function:: int PyDict_Pop(PyObject *p, PyObject *key, PyObject **result)
Remove *key* from dictionary *p* and optionally return the removed value.
Do not raise :exc:`KeyError` if the key missing.
- If the key is present, set *\*result* to a new reference to the removed
value if *result* is not ``NULL``, and return ``1``.
- If the key is missing, set *\*result* to ``NULL`` if *result* is not
``NULL``, and return ``0``.
- On error, raise an exception and return ``-1``.
This is similar to :meth:`dict.pop`, but without the default value and
not raising :exc:`KeyError` if the key missing.
.. versionadded:: 3.13
.. c:function:: int PyDict_PopString(PyObject *p, const char *key, PyObject **result)
Similar to :c:func:`PyDict_Pop`, but *key* is specified as a
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
:c:expr:`PyObject*`.
.. versionadded:: 3.13
.. c:function:: PyObject* PyDict_Items(PyObject *p)
Return a :c:type:`PyListObject` containing all the items from the dictionary.

View file

@ -1175,6 +1175,12 @@ New Features
Python ``list.extend()`` and ``list.clear()`` methods.
(Contributed by Victor Stinner in :gh:`111138`.)
* Add :c:func:`PyDict_Pop` and :c:func:`PyDict_PopString` functions: remove a
key from a dictionary and optionally return the removed value. This is
similar to :meth:`dict.pop`, but without the default value and not raising
:exc:`KeyError` if the key missing.
(Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.)
Porting to Python 3.13
----------------------

View file

@ -46,6 +46,8 @@ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) {
PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key);
PyAPI_FUNC(int) PyDict_Pop(PyObject *dict, PyObject *key, PyObject **result);
PyAPI_FUNC(int) PyDict_PopString(PyObject *dict, const char *key, PyObject **result);
PyAPI_FUNC(PyObject *) _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *default_value);
/* Dictionary watchers */

View file

@ -116,7 +116,11 @@ extern PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
extern int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value);
extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
extern PyObject *_PyDict_Pop_KnownHash(PyObject *, PyObject *, Py_hash_t, PyObject *);
extern int _PyDict_Pop_KnownHash(
PyDictObject *dict,
PyObject *key,
Py_hash_t hash,
PyObject **result);
#define DKIX_EMPTY (-1)
#define DKIX_DUMMY (-2) /* Used internally */

View file

@ -432,6 +432,93 @@ def test_dict_mergefromseq2(self):
# CRASHES mergefromseq2({}, NULL, 0)
# CRASHES mergefromseq2(NULL, {}, 0)
def test_dict_pop(self):
# Test PyDict_Pop()
dict_pop = _testcapi.dict_pop
dict_pop_null = _testcapi.dict_pop_null
# key present, get removed value
mydict = {"key": "value", "key2": "value2"}
self.assertEqual(dict_pop(mydict, "key"), (1, "value"))
self.assertEqual(mydict, {"key2": "value2"})
self.assertEqual(dict_pop(mydict, "key2"), (1, "value2"))
self.assertEqual(mydict, {})
# key present, ignore removed value
mydict = {"key": "value", "key2": "value2"}
self.assertEqual(dict_pop_null(mydict, "key"), 1)
self.assertEqual(mydict, {"key2": "value2"})
self.assertEqual(dict_pop_null(mydict, "key2"), 1)
self.assertEqual(mydict, {})
# key missing, expect removed value; empty dict has a fast path
self.assertEqual(dict_pop({}, "key"), (0, NULL))
self.assertEqual(dict_pop({"a": 1}, "key"), (0, NULL))
# key missing, ignored removed value; empty dict has a fast path
self.assertEqual(dict_pop_null({}, "key"), 0)
self.assertEqual(dict_pop_null({"a": 1}, "key"), 0)
# dict error
not_dict = UserDict({1: 2})
self.assertRaises(SystemError, dict_pop, not_dict, "key")
self.assertRaises(SystemError, dict_pop_null, not_dict, "key")
# key error; don't hash key if dict is empty
not_hashable_key = ["list"]
self.assertEqual(dict_pop({}, not_hashable_key), (0, NULL))
with self.assertRaises(TypeError):
dict_pop({'key': 1}, not_hashable_key)
dict_pop({}, NULL) # key is not checked if dict is empty
# CRASHES dict_pop(NULL, "key")
# CRASHES dict_pop({"a": 1}, NULL)
def test_dict_popstring(self):
# Test PyDict_PopString()
dict_popstring = _testcapi.dict_popstring
dict_popstring_null = _testcapi.dict_popstring_null
# key present, get removed value
mydict = {"key": "value", "key2": "value2"}
self.assertEqual(dict_popstring(mydict, "key"), (1, "value"))
self.assertEqual(mydict, {"key2": "value2"})
self.assertEqual(dict_popstring(mydict, "key2"), (1, "value2"))
self.assertEqual(mydict, {})
# key present, ignore removed value
mydict = {"key": "value", "key2": "value2"}
self.assertEqual(dict_popstring_null(mydict, "key"), 1)
self.assertEqual(mydict, {"key2": "value2"})
self.assertEqual(dict_popstring_null(mydict, "key2"), 1)
self.assertEqual(mydict, {})
# key missing; empty dict has a fast path
self.assertEqual(dict_popstring({}, "key"), (0, NULL))
self.assertEqual(dict_popstring_null({}, "key"), 0)
self.assertEqual(dict_popstring({"a": 1}, "key"), (0, NULL))
self.assertEqual(dict_popstring_null({"a": 1}, "key"), 0)
# non-ASCII key
non_ascii = '\U0001f40d'
dct = {'\U0001f40d': 123}
self.assertEqual(dict_popstring(dct, '\U0001f40d'.encode()), (1, 123))
dct = {'\U0001f40d': 123}
self.assertEqual(dict_popstring_null(dct, '\U0001f40d'.encode()), 1)
# dict error
not_dict = UserDict({1: 2})
self.assertRaises(SystemError, dict_popstring, not_dict, "key")
self.assertRaises(SystemError, dict_popstring_null, not_dict, "key")
# key error
self.assertRaises(UnicodeDecodeError, dict_popstring, {1: 2}, INVALID_UTF8)
self.assertRaises(UnicodeDecodeError, dict_popstring_null, {1: 2}, INVALID_UTF8)
# CRASHES dict_popstring(NULL, "key")
# CRASHES dict_popstring({}, NULL)
# CRASHES dict_popstring({"a": 1}, NULL)
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,4 @@
Add :c:func:`PyDict_Pop` and :c:func:`PyDict_PopString` functions: remove a key
from a dictionary and optionally return the removed value. This is similar to
:meth:`dict.pop`, but without the default value and not raising :exc:`KeyError`
if the key missing. Patch by Stefan Behnel and Victor Stinner.

View file

@ -1087,19 +1087,9 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
The cache dict holds one reference to the link.
We created one other reference when the link was created.
The linked list only has borrowed references. */
popresult = _PyDict_Pop_KnownHash(self->cache, link->key,
link->hash, Py_None);
if (popresult == Py_None) {
/* Getting here means that the user function call or another
thread has already removed the old key from the dictionary.
This link is now an orphan. Since we don't want to leave the
cache in an inconsistent state, we don't restore the link. */
Py_DECREF(popresult);
Py_DECREF(link);
Py_DECREF(key);
return result;
}
if (popresult == NULL) {
int res = _PyDict_Pop_KnownHash((PyDictObject*)self->cache, link->key,
link->hash, &popresult);
if (res < 0) {
/* An error arose while trying to remove the oldest key (the one
being evicted) from the cache. We restore the link to its
original position as the oldest link. Then we allow the
@ -1110,10 +1100,22 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
Py_DECREF(result);
return NULL;
}
if (res == 0) {
/* Getting here means that the user function call or another
thread has already removed the old key from the dictionary.
This link is now an orphan. Since we don't want to leave the
cache in an inconsistent state, we don't restore the link. */
assert(popresult == NULL);
Py_DECREF(link);
Py_DECREF(key);
return result;
}
/* Keep a reference to the old key and old result to prevent their
ref counts from going to zero during the update. That will
prevent potentially arbitrary object clean-up code (i.e. __del__)
from running while we're still adjusting the links. */
assert(popresult != NULL);
oldkey = link->key;
oldresult = link->result;

View file

@ -331,6 +331,88 @@ dict_mergefromseq2(PyObject *self, PyObject *args)
}
static PyObject *
dict_pop(PyObject *self, PyObject *args)
{
// Test PyDict_Pop(dict, key, &value)
PyObject *dict, *key;
if (!PyArg_ParseTuple(args, "OO", &dict, &key)) {
return NULL;
}
NULLABLE(dict);
NULLABLE(key);
PyObject *result = UNINITIALIZED_PTR;
int res = PyDict_Pop(dict, key, &result);
if (res < 0) {
assert(result == NULL);
return NULL;
}
if (res == 0) {
assert(result == NULL);
result = Py_NewRef(Py_None);
}
else {
assert(result != NULL);
}
return Py_BuildValue("iN", res, result);
}
static PyObject *
dict_pop_null(PyObject *self, PyObject *args)
{
// Test PyDict_Pop(dict, key, NULL)
PyObject *dict, *key;
if (!PyArg_ParseTuple(args, "OO", &dict, &key)) {
return NULL;
}
NULLABLE(dict);
NULLABLE(key);
RETURN_INT(PyDict_Pop(dict, key, NULL));
}
static PyObject *
dict_popstring(PyObject *self, PyObject *args)
{
PyObject *dict;
const char *key;
Py_ssize_t key_size;
if (!PyArg_ParseTuple(args, "Oz#", &dict, &key, &key_size)) {
return NULL;
}
NULLABLE(dict);
PyObject *result = UNINITIALIZED_PTR;
int res = PyDict_PopString(dict, key, &result);
if (res < 0) {
assert(result == NULL);
return NULL;
}
if (res == 0) {
assert(result == NULL);
result = Py_NewRef(Py_None);
}
else {
assert(result != NULL);
}
return Py_BuildValue("iN", res, result);
}
static PyObject *
dict_popstring_null(PyObject *self, PyObject *args)
{
PyObject *dict;
const char *key;
Py_ssize_t key_size;
if (!PyArg_ParseTuple(args, "Oz#", &dict, &key, &key_size)) {
return NULL;
}
NULLABLE(dict);
RETURN_INT(PyDict_PopString(dict, key, NULL));
}
static PyMethodDef test_methods[] = {
{"dict_check", dict_check, METH_O},
{"dict_checkexact", dict_checkexact, METH_O},
@ -358,7 +440,10 @@ static PyMethodDef test_methods[] = {
{"dict_merge", dict_merge, METH_VARARGS},
{"dict_update", dict_update, METH_VARARGS},
{"dict_mergefromseq2", dict_mergefromseq2, METH_VARARGS},
{"dict_pop", dict_pop, METH_VARARGS},
{"dict_pop_null", dict_pop_null, METH_VARARGS},
{"dict_popstring", dict_popstring, METH_VARARGS},
{"dict_popstring_null", dict_popstring_null, METH_VARARGS},
{NULL},
};

View file

@ -967,11 +967,8 @@ local_clear(localobject *self)
HEAD_UNLOCK(runtime);
while (tstate) {
if (tstate->dict) {
PyObject *v = _PyDict_Pop(tstate->dict, self->key, Py_None);
if (v != NULL) {
Py_DECREF(v);
}
else {
if (PyDict_Pop(tstate->dict, self->key, NULL) < 0) {
// Silently ignore error
PyErr_Clear();
}
}

View file

@ -392,16 +392,10 @@ remove_unusable_flags(PyObject *m)
break;
}
else {
PyObject *flag_name = PyUnicode_FromString(win_runtime_flags[i].flag_name);
if (flag_name == NULL) {
if (PyDict_PopString(dict, win_runtime_flags[i].flag_name,
NULL) < 0) {
return -1;
}
PyObject *v = _PyDict_Pop(dict, flag_name, Py_None);
Py_DECREF(flag_name);
if (v == NULL) {
return -1;
}
Py_DECREF(v);
}
}
return 0;

View file

@ -2226,64 +2226,119 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
return _PyDict_Next(op, ppos, pkey, pvalue, NULL);
}
/* Internal version of dict.pop(). */
PyObject *
_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
{
Py_ssize_t ix;
PyObject *old_value;
PyDictObject *mp;
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(PyDict_Check(dict));
mp = (PyDictObject *)dict;
/* Internal version of dict.pop(). */
int
_PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash,
PyObject **result)
{
assert(PyDict_Check(mp));
if (mp->ma_used == 0) {
if (deflt) {
return Py_NewRef(deflt);
if (result) {
*result = NULL;
}
_PyErr_SetKeyError(key);
return NULL;
return 0;
}
ix = _Py_dict_lookup(mp, key, hash, &old_value);
if (ix == DKIX_ERROR)
return NULL;
PyObject *old_value;
Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value);
if (ix == DKIX_ERROR) {
if (result) {
*result = NULL;
}
return -1;
}
if (ix == DKIX_EMPTY || old_value == NULL) {
if (deflt) {
return Py_NewRef(deflt);
if (result) {
*result = NULL;
}
_PyErr_SetKeyError(key);
return NULL;
return 0;
}
assert(old_value != NULL);
PyInterpreterState *interp = _PyInterpreterState_GET();
uint64_t new_version = _PyDict_NotifyEvent(
interp, PyDict_EVENT_DELETED, mp, key, NULL);
delitem_common(mp, hash, ix, Py_NewRef(old_value), new_version);
ASSERT_CONSISTENT(mp);
return old_value;
if (result) {
*result = old_value;
}
else {
Py_DECREF(old_value);
}
return 1;
}
PyObject *
_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
{
Py_hash_t hash;
if (((PyDictObject *)dict)->ma_used == 0) {
if (deflt) {
return Py_NewRef(deflt);
int
PyDict_Pop(PyObject *op, PyObject *key, PyObject **result)
{
if (!PyDict_Check(op)) {
if (result) {
*result = NULL;
}
PyErr_BadInternalCall();
return -1;
}
PyDictObject *dict = (PyDictObject *)op;
if (dict->ma_used == 0) {
if (result) {
*result = NULL;
}
return 0;
}
Py_hash_t hash;
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
hash = PyObject_Hash(key);
if (hash == -1) {
if (result) {
*result = NULL;
}
return -1;
}
}
return _PyDict_Pop_KnownHash(dict, key, hash, result);
}
int
PyDict_PopString(PyObject *op, const char *key, PyObject **result)
{
PyObject *key_obj = PyUnicode_FromString(key);
if (key_obj == NULL) {
if (result != NULL) {
*result = NULL;
}
return -1;
}
int res = PyDict_Pop(op, key_obj, result);
Py_DECREF(key_obj);
return res;
}
PyObject *
_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *default_value)
{
PyObject *result;
if (PyDict_Pop(dict, key, &result) == 0) {
if (default_value != NULL) {
return Py_NewRef(default_value);
}
_PyErr_SetKeyError(key);
return NULL;
}
if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return NULL;
}
return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
return result;
}
/* Internal version of dict.from_keys(). It is subclass-friendly. */
PyObject *
_PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)

View file

@ -1049,7 +1049,10 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj,
return NULL;
}
/* Now delete the value from the dict. */
value = _PyDict_Pop_KnownHash(od, key, hash, failobj);
if (_PyDict_Pop_KnownHash((PyDictObject *)od, key, hash,
&value) == 0) {
value = Py_NewRef(failobj);
}
}
else if (value == NULL && !PyErr_Occurred()) {
/* Apply the fallback value, if necessary. */

View file

@ -8,7 +8,6 @@
*/
#include "Python.h"
#include "pycore_dict.h" // _PyDict_Pop()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_modsupport.h" // _PyArg_NoPositional()
#include "pycore_object.h" // _PyObject_GC_TRACK()
@ -417,14 +416,13 @@ structseq_replace(PyStructSequence *self, PyObject *args, PyObject *kwargs)
// We do not support types with unnamed fields, so we can iterate over
// i >= n_visible_fields case without slicing with (i - n_unnamed_fields).
for (i = 0; i < n_fields; ++i) {
PyObject *key = PyUnicode_FromString(Py_TYPE(self)->tp_members[i].name);
if (!key) {
PyObject *ob;
if (PyDict_PopString(kwargs, Py_TYPE(self)->tp_members[i].name,
&ob) < 0) {
goto error;
}
PyObject *ob = _PyDict_Pop(kwargs, key, self->ob_item[i]);
Py_DECREF(key);
if (!ob) {
goto error;
if (ob == NULL) {
ob = Py_NewRef(self->ob_item[i]);
}
result->ob_item[i] = ob;
}

View file

@ -395,8 +395,8 @@ remove_module(PyThreadState *tstate, PyObject *name)
PyObject *modules = MODULES(tstate->interp);
if (PyDict_CheckExact(modules)) {
PyObject *mod = _PyDict_Pop(modules, name, Py_None);
Py_XDECREF(mod);
// Error is reported to the caller
(void)PyDict_Pop(modules, name, NULL);
}
else if (PyMapping_DelItem(modules, name) < 0) {
if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) {

View file

@ -125,11 +125,9 @@ sys_set_object(PyInterpreterState *interp, PyObject *key, PyObject *v)
}
PyObject *sd = interp->sysdict;
if (v == NULL) {
v = _PyDict_Pop(sd, key, Py_None);
if (v == NULL) {
if (PyDict_Pop(sd, key, NULL) < 0) {
return -1;
}
Py_DECREF(v);
return 0;
}
else {