gh-89850: Add default C implementations of persistent_id() and persistent_load() (GH-113579)

Previously the C implementation of pickle.Pickler and pickle.Unpickler
classes did not have such methods and they could only be used if
they were overloaded in subclasses or set as instance attributes.

Fixed calling super().persistent_id() and super().persistent_load() in
subclasses of the C implementation of pickle.Pickler and pickle.Unpickler
classes. It no longer causes an infinite recursion.
This commit is contained in:
Serhiy Storchaka 2024-01-10 15:30:37 +02:00 committed by GitHub
parent b3d2427f22
commit 89cee94b31
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 191 additions and 215 deletions

View file

@ -345,6 +345,10 @@ The :mod:`pickle` module exports three classes, :class:`Pickler`,
See :ref:`pickle-persistent` for details and examples of uses.
.. versionchanged:: 3.13
Add the default implementation of this method in the C implementation
of :class:`!Pickler`.
.. attribute:: dispatch_table
A pickler object's dispatch table is a registry of *reduction
@ -446,6 +450,10 @@ The :mod:`pickle` module exports three classes, :class:`Pickler`,
See :ref:`pickle-persistent` for details and examples of uses.
.. versionchanged:: 3.13
Add the default implementation of this method in the C implementation
of :class:`!Unpickler`.
.. method:: find_class(module, name)
Import *module* if necessary and return the object called *name* from it,

View file

@ -122,6 +122,7 @@ class PyIdPersPicklerTests(AbstractIdentityPersistentPicklerTests,
pickler = pickle._Pickler
unpickler = pickle._Unpickler
persistent_load_error = pickle.UnpicklingError
@support.cpython_only
def test_pickler_reference_cycle(self):
@ -176,7 +177,6 @@ class DispatchTable:
support.gc_collect()
self.assertIsNone(table_ref())
@support.cpython_only
def test_unpickler_reference_cycle(self):
def check(Unpickler):
@ -206,6 +206,28 @@ def persistent_load(pid):
return pid
check(PersUnpickler)
def test_pickler_super(self):
class PersPickler(self.pickler):
def persistent_id(subself, obj):
self.assertIsNone(super().persistent_id(obj))
return obj
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
f = io.BytesIO()
pickler = PersPickler(f, proto)
pickler.dump('abc')
self.assertEqual(self.loads(f.getvalue()), 'abc')
def test_unpickler_super(self):
class PersUnpickler(self.unpickler):
def persistent_load(subself, pid):
with self.assertRaises(self.persistent_load_error):
super().persistent_load(pid)
return pid
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
unpickler = PersUnpickler(io.BytesIO(self.dumps('abc', proto)))
self.assertEqual(unpickler.load(), 'abc')
class PyPicklerUnpicklerObjectTests(AbstractPicklerUnpicklerObjectTests, unittest.TestCase):
@ -256,6 +278,7 @@ class CPersPicklerTests(PyPersPicklerTests):
class CIdPersPicklerTests(PyIdPersPicklerTests):
pickler = _pickle.Pickler
unpickler = _pickle.Unpickler
persistent_load_error = _pickle.UnpicklingError
class CDumpPickle_LoadPickle(PyPicklerTests):
pickler = _pickle.Pickler
@ -326,7 +349,7 @@ class SizeofTests(unittest.TestCase):
check_sizeof = support.check_sizeof
def test_pickler(self):
basesize = support.calcobjsize('7P2n3i2n3i2P')
basesize = support.calcobjsize('6P2n3i2n3i2P')
p = _pickle.Pickler(io.BytesIO())
self.assertEqual(object.__sizeof__(p), basesize)
MT_size = struct.calcsize('3nP0n')
@ -343,7 +366,7 @@ def test_pickler(self):
0) # Write buffer is cleared after every dump().
def test_unpickler(self):
basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i')
basesize = support.calcobjsize('2P2nP 2P2n2i5P 2P3n8P2n2i')
unpickler = _pickle.Unpickler
P = struct.calcsize('P') # Size of memo table entry.
n = struct.calcsize('n') # Size of mark table entry.

View file

@ -0,0 +1,5 @@
Add default implementations of :meth:`pickle.Pickler.persistent_id` and
:meth:`pickle.Unpickler.persistent_load` methods in the C implementation.
Calling ``super().persistent_id()`` and ``super().persistent_load()`` in
subclasses of the C implementation of :class:`pickle.Pickler` and
:class:`pickle.Unpickler` classes no longer causes infinite recursion.

View file

@ -399,64 +399,6 @@ _Pickle_FastCall(PyObject *func, PyObject *obj)
/*************************************************************************/
/* Retrieve and deconstruct a method for avoiding a reference cycle
(pickler -> bound method of pickler -> pickler) */
static int
init_method_ref(PyObject *self, PyObject *name,
PyObject **method_func, PyObject **method_self)
{
PyObject *func, *func2;
int ret;
/* *method_func and *method_self should be consistent. All refcount decrements
should be occurred after setting *method_self and *method_func. */
ret = PyObject_GetOptionalAttr(self, name, &func);
if (func == NULL) {
*method_self = NULL;
Py_CLEAR(*method_func);
return ret;
}
if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
/* Deconstruct a bound Python method */
*method_self = self; /* borrowed */
func2 = PyMethod_GET_FUNCTION(func);
Py_XSETREF(*method_func, Py_NewRef(func2));
Py_DECREF(func);
return 0;
}
else {
*method_self = NULL;
Py_XSETREF(*method_func, func);
return 0;
}
}
/* Bind a method if it was deconstructed */
static PyObject *
reconstruct_method(PyObject *func, PyObject *self)
{
if (self) {
return PyMethod_New(func, self);
}
else {
return Py_NewRef(func);
}
}
static PyObject *
call_method(PyObject *func, PyObject *self, PyObject *obj)
{
if (self) {
return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
}
else {
return PyObject_CallOneArg(func, obj);
}
}
/*************************************************************************/
/* Internal data type used as the unpickling stack. */
typedef struct {
PyObject_VAR_HEAD
@ -668,9 +610,7 @@ typedef struct PicklerObject {
PyMemoTable *memo; /* Memo table, keep track of the seen
objects to support self-referential objects
pickling. */
PyObject *pers_func; /* persistent_id() method, can be NULL */
PyObject *pers_func_self; /* borrowed reference to self if pers_func
is an unbound method, NULL otherwise */
PyObject *persistent_id; /* persistent_id() method, can be NULL */
PyObject *dispatch_table; /* private dispatch_table, can be NULL */
PyObject *reducer_override; /* hook for invoking user-defined callbacks
instead of save_global when pickling
@ -712,9 +652,7 @@ typedef struct UnpicklerObject {
size_t memo_size; /* Capacity of the memo array */
size_t memo_len; /* Number of objects in the memo */
PyObject *pers_func; /* persistent_load() method, can be NULL. */
PyObject *pers_func_self; /* borrowed reference to self if pers_func
is an unbound method, NULL otherwise */
PyObject *persistent_load; /* persistent_load() method, can be NULL. */
Py_buffer buffer;
char *input_buffer;
@ -1167,8 +1105,7 @@ _Pickler_New(PickleState *st)
}
self->memo = memo;
self->pers_func = NULL;
self->pers_func_self = NULL;
self->persistent_id = NULL;
self->dispatch_table = NULL;
self->reducer_override = NULL;
self->write = NULL;
@ -1662,8 +1599,7 @@ _Unpickler_New(PyObject *module)
self->memo = memo;
self->memo_size = MEMO_SIZE;
self->memo_len = 0;
self->pers_func = NULL;
self->pers_func_self = NULL;
self->persistent_load = NULL;
memset(&self->buffer, 0, sizeof(Py_buffer));
self->input_buffer = NULL;
self->input_line = NULL;
@ -3929,7 +3865,7 @@ save_pers(PickleState *state, PicklerObject *self, PyObject *obj)
const char persid_op = PERSID;
const char binpersid_op = BINPERSID;
pid = call_method(self->pers_func, self->pers_func_self, obj);
pid = PyObject_CallOneArg(self->persistent_id, obj);
if (pid == NULL)
return -1;
@ -4317,7 +4253,7 @@ save(PickleState *st, PicklerObject *self, PyObject *obj, int pers_save)
/* The extra pers_save argument is necessary to avoid calling save_pers()
on its returned object. */
if (!pers_save && self->pers_func) {
if (!pers_save && self->persistent_id) {
/* save_pers() returns:
-1 to signal an error;
0 if it did nothing successfully;
@ -4522,6 +4458,12 @@ save(PickleState *st, PicklerObject *self, PyObject *obj, int pers_save)
return status;
}
static PyObject *
persistent_id(PyObject *self, PyObject *obj)
{
Py_RETURN_NONE;
}
static int
dump(PickleState *state, PicklerObject *self, PyObject *obj)
{
@ -4529,17 +4471,25 @@ dump(PickleState *state, PicklerObject *self, PyObject *obj)
int status = -1;
PyObject *tmp;
/* Cache the persistent_id method. */
tmp = PyObject_GetAttr((PyObject *)self, &_Py_ID(persistent_id));
if (tmp == NULL) {
goto error;
}
if (PyCFunction_Check(tmp) &&
PyCFunction_GET_SELF(tmp) == (PyObject *)self &&
PyCFunction_GET_FUNCTION(tmp) == persistent_id)
{
Py_CLEAR(tmp);
}
Py_XSETREF(self->persistent_id, tmp);
/* Cache the reducer_override method, if it exists. */
if (PyObject_GetOptionalAttr((PyObject *)self, &_Py_ID(reducer_override),
&tmp) < 0) {
goto error;
}
/* Cache the reducer_override method, if it exists. */
if (tmp != NULL) {
Py_XSETREF(self->reducer_override, tmp);
}
else {
Py_CLEAR(self->reducer_override);
goto error;
}
Py_XSETREF(self->reducer_override, tmp);
if (self->proto >= 2) {
char header[2];
@ -4565,11 +4515,12 @@ dump(PickleState *state, PicklerObject *self, PyObject *obj)
self->framing = 0;
/* Break the reference cycle we generated at the beginning this function
* call when setting the reducer_override attribute of the Pickler instance
* to a bound method of the same instance. This is important as the Pickler
* instance holds a reference to each object it has pickled (through its
* memo): thus, these objects won't be garbage-collected as long as the
* Pickler itself is not collected. */
* call when setting the persistent_id and the reducer_override attributes
* of the Pickler instance to a bound method of the same instance.
* This is important as the Pickler instance holds a reference to each
* object it has pickled (through its memo): thus, these objects won't
* be garbage-collected as long as the Pickler itself is not collected. */
Py_CLEAR(self->persistent_id);
Py_CLEAR(self->reducer_override);
return status;
}
@ -4662,6 +4613,8 @@ _pickle_Pickler___sizeof___impl(PicklerObject *self)
}
static struct PyMethodDef Pickler_methods[] = {
{"persistent_id", persistent_id, METH_O,
PyDoc_STR("persistent_id($self, obj, /)\n--\n\n")},
_PICKLE_PICKLER_DUMP_METHODDEF
_PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
_PICKLE_PICKLER___SIZEOF___METHODDEF
@ -4673,7 +4626,7 @@ Pickler_clear(PicklerObject *self)
{
Py_CLEAR(self->output_buffer);
Py_CLEAR(self->write);
Py_CLEAR(self->pers_func);
Py_CLEAR(self->persistent_id);
Py_CLEAR(self->dispatch_table);
Py_CLEAR(self->fast_memo);
Py_CLEAR(self->reducer_override);
@ -4702,7 +4655,7 @@ Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
Py_VISIT(self->write);
Py_VISIT(self->pers_func);
Py_VISIT(self->persistent_id);
Py_VISIT(self->dispatch_table);
Py_VISIT(self->fast_memo);
Py_VISIT(self->reducer_override);
@ -4799,11 +4752,6 @@ _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
self->fast_nesting = 0;
self->fast_memo = NULL;
if (init_method_ref((PyObject *)self, &_Py_ID(persistent_id),
&self->pers_func, &self->pers_func_self) < 0)
{
return -1;
}
if (self->dispatch_table != NULL) {
return 0;
}
@ -5052,36 +5000,6 @@ Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
return -1;
}
static PyObject *
Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
{
if (self->pers_func == NULL) {
PyErr_SetString(PyExc_AttributeError, "persistent_id");
return NULL;
}
return reconstruct_method(self->pers_func, self->pers_func_self);
}
static int
Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
{
if (value == NULL) {
PyErr_SetString(PyExc_TypeError,
"attribute deletion is not supported");
return -1;
}
if (!PyCallable_Check(value)) {
PyErr_SetString(PyExc_TypeError,
"persistent_id must be a callable taking one argument");
return -1;
}
self->pers_func_self = NULL;
Py_XSETREF(self->pers_func, Py_NewRef(value));
return 0;
}
static PyMemberDef Pickler_members[] = {
{"bin", Py_T_INT, offsetof(PicklerObject, bin)},
{"fast", Py_T_INT, offsetof(PicklerObject, fast)},
@ -5092,8 +5010,6 @@ static PyMemberDef Pickler_members[] = {
static PyGetSetDef Pickler_getsets[] = {
{"memo", (getter)Pickler_get_memo,
(setter)Pickler_set_memo},
{"persistent_id", (getter)Pickler_get_persid,
(setter)Pickler_set_persid},
{NULL}
};
@ -6056,36 +5972,28 @@ load_persid(PickleState *st, UnpicklerObject *self)
Py_ssize_t len;
char *s;
if (self->pers_func) {
if ((len = _Unpickler_Readline(st, self, &s)) < 0)
return -1;
if (len < 1)
return bad_readline(st);
if ((len = _Unpickler_Readline(st, self, &s)) < 0)
return -1;
if (len < 1)
return bad_readline(st);
pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
if (pid == NULL) {
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
PyErr_SetString(st->UnpicklingError,
"persistent IDs in protocol 0 must be "
"ASCII strings");
}
return -1;
pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
if (pid == NULL) {
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
PyErr_SetString(st->UnpicklingError,
"persistent IDs in protocol 0 must be "
"ASCII strings");
}
obj = call_method(self->pers_func, self->pers_func_self, pid);
Py_DECREF(pid);
if (obj == NULL)
return -1;
PDATA_PUSH(self->stack, obj, -1);
return 0;
}
else {
PyErr_SetString(st->UnpicklingError,
"A load persistent id instruction was encountered, "
"but no persistent_load function was specified.");
return -1;
}
obj = PyObject_CallOneArg(self->persistent_load, pid);
Py_DECREF(pid);
if (obj == NULL)
return -1;
PDATA_PUSH(self->stack, obj, -1);
return 0;
}
static int
@ -6093,25 +6001,17 @@ load_binpersid(PickleState *st, UnpicklerObject *self)
{
PyObject *pid, *obj;
if (self->pers_func) {
PDATA_POP(st, self->stack, pid);
if (pid == NULL)
return -1;
obj = call_method(self->pers_func, self->pers_func_self, pid);
Py_DECREF(pid);
if (obj == NULL)
return -1;
PDATA_PUSH(self->stack, obj, -1);
return 0;
}
else {
PyErr_SetString(st->UnpicklingError,
"A load persistent id instruction was encountered, "
"but no persistent_load function was specified.");
PDATA_POP(st, self->stack, pid);
if (pid == NULL)
return -1;
}
obj = PyObject_CallOneArg(self->persistent_load, pid);
Py_DECREF(pid);
if (obj == NULL)
return -1;
PDATA_PUSH(self->stack, obj, -1);
return 0;
}
static int
@ -6837,6 +6737,7 @@ static PyObject *
load(PickleState *st, UnpicklerObject *self)
{
PyObject *value = NULL;
PyObject *tmp;
char *s = NULL;
self->num_marks = 0;
@ -6846,6 +6747,13 @@ load(PickleState *st, UnpicklerObject *self)
if (Py_SIZE(self->stack))
Pdata_clear(self->stack, 0);
/* Cache the persistent_load method. */
tmp = PyObject_GetAttr((PyObject *)self, &_Py_ID(persistent_load));
if (tmp == NULL) {
goto error;
}
Py_XSETREF(self->persistent_load, tmp);
/* Convenient macros for the dispatch while-switch loop just below. */
#define OP(opcode, load_func) \
case opcode: if (load_func(st, self) < 0) break; continue;
@ -6858,7 +6766,7 @@ load(PickleState *st, UnpicklerObject *self)
if (PyErr_ExceptionMatches(st->UnpicklingError)) {
PyErr_Format(PyExc_EOFError, "Ran out of input");
}
return NULL;
goto error;
}
switch ((enum opcode)s[0]) {
@ -6944,7 +6852,7 @@ load(PickleState *st, UnpicklerObject *self)
PyErr_Format(st->UnpicklingError,
"invalid load key, '\\x%02x'.", c);
}
return NULL;
goto error;
}
}
@ -6952,14 +6860,41 @@ load(PickleState *st, UnpicklerObject *self)
}
if (PyErr_Occurred()) {
return NULL;
goto error;
}
if (_Unpickler_SkipConsumed(self) < 0)
return NULL;
goto error;
Py_CLEAR(self->persistent_load);
PDATA_POP(st, self->stack, value);
return value;
error:
Py_CLEAR(self->persistent_load);
return NULL;
}
/*[clinic input]
_pickle.Unpickler.persistent_load
cls: defining_class
pid: object
/
[clinic start generated code]*/
static PyObject *
_pickle_Unpickler_persistent_load_impl(UnpicklerObject *self,
PyTypeObject *cls, PyObject *pid)
/*[clinic end generated code: output=9f4706f1330cb14d input=2f9554fae051276e]*/
{
PickleState *st = _Pickle_GetStateByClass(cls);
PyErr_SetString(st->UnpicklingError,
"A load persistent id instruction was encountered, "
"but no persistent_load function was specified.");
return NULL;
}
/*[clinic input]
@ -7128,6 +7063,7 @@ _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
}
static struct PyMethodDef Unpickler_methods[] = {
_PICKLE_UNPICKLER_PERSISTENT_LOAD_METHODDEF
_PICKLE_UNPICKLER_LOAD_METHODDEF
_PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
_PICKLE_UNPICKLER___SIZEOF___METHODDEF
@ -7142,7 +7078,7 @@ Unpickler_clear(UnpicklerObject *self)
Py_CLEAR(self->read);
Py_CLEAR(self->peek);
Py_CLEAR(self->stack);
Py_CLEAR(self->pers_func);
Py_CLEAR(self->persistent_load);
Py_CLEAR(self->buffers);
if (self->buffer.buf != NULL) {
PyBuffer_Release(&self->buffer);
@ -7181,7 +7117,7 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
Py_VISIT(self->read);
Py_VISIT(self->peek);
Py_VISIT(self->stack);
Py_VISIT(self->pers_func);
Py_VISIT(self->persistent_load);
Py_VISIT(self->buffers);
PyObject **memo = self->memo;
if (memo) {
@ -7247,12 +7183,6 @@ _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
self->fix_imports = fix_imports;
if (init_method_ref((PyObject *)self, &_Py_ID(persistent_load),
&self->pers_func, &self->pers_func_self) < 0)
{
return -1;
}
PyTypeObject *tp = Py_TYPE(self);
PickleState *state = _Pickle_FindStateByType(tp);
self->stack = (Pdata *)Pdata_New(state);
@ -7521,41 +7451,8 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored
return -1;
}
static PyObject *
Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
{
if (self->pers_func == NULL) {
PyErr_SetString(PyExc_AttributeError, "persistent_load");
return NULL;
}
return reconstruct_method(self->pers_func, self->pers_func_self);
}
static int
Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
{
if (value == NULL) {
PyErr_SetString(PyExc_TypeError,
"attribute deletion is not supported");
return -1;
}
if (!PyCallable_Check(value)) {
PyErr_SetString(PyExc_TypeError,
"persistent_load must be a callable taking "
"one argument");
return -1;
}
self->pers_func_self = NULL;
Py_XSETREF(self->pers_func, Py_NewRef(value));
return 0;
}
static PyGetSetDef Unpickler_getsets[] = {
{"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
{"persistent_load", (getter)Unpickler_get_persload,
(setter)Unpickler_set_persload},
{NULL}
};

View file

@ -266,6 +266,49 @@ _pickle_PicklerMemoProxy___reduce__(PicklerMemoProxyObject *self, PyObject *Py_U
return _pickle_PicklerMemoProxy___reduce___impl(self);
}
PyDoc_STRVAR(_pickle_Unpickler_persistent_load__doc__,
"persistent_load($self, pid, /)\n"
"--\n"
"\n");
#define _PICKLE_UNPICKLER_PERSISTENT_LOAD_METHODDEF \
{"persistent_load", _PyCFunction_CAST(_pickle_Unpickler_persistent_load), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _pickle_Unpickler_persistent_load__doc__},
static PyObject *
_pickle_Unpickler_persistent_load_impl(UnpicklerObject *self,
PyTypeObject *cls, PyObject *pid);
static PyObject *
_pickle_Unpickler_persistent_load(UnpicklerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
# define KWTUPLE (PyObject *)&_Py_SINGLETON(tuple_empty)
#else
# define KWTUPLE NULL
#endif
static const char * const _keywords[] = {"", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "persistent_load",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[1];
PyObject *pid;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
pid = args[0];
return_value = _pickle_Unpickler_persistent_load_impl(self, cls, pid);
exit:
return return_value;
}
PyDoc_STRVAR(_pickle_Unpickler_load__doc__,
"load($self, /)\n"
"--\n"
@ -1034,4 +1077,4 @@ skip_optional_kwonly:
exit:
return return_value;
}
/*[clinic end generated code: output=7f0564b5fb5410a8 input=a9049054013a1b77]*/
/*[clinic end generated code: output=ebe78653233827a6 input=a9049054013a1b77]*/