bpo-41930: Add support for SQLite serialise/deserialise API (GH-26728)

Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com>
This commit is contained in:
Erlend Egeberg Aasland 2022-04-05 16:15:25 +02:00 committed by GitHub
parent aa0f056a00
commit a7551247e7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 435 additions and 1 deletions

View file

@ -748,6 +748,44 @@ Connection Objects
.. versionadded:: 3.11
.. method:: serialize(*, name="main")
This method serializes a database into a :class:`bytes` object. For an
ordinary on-disk database file, the serialization is just a copy of the
disk file. For an in-memory database or a "temp" database, the
serialization is the same sequence of bytes which would be written to
disk if that database were backed up to disk.
*name* is the database to be serialized, and defaults to the main
database.
.. note::
This method is only available if the underlying SQLite library has the
serialize API.
.. versionadded:: 3.11
.. method:: deserialize(data, /, *, name="main")
This method causes the database connection to disconnect from database
*name*, and reopen *name* as an in-memory database based on the
serialization contained in *data*. Deserialization will raise
:exc:`OperationalError` if the database connection is currently involved
in a read transaction or a backup operation. :exc:`DataError` will be
raised if ``len(data)`` is larger than ``2**63 - 1``, and
:exc:`DatabaseError` will be raised if *data* does not contain a valid
SQLite database.
.. note::
This method is only available if the underlying SQLite library has the
deserialize API.
.. versionadded:: 3.11
.. _sqlite3-cursor-objects:
Cursor Objects

View file

@ -366,6 +366,11 @@ sqlite3
Instead we leave it to the SQLite library to handle these cases.
(Contributed by Erlend E. Aasland in :issue:`44092`.)
* Add :meth:`~sqlite3.Connection.serialize` and
:meth:`~sqlite3.Connection.deserialize` to :class:`sqlite3.Connection` for
serializing and deserializing databases.
(Contributed by Erlend E. Aasland in :issue:`41930`.)
sys
---

View file

@ -29,6 +29,7 @@
from test.support import (
SHORT_TIMEOUT,
bigmemtest,
check_disallow_instantiation,
threading_helper,
)
@ -603,6 +604,56 @@ def test_uninit_operations(self):
func)
@unittest.skipUnless(hasattr(sqlite.Connection, "serialize"),
"Needs SQLite serialize API")
class SerializeTests(unittest.TestCase):
def test_serialize_deserialize(self):
with memory_database() as cx:
with cx:
cx.execute("create table t(t)")
data = cx.serialize()
self.assertEqual(len(data), 8192)
# Remove test table, verify that it was removed.
with cx:
cx.execute("drop table t")
regex = "no such table"
with self.assertRaisesRegex(sqlite.OperationalError, regex):
cx.execute("select t from t")
# Deserialize and verify that test table is restored.
cx.deserialize(data)
cx.execute("select t from t")
def test_deserialize_wrong_args(self):
dataset = (
(BufferError, memoryview(b"blob")[::2]),
(TypeError, []),
(TypeError, 1),
(TypeError, None),
)
for exc, arg in dataset:
with self.subTest(exc=exc, arg=arg):
with memory_database() as cx:
self.assertRaises(exc, cx.deserialize, arg)
def test_deserialize_corrupt_database(self):
with memory_database() as cx:
regex = "file is not a database"
with self.assertRaisesRegex(sqlite.DatabaseError, regex):
cx.deserialize(b"\0\1\3")
# SQLite does not generate an error until you try to query the
# deserialized database.
cx.execute("create table fail(f)")
@unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
@bigmemtest(size=2**63, memuse=3, dry_run=False)
def test_deserialize_too_much_data_64bit(self):
with memory_database() as cx:
with self.assertRaisesRegex(OverflowError, "'data' is too large"):
cx.deserialize(b"b" * size)
class OpenTests(unittest.TestCase):
_sql = "create table test(id integer)"
@ -1030,6 +1081,10 @@ def test_check_connection_thread(self):
lambda: self.con.setlimit(sqlite.SQLITE_LIMIT_LENGTH, -1),
lambda: self.con.getlimit(sqlite.SQLITE_LIMIT_LENGTH),
]
if hasattr(sqlite.Connection, "serialize"):
fns.append(lambda: self.con.serialize())
fns.append(lambda: self.con.deserialize(b""))
for fn in fns:
with self.subTest(fn=fn):
self._run_test(fn)

View file

@ -0,0 +1,3 @@
Add :meth:`~sqlite3.Connection.serialize` and
:meth:`~sqlite3.Connection.deserialize` support to :mod:`sqlite3`. Patch by
Erlend E. Aasland.

View file

@ -693,6 +693,156 @@ exit:
return return_value;
}
#if defined(PY_SQLITE_HAVE_SERIALIZE)
PyDoc_STRVAR(serialize__doc__,
"serialize($self, /, *, name=\'main\')\n"
"--\n"
"\n"
"Serialize a database into a byte string.\n"
"\n"
" name\n"
" Which database to serialize.\n"
"\n"
"For an ordinary on-disk database file, the serialization is just a copy of the\n"
"disk file. For an in-memory database or a \"temp\" database, the serialization is\n"
"the same sequence of bytes which would be written to disk if that database\n"
"were backed up to disk.");
#define SERIALIZE_METHODDEF \
{"serialize", (PyCFunction)(void(*)(void))serialize, METH_FASTCALL|METH_KEYWORDS, serialize__doc__},
static PyObject *
serialize_impl(pysqlite_Connection *self, const char *name);
static PyObject *
serialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"name", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "serialize", 0};
PyObject *argsbuf[1];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
const char *name = "main";
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf);
if (!args) {
goto exit;
}
if (!noptargs) {
goto skip_optional_kwonly;
}
if (!PyUnicode_Check(args[0])) {
_PyArg_BadArgument("serialize", "argument 'name'", "str", args[0]);
goto exit;
}
Py_ssize_t name_length;
name = PyUnicode_AsUTF8AndSize(args[0], &name_length);
if (name == NULL) {
goto exit;
}
if (strlen(name) != (size_t)name_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_kwonly:
return_value = serialize_impl(self, name);
exit:
return return_value;
}
#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */
#if defined(PY_SQLITE_HAVE_SERIALIZE)
PyDoc_STRVAR(deserialize__doc__,
"deserialize($self, data, /, *, name=\'main\')\n"
"--\n"
"\n"
"Load a serialized database.\n"
"\n"
" data\n"
" The serialized database content.\n"
" name\n"
" Which database to reopen with the deserialization.\n"
"\n"
"The deserialize interface causes the database connection to disconnect from the\n"
"target database, and then reopen it as an in-memory database based on the given\n"
"serialized data.\n"
"\n"
"The deserialize interface will fail with SQLITE_BUSY if the database is\n"
"currently in a read transaction or is involved in a backup operation.");
#define DESERIALIZE_METHODDEF \
{"deserialize", (PyCFunction)(void(*)(void))deserialize, METH_FASTCALL|METH_KEYWORDS, deserialize__doc__},
static PyObject *
deserialize_impl(pysqlite_Connection *self, Py_buffer *data,
const char *name);
static PyObject *
deserialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"", "name", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "deserialize", 0};
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
Py_buffer data = {NULL, NULL};
const char *name = "main";
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
if (PyUnicode_Check(args[0])) {
Py_ssize_t len;
const char *ptr = PyUnicode_AsUTF8AndSize(args[0], &len);
if (ptr == NULL) {
goto exit;
}
PyBuffer_FillInfo(&data, args[0], (void *)ptr, len, 1, 0);
}
else { /* any bytes-like object */
if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
goto exit;
}
if (!PyBuffer_IsContiguous(&data, 'C')) {
_PyArg_BadArgument("deserialize", "argument 1", "contiguous buffer", args[0]);
goto exit;
}
}
if (!noptargs) {
goto skip_optional_kwonly;
}
if (!PyUnicode_Check(args[1])) {
_PyArg_BadArgument("deserialize", "argument 'name'", "str", args[1]);
goto exit;
}
Py_ssize_t name_length;
name = PyUnicode_AsUTF8AndSize(args[1], &name_length);
if (name == NULL) {
goto exit;
}
if (strlen(name) != (size_t)name_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_kwonly:
return_value = deserialize_impl(self, &data, name);
exit:
/* Cleanup for data */
if (data.obj) {
PyBuffer_Release(&data);
}
return return_value;
}
#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */
PyDoc_STRVAR(pysqlite_connection_enter__doc__,
"__enter__($self, /)\n"
"--\n"
@ -832,4 +982,12 @@ exit:
#ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */
/*[clinic end generated code: output=176c9095219b17c4 input=a9049054013a1b77]*/
#ifndef SERIALIZE_METHODDEF
#define SERIALIZE_METHODDEF
#endif /* !defined(SERIALIZE_METHODDEF) */
#ifndef DESERIALIZE_METHODDEF
#define DESERIALIZE_METHODDEF
#endif /* !defined(DESERIALIZE_METHODDEF) */
/*[clinic end generated code: output=d965a68f9229a56c input=a9049054013a1b77]*/

View file

@ -1818,6 +1818,125 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
Py_RETURN_NONE;
}
#ifdef PY_SQLITE_HAVE_SERIALIZE
/*[clinic input]
_sqlite3.Connection.serialize as serialize
*
name: str = "main"
Which database to serialize.
Serialize a database into a byte string.
For an ordinary on-disk database file, the serialization is just a copy of the
disk file. For an in-memory database or a "temp" database, the serialization is
the same sequence of bytes which would be written to disk if that database
were backed up to disk.
[clinic start generated code]*/
static PyObject *
serialize_impl(pysqlite_Connection *self, const char *name)
/*[clinic end generated code: output=97342b0e55239dd3 input=d2eb5194a65abe2b]*/
{
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
return NULL;
}
/* If SQLite has a contiguous memory representation of the database, we can
* avoid memory allocations, so we try with the no-copy flag first.
*/
sqlite3_int64 size;
unsigned int flags = SQLITE_SERIALIZE_NOCOPY;
const char *data;
Py_BEGIN_ALLOW_THREADS
data = (const char *)sqlite3_serialize(self->db, name, &size, flags);
if (data == NULL) {
flags &= ~SQLITE_SERIALIZE_NOCOPY;
data = (const char *)sqlite3_serialize(self->db, name, &size, flags);
}
Py_END_ALLOW_THREADS
if (data == NULL) {
PyErr_Format(self->OperationalError, "unable to serialize '%s'",
name);
return NULL;
}
PyObject *res = PyBytes_FromStringAndSize(data, size);
if (!(flags & SQLITE_SERIALIZE_NOCOPY)) {
sqlite3_free((void *)data);
}
return res;
}
/*[clinic input]
_sqlite3.Connection.deserialize as deserialize
data: Py_buffer(accept={buffer, str})
The serialized database content.
/
*
name: str = "main"
Which database to reopen with the deserialization.
Load a serialized database.
The deserialize interface causes the database connection to disconnect from the
target database, and then reopen it as an in-memory database based on the given
serialized data.
The deserialize interface will fail with SQLITE_BUSY if the database is
currently in a read transaction or is involved in a backup operation.
[clinic start generated code]*/
static PyObject *
deserialize_impl(pysqlite_Connection *self, Py_buffer *data,
const char *name)
/*[clinic end generated code: output=e394c798b98bad89 input=1be4ca1faacf28f2]*/
{
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
return NULL;
}
/* Transfer ownership of the buffer to SQLite:
* - Move buffer from Py to SQLite
* - Tell SQLite to free buffer memory
* - Tell SQLite that it is permitted to grow the resulting database
*
* Make sure we don't overflow sqlite3_deserialize(); it accepts a signed
* 64-bit int as its data size argument.
*
* We can safely use sqlite3_malloc64 here, since it was introduced before
* the serialize APIs.
*/
if (data->len > 9223372036854775807) { // (1 << 63) - 1
PyErr_SetString(PyExc_OverflowError, "'data' is too large");
return NULL;
}
sqlite3_int64 size = (sqlite3_int64)data->len;
unsigned char *buf = sqlite3_malloc64(size);
if (buf == NULL) {
return PyErr_NoMemory();
}
const unsigned int flags = SQLITE_DESERIALIZE_FREEONCLOSE |
SQLITE_DESERIALIZE_RESIZEABLE;
int rc;
Py_BEGIN_ALLOW_THREADS
(void)memcpy(buf, data->buf, data->len);
rc = sqlite3_deserialize(self->db, name, buf, size, size, flags);
Py_END_ALLOW_THREADS
if (rc != SQLITE_OK) {
(void)_pysqlite_seterror(self->state, self->db);
return NULL;
}
Py_RETURN_NONE;
}
#endif // PY_SQLITE_HAVE_SERIALIZE
/*[clinic input]
_sqlite3.Connection.__enter__ as pysqlite_connection_enter
@ -1971,6 +2090,8 @@ static PyMethodDef connection_methods[] = {
PYSQLITE_CONNECTION_SET_TRACE_CALLBACK_METHODDEF
SETLIMIT_METHODDEF
GETLIMIT_METHODDEF
SERIALIZE_METHODDEF
DESERIALIZE_METHODDEF
{NULL, NULL}
};

View file

@ -94,6 +94,7 @@
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(sqlite3Dir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>PY_SQLITE_HAVE_SERIALIZE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>

44
configure generated vendored
View file

@ -12902,6 +12902,50 @@ if test "x$ac_cv_lib_sqlite3_sqlite3_load_extension" = xyes; then :
else
have_sqlite3_load_extension=no
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sqlite3_serialize in -lsqlite3" >&5
$as_echo_n "checking for sqlite3_serialize in -lsqlite3... " >&6; }
if ${ac_cv_lib_sqlite3_sqlite3_serialize+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lsqlite3 $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char sqlite3_serialize ();
int
main ()
{
return sqlite3_serialize ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_sqlite3_sqlite3_serialize=yes
else
ac_cv_lib_sqlite3_sqlite3_serialize=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_sqlite3_sqlite3_serialize" >&5
$as_echo "$ac_cv_lib_sqlite3_sqlite3_serialize" >&6; }
if test "x$ac_cv_lib_sqlite3_sqlite3_serialize" = xyes; then :
$as_echo "#define PY_SQLITE_HAVE_SERIALIZE 1" >>confdefs.h
fi

View file

@ -3605,6 +3605,12 @@ dnl hence CPPFLAGS instead of CFLAGS.
[have_sqlite3_load_extension=yes],
[have_sqlite3_load_extension=no]
)
AC_CHECK_LIB([sqlite3], [sqlite3_serialize], [
AC_DEFINE(
[PY_SQLITE_HAVE_SERIALIZE], [1],
[Define if SQLite was compiled with the serialize API]
)
])
], [
have_supported_sqlite3=no
])

View file

@ -1506,6 +1506,9 @@
/* Define to 1 to build the sqlite module with loadable extensions support. */
#undef PY_SQLITE_ENABLE_LOAD_EXTENSION
/* Define if SQLite was compiled with the serialize API */
#undef PY_SQLITE_HAVE_SERIALIZE
/* Default cipher suites list for ssl module. 1: Python's preferred selection,
2: leave OpenSSL defaults untouched, 0: custom string */
#undef PY_SSL_DEFAULT_CIPHERS