bpo-41930: Add support for SQLite serialise/deserialise API (GH-26728)

Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com>
This commit is contained in:
Erlend Egeberg Aasland 2022-04-05 16:15:25 +02:00 committed by GitHub
parent aa0f056a00
commit a7551247e7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 435 additions and 1 deletions

View file

@ -748,6 +748,44 @@ Connection Objects
.. versionadded:: 3.11 .. versionadded:: 3.11
.. method:: serialize(*, name="main")
This method serializes a database into a :class:`bytes` object. For an
ordinary on-disk database file, the serialization is just a copy of the
disk file. For an in-memory database or a "temp" database, the
serialization is the same sequence of bytes which would be written to
disk if that database were backed up to disk.
*name* is the database to be serialized, and defaults to the main
database.
.. note::
This method is only available if the underlying SQLite library has the
serialize API.
.. versionadded:: 3.11
.. method:: deserialize(data, /, *, name="main")
This method causes the database connection to disconnect from database
*name*, and reopen *name* as an in-memory database based on the
serialization contained in *data*. Deserialization will raise
:exc:`OperationalError` if the database connection is currently involved
in a read transaction or a backup operation. :exc:`DataError` will be
raised if ``len(data)`` is larger than ``2**63 - 1``, and
:exc:`DatabaseError` will be raised if *data* does not contain a valid
SQLite database.
.. note::
This method is only available if the underlying SQLite library has the
deserialize API.
.. versionadded:: 3.11
.. _sqlite3-cursor-objects: .. _sqlite3-cursor-objects:
Cursor Objects Cursor Objects

View file

@ -366,6 +366,11 @@ sqlite3
Instead we leave it to the SQLite library to handle these cases. Instead we leave it to the SQLite library to handle these cases.
(Contributed by Erlend E. Aasland in :issue:`44092`.) (Contributed by Erlend E. Aasland in :issue:`44092`.)
* Add :meth:`~sqlite3.Connection.serialize` and
:meth:`~sqlite3.Connection.deserialize` to :class:`sqlite3.Connection` for
serializing and deserializing databases.
(Contributed by Erlend E. Aasland in :issue:`41930`.)
sys sys
--- ---

View file

@ -29,6 +29,7 @@
from test.support import ( from test.support import (
SHORT_TIMEOUT, SHORT_TIMEOUT,
bigmemtest,
check_disallow_instantiation, check_disallow_instantiation,
threading_helper, threading_helper,
) )
@ -603,6 +604,56 @@ def test_uninit_operations(self):
func) func)
@unittest.skipUnless(hasattr(sqlite.Connection, "serialize"),
"Needs SQLite serialize API")
class SerializeTests(unittest.TestCase):
def test_serialize_deserialize(self):
with memory_database() as cx:
with cx:
cx.execute("create table t(t)")
data = cx.serialize()
self.assertEqual(len(data), 8192)
# Remove test table, verify that it was removed.
with cx:
cx.execute("drop table t")
regex = "no such table"
with self.assertRaisesRegex(sqlite.OperationalError, regex):
cx.execute("select t from t")
# Deserialize and verify that test table is restored.
cx.deserialize(data)
cx.execute("select t from t")
def test_deserialize_wrong_args(self):
dataset = (
(BufferError, memoryview(b"blob")[::2]),
(TypeError, []),
(TypeError, 1),
(TypeError, None),
)
for exc, arg in dataset:
with self.subTest(exc=exc, arg=arg):
with memory_database() as cx:
self.assertRaises(exc, cx.deserialize, arg)
def test_deserialize_corrupt_database(self):
with memory_database() as cx:
regex = "file is not a database"
with self.assertRaisesRegex(sqlite.DatabaseError, regex):
cx.deserialize(b"\0\1\3")
# SQLite does not generate an error until you try to query the
# deserialized database.
cx.execute("create table fail(f)")
@unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform')
@bigmemtest(size=2**63, memuse=3, dry_run=False)
def test_deserialize_too_much_data_64bit(self):
with memory_database() as cx:
with self.assertRaisesRegex(OverflowError, "'data' is too large"):
cx.deserialize(b"b" * size)
class OpenTests(unittest.TestCase): class OpenTests(unittest.TestCase):
_sql = "create table test(id integer)" _sql = "create table test(id integer)"
@ -1030,6 +1081,10 @@ def test_check_connection_thread(self):
lambda: self.con.setlimit(sqlite.SQLITE_LIMIT_LENGTH, -1), lambda: self.con.setlimit(sqlite.SQLITE_LIMIT_LENGTH, -1),
lambda: self.con.getlimit(sqlite.SQLITE_LIMIT_LENGTH), lambda: self.con.getlimit(sqlite.SQLITE_LIMIT_LENGTH),
] ]
if hasattr(sqlite.Connection, "serialize"):
fns.append(lambda: self.con.serialize())
fns.append(lambda: self.con.deserialize(b""))
for fn in fns: for fn in fns:
with self.subTest(fn=fn): with self.subTest(fn=fn):
self._run_test(fn) self._run_test(fn)

View file

@ -0,0 +1,3 @@
Add :meth:`~sqlite3.Connection.serialize` and
:meth:`~sqlite3.Connection.deserialize` support to :mod:`sqlite3`. Patch by
Erlend E. Aasland.

View file

@ -693,6 +693,156 @@ exit:
return return_value; return return_value;
} }
#if defined(PY_SQLITE_HAVE_SERIALIZE)
PyDoc_STRVAR(serialize__doc__,
"serialize($self, /, *, name=\'main\')\n"
"--\n"
"\n"
"Serialize a database into a byte string.\n"
"\n"
" name\n"
" Which database to serialize.\n"
"\n"
"For an ordinary on-disk database file, the serialization is just a copy of the\n"
"disk file. For an in-memory database or a \"temp\" database, the serialization is\n"
"the same sequence of bytes which would be written to disk if that database\n"
"were backed up to disk.");
#define SERIALIZE_METHODDEF \
{"serialize", (PyCFunction)(void(*)(void))serialize, METH_FASTCALL|METH_KEYWORDS, serialize__doc__},
static PyObject *
serialize_impl(pysqlite_Connection *self, const char *name);
static PyObject *
serialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"name", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "serialize", 0};
PyObject *argsbuf[1];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
const char *name = "main";
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf);
if (!args) {
goto exit;
}
if (!noptargs) {
goto skip_optional_kwonly;
}
if (!PyUnicode_Check(args[0])) {
_PyArg_BadArgument("serialize", "argument 'name'", "str", args[0]);
goto exit;
}
Py_ssize_t name_length;
name = PyUnicode_AsUTF8AndSize(args[0], &name_length);
if (name == NULL) {
goto exit;
}
if (strlen(name) != (size_t)name_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_kwonly:
return_value = serialize_impl(self, name);
exit:
return return_value;
}
#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */
#if defined(PY_SQLITE_HAVE_SERIALIZE)
PyDoc_STRVAR(deserialize__doc__,
"deserialize($self, data, /, *, name=\'main\')\n"
"--\n"
"\n"
"Load a serialized database.\n"
"\n"
" data\n"
" The serialized database content.\n"
" name\n"
" Which database to reopen with the deserialization.\n"
"\n"
"The deserialize interface causes the database connection to disconnect from the\n"
"target database, and then reopen it as an in-memory database based on the given\n"
"serialized data.\n"
"\n"
"The deserialize interface will fail with SQLITE_BUSY if the database is\n"
"currently in a read transaction or is involved in a backup operation.");
#define DESERIALIZE_METHODDEF \
{"deserialize", (PyCFunction)(void(*)(void))deserialize, METH_FASTCALL|METH_KEYWORDS, deserialize__doc__},
static PyObject *
deserialize_impl(pysqlite_Connection *self, Py_buffer *data,
const char *name);
static PyObject *
deserialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"", "name", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "deserialize", 0};
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
Py_buffer data = {NULL, NULL};
const char *name = "main";
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
if (PyUnicode_Check(args[0])) {
Py_ssize_t len;
const char *ptr = PyUnicode_AsUTF8AndSize(args[0], &len);
if (ptr == NULL) {
goto exit;
}
PyBuffer_FillInfo(&data, args[0], (void *)ptr, len, 1, 0);
}
else { /* any bytes-like object */
if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
goto exit;
}
if (!PyBuffer_IsContiguous(&data, 'C')) {
_PyArg_BadArgument("deserialize", "argument 1", "contiguous buffer", args[0]);
goto exit;
}
}
if (!noptargs) {
goto skip_optional_kwonly;
}
if (!PyUnicode_Check(args[1])) {
_PyArg_BadArgument("deserialize", "argument 'name'", "str", args[1]);
goto exit;
}
Py_ssize_t name_length;
name = PyUnicode_AsUTF8AndSize(args[1], &name_length);
if (name == NULL) {
goto exit;
}
if (strlen(name) != (size_t)name_length) {
PyErr_SetString(PyExc_ValueError, "embedded null character");
goto exit;
}
skip_optional_kwonly:
return_value = deserialize_impl(self, &data, name);
exit:
/* Cleanup for data */
if (data.obj) {
PyBuffer_Release(&data);
}
return return_value;
}
#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */
PyDoc_STRVAR(pysqlite_connection_enter__doc__, PyDoc_STRVAR(pysqlite_connection_enter__doc__,
"__enter__($self, /)\n" "__enter__($self, /)\n"
"--\n" "--\n"
@ -832,4 +982,12 @@ exit:
#ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF
#endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */ #endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */
/*[clinic end generated code: output=176c9095219b17c4 input=a9049054013a1b77]*/
#ifndef SERIALIZE_METHODDEF
#define SERIALIZE_METHODDEF
#endif /* !defined(SERIALIZE_METHODDEF) */
#ifndef DESERIALIZE_METHODDEF
#define DESERIALIZE_METHODDEF
#endif /* !defined(DESERIALIZE_METHODDEF) */
/*[clinic end generated code: output=d965a68f9229a56c input=a9049054013a1b77]*/

View file

@ -1818,6 +1818,125 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self,
Py_RETURN_NONE; Py_RETURN_NONE;
} }
#ifdef PY_SQLITE_HAVE_SERIALIZE
/*[clinic input]
_sqlite3.Connection.serialize as serialize
*
name: str = "main"
Which database to serialize.
Serialize a database into a byte string.
For an ordinary on-disk database file, the serialization is just a copy of the
disk file. For an in-memory database or a "temp" database, the serialization is
the same sequence of bytes which would be written to disk if that database
were backed up to disk.
[clinic start generated code]*/
static PyObject *
serialize_impl(pysqlite_Connection *self, const char *name)
/*[clinic end generated code: output=97342b0e55239dd3 input=d2eb5194a65abe2b]*/
{
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
return NULL;
}
/* If SQLite has a contiguous memory representation of the database, we can
* avoid memory allocations, so we try with the no-copy flag first.
*/
sqlite3_int64 size;
unsigned int flags = SQLITE_SERIALIZE_NOCOPY;
const char *data;
Py_BEGIN_ALLOW_THREADS
data = (const char *)sqlite3_serialize(self->db, name, &size, flags);
if (data == NULL) {
flags &= ~SQLITE_SERIALIZE_NOCOPY;
data = (const char *)sqlite3_serialize(self->db, name, &size, flags);
}
Py_END_ALLOW_THREADS
if (data == NULL) {
PyErr_Format(self->OperationalError, "unable to serialize '%s'",
name);
return NULL;
}
PyObject *res = PyBytes_FromStringAndSize(data, size);
if (!(flags & SQLITE_SERIALIZE_NOCOPY)) {
sqlite3_free((void *)data);
}
return res;
}
/*[clinic input]
_sqlite3.Connection.deserialize as deserialize
data: Py_buffer(accept={buffer, str})
The serialized database content.
/
*
name: str = "main"
Which database to reopen with the deserialization.
Load a serialized database.
The deserialize interface causes the database connection to disconnect from the
target database, and then reopen it as an in-memory database based on the given
serialized data.
The deserialize interface will fail with SQLITE_BUSY if the database is
currently in a read transaction or is involved in a backup operation.
[clinic start generated code]*/
static PyObject *
deserialize_impl(pysqlite_Connection *self, Py_buffer *data,
const char *name)
/*[clinic end generated code: output=e394c798b98bad89 input=1be4ca1faacf28f2]*/
{
if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) {
return NULL;
}
/* Transfer ownership of the buffer to SQLite:
* - Move buffer from Py to SQLite
* - Tell SQLite to free buffer memory
* - Tell SQLite that it is permitted to grow the resulting database
*
* Make sure we don't overflow sqlite3_deserialize(); it accepts a signed
* 64-bit int as its data size argument.
*
* We can safely use sqlite3_malloc64 here, since it was introduced before
* the serialize APIs.
*/
if (data->len > 9223372036854775807) { // (1 << 63) - 1
PyErr_SetString(PyExc_OverflowError, "'data' is too large");
return NULL;
}
sqlite3_int64 size = (sqlite3_int64)data->len;
unsigned char *buf = sqlite3_malloc64(size);
if (buf == NULL) {
return PyErr_NoMemory();
}
const unsigned int flags = SQLITE_DESERIALIZE_FREEONCLOSE |
SQLITE_DESERIALIZE_RESIZEABLE;
int rc;
Py_BEGIN_ALLOW_THREADS
(void)memcpy(buf, data->buf, data->len);
rc = sqlite3_deserialize(self->db, name, buf, size, size, flags);
Py_END_ALLOW_THREADS
if (rc != SQLITE_OK) {
(void)_pysqlite_seterror(self->state, self->db);
return NULL;
}
Py_RETURN_NONE;
}
#endif // PY_SQLITE_HAVE_SERIALIZE
/*[clinic input] /*[clinic input]
_sqlite3.Connection.__enter__ as pysqlite_connection_enter _sqlite3.Connection.__enter__ as pysqlite_connection_enter
@ -1971,6 +2090,8 @@ static PyMethodDef connection_methods[] = {
PYSQLITE_CONNECTION_SET_TRACE_CALLBACK_METHODDEF PYSQLITE_CONNECTION_SET_TRACE_CALLBACK_METHODDEF
SETLIMIT_METHODDEF SETLIMIT_METHODDEF
GETLIMIT_METHODDEF GETLIMIT_METHODDEF
SERIALIZE_METHODDEF
DESERIALIZE_METHODDEF
{NULL, NULL} {NULL, NULL}
}; };

View file

@ -94,6 +94,7 @@
<ItemDefinitionGroup> <ItemDefinitionGroup>
<ClCompile> <ClCompile>
<AdditionalIncludeDirectories>$(sqlite3Dir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>$(sqlite3Dir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>PY_SQLITE_HAVE_SERIALIZE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile> </ClCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>

44
configure generated vendored
View file

@ -12902,6 +12902,50 @@ if test "x$ac_cv_lib_sqlite3_sqlite3_load_extension" = xyes; then :
else else
have_sqlite3_load_extension=no have_sqlite3_load_extension=no
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for sqlite3_serialize in -lsqlite3" >&5
$as_echo_n "checking for sqlite3_serialize in -lsqlite3... " >&6; }
if ${ac_cv_lib_sqlite3_sqlite3_serialize+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
LIBS="-lsqlite3 $LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char sqlite3_serialize ();
int
main ()
{
return sqlite3_serialize ();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
ac_cv_lib_sqlite3_sqlite3_serialize=yes
else
ac_cv_lib_sqlite3_sqlite3_serialize=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
LIBS=$ac_check_lib_save_LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_sqlite3_sqlite3_serialize" >&5
$as_echo "$ac_cv_lib_sqlite3_sqlite3_serialize" >&6; }
if test "x$ac_cv_lib_sqlite3_sqlite3_serialize" = xyes; then :
$as_echo "#define PY_SQLITE_HAVE_SERIALIZE 1" >>confdefs.h
fi fi

View file

@ -3605,6 +3605,12 @@ dnl hence CPPFLAGS instead of CFLAGS.
[have_sqlite3_load_extension=yes], [have_sqlite3_load_extension=yes],
[have_sqlite3_load_extension=no] [have_sqlite3_load_extension=no]
) )
AC_CHECK_LIB([sqlite3], [sqlite3_serialize], [
AC_DEFINE(
[PY_SQLITE_HAVE_SERIALIZE], [1],
[Define if SQLite was compiled with the serialize API]
)
])
], [ ], [
have_supported_sqlite3=no have_supported_sqlite3=no
]) ])

View file

@ -1506,6 +1506,9 @@
/* Define to 1 to build the sqlite module with loadable extensions support. */ /* Define to 1 to build the sqlite module with loadable extensions support. */
#undef PY_SQLITE_ENABLE_LOAD_EXTENSION #undef PY_SQLITE_ENABLE_LOAD_EXTENSION
/* Define if SQLite was compiled with the serialize API */
#undef PY_SQLITE_HAVE_SERIALIZE
/* Default cipher suites list for ssl module. 1: Python's preferred selection, /* Default cipher suites list for ssl module. 1: Python's preferred selection,
2: leave OpenSSL defaults untouched, 0: custom string */ 2: leave OpenSSL defaults untouched, 0: custom string */
#undef PY_SSL_DEFAULT_CIPHERS #undef PY_SSL_DEFAULT_CIPHERS