bpo-42862: Use functools.lru_cache iso. _sqlite.Cache in sqlite3 module (GH-24203)

Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
This commit is contained in:
Erlend Egeberg Aasland 2021-06-03 21:59:26 +02:00 committed by GitHub
parent f3fa63ec75
commit f461a7fc3f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 77 additions and 438 deletions

View file

@ -213,7 +213,7 @@ Module functions and constants
The :mod:`sqlite3` module internally uses a statement cache to avoid SQL parsing
overhead. If you want to explicitly set the number of statements that are cached
for the connection, you can set the *cached_statements* parameter. The currently
implemented default is to cache 100 statements.
implemented default is to cache 128 statements.
If *uri* is true, *database* is interpreted as a URI. This allows you
to specify options. For example, to open a database in read-only mode

View file

@ -0,0 +1,4 @@
:mod:`sqlite3` now utilizes :meth:`functools.lru_cache` to implement the
connection statement cache. As a small optimisation, the default
statement cache size has been increased from 100 to 128.
Patch by Erlend E. Aasland.

View file

@ -1,344 +0,0 @@
/* cache .c - a LRU cache
*
* Copyright (C) 2004-2010 Gerhard Häring <gh@ghaering.de>
*
* This file is part of pysqlite.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#include "cache.h"
#include <limits.h>
/* only used internally */
static pysqlite_Node *
pysqlite_new_node(PyObject *key, PyObject *data)
{
pysqlite_Node* node;
node = (pysqlite_Node*) (pysqlite_NodeType->tp_alloc(pysqlite_NodeType, 0));
if (!node) {
return NULL;
}
node->key = Py_NewRef(key);
node->data = Py_NewRef(data);
node->prev = NULL;
node->next = NULL;
return node;
}
static int
node_traverse(pysqlite_Node *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
Py_VISIT(self->key);
Py_VISIT(self->data);
return 0;
}
static int
node_clear(pysqlite_Node *self)
{
Py_CLEAR(self->key);
Py_CLEAR(self->data);
return 0;
}
static void
pysqlite_node_dealloc(pysqlite_Node *self)
{
PyTypeObject *tp = Py_TYPE(self);
PyObject_GC_UnTrack(self);
tp->tp_clear((PyObject *)self);
tp->tp_free(self);
Py_DECREF(tp);
}
static int
pysqlite_cache_init(pysqlite_Cache *self, PyObject *args, PyObject *kwargs)
{
PyObject* factory;
int size = 10;
self->factory = NULL;
if (!PyArg_ParseTuple(args, "O|i", &factory, &size)) {
return -1;
}
/* minimum cache size is 5 entries */
if (size < 5) {
size = 5;
}
self->size = size;
self->first = NULL;
self->last = NULL;
self->mapping = PyDict_New();
if (!self->mapping) {
return -1;
}
self->factory = Py_NewRef(factory);
return 0;
}
static int
cache_traverse(pysqlite_Cache *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
Py_VISIT(self->mapping);
Py_VISIT(self->factory);
pysqlite_Node *node = self->first;
while (node) {
Py_VISIT(node);
node = node->next;
}
return 0;
}
static int
cache_clear(pysqlite_Cache *self)
{
Py_CLEAR(self->mapping);
Py_CLEAR(self->factory);
/* iterate over all nodes and deallocate them */
pysqlite_Node *node = self->first;
self->first = NULL;
while (node) {
pysqlite_Node *delete_node = node;
node = node->next;
Py_CLEAR(delete_node);
}
return 0;
}
static void
pysqlite_cache_dealloc(pysqlite_Cache *self)
{
if (!self->factory) {
/* constructor failed, just get out of here */
return;
}
PyObject_GC_UnTrack(self);
PyTypeObject *tp = Py_TYPE(self);
tp->tp_clear((PyObject *)self);
tp->tp_free(self);
Py_DECREF(tp);
}
PyObject* pysqlite_cache_get(pysqlite_Cache* self, PyObject* key)
{
pysqlite_Node* node;
pysqlite_Node* ptr;
PyObject* data;
node = (pysqlite_Node*)PyDict_GetItemWithError(self->mapping, key);
if (node) {
/* an entry for this key already exists in the cache */
/* increase usage counter of the node found */
if (node->count < LONG_MAX) {
node->count++;
}
/* if necessary, reorder entries in the cache by swapping positions */
if (node->prev && node->count > node->prev->count) {
ptr = node->prev;
while (ptr->prev && node->count > ptr->prev->count) {
ptr = ptr->prev;
}
if (node->next) {
node->next->prev = node->prev;
} else {
self->last = node->prev;
}
if (node->prev) {
node->prev->next = node->next;
}
if (ptr->prev) {
ptr->prev->next = node;
} else {
self->first = node;
}
node->next = ptr;
node->prev = ptr->prev;
if (!node->prev) {
self->first = node;
}
ptr->prev = node;
}
}
else if (PyErr_Occurred()) {
return NULL;
}
else {
/* There is no entry for this key in the cache, yet. We'll insert a new
* entry in the cache, and make space if necessary by throwing the
* least used item out of the cache. */
if (PyDict_GET_SIZE(self->mapping) == self->size) {
if (self->last) {
node = self->last;
if (PyDict_DelItem(self->mapping, self->last->key) != 0) {
return NULL;
}
if (node->prev) {
node->prev->next = NULL;
}
self->last = node->prev;
node->prev = NULL;
Py_DECREF(node);
}
}
/* We cannot replace this by PyObject_CallOneArg() since
* PyObject_CallFunction() has a special case when using a
* single tuple as argument. */
data = PyObject_CallFunction(self->factory, "O", key);
if (!data) {
return NULL;
}
node = pysqlite_new_node(key, data);
if (!node) {
return NULL;
}
node->prev = self->last;
Py_DECREF(data);
if (PyDict_SetItem(self->mapping, key, (PyObject*)node) != 0) {
Py_DECREF(node);
return NULL;
}
if (self->last) {
self->last->next = node;
} else {
self->first = node;
}
self->last = node;
}
return Py_NewRef(node->data);
}
static PyObject *
pysqlite_cache_display(pysqlite_Cache *self, PyObject *args)
{
pysqlite_Node* ptr;
PyObject* prevkey;
PyObject* nextkey;
PyObject* display_str;
ptr = self->first;
while (ptr) {
if (ptr->prev) {
prevkey = ptr->prev->key;
} else {
prevkey = Py_None;
}
if (ptr->next) {
nextkey = ptr->next->key;
} else {
nextkey = Py_None;
}
display_str = PyUnicode_FromFormat("%S <- %S -> %S\n",
prevkey, ptr->key, nextkey);
if (!display_str) {
return NULL;
}
PyObject_Print(display_str, stdout, Py_PRINT_RAW);
Py_DECREF(display_str);
ptr = ptr->next;
}
Py_RETURN_NONE;
}
static PyType_Slot node_slots[] = {
{Py_tp_dealloc, pysqlite_node_dealloc},
{Py_tp_traverse, node_traverse},
{Py_tp_clear, node_clear},
{0, NULL},
};
static PyType_Spec node_spec = {
.name = MODULE_NAME ".Node",
.basicsize = sizeof(pysqlite_Node),
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
.slots = node_slots,
};
PyTypeObject *pysqlite_NodeType = NULL;
static PyMethodDef cache_methods[] = {
{"get", (PyCFunction)pysqlite_cache_get, METH_O,
PyDoc_STR("Gets an entry from the cache or calls the factory function to produce one.")},
{"display", (PyCFunction)pysqlite_cache_display, METH_NOARGS,
PyDoc_STR("For debugging only.")},
{NULL, NULL}
};
static PyType_Slot cache_slots[] = {
{Py_tp_dealloc, pysqlite_cache_dealloc},
{Py_tp_methods, cache_methods},
{Py_tp_init, pysqlite_cache_init},
{Py_tp_traverse, cache_traverse},
{Py_tp_clear, cache_clear},
{0, NULL},
};
static PyType_Spec cache_spec = {
.name = MODULE_NAME ".Cache",
.basicsize = sizeof(pysqlite_Cache),
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
.slots = cache_slots,
};
PyTypeObject *pysqlite_CacheType = NULL;
int
pysqlite_cache_setup_types(PyObject *mod)
{
pysqlite_NodeType = (PyTypeObject *)PyType_FromModuleAndSpec(mod, &node_spec, NULL);
if (pysqlite_NodeType == NULL) {
return -1;
}
pysqlite_CacheType = (PyTypeObject *)PyType_FromModuleAndSpec(mod, &cache_spec, NULL);
if (pysqlite_CacheType == NULL) {
return -1;
}
return 0;
}

View file

@ -1,64 +0,0 @@
/* cache.h - definitions for the LRU cache
*
* Copyright (C) 2004-2010 Gerhard Häring <gh@ghaering.de>
*
* This file is part of pysqlite.
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef PYSQLITE_CACHE_H
#define PYSQLITE_CACHE_H
#include "module.h"
/* The LRU cache is implemented as a combination of a doubly-linked with a
* dictionary. The list items are of type 'Node' and the dictionary has the
* nodes as values. */
typedef struct _pysqlite_Node
{
PyObject_HEAD
PyObject* key;
PyObject* data;
long count;
struct _pysqlite_Node* prev;
struct _pysqlite_Node* next;
} pysqlite_Node;
typedef struct
{
PyObject_HEAD
int size;
/* a dictionary mapping keys to Node entries */
PyObject* mapping;
/* the factory callable */
PyObject* factory;
pysqlite_Node* first;
pysqlite_Node* last;
} pysqlite_Cache;
extern PyTypeObject *pysqlite_NodeType;
extern PyTypeObject *pysqlite_CacheType;
PyObject* pysqlite_cache_get(pysqlite_Cache* self, PyObject* args);
int pysqlite_cache_setup_types(PyObject *module);
#endif

View file

@ -21,7 +21,6 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
#include "cache.h"
#include "module.h"
#include "structmember.h" // PyMemberDef
#include "connection.h"
@ -57,6 +56,26 @@ static const char * const begin_statements[] = {
static int pysqlite_connection_set_isolation_level(pysqlite_Connection* self, PyObject* isolation_level, void *Py_UNUSED(ignored));
static void _pysqlite_drop_unused_cursor_references(pysqlite_Connection* self);
static PyObject *
new_statement_cache(pysqlite_Connection *self, int maxsize)
{
PyObject *args[] = { PyLong_FromLong(maxsize), };
if (args[0] == NULL) {
return NULL;
}
pysqlite_state *state = pysqlite_get_state(NULL);
PyObject *inner = PyObject_Vectorcall(state->lru_cache, args, 1, NULL);
Py_DECREF(args[0]);
if (inner == NULL) {
return NULL;
}
args[0] = (PyObject *)self; // Borrowed ref.
PyObject *res = PyObject_Vectorcall(inner, args, 1, NULL);
Py_DECREF(inner);
return res;
}
static int
pysqlite_connection_init(pysqlite_Connection *self, PyObject *args,
PyObject *kwargs)
@ -73,7 +92,7 @@ pysqlite_connection_init(pysqlite_Connection *self, PyObject *args,
PyObject* isolation_level = NULL;
PyObject* factory = NULL;
int check_same_thread = 1;
int cached_statements = 100;
int cached_statements = 128;
int uri = 0;
double timeout = 5.0;
int rc;
@ -134,7 +153,10 @@ pysqlite_connection_init(pysqlite_Connection *self, PyObject *args,
}
Py_DECREF(isolation_level);
self->statement_cache = (pysqlite_Cache*)PyObject_CallFunction((PyObject*)pysqlite_CacheType, "Oi", self, cached_statements);
self->statement_cache = new_statement_cache(self, cached_statements);
if (self->statement_cache == NULL) {
return -1;
}
if (PyErr_Occurred()) {
return -1;
}

View file

@ -28,7 +28,6 @@
#include "pythread.h"
#include "structmember.h"
#include "cache.h"
#include "module.h"
#include "sqlite3.h"
@ -64,7 +63,7 @@ typedef struct
/* thread identification of the thread the connection was created in */
unsigned long thread_ident;
pysqlite_Cache* statement_cache;
PyObject *statement_cache;
/* Lists of weak references to statements and cursors used within this connection */
PyObject* statements;

View file

@ -415,6 +415,14 @@ static int check_cursor(pysqlite_Cursor* cur)
return pysqlite_check_thread(cur->connection) && pysqlite_check_connection(cur->connection);
}
static PyObject *
get_statement_from_cache(pysqlite_Cursor *self, PyObject *operation)
{
PyObject *args[] = { operation, };
PyObject *cache = self->connection->statement_cache;
return PyObject_Vectorcall(cache, args, 1, NULL);
}
static PyObject *
_pysqlite_query_execute(pysqlite_Cursor* self, int multiple, PyObject* operation, PyObject* second_argument)
{
@ -423,7 +431,6 @@ _pysqlite_query_execute(pysqlite_Cursor* self, int multiple, PyObject* operation
PyObject* parameters = NULL;
int i;
int rc;
PyObject* func_args;
PyObject* result;
int numcols;
PyObject* column_name;
@ -485,22 +492,12 @@ _pysqlite_query_execute(pysqlite_Cursor* self, int multiple, PyObject* operation
Py_SETREF(self->description, Py_None);
self->rowcount = 0L;
func_args = PyTuple_New(1);
if (!func_args) {
goto error;
}
if (PyTuple_SetItem(func_args, 0, Py_NewRef(operation)) != 0) {
goto error;
}
if (self->statement) {
(void)pysqlite_statement_reset(self->statement);
}
Py_XSETREF(self->statement,
(pysqlite_Statement *)pysqlite_cache_get(self->connection->statement_cache, func_args));
Py_DECREF(func_args);
PyObject *stmt = get_statement_from_cache(self, operation);
Py_XSETREF(self->statement, (pysqlite_Statement *)stmt);
if (!self->statement) {
goto error;
}

View file

@ -24,7 +24,6 @@
#include "connection.h"
#include "statement.h"
#include "cursor.h"
#include "cache.h"
#include "prepare_protocol.h"
#include "microprotocols.h"
#include "row.h"
@ -56,6 +55,14 @@ PyObject* _pysqlite_converters = NULL;
int _pysqlite_enable_callback_tracebacks = 0;
int pysqlite_BaseTypeAdapted = 0;
pysqlite_state pysqlite_global_state;
pysqlite_state *
pysqlite_get_state(PyObject *Py_UNUSED(module))
{
return &pysqlite_global_state;
}
static PyObject* module_connect(PyObject* self, PyObject* args, PyObject*
kwargs)
{
@ -261,6 +268,23 @@ static int converters_init(PyObject* module)
return res;
}
static int
load_functools_lru_cache(PyObject *module)
{
PyObject *functools = PyImport_ImportModule("functools");
if (functools == NULL) {
return -1;
}
pysqlite_state *state = pysqlite_get_state(module);
state->lru_cache = PyObject_GetAttrString(functools, "lru_cache");
Py_DECREF(functools);
if (state->lru_cache == NULL) {
return -1;
}
return 0;
}
static PyMethodDef module_methods[] = {
{"connect", (PyCFunction)(void(*)(void))module_connect,
METH_VARARGS | METH_KEYWORDS, module_connect_doc},
@ -373,7 +397,6 @@ PyMODINIT_FUNC PyInit__sqlite3(void)
(pysqlite_row_setup_types(module) < 0) ||
(pysqlite_cursor_setup_types(module) < 0) ||
(pysqlite_connection_setup_types(module) < 0) ||
(pysqlite_cache_setup_types(module) < 0) ||
(pysqlite_statement_setup_types(module) < 0) ||
(pysqlite_prepare_protocol_setup_types(module) < 0)
) {
@ -424,6 +447,10 @@ PyMODINIT_FUNC PyInit__sqlite3(void)
goto error;
}
if (load_functools_lru_cache(module) < 0) {
goto error;
}
return module;
error:

View file

@ -29,6 +29,12 @@
#define PYSQLITE_VERSION "2.6.0"
#define MODULE_NAME "sqlite3"
typedef struct {
PyObject *lru_cache;
} pysqlite_state;
extern pysqlite_state *pysqlite_get_state(PyObject *module);
extern PyObject* pysqlite_Error;
extern PyObject* pysqlite_Warning;
extern PyObject* pysqlite_InterfaceError;

View file

@ -97,7 +97,6 @@
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\Modules\_sqlite\cache.h" />
<ClInclude Include="..\Modules\_sqlite\connection.h" />
<ClInclude Include="..\Modules\_sqlite\cursor.h" />
<ClInclude Include="..\Modules\_sqlite\microprotocols.h" />
@ -108,7 +107,6 @@
<ClInclude Include="..\Modules\_sqlite\util.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Modules\_sqlite\cache.c" />
<ClCompile Include="..\Modules\_sqlite\connection.c" />
<ClCompile Include="..\Modules\_sqlite\cursor.c" />
<ClCompile Include="..\Modules\_sqlite\microprotocols.c" />

View file

@ -12,9 +12,6 @@
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Modules\_sqlite\cache.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\Modules\_sqlite\connection.h">
<Filter>Header Files</Filter>
</ClInclude>
@ -41,9 +38,6 @@
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\Modules\_sqlite\cache.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\Modules\_sqlite\connection.c">
<Filter>Source Files</Filter>
</ClCompile>

View file

@ -1578,7 +1578,7 @@ def detect_sqlite(self):
sqlite_libdir = [os.path.abspath(os.path.dirname(sqlite_libfile))]
if sqlite_incdir and sqlite_libdir:
sqlite_srcs = ['_sqlite/cache.c',
sqlite_srcs = [
'_sqlite/connection.c',
'_sqlite/cursor.c',
'_sqlite/microprotocols.c',