bpo-44582: Accelerate mimetypes.init on Windows with a native accelerator (GH-27059)

This commit is contained in:
Steve Dower 2021-07-08 16:48:42 +01:00 committed by GitHub
parent af4a2dcc40
commit bbf2fb6c7a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 189 additions and 7 deletions

View file

@ -27,6 +27,12 @@
import sys
import posixpath
import urllib.parse
try:
from _winapi import _mimetypes_read_windows_registry
except ImportError:
_mimetypes_read_windows_registry = None
try:
import winreg as _winreg
except ImportError:
@ -237,10 +243,21 @@ def read_windows_registry(self, strict=True):
types.
"""
# Windows only
if not _winreg:
if not _mimetypes_read_windows_registry and not _winreg:
return
add_type = self.add_type
if strict:
add_type = lambda type, ext: self.add_type(type, ext, True)
# Accelerated function if it is available
if _mimetypes_read_windows_registry:
_mimetypes_read_windows_registry(add_type)
elif _winreg:
self._read_windows_registry(add_type)
@classmethod
def _read_windows_registry(cls, add_type):
def enum_types(mimedb):
i = 0
while True:
@ -265,7 +282,7 @@ def enum_types(mimedb):
subkey, 'Content Type')
if datatype != _winreg.REG_SZ:
continue
self.add_type(mimetype, subkeyname, strict)
add_type(mimetype, subkeyname)
except OSError:
continue
@ -349,8 +366,8 @@ def init(files=None):
if files is None or _db is None:
db = MimeTypes()
if _winreg:
db.read_windows_registry()
# Quick return if not supported
db.read_windows_registry()
if files is None:
files = knownfiles

View file

@ -9,6 +9,11 @@
from test.support import os_helper
from platform import win32_edition
try:
import _winapi
except ImportError:
_winapi = None
def setUpModule():
global knownfiles
@ -235,6 +240,21 @@ def test_registry_parsing(self):
eq(self.db.guess_type("image.jpg"), ("image/jpeg", None))
eq(self.db.guess_type("image.png"), ("image/png", None))
@unittest.skipIf(not hasattr(_winapi, "_mimetypes_read_windows_registry"),
"read_windows_registry accelerator unavailable")
def test_registry_accelerator(self):
from_accel = {}
from_reg = {}
_winapi._mimetypes_read_windows_registry(
lambda v, k: from_accel.setdefault(k, set()).add(v)
)
mimetypes.MimeTypes._read_windows_registry(
lambda v, k: from_reg.setdefault(k, set()).add(v)
)
self.assertEqual(list(from_reg), list(from_accel))
for k in from_reg:
self.assertEqual(from_reg[k], from_accel[k])
class MiscTestCase(unittest.TestCase):
def test__all__(self):
@ -288,6 +308,5 @@ def test_guess_type(self):
type_info = self.mimetypes_cmd("foo.pic")
eq(type_info, "I don't know anything about type foo.pic")
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,2 @@
Accelerate speed of :mod:`mimetypes` initialization using a native
implementation of the registry scan.

View file

@ -1894,6 +1894,113 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle)
return result;
}
/*[clinic input]
_winapi._mimetypes_read_windows_registry
on_type_read: object
Optimized function for reading all known MIME types from the registry.
*on_type_read* is a callable taking *type* and *ext* arguments, as for
MimeTypes.add_type.
[clinic start generated code]*/
static PyObject *
_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
PyObject *on_type_read)
/*[clinic end generated code: output=20829f00bebce55b input=cd357896d6501f68]*/
{
#define CCH_EXT 128
#define CB_TYPE 510
struct {
wchar_t ext[CCH_EXT];
wchar_t type[CB_TYPE / sizeof(wchar_t) + 1];
} entries[64];
int entry = 0;
HKEY hkcr = NULL;
LRESULT err;
Py_BEGIN_ALLOW_THREADS
err = RegOpenKeyExW(HKEY_CLASSES_ROOT, NULL, 0, KEY_READ, &hkcr);
for (DWORD i = 0; err == ERROR_SUCCESS || err == ERROR_MORE_DATA; ++i) {
LPWSTR ext = entries[entry].ext;
LPWSTR type = entries[entry].type;
DWORD cchExt = CCH_EXT;
DWORD cbType = CB_TYPE;
HKEY subkey;
DWORD regType;
err = RegEnumKeyExW(hkcr, i, ext, &cchExt, NULL, NULL, NULL, NULL);
if (err != ERROR_SUCCESS || (cchExt && ext[0] != L'.')) {
continue;
}
err = RegOpenKeyExW(hkcr, ext, 0, KEY_READ, &subkey);
if (err == ERROR_FILE_NOT_FOUND) {
err = ERROR_SUCCESS;
continue;
} else if (err != ERROR_SUCCESS) {
continue;
}
err = RegQueryValueExW(subkey, L"Content Type", NULL,
&regType, (LPBYTE)type, &cbType);
RegCloseKey(subkey);
if (err == ERROR_FILE_NOT_FOUND) {
err = ERROR_SUCCESS;
continue;
} else if (err != ERROR_SUCCESS) {
continue;
} else if (regType != REG_SZ || !cbType) {
continue;
}
type[cbType / sizeof(wchar_t)] = L'\0';
entry += 1;
/* Flush our cached entries if we are full */
if (entry == sizeof(entries) / sizeof(entries[0])) {
Py_BLOCK_THREADS
for (int j = 0; j < entry; ++j) {
PyObject *r = PyObject_CallFunction(
on_type_read, "uu", entries[j].type, entries[j].ext
);
if (!r) {
/* We blocked threads, so safe to return from here */
RegCloseKey(hkcr);
return NULL;
}
Py_DECREF(r);
}
Py_UNBLOCK_THREADS
entry = 0;
}
}
if (hkcr) {
RegCloseKey(hkcr);
}
Py_END_ALLOW_THREADS
if (err != ERROR_SUCCESS && err != ERROR_NO_MORE_ITEMS) {
PyErr_SetFromWindowsErr((int)err);
return NULL;
}
for (int j = 0; j < entry; ++j) {
PyObject *r = PyObject_CallFunction(
on_type_read, "uu", entries[j].type, entries[j].ext
);
if (!r) {
return NULL;
}
Py_DECREF(r);
}
Py_RETURN_NONE;
#undef CCH_EXT
#undef CB_TYPE
}
static PyMethodDef winapi_functions[] = {
_WINAPI_CLOSEHANDLE_METHODDEF
@ -1926,6 +2033,7 @@ static PyMethodDef winapi_functions[] = {
_WINAPI_WRITEFILE_METHODDEF
_WINAPI_GETACP_METHODDEF
_WINAPI_GETFILETYPE_METHODDEF
_WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF
{NULL, NULL}
};

View file

@ -1148,4 +1148,40 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P
exit:
return return_value;
}
/*[clinic end generated code: output=1f10e03f64ff9777 input=a9049054013a1b77]*/
PyDoc_STRVAR(_winapi__mimetypes_read_windows_registry__doc__,
"_mimetypes_read_windows_registry($module, /, on_type_read)\n"
"--\n"
"\n"
"Optimized function for reading all known MIME types from the registry.\n"
"\n"
"*on_type_read* is a callable taking *type* and *ext* arguments, as for\n"
"MimeTypes.add_type.");
#define _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF \
{"_mimetypes_read_windows_registry", (PyCFunction)(void(*)(void))_winapi__mimetypes_read_windows_registry, METH_FASTCALL|METH_KEYWORDS, _winapi__mimetypes_read_windows_registry__doc__},
static PyObject *
_winapi__mimetypes_read_windows_registry_impl(PyObject *module,
PyObject *on_type_read);
static PyObject *
_winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"on_type_read", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "_mimetypes_read_windows_registry", 0};
PyObject *argsbuf[1];
PyObject *on_type_read;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
on_type_read = args[0];
return_value = _winapi__mimetypes_read_windows_registry_impl(module, on_type_read);
exit:
return return_value;
}
/*[clinic end generated code: output=ac3623be6e42017c input=a9049054013a1b77]*/