From bbf2fb6c7ae78f40483606f467739a58cd747270 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 8 Jul 2021 16:48:42 +0100 Subject: [PATCH] bpo-44582: Accelerate mimetypes.init on Windows with a native accelerator (GH-27059) --- Lib/mimetypes.py | 27 ++++- Lib/test/test_mimetypes.py | 21 +++- .../2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst | 2 + Modules/_winapi.c | 108 ++++++++++++++++++ Modules/clinic/_winapi.c.h | 38 +++++- 5 files changed, 189 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 4cd94042126..b3d70e43b36 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -27,6 +27,12 @@ import sys import posixpath import urllib.parse + +try: + from _winapi import _mimetypes_read_windows_registry +except ImportError: + _mimetypes_read_windows_registry = None + try: import winreg as _winreg except ImportError: @@ -237,10 +243,21 @@ def read_windows_registry(self, strict=True): types. """ - # Windows only - if not _winreg: + if not _mimetypes_read_windows_registry and not _winreg: return + add_type = self.add_type + if strict: + add_type = lambda type, ext: self.add_type(type, ext, True) + + # Accelerated function if it is available + if _mimetypes_read_windows_registry: + _mimetypes_read_windows_registry(add_type) + elif _winreg: + self._read_windows_registry(add_type) + + @classmethod + def _read_windows_registry(cls, add_type): def enum_types(mimedb): i = 0 while True: @@ -265,7 +282,7 @@ def enum_types(mimedb): subkey, 'Content Type') if datatype != _winreg.REG_SZ: continue - self.add_type(mimetype, subkeyname, strict) + add_type(mimetype, subkeyname) except OSError: continue @@ -349,8 +366,8 @@ def init(files=None): if files is None or _db is None: db = MimeTypes() - if _winreg: - db.read_windows_registry() + # Quick return if not supported + db.read_windows_registry() if files is None: files = knownfiles diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index f5c040a97ad..fb9cb04452c 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -9,6 +9,11 @@ from test.support import os_helper from platform import win32_edition +try: + import _winapi +except ImportError: + _winapi = None + def setUpModule(): global knownfiles @@ -235,6 +240,21 @@ def test_registry_parsing(self): eq(self.db.guess_type("image.jpg"), ("image/jpeg", None)) eq(self.db.guess_type("image.png"), ("image/png", None)) + @unittest.skipIf(not hasattr(_winapi, "_mimetypes_read_windows_registry"), + "read_windows_registry accelerator unavailable") + def test_registry_accelerator(self): + from_accel = {} + from_reg = {} + _winapi._mimetypes_read_windows_registry( + lambda v, k: from_accel.setdefault(k, set()).add(v) + ) + mimetypes.MimeTypes._read_windows_registry( + lambda v, k: from_reg.setdefault(k, set()).add(v) + ) + self.assertEqual(list(from_reg), list(from_accel)) + for k in from_reg: + self.assertEqual(from_reg[k], from_accel[k]) + class MiscTestCase(unittest.TestCase): def test__all__(self): @@ -288,6 +308,5 @@ def test_guess_type(self): type_info = self.mimetypes_cmd("foo.pic") eq(type_info, "I don't know anything about type foo.pic") - if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst b/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst new file mode 100644 index 00000000000..f79c88931c5 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2021-07-07-21-07-18.bpo-44582.4Mm6Hh.rst @@ -0,0 +1,2 @@ +Accelerate speed of :mod:`mimetypes` initialization using a native +implementation of the registry scan. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 1b85d7dd7ee..f341493503c 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1894,6 +1894,113 @@ _winapi_GetFileType_impl(PyObject *module, HANDLE handle) return result; } +/*[clinic input] +_winapi._mimetypes_read_windows_registry + + on_type_read: object + +Optimized function for reading all known MIME types from the registry. + +*on_type_read* is a callable taking *type* and *ext* arguments, as for +MimeTypes.add_type. +[clinic start generated code]*/ + +static PyObject * +_winapi__mimetypes_read_windows_registry_impl(PyObject *module, + PyObject *on_type_read) +/*[clinic end generated code: output=20829f00bebce55b input=cd357896d6501f68]*/ +{ +#define CCH_EXT 128 +#define CB_TYPE 510 + struct { + wchar_t ext[CCH_EXT]; + wchar_t type[CB_TYPE / sizeof(wchar_t) + 1]; + } entries[64]; + int entry = 0; + HKEY hkcr = NULL; + LRESULT err; + + Py_BEGIN_ALLOW_THREADS + err = RegOpenKeyExW(HKEY_CLASSES_ROOT, NULL, 0, KEY_READ, &hkcr); + for (DWORD i = 0; err == ERROR_SUCCESS || err == ERROR_MORE_DATA; ++i) { + LPWSTR ext = entries[entry].ext; + LPWSTR type = entries[entry].type; + DWORD cchExt = CCH_EXT; + DWORD cbType = CB_TYPE; + HKEY subkey; + DWORD regType; + + err = RegEnumKeyExW(hkcr, i, ext, &cchExt, NULL, NULL, NULL, NULL); + if (err != ERROR_SUCCESS || (cchExt && ext[0] != L'.')) { + continue; + } + + err = RegOpenKeyExW(hkcr, ext, 0, KEY_READ, &subkey); + if (err == ERROR_FILE_NOT_FOUND) { + err = ERROR_SUCCESS; + continue; + } else if (err != ERROR_SUCCESS) { + continue; + } + + err = RegQueryValueExW(subkey, L"Content Type", NULL, + ®Type, (LPBYTE)type, &cbType); + RegCloseKey(subkey); + if (err == ERROR_FILE_NOT_FOUND) { + err = ERROR_SUCCESS; + continue; + } else if (err != ERROR_SUCCESS) { + continue; + } else if (regType != REG_SZ || !cbType) { + continue; + } + type[cbType / sizeof(wchar_t)] = L'\0'; + + entry += 1; + + /* Flush our cached entries if we are full */ + if (entry == sizeof(entries) / sizeof(entries[0])) { + Py_BLOCK_THREADS + for (int j = 0; j < entry; ++j) { + PyObject *r = PyObject_CallFunction( + on_type_read, "uu", entries[j].type, entries[j].ext + ); + if (!r) { + /* We blocked threads, so safe to return from here */ + RegCloseKey(hkcr); + return NULL; + } + Py_DECREF(r); + } + Py_UNBLOCK_THREADS + entry = 0; + } + } + if (hkcr) { + RegCloseKey(hkcr); + } + Py_END_ALLOW_THREADS + + if (err != ERROR_SUCCESS && err != ERROR_NO_MORE_ITEMS) { + PyErr_SetFromWindowsErr((int)err); + return NULL; + } + + for (int j = 0; j < entry; ++j) { + PyObject *r = PyObject_CallFunction( + on_type_read, "uu", entries[j].type, entries[j].ext + ); + if (!r) { + return NULL; + } + Py_DECREF(r); + } + + Py_RETURN_NONE; +#undef CCH_EXT +#undef CB_TYPE +} + static PyMethodDef winapi_functions[] = { _WINAPI_CLOSEHANDLE_METHODDEF @@ -1926,6 +2033,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_WRITEFILE_METHODDEF _WINAPI_GETACP_METHODDEF _WINAPI_GETFILETYPE_METHODDEF + _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index a9630d55998..5bda156d7aa 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -1148,4 +1148,40 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P exit: return return_value; } -/*[clinic end generated code: output=1f10e03f64ff9777 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_winapi__mimetypes_read_windows_registry__doc__, +"_mimetypes_read_windows_registry($module, /, on_type_read)\n" +"--\n" +"\n" +"Optimized function for reading all known MIME types from the registry.\n" +"\n" +"*on_type_read* is a callable taking *type* and *ext* arguments, as for\n" +"MimeTypes.add_type."); + +#define _WINAPI__MIMETYPES_READ_WINDOWS_REGISTRY_METHODDEF \ + {"_mimetypes_read_windows_registry", (PyCFunction)(void(*)(void))_winapi__mimetypes_read_windows_registry, METH_FASTCALL|METH_KEYWORDS, _winapi__mimetypes_read_windows_registry__doc__}, + +static PyObject * +_winapi__mimetypes_read_windows_registry_impl(PyObject *module, + PyObject *on_type_read); + +static PyObject * +_winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"on_type_read", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "_mimetypes_read_windows_registry", 0}; + PyObject *argsbuf[1]; + PyObject *on_type_read; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + on_type_read = args[0]; + return_value = _winapi__mimetypes_read_windows_registry_impl(module, on_type_read); + +exit: + return return_value; +} +/*[clinic end generated code: output=ac3623be6e42017c input=a9049054013a1b77]*/