From 4c8f09d7cef8c7aa07d5b5232b5b64f63819a743 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 10 Jul 2020 23:26:06 +0300 Subject: [PATCH] bpo-36346: Make using the legacy Unicode C API optional (GH-21437) Add compile time option USE_UNICODE_WCHAR_CACHE. Setting it to 0 makes the interpreter not using the wchar_t cache and the legacy Unicode C API. --- Include/cpython/unicodeobject.h | 4 +- Lib/test/support/__init__.py | 8 ++++ Lib/test/test_csv.py | 2 +- Lib/test/test_decimal.py | 5 +- Lib/test/test_getargs2.py | 4 ++ Lib/test/test_unicode.py | 4 ++ Modules/_io/fileio.c | 12 +++++ Modules/_testcapimodule.c | 16 ++++++- Modules/_winapi.c | 17 +++---- Modules/clinic/_winapi.c.h | 64 +++++++++++++++++++++----- Modules/overlapped.c | 64 ++++++++++++++++++++------ Modules/posixmodule.c | 70 ++++++++++++++++++++++++---- Objects/unicodeobject.c | 81 +++++++++++++++++++++++---------- PC/clinic/winreg.c.h | 14 +++--- PC/winreg.c | 61 +++++++++++++++++++------ Python/dynload_win.c | 10 +++- Python/fileutils.c | 23 ++++++++-- 17 files changed, 360 insertions(+), 99 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 49ad32d5d19..615b4a971d5 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -11,7 +11,9 @@ /* --- Internal Unicode Operations ---------------------------------------- */ -#define USE_UNICODE_WCHAR_CACHE 1 +#ifndef USE_UNICODE_WCHAR_CACHE +# define USE_UNICODE_WCHAR_CACHE 1 +#endif /* USE_UNICODE_WCHAR_CACHE */ /* Since splitting on whitespace is an important use case, and whitespace in most situations is solely ASCII whitespace, we diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index b21978a61cd..1ce3a78fdbb 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -36,6 +36,11 @@ from .testresult import get_test_runner +try: + from _testcapi import unicode_legacy_string +except ImportError: + unicode_legacy_string = None + __all__ = [ # globals "PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast", @@ -426,6 +431,9 @@ def requires_lzma(reason='requires lzma'): lzma = None return unittest.skipUnless(lzma, reason) +requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string, + 'requires legacy Unicode C API') + is_jython = sys.platform.startswith('java') is_android = hasattr(sys, 'getandroidapilevel') diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index d421be075ca..a92870c24a1 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -250,9 +250,9 @@ def test_writerows_errors(self): self.assertRaises(OSError, writer.writerows, BadIterable()) @support.cpython_only + @support.requires_legacy_unicode_capi def test_writerows_legacy_strings(self): import _testcapi - c = _testcapi.unicode_legacy_string('a') with TemporaryFile("w+", newline='') as fileobj: writer = csv.writer(fileobj) diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 716e6eb7fb1..9dbae449fb6 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -33,7 +33,8 @@ import numbers import locale from test.support import (run_unittest, run_doctest, is_resource_enabled, - requires_IEEE_754, requires_docstrings) + requires_IEEE_754, requires_docstrings, + requires_legacy_unicode_capi) from test.support import (TestFailed, run_with_locale, cpython_only) from test.support.import_helper import import_fresh_module @@ -582,6 +583,7 @@ def test_explicit_from_string(self): self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003") @cpython_only + @requires_legacy_unicode_capi def test_from_legacy_strings(self): import _testcapi Decimal = self.decimal.Decimal @@ -2817,6 +2819,7 @@ def test_none_args(self): Overflow]) @cpython_only + @requires_legacy_unicode_capi def test_from_legacy_strings(self): import _testcapi c = self.decimal.Context() diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py index d39ea56ae9e..09560197913 100644 --- a/Lib/test/test_getargs2.py +++ b/Lib/test/test_getargs2.py @@ -976,6 +976,7 @@ def test_et_hash(self): buf = bytearray() self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf) + @support.requires_legacy_unicode_capi def test_u(self): from _testcapi import getargs_u self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9') @@ -985,6 +986,7 @@ def test_u(self): self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview')) self.assertRaises(TypeError, getargs_u, None) + @support.requires_legacy_unicode_capi def test_u_hash(self): from _testcapi import getargs_u_hash self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9') @@ -994,6 +996,7 @@ def test_u_hash(self): self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview')) self.assertRaises(TypeError, getargs_u_hash, None) + @support.requires_legacy_unicode_capi def test_Z(self): from _testcapi import getargs_Z self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9') @@ -1003,6 +1006,7 @@ def test_Z(self): self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview')) self.assertIsNone(getargs_Z(None)) + @support.requires_legacy_unicode_capi def test_Z_hash(self): from _testcapi import getargs_Z_hash self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9') diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index afc95555db0..d485bc7ede2 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -723,6 +723,7 @@ def test_isidentifier(self): self.assertFalse("0".isidentifier()) @support.cpython_only + @support.requires_legacy_unicode_capi def test_isidentifier_legacy(self): import _testcapi u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊' @@ -2350,6 +2351,7 @@ def test_getnewargs(self): self.assertEqual(len(args), 1) @support.cpython_only + @support.requires_legacy_unicode_capi def test_resize(self): from _testcapi import getargs_u for length in range(1, 100, 7): @@ -2920,6 +2922,7 @@ def test_copycharacters(self): self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0) @support.cpython_only + @support.requires_legacy_unicode_capi def test_encode_decimal(self): from _testcapi import unicode_encodedecimal self.assertEqual(unicode_encodedecimal('123'), @@ -2936,6 +2939,7 @@ def test_encode_decimal(self): unicode_encodedecimal, "123\u20ac", "replace") @support.cpython_only + @support.requires_legacy_unicode_capi def test_transform_decimal(self): from _testcapi import unicode_transformdecimaltoascii as transform_decimal self.assertEqual(transform_decimal('123'), diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 7c8ba37c4fe..b9856b3b631 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -270,7 +270,14 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, if (!PyUnicode_FSDecoder(nameobj, &stringobj)) { return -1; } +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS widename = PyUnicode_AsUnicode(stringobj); +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + widename = PyUnicode_AsWideCharString(stringobj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (widename == NULL) return -1; #else @@ -491,6 +498,11 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode, internal_close(self); done: +#ifdef MS_WINDOWS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(widename); +#endif /* USE_UNICODE_WCHAR_CACHE */ +#endif Py_CLEAR(stringobj); return ret; } diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 1e4c31fefb2..fca94a83a5d 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1668,6 +1668,7 @@ parse_tuple_and_keywords(PyObject *self, PyObject *args) static volatile int x; +#if USE_UNICODE_WCHAR_CACHE /* Ignore use of deprecated APIs */ _Py_COMP_DIAG_PUSH _Py_COMP_DIAG_IGNORE_DEPR_DECLS @@ -1772,6 +1773,8 @@ test_Z_code(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_DECREF(tuple); Py_RETURN_NONE; } +_Py_COMP_DIAG_POP +#endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) @@ -1824,6 +1827,10 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_FromWideChar(L\"\\U00110000\", 1) didn't fail"); +#if USE_UNICODE_WCHAR_CACHE +/* Ignore use of deprecated APIs */ +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_FromUnicode(invalid, 1); if (wide == NULL) PyErr_Clear(); @@ -1844,11 +1851,12 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored)) return raiseTestError("test_widechar", "PyUnicode_Ready() didn't fail"); } +_Py_COMP_DIAG_POP +#endif /* USE_UNICODE_WCHAR_CACHE */ #endif Py_RETURN_NONE; } -_Py_COMP_DIAG_POP static PyObject * unicode_aswidechar(PyObject *self, PyObject *args) @@ -2024,6 +2032,7 @@ unicode_copycharacters(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", to_copy, copied); } +#if USE_UNICODE_WCHAR_CACHE /* Ignore use of deprecated APIs */ _Py_COMP_DIAG_PUSH _Py_COMP_DIAG_IGNORE_DEPR_DECLS @@ -2096,6 +2105,7 @@ unicode_legacy_string(PyObject *self, PyObject *args) return u; } _Py_COMP_DIAG_POP +#endif /* USE_UNICODE_WCHAR_CACHE */ static PyObject * getargs_w_star(PyObject *self, PyObject *args) @@ -5398,8 +5408,10 @@ static PyMethodDef TestMethods[] = { {"codec_incrementaldecoder", (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, {"test_s_code", test_s_code, METH_NOARGS}, +#if USE_UNICODE_WCHAR_CACHE {"test_u_code", test_u_code, METH_NOARGS}, {"test_Z_code", test_Z_code, METH_NOARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ {"test_widechar", test_widechar, METH_NOARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, @@ -5408,9 +5420,11 @@ static PyMethodDef TestMethods[] = { {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS}, {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, +#if USE_UNICODE_WCHAR_CACHE {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, +#endif /* USE_UNICODE_WCHAR_CACHE */ {"_test_thread_state", test_thread_state, METH_VARARGS}, {"_pending_threadfunc", pending_threadfunc, METH_VARARGS}, #ifdef HAVE_GETTIMEOFDAY diff --git a/Modules/_winapi.c b/Modules/_winapi.c index e1672c47852..ddb11aa5a82 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -164,10 +164,11 @@ create_converter('LPCVOID', '" F_POINTER "') create_converter('BOOL', 'i') # F_BOOL used previously (always 'i') create_converter('DWORD', 'k') # F_DWORD is always "k" (which is much shorter) create_converter('LPCTSTR', 's') -create_converter('LPCWSTR', 'u') -create_converter('LPWSTR', 'u') create_converter('UINT', 'I') # F_UINT used previously (always 'I') +class LPCWSTR_converter(Py_UNICODE_converter): + type = 'LPCWSTR' + class HANDLE_return_converter(CReturnConverter): type = 'HANDLE' @@ -197,7 +198,7 @@ class LPVOID_return_converter(CReturnConverter): data.return_conversion.append( 'return_value = HANDLE_TO_PYNUM(_return_value);\n') [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=79464c61a31ae932]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=011ee0c3a2244bfe]*/ #include "clinic/_winapi.c.h" @@ -520,15 +521,15 @@ _winapi_CreateFileMapping_impl(PyObject *module, HANDLE file_handle, /*[clinic input] _winapi.CreateJunction - src_path: LPWSTR - dst_path: LPWSTR + src_path: LPCWSTR + dst_path: LPCWSTR / [clinic start generated code]*/ static PyObject * -_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path, - LPWSTR dst_path) -/*[clinic end generated code: output=66b7eb746e1dfa25 input=8cd1f9964b6e3d36]*/ +_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path, + LPCWSTR dst_path) +/*[clinic end generated code: output=44b3f5e9bbcc4271 input=963d29b44b9384a7]*/ { /* Privilege adjustment */ HANDLE token = NULL; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 6022dfe0db4..a9630d55998 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -195,8 +195,8 @@ _winapi_CreateFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t na LPCWSTR name; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "" F_HANDLE "" F_POINTER "kkku:CreateFileMapping", - &file_handle, &security_attributes, &protect, &max_size_high, &max_size_low, &name)) { + if (!_PyArg_ParseStack(args, nargs, "" F_HANDLE "" F_POINTER "kkkO&:CreateFileMapping", + &file_handle, &security_attributes, &protect, &max_size_high, &max_size_low, _PyUnicode_WideCharString_Converter, &name)) { goto exit; } _return_value = _winapi_CreateFileMapping_impl(module, file_handle, security_attributes, protect, max_size_high, max_size_low, name); @@ -209,6 +209,11 @@ _winapi_CreateFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t na return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -221,23 +226,55 @@ PyDoc_STRVAR(_winapi_CreateJunction__doc__, {"CreateJunction", (PyCFunction)(void(*)(void))_winapi_CreateJunction, METH_FASTCALL, _winapi_CreateJunction__doc__}, static PyObject * -_winapi_CreateJunction_impl(PyObject *module, LPWSTR src_path, - LPWSTR dst_path); +_winapi_CreateJunction_impl(PyObject *module, LPCWSTR src_path, + LPCWSTR dst_path); static PyObject * _winapi_CreateJunction(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - LPWSTR src_path; - LPWSTR dst_path; + LPCWSTR src_path; + LPCWSTR dst_path; - if (!_PyArg_ParseStack(args, nargs, "uu:CreateJunction", - &src_path, &dst_path)) { + if (!_PyArg_CheckPositional("CreateJunction", nargs, 2, 2)) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("CreateJunction", "argument 1", "str", args[0]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + src_path = _PyUnicode_AsUnicode(args[0]); + #else /* USE_UNICODE_WCHAR_CACHE */ + src_path = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (src_path == NULL) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("CreateJunction", "argument 2", "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + dst_path = _PyUnicode_AsUnicode(args[1]); + #else /* USE_UNICODE_WCHAR_CACHE */ + dst_path = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (dst_path == NULL) { goto exit; } return_value = _winapi_CreateJunction_impl(module, src_path, dst_path); exit: + /* Cleanup for src_path */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)src_path); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for dst_path */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)dst_path); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -715,8 +752,8 @@ _winapi_OpenFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t narg LPCWSTR name; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "kiu:OpenFileMapping", - &desired_access, &inherit_handle, &name)) { + if (!_PyArg_ParseStack(args, nargs, "kiO&:OpenFileMapping", + &desired_access, &inherit_handle, _PyUnicode_WideCharString_Converter, &name)) { goto exit; } _return_value = _winapi_OpenFileMapping_impl(module, desired_access, inherit_handle, name); @@ -729,6 +766,11 @@ _winapi_OpenFileMapping(PyObject *module, PyObject *const *args, Py_ssize_t narg return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1106,4 +1148,4 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P exit: return return_value; } -/*[clinic end generated code: output=db87076a32fa7abe input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1f10e03f64ff9777 input=a9049054013a1b77]*/ diff --git a/Modules/overlapped.c b/Modules/overlapped.c index 9c4e2da9dfb..4f0ba85d798 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -1291,6 +1291,7 @@ _overlapped_Overlapped_AcceptEx_impl(OverlappedObject *self, static int parse_address(PyObject *obj, SOCKADDR *Address, int Length) { + PyObject *Host_obj; Py_UNICODE *Host; unsigned short Port; unsigned long FlowInfo; @@ -1298,33 +1299,66 @@ parse_address(PyObject *obj, SOCKADDR *Address, int Length) memset(Address, 0, Length); - if (PyArg_ParseTuple(obj, "uH", &Host, &Port)) - { + switch (PyTuple_GET_SIZE(obj)) { + case 2: { + if (!PyArg_ParseTuple(obj, "UH", &Host_obj, &Port)) { + return -1; + } +#if USE_UNICODE_WCHAR_CACHE + Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Host = PyUnicode_AsWideCharString(Host_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Host == NULL) { + return -1; + } Address->sa_family = AF_INET; if (WSAStringToAddressW(Host, AF_INET, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); - return -1; + Length = -1; } - ((SOCKADDR_IN*)Address)->sin_port = htons(Port); + else { + ((SOCKADDR_IN*)Address)->sin_port = htons(Port); + } +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Host); +#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } - else if (PyArg_ParseTuple(obj, - "uHkk;ConnectEx(): illegal address_as_bytes " - "argument", &Host, &Port, &FlowInfo, &ScopeId)) - { - PyErr_Clear(); + case 4: { + if (!PyArg_ParseTuple(obj, + "UHkk;ConnectEx(): illegal address_as_bytes argument", + &Host_obj, &Port, &FlowInfo, &ScopeId)) + { + return -1; + } +#if USE_UNICODE_WCHAR_CACHE + Host = (wchar_t *)_PyUnicode_AsUnicode(Host_obj); +#else /* USE_UNICODE_WCHAR_CACHE */ + Host = PyUnicode_AsWideCharString(Host_obj, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (Host == NULL) { + return -1; + } Address->sa_family = AF_INET6; if (WSAStringToAddressW(Host, AF_INET6, NULL, Address, &Length) < 0) { SetFromWindowsErr(WSAGetLastError()); - return -1; + Length = -1; } - ((SOCKADDR_IN6*)Address)->sin6_port = htons(Port); - ((SOCKADDR_IN6*)Address)->sin6_flowinfo = FlowInfo; - ((SOCKADDR_IN6*)Address)->sin6_scope_id = ScopeId; + else { + ((SOCKADDR_IN6*)Address)->sin6_port = htons(Port); + ((SOCKADDR_IN6*)Address)->sin6_flowinfo = FlowInfo; + ((SOCKADDR_IN6*)Address)->sin6_scope_id = ScopeId; + } +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(Host); +#endif /* USE_UNICODE_WCHAR_CACHE */ return Length; } - - return -1; + default: + PyErr_SetString(PyExc_ValueError, "illegal address_as_bytes argument"); + return -1; + } } /*[clinic input] diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index a411f28987e..efd99544f5a 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -988,6 +988,11 @@ typedef struct { static void path_cleanup(path_t *path) { +#if !USE_UNICODE_WCHAR_CACHE + wchar_t *wide = (wchar_t *)path->wide; + path->wide = NULL; + PyMem_Free(wide); +#endif /* USE_UNICODE_WCHAR_CACHE */ Py_CLEAR(path->object); Py_CLEAR(path->cleanup); } @@ -1002,7 +1007,7 @@ path_converter(PyObject *o, void *p) const char *narrow; #ifdef MS_WINDOWS PyObject *wo = NULL; - const wchar_t *wide; + wchar_t *wide = NULL; #endif #define FORMAT_EXCEPTION(exc, fmt) \ @@ -1075,7 +1080,14 @@ path_converter(PyObject *o, void *p) if (is_unicode) { #ifdef MS_WINDOWS +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_AsUnicodeAndSize(o, &length); +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = PyUnicode_AsWideCharString(o, &length); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1091,6 +1103,9 @@ path_converter(PyObject *o, void *p) path->wide = wide; path->narrow = FALSE; path->fd = -1; +#if !USE_UNICODE_WCHAR_CACHE + wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ goto success_exit; #else if (!PyUnicode_FSConverter(o, &bytes)) { @@ -1166,7 +1181,15 @@ path_converter(PyObject *o, void *p) goto error_exit; } +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS wide = PyUnicode_AsUnicodeAndSize(wo, &length); +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = PyUnicode_AsWideCharString(wo, &length); + Py_DECREF(wo); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (!wide) { goto error_exit; } @@ -1180,8 +1203,12 @@ path_converter(PyObject *o, void *p) } path->wide = wide; path->narrow = TRUE; - path->cleanup = wo; Py_DECREF(bytes); +#if USE_UNICODE_WCHAR_CACHE + path->cleanup = wo; +#else /* USE_UNICODE_WCHAR_CACHE */ + wide = NULL; +#endif /* USE_UNICODE_WCHAR_CACHE */ #else path->wide = NULL; path->narrow = narrow; @@ -1205,7 +1232,11 @@ path_converter(PyObject *o, void *p) Py_XDECREF(o); Py_XDECREF(bytes); #ifdef MS_WINDOWS +#if USE_UNICODE_WCHAR_CACHE Py_XDECREF(wo); +#else /* USE_UNICODE_WCHAR_CACHE */ + PyMem_Free(wide); +#endif /* USE_UNICODE_WCHAR_CACHE */ #endif return 0; } @@ -12824,7 +12855,15 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) #ifdef MS_WINDOWS if (!PyUnicode_FSDecoder(self->path, &ub)) return NULL; +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS const wchar_t *path = PyUnicode_AsUnicode(ub); +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *path = PyUnicode_AsWideCharString(ub, NULL); + Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ #else /* POSIX */ if (!PyUnicode_FSConverter(self->path, &ub)) return NULL; @@ -12834,6 +12873,7 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) result = fstatat(self->dir_fd, path, &st, follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW); #else + Py_DECREF(ub); PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat"); return NULL; #endif /* HAVE_FSTATAT */ @@ -12846,7 +12886,11 @@ DirEntry_fetch_stat(PyObject *module, DirEntry *self, int follow_symlinks) else result = LSTAT(path, &st); } +#if defined(MS_WINDOWS) && !USE_UNICODE_WCHAR_CACHE + PyMem_Free(path); +#else /* USE_UNICODE_WCHAR_CACHE */ Py_DECREF(ub); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (result != 0) return path_object_error(self->path); @@ -13035,15 +13079,24 @@ os_DirEntry_inode_impl(DirEntry *self) #ifdef MS_WINDOWS if (!self->got_file_index) { PyObject *unicode; - const wchar_t *path; STRUCT_STAT stat; int result; if (!PyUnicode_FSDecoder(self->path, &unicode)) return NULL; - path = PyUnicode_AsUnicode(unicode); +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + const wchar_t *path = PyUnicode_AsUnicode(unicode); result = LSTAT(path, &stat); Py_DECREF(unicode); +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *path = PyUnicode_AsWideCharString(unicode, NULL); + Py_DECREF(unicode); + result = LSTAT(path, &stat); + PyMem_Free(path); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (result != 0) return path_object_error(self->path); @@ -13597,10 +13650,9 @@ os_scandir_impl(PyObject *module, path_t *path) iterator->dirp = NULL; #endif - memcpy(&iterator->path, path, sizeof(path_t)); /* Move the ownership to iterator->path */ - path->object = NULL; - path->cleanup = NULL; + memcpy(&iterator->path, path, sizeof(path_t)); + memset(path, 0, sizeof(path_t)); #ifdef MS_WINDOWS iterator->first_time = 1; @@ -13622,9 +13674,9 @@ os_scandir_impl(PyObject *module, path_t *path) #else /* POSIX */ errno = 0; #ifdef HAVE_FDOPENDIR - if (path->fd != -1) { + if (iterator->path.fd != -1) { /* closedir() closes the FD, so we duplicate it */ - fd = _Py_dup(path->fd); + fd = _Py_dup(iterator->path.fd); if (fd == -1) goto error; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 648dd15ca09..2e1045ad3a7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3150,9 +3150,11 @@ unicode_get_widechar_size(PyObject *unicode) assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); +#if USE_UNICODE_WCHAR_CACHE if (_PyUnicode_WSTR(unicode) != NULL) { return PyUnicode_WSTR_LENGTH(unicode); } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(PyUnicode_IS_READY(unicode)); res = _PyUnicode_LENGTH(unicode); @@ -3173,16 +3175,21 @@ unicode_get_widechar_size(PyObject *unicode) static void unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size) { - const wchar_t *wstr; - assert(unicode != NULL); assert(_PyUnicode_CHECK(unicode)); - wstr = _PyUnicode_WSTR(unicode); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wstr = _PyUnicode_WSTR(unicode); if (wstr != NULL) { memcpy(w, wstr, size * sizeof(wchar_t)); return; } +#else /* USE_UNICODE_WCHAR_CACHE */ + if (PyUnicode_KIND(unicode) == sizeof(wchar_t)) { + memcpy(w, PyUnicode_DATA(unicode), size * sizeof(wchar_t)); + return; + } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(PyUnicode_IS_READY(unicode)); if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { @@ -4378,7 +4385,6 @@ unicode_decode_call_errorhandler_wchar( Py_ssize_t requiredsize; Py_ssize_t newpos; PyObject *inputobj = NULL; - wchar_t *repwstr; Py_ssize_t repwlen; if (*errorHandler == NULL) { @@ -4424,9 +4430,19 @@ unicode_decode_call_errorhandler_wchar( goto onError; } - repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen); - if (repwstr == NULL) +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + repwlen = PyUnicode_GetSize(repunicode); + if (repwlen < 0) goto onError; +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + repwlen = PyUnicode_AsWideChar(repunicode, NULL, 0); + if (repwlen < 0) + goto onError; + repwlen--; +#endif /* USE_UNICODE_WCHAR_CACHE */ /* need more space? (at least enough for what we have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space @@ -4446,7 +4462,7 @@ unicode_decode_call_errorhandler_wchar( goto onError; } } - wcsncpy(*buf + *outpos, repwstr, repwlen); + PyUnicode_AsWideChar(repunicode, *buf + *outpos, repwlen); *outpos += repwlen; *endinpos = newpos; *inptr = *input + newpos; @@ -7748,6 +7764,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, /* Create a substring so that we can get the UTF-16 representation of just the slice under consideration. */ PyObject *substring; + int ret = -1; assert(len > 0); @@ -7759,11 +7776,22 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, substring = PyUnicode_Substring(unicode, offset, offset+len); if (substring == NULL) return -1; +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS p = PyUnicode_AsUnicodeAndSize(substring, &size); if (p == NULL) { Py_DECREF(substring); return -1; } +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + p = PyUnicode_AsWideCharString(substring, &size); + Py_CLEAR(substring); + if (p == NULL) { + return -1; + } +#endif /* USE_UNICODE_WCHAR_CACHE */ assert(size <= INT_MAX); /* First get the size of the result */ @@ -7775,16 +7803,15 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, goto error; /* If we used a default char, then we failed! */ if (pusedDefaultChar && *pusedDefaultChar) { - Py_DECREF(substring); - return -2; + ret = -2; + goto done; } if (*outbytes == NULL) { /* Create string object */ *outbytes = PyBytes_FromStringAndSize(NULL, outsize); if (*outbytes == NULL) { - Py_DECREF(substring); - return -1; + goto done; } out = PyBytes_AS_STRING(*outbytes); } @@ -7793,12 +7820,10 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, const Py_ssize_t n = PyBytes_Size(*outbytes); if (outsize > PY_SSIZE_T_MAX - n) { PyErr_NoMemory(); - Py_DECREF(substring); - return -1; + goto done; } if (_PyBytes_Resize(outbytes, n + outsize) < 0) { - Py_DECREF(substring); - return -1; + goto done; } out = PyBytes_AS_STRING(*outbytes) + n; } @@ -7808,19 +7833,29 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes, p, (int)size, out, outsize, NULL, pusedDefaultChar); - Py_CLEAR(substring); if (outsize <= 0) goto error; - if (pusedDefaultChar && *pusedDefaultChar) - return -2; - return 0; + if (pusedDefaultChar && *pusedDefaultChar) { + ret = -2; + goto done; + } + ret = 0; + +done: +#if USE_UNICODE_WCHAR_CACHE + Py_DECREF(substring); +#else /* USE_UNICODE_WCHAR_CACHE */ + PyMem_Free(p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + return ret; error: - Py_XDECREF(substring); - if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) - return -2; + if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION) { + ret = -2; + goto done; + } PyErr_SetFromWindowsErr(0); - return -1; + goto done; } /* diff --git a/PC/clinic/winreg.c.h b/PC/clinic/winreg.c.h index 3301bed9713..183301f0618 100644 --- a/PC/clinic/winreg.c.h +++ b/PC/clinic/winreg.c.h @@ -1143,8 +1143,7 @@ PyDoc_STRVAR(winreg_SetValue__doc__, static PyObject * winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, - DWORD type, const Py_UNICODE *value, - Py_ssize_clean_t value_length); + DWORD type, PyObject *value_obj); static PyObject * winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) @@ -1153,14 +1152,13 @@ winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *sub_key; DWORD type; - const Py_UNICODE *value; - Py_ssize_clean_t value_length; + PyObject *value_obj; - if (!_PyArg_ParseStack(args, nargs, "O&O&ku#:SetValue", - clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &type, &value, &value_length)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&kU:SetValue", + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &type, &value_obj)) { goto exit; } - return_value = winreg_SetValue_impl(module, key, sub_key, type, value, value_length); + return_value = winreg_SetValue_impl(module, key, sub_key, type, value_obj); exit: /* Cleanup for sub_key */ @@ -1348,4 +1346,4 @@ winreg_QueryReflectionKey(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=30b1311886c13907 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=497a2e804821d5c9 input=a9049054013a1b77]*/ diff --git a/PC/winreg.c b/PC/winreg.c index b2725b857d0..a24d784c773 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -640,16 +640,25 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) for (j = 0; j < i; j++) { PyObject *t; - wchar_t *wstr; Py_ssize_t len; t = PyList_GET_ITEM(value, j); if (!PyUnicode_Check(t)) return FALSE; - wstr = PyUnicode_AsUnicodeAndSize(t, &len); - if (wstr == NULL) +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + len = PyUnicode_GetSize(t); + if (len < 0) return FALSE; - size += Py_SAFE_DOWNCAST((len + 1) * sizeof(wchar_t), + len++; +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + len = PyUnicode_AsWideChar(t, NULL, 0); + if (len < 0) + return FALSE; +#endif /* USE_UNICODE_WCHAR_CACHE */ + size += Py_SAFE_DOWNCAST(len * sizeof(wchar_t), size_t, DWORD); } @@ -665,17 +674,18 @@ Py2Reg(PyObject *value, DWORD typ, BYTE **retDataBuf, DWORD *retDataSize) for (j = 0; j < i; j++) { PyObject *t; - wchar_t *wstr; Py_ssize_t len; t = PyList_GET_ITEM(value, j); - wstr = PyUnicode_AsUnicodeAndSize(t, &len); - assert(wstr); - wcscpy(P, wstr); - P += (len + 1); + assert(size > 0); + len = PyUnicode_AsWideChar(t, P, size); + assert(len >= 0); + assert(len < size); + size -= (DWORD)len + 1; + P += len + 1; } /* And doubly-terminate the list... */ - *P = '\0'; + *P = L'\0'; break; } case REG_BINARY: @@ -1669,7 +1679,7 @@ winreg.SetValue type: DWORD An integer that specifies the type of the data. Currently this must be REG_SZ, meaning only strings are supported. - value: Py_UNICODE(zeroes=True) + value as value_obj: unicode A string that specifies the new value. / @@ -1688,30 +1698,51 @@ KEY_SET_VALUE access. static PyObject * winreg_SetValue_impl(PyObject *module, HKEY key, const Py_UNICODE *sub_key, - DWORD type, const Py_UNICODE *value, - Py_ssize_clean_t value_length) -/*[clinic end generated code: output=686bedb1cbb4367b input=2cd2adab79339c53]*/ + DWORD type, PyObject *value_obj) +/*[clinic end generated code: output=d4773dc9c372311a input=bf088494ae2d24fd]*/ { + Py_ssize_t value_length; long rc; if (type != REG_SZ) { PyErr_SetString(PyExc_TypeError, "type must be winreg.REG_SZ"); return NULL; } - if ((size_t)value_length >= PY_DWORD_MAX) { + +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + const wchar_t *value = PyUnicode_AsUnicodeAndSize(value_obj, &value_length); +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *value = PyUnicode_AsWideCharString(value_obj, &value_length); +#endif /* USE_UNICODE_WCHAR_CACHE */ + if (value == NULL) { + return NULL; + } + if ((Py_ssize_t)(DWORD)value_length != value_length) { PyErr_SetString(PyExc_OverflowError, "value is too long"); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); +#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } if (PySys_Audit("winreg.SetValue", "nunu#", (Py_ssize_t)key, sub_key, (Py_ssize_t)type, value, value_length) < 0) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); +#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } Py_BEGIN_ALLOW_THREADS rc = RegSetValueW(key, sub_key, REG_SZ, value, (DWORD)(value_length + 1)); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(value); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegSetValue"); Py_RETURN_NONE; diff --git a/Python/dynload_win.c b/Python/dynload_win.c index 8431c5b3b2f..5702ab2cd71 100644 --- a/Python/dynload_win.c +++ b/Python/dynload_win.c @@ -166,11 +166,14 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, { dl_funcptr p; char funcname[258], *import_python; - const wchar_t *wpathname; _Py_CheckPython3(); - wpathname = _PyUnicode_AsUnicode(pathname); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpathname = _PyUnicode_AsUnicode(pathname); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpathname = PyUnicode_AsWideCharString(pathname, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpathname == NULL) return NULL; @@ -192,6 +195,9 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR); Py_END_ALLOW_THREADS +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpathname); +#endif /* USE_UNICODE_WCHAR_CACHE */ /* restore old error mode settings */ SetErrorMode(old_mode); diff --git a/Python/fileutils.c b/Python/fileutils.c index 2c86828ba98..50ef3c174ac 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1008,15 +1008,21 @@ _Py_stat(PyObject *path, struct stat *statbuf) #ifdef MS_WINDOWS int err; struct _stat wstatbuf; - const wchar_t *wpath; - wpath = _PyUnicode_AsUnicode(path); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpath = _PyUnicode_AsUnicode(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return -2; err = _wstat(wpath, &wstatbuf); if (!err) statbuf->st_mode = wstatbuf.st_mode; +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ return err; #else int ret; @@ -1433,7 +1439,6 @@ _Py_fopen_obj(PyObject *path, const char *mode) FILE *f; int async_err = 0; #ifdef MS_WINDOWS - const wchar_t *wpath; wchar_t wmode[10]; int usize; @@ -1448,7 +1453,11 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_TYPE(path)); return NULL; } - wpath = _PyUnicode_AsUnicode(path); +#if USE_UNICODE_WCHAR_CACHE + const wchar_t *wpath = _PyUnicode_AsUnicode(path); +#else /* USE_UNICODE_WCHAR_CACHE */ + wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL); +#endif /* USE_UNICODE_WCHAR_CACHE */ if (wpath == NULL) return NULL; @@ -1456,6 +1465,9 @@ _Py_fopen_obj(PyObject *path, const char *mode) wmode, Py_ARRAY_LENGTH(wmode)); if (usize == 0) { PyErr_SetFromWindowsErr(0); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ return NULL; } @@ -1465,6 +1477,9 @@ _Py_fopen_obj(PyObject *path, const char *mode) Py_END_ALLOW_THREADS } while (f == NULL && errno == EINTR && !(async_err = PyErr_CheckSignals())); +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(wpath); +#endif /* USE_UNICODE_WCHAR_CACHE */ #else PyObject *bytes; const char *path_bytes;