bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-32010)

This commit is contained in:
AN Long 2022-06-07 07:47:27 +08:00 committed by GitHub
parent bb0b768946
commit 3256b178ed
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 137 additions and 8 deletions

View file

@ -23,6 +23,7 @@
import genericpath
from genericpath import *
__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
"basename","dirname","commonprefix","getsize","getmtime",
"getatime","getctime", "islink","exists","lexists","isdir","isfile",
@ -41,14 +42,39 @@ def _get_bothseps(path):
# Other normalizations (such as optimizing '../' away) are not done
# (this is done by normpath).
def normcase(s):
"""Normalize case of pathname.
try:
from _winapi import (
LCMapStringEx as _LCMapStringEx,
LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT,
LCMAP_LOWERCASE as _LCMAP_LOWERCASE)
Makes all characters lowercase and all slashes into backslashes."""
s = os.fspath(s)
if isinstance(s, bytes):
return s.replace(b'/', b'\\').lower()
else:
def normcase(s):
"""Normalize case of pathname.
Makes all characters lowercase and all slashes into backslashes.
"""
s = os.fspath(s)
if not s:
return s
if isinstance(s, bytes):
encoding = sys.getfilesystemencoding()
s = s.decode(encoding, 'surrogateescape').replace('/', '\\')
s = _LCMapStringEx(_LOCALE_NAME_INVARIANT,
_LCMAP_LOWERCASE, s)
return s.encode(encoding, 'surrogateescape')
else:
return _LCMapStringEx(_LOCALE_NAME_INVARIANT,
_LCMAP_LOWERCASE,
s.replace('/', '\\'))
except ImportError:
def normcase(s):
"""Normalize case of pathname.
Makes all characters lowercase and all slashes into backslashes.
"""
s = os.fspath(s)
if isinstance(s, bytes):
return os.fsencode(os.fsdecode(s).replace('/', '\\').lower())
return s.replace('/', '\\').lower()

View file

@ -852,6 +852,8 @@ def _check_function(self, func):
def test_path_normcase(self):
self._check_function(self.path.normcase)
if sys.platform == 'win32':
self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ')
def test_path_isabs(self):
self._check_function(self.path.isabs)

View file

@ -0,0 +1,3 @@
Support native Windows case-insensitive path comparisons by using
``LCMapStringEx`` instead of :func:`str.lower` in :func:`ntpath.normcase`.
Add ``LCMapStringEx`` to the :mod:`_winapi` module.

View file

@ -1512,6 +1512,50 @@ _winapi_PeekNamedPipe_impl(PyObject *module, HANDLE handle, int size)
}
}
/*[clinic input]
_winapi.LCMapStringEx
locale: LPCWSTR
flags: DWORD
src: LPCWSTR
[clinic start generated code]*/
static PyObject *
_winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags,
LPCWSTR src)
/*[clinic end generated code: output=cf4713d80e2b47c9 input=9fe26f95d5ab0001]*/
{
if (flags & (LCMAP_SORTHANDLE | LCMAP_HASH | LCMAP_BYTEREV |
LCMAP_SORTKEY)) {
return PyErr_Format(PyExc_ValueError, "unsupported flags");
}
int dest_size = LCMapStringEx(locale, flags, src, -1, NULL, 0,
NULL, NULL, 0);
if (dest_size == 0) {
return PyErr_SetFromWindowsErr(0);
}
wchar_t* dest = PyMem_NEW(wchar_t, dest_size);
if (dest == NULL) {
return PyErr_NoMemory();
}
int nmapped = LCMapStringEx(locale, flags, src, -1, dest, dest_size,
NULL, NULL, 0);
if (nmapped == 0) {
DWORD error = GetLastError();
PyMem_DEL(dest);
return PyErr_SetFromWindowsErr(error);
}
PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1);
PyMem_DEL(dest);
return ret;
}
/*[clinic input]
_winapi.ReadFile
@ -2023,6 +2067,7 @@ static PyMethodDef winapi_functions[] = {
_WINAPI_OPENFILEMAPPING_METHODDEF
_WINAPI_OPENPROCESS_METHODDEF
_WINAPI_PEEKNAMEDPIPE_METHODDEF
_WINAPI_LCMAPSTRINGEX_METHODDEF
_WINAPI_READFILE_METHODDEF
_WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF
_WINAPI_TERMINATEPROCESS_METHODDEF
@ -2160,6 +2205,22 @@ static int winapi_exec(PyObject *m)
WINAPI_CONSTANT(F_DWORD, FILE_TYPE_PIPE);
WINAPI_CONSTANT(F_DWORD, FILE_TYPE_REMOTE);
WINAPI_CONSTANT("u", LOCALE_NAME_INVARIANT);
WINAPI_CONSTANT(F_DWORD, LOCALE_NAME_MAX_LENGTH);
WINAPI_CONSTANT("u", LOCALE_NAME_SYSTEM_DEFAULT);
WINAPI_CONSTANT("u", LOCALE_NAME_USER_DEFAULT);
WINAPI_CONSTANT(F_DWORD, LCMAP_FULLWIDTH);
WINAPI_CONSTANT(F_DWORD, LCMAP_HALFWIDTH);
WINAPI_CONSTANT(F_DWORD, LCMAP_HIRAGANA);
WINAPI_CONSTANT(F_DWORD, LCMAP_KATAKANA);
WINAPI_CONSTANT(F_DWORD, LCMAP_LINGUISTIC_CASING);
WINAPI_CONSTANT(F_DWORD, LCMAP_LOWERCASE);
WINAPI_CONSTANT(F_DWORD, LCMAP_SIMPLIFIED_CHINESE);
WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE);
WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE);
WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE);
WINAPI_CONSTANT("i", NULL);
return 0;

View file

@ -820,6 +820,43 @@ exit:
return return_value;
}
PyDoc_STRVAR(_winapi_LCMapStringEx__doc__,
"LCMapStringEx($module, /, locale, flags, src)\n"
"--\n"
"\n");
#define _WINAPI_LCMAPSTRINGEX_METHODDEF \
{"LCMapStringEx", _PyCFunction_CAST(_winapi_LCMapStringEx), METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__},
static PyObject *
_winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags,
LPCWSTR src);
static PyObject *
_winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
static const char * const _keywords[] = {"locale", "flags", "src", NULL};
static _PyArg_Parser _parser = {"O&kO&:LCMapStringEx", _keywords, 0};
LPCWSTR locale;
DWORD flags;
LPCWSTR src;
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
_PyUnicode_WideCharString_Converter, &locale, &flags, _PyUnicode_WideCharString_Converter, &src)) {
goto exit;
}
return_value = _winapi_LCMapStringEx_impl(module, locale, flags, src);
exit:
/* Cleanup for locale */
PyMem_Free((void *)locale);
/* Cleanup for src */
PyMem_Free((void *)src);
return return_value;
}
PyDoc_STRVAR(_winapi_ReadFile__doc__,
"ReadFile($module, /, handle, size, overlapped=False)\n"
"--\n"
@ -1164,4 +1201,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args
exit:
return return_value;
}
/*[clinic end generated code: output=b007dde2e7f2fff8 input=a9049054013a1b77]*/
/*[clinic end generated code: output=6cdefec63a1d7f12 input=a9049054013a1b77]*/