bpo-46659: Fix the MBCS codec alias on Windows (GH-31218)

This commit is contained in:
Victor Stinner 2022-02-22 22:04:07 +01:00 committed by GitHub
parent 8fb94893e4
commit ccbe8045fa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 11 deletions

View file

@ -152,7 +152,14 @@ def search_function(encoding):
# Return the registry entry
return entry
# Register the search_function in the Python codec registry
codecs.register(search_function)
if sys.platform == 'win32':
# bpo-671666, bpo-46668: If Python does not implement a codec for current
# Windows ANSI code page, use the "mbcs" codec instead:
# WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP.
# Python does not support custom code pages.
def _alias_mbcs(encoding):
try:
import _winapi
@ -164,8 +171,4 @@ def _alias_mbcs(encoding):
# Imports may fail while we are shutting down
pass
# It must be registered before search_function()
codecs.register(_alias_mbcs)
# Register the search_function in the Python codec registry
codecs.register(search_function)

View file

@ -3191,13 +3191,16 @@ def test_incremental(self):
self.assertEqual(decoded, ('abc', 3))
def test_mbcs_alias(self):
# On Windows, the encoding name must be the ANSI code page
encoding = locale.getpreferredencoding(False)
self.assertTrue(encoding.startswith('cp'), encoding)
# The encodings module create a "mbcs" alias to the ANSI code page
codec = codecs.lookup(encoding)
self.assertEqual(codec.name, "mbcs")
# Check that looking up our 'default' codepage will return
# mbcs when we don't have a more specific one available
code_page = 99_999
name = f'cp{code_page}'
with mock.patch('_winapi.GetACP', return_value=code_page):
try:
codec = codecs.lookup(name)
self.assertEqual(codec.name, 'mbcs')
finally:
codecs.unregister(name)
@support.bigmemtest(size=2**31, memuse=7, dry_run=False)
def test_large_input(self, size):