bpo-42208: Add _locale._get_locale_encoding() (GH-23052)

* Add a new _locale._get_locale_encoding() function to get the
  current locale encoding.
* Modify locale.getpreferredencoding() to use it.
* Remove the _bootlocale module.
This commit is contained in:
Victor Stinner 2020-10-31 01:32:11 +01:00 committed by GitHub
parent 710e826307
commit b62bdf71ea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 86 additions and 103 deletions

View file

@ -1,46 +0,0 @@
"""A minimal subset of the locale module used at interpreter startup
(imported by the _io module), in order to reduce startup time.
Don't import directly from third-party code; use the `locale` module instead!
"""
import sys
import _locale
if sys.platform.startswith("win"):
def getpreferredencoding(do_setlocale=True):
if sys.flags.utf8_mode:
return 'UTF-8'
return _locale._getdefaultlocale()[1]
else:
try:
_locale.CODESET
except AttributeError:
if hasattr(sys, 'getandroidapilevel'):
# On Android langinfo.h and CODESET are missing, and UTF-8 is
# always used in mbstowcs() and wcstombs().
def getpreferredencoding(do_setlocale=True):
return 'UTF-8'
else:
def getpreferredencoding(do_setlocale=True):
if sys.flags.utf8_mode:
return 'UTF-8'
# This path for legacy systems needs the more complex
# getdefaultlocale() function, import the full locale module.
import locale
return locale.getpreferredencoding(do_setlocale)
else:
def getpreferredencoding(do_setlocale=True):
assert not do_setlocale
if sys.flags.utf8_mode:
return 'UTF-8'
result = _locale.nl_langinfo(_locale.CODESET)
if not result and sys.platform == 'darwin':
# nl_langinfo can return an empty string
# when the setting has an invalid value.
# Default to UTF-8 in that case because
# UTF-8 is the default charset on OSX and
# returning nothing will crash the
# interpreter.
result = 'UTF-8'
return result

View file

@ -619,53 +619,49 @@ def resetlocale(category=LC_ALL):
"""
_setlocale(category, _build_localename(getdefaultlocale()))
if sys.platform.startswith("win"):
# On Win32, this will return the ANSI code page
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using."""
if sys.flags.utf8_mode:
return 'UTF-8'
import _bootlocale
return _bootlocale.getpreferredencoding(False)
else:
# On Unix, if CODESET is available, use that.
try:
CODESET
except NameError:
try:
from _locale import _get_locale_encoding
except ImportError:
def _get_locale_encoding():
if hasattr(sys, 'getandroidapilevel'):
# On Android langinfo.h and CODESET are missing, and UTF-8 is
# always used in mbstowcs() and wcstombs().
def getpreferredencoding(do_setlocale = True):
return 'UTF-8'
else:
# Fall back to parsing environment variables :-(
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
by looking at environment variables."""
if sys.flags.utf8_mode:
return 'UTF-8'
res = getdefaultlocale()[1]
if res is None:
# LANG not set, default conservatively to ASCII
res = 'ascii'
return res
else:
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
according to the system configuration."""
if sys.flags.utf8_mode:
return 'UTF-8'
import _bootlocale
if do_setlocale:
oldloc = setlocale(LC_CTYPE)
try:
setlocale(LC_CTYPE, "")
except Error:
pass
result = _bootlocale.getpreferredencoding(False)
if do_setlocale:
setlocale(LC_CTYPE, oldloc)
return result
return 'UTF-8'
if sys.flags.utf8_mode:
return 'UTF-8'
encoding = getdefaultlocale()[1]
if encoding is None:
# LANG not set, default conservatively to ASCII
encoding = 'ascii'
return encoding
try:
CODESET
except NameError:
def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using."""
return _get_locale_encoding()
else:
# On Unix, if CODESET is available, use that.
def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using,
according to the system configuration."""
if sys.flags.utf8_mode:
return 'UTF-8'
if not do_setlocale:
return _get_locale_encoding()
old_loc = setlocale(LC_CTYPE)
try:
try:
setlocale(LC_CTYPE, "")
except Error:
pass
return _get_locale_encoding()
finally:
setlocale(LC_CTYPE, old_loc)
### Database

View file

@ -3,7 +3,7 @@
import mimetypes
import pathlib
import sys
import unittest
import unittest.mock
from test import support
from test.support import os_helper
@ -71,14 +71,14 @@ def test_read_mime_types(self):
# bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding.
# Not with locale encoding. _bootlocale has been imported because io.open(...)
# uses it.
with os_helper.temp_dir() as directory:
data = "application/no-mans-land Fran\u00E7ais"
file = pathlib.Path(directory, "sample.mimetype")
file.write_text(data, encoding='utf-8')
import _bootlocale
with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'):
mime_dict = mimetypes.read_mime_types(file)
eq(mime_dict[".Français"], "application/no-mans-land")
data = "application/no-mans-land Fran\u00E7ais"
filename = "filename"
fp = io.StringIO(data)
with unittest.mock.patch.object(mimetypes, 'open',
return_value=fp) as mock_open:
mime_dict = mimetypes.read_mime_types(filename)
mock_open.assert_called_with(filename, encoding='utf-8')
eq(mime_dict[".Français"], "application/no-mans-land")
def test_non_standard_types(self):
eq = self.assertEqual

View file

@ -768,9 +768,24 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
}
Py_RETURN_NONE;
}
#endif
#endif // HAVE_BIND_TEXTDOMAIN_CODESET
#endif // HAVE_LIBINTL_H
/*[clinic input]
_locale._get_locale_encoding
Get the current locale encoding.
[clinic start generated code]*/
static PyObject *
_locale__get_locale_encoding_impl(PyObject *module)
/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
{
return _Py_GetLocaleEncoding();
}
#endif
static struct PyMethodDef PyLocale_Methods[] = {
_LOCALE_SETLOCALE_METHODDEF
@ -797,6 +812,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif
#endif
_LOCALE__GET_LOCALE_ENCODING_METHODDEF
{NULL, NULL}
};

View file

@ -545,6 +545,24 @@ exit:
#endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
PyDoc_STRVAR(_locale__get_locale_encoding__doc__,
"_get_locale_encoding($module, /)\n"
"--\n"
"\n"
"Get the current locale encoding.");
#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \
{"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__},
static PyObject *
_locale__get_locale_encoding_impl(PyObject *module);
static PyObject *
_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
{
return _locale__get_locale_encoding_impl(module);
}
#ifndef _LOCALE_STRCOLL_METHODDEF
#define _LOCALE_STRCOLL_METHODDEF
#endif /* !defined(_LOCALE_STRCOLL_METHODDEF) */
@ -584,4 +602,4 @@ exit:
#ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
/*[clinic end generated code: output=fe944779cd572d8e input=a9049054013a1b77]*/
/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/

View file

@ -1572,7 +1572,6 @@
<Compile Include="zoneinfo\__init__.py" />
<Compile Include="zoneinfo\_tzpath.py" />
<Compile Include="zoneinfo\_zoneinfo.py" />
<Compile Include="_bootlocale.py" />
<Compile Include="_collections_abc.py" />
<Compile Include="_compat_pickle.py" />
<Compile Include="_compression.py" />