diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 258a06f7217..75db433abd6 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -706,15 +706,15 @@ Glossary locale encoding On Unix, it is the encoding of the LC_CTYPE locale. It can be set with - ``locale.setlocale(locale.LC_CTYPE, new_locale)``. + :func:`locale.setlocale(locale.LC_CTYPE, new_locale) `. - On Windows, it is the ANSI code page (ex: ``cp1252``). + On Windows, it is the ANSI code page (ex: ``"cp1252"``). - ``locale.getpreferredencoding(False)`` can be used to get the locale - encoding. + On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding. - Python uses the :term:`filesystem encoding and error handler` to convert - between Unicode filenames and bytes filenames. + ``locale.getencoding()`` can be used to get the locale encoding. + + See also the :term:`filesystem encoding and error handler`. list A built-in Python :term:`sequence`. Despite its name it is more akin diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 01e14a151d2..77a3e036841 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -327,17 +327,37 @@ The :mod:`locale` module defines the following exception and functions: is not necessary or desired, *do_setlocale* should be set to ``False``. On Android or if the :ref:`Python UTF-8 Mode ` is enabled, always - return ``'UTF-8'``, the :term:`locale encoding` and the *do_setlocale* + return ``'utf-8'``, the :term:`locale encoding` and the *do_setlocale* argument are ignored. The :ref:`Python preinitialization ` configures the LC_CTYPE locale. See also the :term:`filesystem encoding and error handler`. .. versionchanged:: 3.7 - The function now always returns ``UTF-8`` on Android or if the + The function now always returns ``"utf-8"`` on Android or if the :ref:`Python UTF-8 Mode ` is enabled. +.. function:: getencoding() + + Get the current :term:`locale encoding`: + + * On Android and VxWorks, return ``"utf-8"``. + * On Unix, return the encoding of the current :data:`LC_CTYPE` locale. + Return ``"utf-8"`` if ``nl_langinfo(CODESET)`` returns an empty string: + for example, if the current LC_CTYPE locale is not supported. + * On Windows, return the ANSI code page. + + The :ref:`Python preinitialization ` configures the LC_CTYPE + locale. See also the :term:`filesystem encoding and error handler`. + + This function is similar to + :func:`getpreferredencoding(False) ` except this + function ignores the :ref:`Python UTF-8 Mode `. + + .. versionadded:: 3.11 + + .. function:: normalize(localename) Returns a normalized locale code for the given locale name. The returned locale diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 2758a268e95..870330c5796 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -285,6 +285,13 @@ inspect * Add :func:`inspect.ismethodwrapper` for checking if the type of an object is a :class:`~types.MethodWrapperType`. (Contributed by Hakan Çelik in :issue:`29418`.) +locale +------ + +* Add :func:`locale.getencoding` to get the current locale encoding. It is similar to + ``locale.getpreferredencoding(False)`` but ignores the + :ref:`Python UTF-8 Mode `. + math ---- diff --git a/Lib/locale.py b/Lib/locale.py index a710f27a807..496cc803c88 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -28,7 +28,7 @@ "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", "str", "atof", "atoi", "format", "format_string", "currency", "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", - "LC_NUMERIC", "LC_ALL", "CHAR_MAX"] + "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"] def _strcoll(a,b): """ strcoll(string,string) -> int. @@ -637,19 +637,17 @@ def resetlocale(category=LC_ALL): try: - from _locale import _get_locale_encoding + from _locale import getencoding except ImportError: - def _get_locale_encoding(): + def getencoding(): if hasattr(sys, 'getandroidapilevel'): # On Android langinfo.h and CODESET are missing, and UTF-8 is # always used in mbstowcs() and wcstombs(). - return 'UTF-8' - if sys.flags.utf8_mode: - return 'UTF-8' + return 'utf-8' encoding = getdefaultlocale()[1] if encoding is None: - # LANG not set, default conservatively to ASCII - encoding = 'ascii' + # LANG not set, default to UTF-8 + encoding = 'utf-8' return encoding try: @@ -657,17 +655,19 @@ def _get_locale_encoding(): except NameError: def getpreferredencoding(do_setlocale=True): """Return the charset that the user is likely using.""" - return _get_locale_encoding() + if sys.flags.utf8_mode: + return 'utf-8' + return getencoding() else: # On Unix, if CODESET is available, use that. def getpreferredencoding(do_setlocale=True): """Return the charset that the user is likely using, according to the system configuration.""" if sys.flags.utf8_mode: - return 'UTF-8' + return 'utf-8' if not do_setlocale: - return _get_locale_encoding() + return getencoding() old_loc = setlocale(LC_CTYPE) try: @@ -675,7 +675,7 @@ def getpreferredencoding(do_setlocale=True): setlocale(LC_CTYPE, "") except Error: pass - return _get_locale_encoding() + return getencoding() finally: setlocale(LC_CTYPE, old_loc) diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 308e8e8aea6..ec29ba6d51b 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -203,12 +203,12 @@ def test_pyio_encoding(self): def test_locale_getpreferredencoding(self): code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))' out = self.get_output('-X', 'utf8', '-c', code) - self.assertEqual(out, 'UTF-8 UTF-8') + self.assertEqual(out, 'utf-8 utf-8') for loc in POSIX_LOCALES: with self.subTest(LC_ALL=loc): out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc) - self.assertEqual(out, 'UTF-8 UTF-8') + self.assertEqual(out, 'utf-8 utf-8') @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') def test_cmd_line(self): @@ -276,7 +276,7 @@ def test_device_encoding(self): # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY with open(filename, encoding="utf8") as fp: out = fp.read().rstrip() - self.assertEqual(out, 'True UTF-8') + self.assertEqual(out, 'True utf-8') if __name__ == "__main__": diff --git a/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst b/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst new file mode 100644 index 00000000000..0dd3d416c51 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst @@ -0,0 +1,3 @@ +Add :func:`locale.getencoding` to get the current locale encoding. +It is similar to ``locale.getpreferredencoding(False)`` but ignores the +:ref:`Python UTF-8 Mode `. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d9d1c881418..0e207413257 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1145,7 +1145,13 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, } } if (encoding == NULL && self->encoding == NULL) { - self->encoding = _Py_GetLocaleEncodingObject(); + if (_PyRuntime.preconfig.utf8_mode) { + _Py_DECLARE_STR(utf_8, "utf-8"); + self->encoding = Py_NewRef(&_Py_STR(utf_8)); + } + else { + self->encoding = _Py_GetLocaleEncodingObject(); + } if (self->encoding == NULL) { goto error; } diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 564f5598edc..23c38e14d99 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -773,14 +773,14 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain, /*[clinic input] -_locale._get_locale_encoding +_locale.getencoding Get the current locale encoding. [clinic start generated code]*/ static PyObject * -_locale__get_locale_encoding_impl(PyObject *module) -/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/ +_locale_getencoding_impl(PyObject *module) +/*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/ { return _Py_GetLocaleEncodingObject(); } @@ -811,7 +811,7 @@ static struct PyMethodDef PyLocale_Methods[] = { _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif #endif - _LOCALE__GET_LOCALE_ENCODING_METHODDEF + _LOCALE_GETENCODING_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h index 703d034c32e..2958127e430 100644 --- a/Modules/clinic/_localemodule.c.h +++ b/Modules/clinic/_localemodule.c.h @@ -545,22 +545,22 @@ exit: #endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */ -PyDoc_STRVAR(_locale__get_locale_encoding__doc__, -"_get_locale_encoding($module, /)\n" +PyDoc_STRVAR(_locale_getencoding__doc__, +"getencoding($module, /)\n" "--\n" "\n" "Get the current locale encoding."); -#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \ - {"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__}, +#define _LOCALE_GETENCODING_METHODDEF \ + {"getencoding", (PyCFunction)_locale_getencoding, METH_NOARGS, _locale_getencoding__doc__}, static PyObject * -_locale__get_locale_encoding_impl(PyObject *module); +_locale_getencoding_impl(PyObject *module); static PyObject * -_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) +_locale_getencoding(PyObject *module, PyObject *Py_UNUSED(ignored)) { - return _locale__get_locale_encoding_impl(module); + return _locale_getencoding_impl(module); } #ifndef _LOCALE_STRCOLL_METHODDEF @@ -602,4 +602,4 @@ _locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */ -/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ea71e9b94bdaa47d input=a9049054013a1b77]*/ diff --git a/Python/fileutils.c b/Python/fileutils.c index d1d62dce5da..582c6bafd80 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -93,6 +93,10 @@ _Py_device_encoding(int fd) return PyUnicode_FromFormat("cp%u", (unsigned int)cp); #else + if (_PyRuntime.preconfig.utf8_mode) { + _Py_DECLARE_STR(utf_8, "utf-8"); + return Py_NewRef(&_Py_STR(utf_8)); + } return _Py_GetLocaleEncodingObject(); #endif } @@ -873,10 +877,10 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str, // Get the current locale encoding name: // -// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android) -// - Return "UTF-8" if the UTF-8 Mode is enabled +// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android) +// - Return "utf-8" if the UTF-8 Mode is enabled // - On Windows, return the ANSI code page (ex: "cp1250") -// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string. +// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string. // - Otherwise, return nl_langinfo(CODESET). // // Return NULL on memory allocation failure. @@ -888,12 +892,8 @@ _Py_GetLocaleEncoding(void) #ifdef _Py_FORCE_UTF8_LOCALE // On Android langinfo.h and CODESET are missing, // and UTF-8 is always used in mbstowcs() and wcstombs(). - return _PyMem_RawWcsdup(L"UTF-8"); + return _PyMem_RawWcsdup(L"utf-8"); #else - const PyPreConfig *preconfig = &_PyRuntime.preconfig; - if (preconfig->utf8_mode) { - return _PyMem_RawWcsdup(L"UTF-8"); - } #ifdef MS_WINDOWS wchar_t encoding[23]; @@ -906,7 +906,7 @@ _Py_GetLocaleEncoding(void) if (!encoding || encoding[0] == '\0') { // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on // macOS if the LC_CTYPE locale is not supported. - return _PyMem_RawWcsdup(L"UTF-8"); + return _PyMem_RawWcsdup(L"utf-8"); } wchar_t *wstr; diff --git a/Python/initconfig.c b/Python/initconfig.c index 47ebc64c847..d2e74f5878a 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -1779,7 +1779,13 @@ static PyStatus config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig, wchar_t **locale_encoding) { - wchar_t *encoding = _Py_GetLocaleEncoding(); + wchar_t *encoding; + if (preconfig->utf8_mode) { + encoding = _PyMem_RawWcsdup(L"utf-8"); + } + else { + encoding = _Py_GetLocaleEncoding(); + } if (encoding == NULL) { return _PyStatus_NO_MEMORY(); }