bpo-47000: Make io.text_encoding() respects UTF-8 mode (GH-32003)

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
Inada Naoki 2022-04-04 11:46:57 +09:00 committed by GitHub
parent 6db2db91b9
commit 4216dce04b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 52 additions and 17 deletions

View file

@ -198,12 +198,13 @@ High-level Module Interface
This is a helper function for callables that use :func:`open` or
:class:`TextIOWrapper` and have an ``encoding=None`` parameter.
This function returns *encoding* if it is not ``None`` and ``"locale"`` if
*encoding* is ``None``.
This function returns *encoding* if it is not ``None``.
Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on
:ref:`UTF-8 Mode <utf8-mode>`.
This function emits an :class:`EncodingWarning` if
:data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
is None. *stacklevel* specifies where the warning is emitted.
is ``None``. *stacklevel* specifies where the warning is emitted.
For example::
def read_text(path, encoding=None):
@ -218,6 +219,10 @@ High-level Module Interface
.. versionadded:: 3.10
.. versionchanged:: 3.11
:func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and
*encoding* is ``None``.
.. exception:: BlockingIOError

View file

@ -48,6 +48,7 @@ struct _Py_global_strings {
STRUCT_FOR_STR(newline, "\n")
STRUCT_FOR_STR(open_br, "{")
STRUCT_FOR_STR(percent, "%")
STRUCT_FOR_STR(utf_8, "utf-8")
} literals;
struct {

View file

@ -672,6 +672,7 @@ extern "C" {
INIT_STR(newline, "\n"), \
INIT_STR(open_br, "{"), \
INIT_STR(percent, "%"), \
INIT_STR(utf_8, "utf-8"), \
}, \
.identifiers = { \
INIT_ID(False), \

View file

@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
"""
A helper function to choose the text encoding.
When encoding is not None, just return it.
Otherwise, return the default text encoding (i.e. "locale").
When encoding is not None, this function returns it.
Otherwise, this function returns the default text encoding
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
This function emits an EncodingWarning if *encoding* is None and
sys.flags.warn_default_encoding is true.
@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
However, please consider using encoding="utf-8" for new APIs.
"""
if encoding is None:
encoding = "locale"
if sys.flags.utf8_mode:
encoding = "utf-8"
else:
encoding = "locale"
if sys.flags.warn_default_encoding:
import warnings
warnings.warn("'encoding' argument not specified.",

View file

@ -4289,6 +4289,17 @@ def test_check_encoding_warning(self):
self.assertTrue(
warnings[1].startswith(b"<string>:8: EncodingWarning: "))
def test_text_encoding(self):
# PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8"
# based on sys.flags.utf8_mode
code = "import io; print(io.text_encoding(None))"
proc = assert_python_ok('-X', 'utf8=0', '-c', code)
self.assertEqual(b"locale", proc.out.strip())
proc = assert_python_ok('-X', 'utf8=1', '-c', code)
self.assertEqual(b"utf-8", proc.out.strip())
@support.cpython_only
# Depending if OpenWrapper was already created or not, the warning is
# emitted or not. For example, the attribute is already created when this

View file

@ -161,7 +161,7 @@ def test_io(self):
filename = __file__
out = self.get_output('-c', code, filename, PYTHONUTF8='1')
self.assertEqual(out, 'UTF-8/strict')
self.assertEqual(out.lower(), 'utf-8/strict')
def _check_io_encoding(self, module, encoding=None, errors=None):
filename = __file__
@ -183,10 +183,10 @@ def _check_io_encoding(self, module, encoding=None, errors=None):
PYTHONUTF8='1')
if not encoding:
encoding = 'UTF-8'
encoding = 'utf-8'
if not errors:
errors = 'strict'
self.assertEqual(out, f'{encoding}/{errors}')
self.assertEqual(out.lower(), f'{encoding}/{errors}')
def check_io_encoding(self, module):
self._check_io_encoding(module, encoding="latin1")

View file

@ -0,0 +1 @@
Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.

View file

@ -457,8 +457,9 @@ _io.text_encoding
A helper function to choose the text encoding.
When encoding is not None, just return it.
Otherwise, return the default text encoding (i.e. "locale").
When encoding is not None, this function returns it.
Otherwise, this function returns the default text encoding
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
This function emits an EncodingWarning if encoding is None and
sys.flags.warn_default_encoding is true.
@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs.
static PyObject *
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/
{
if (encoding == NULL || encoding == Py_None) {
PyInterpreterState *interp = _PyInterpreterState_GET();
@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
return NULL;
}
}
return &_Py_ID(locale);
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
if (preconfig->utf8_mode) {
_Py_DECLARE_STR(utf_8, "utf-8");
encoding = &_Py_STR(utf_8);
}
else {
encoding = &_Py_ID(locale);
}
}
Py_INCREF(encoding);
return encoding;

View file

@ -273,8 +273,9 @@ PyDoc_STRVAR(_io_text_encoding__doc__,
"\n"
"A helper function to choose the text encoding.\n"
"\n"
"When encoding is not None, just return it.\n"
"Otherwise, return the default text encoding (i.e. \"locale\").\n"
"When encoding is not None, this function returns it.\n"
"Otherwise, this function returns the default text encoding\n"
"(i.e. \"locale\" or \"utf-8\" depends on UTF-8 mode).\n"
"\n"
"This function emits an EncodingWarning if encoding is None and\n"
"sys.flags.warn_default_encoding is true.\n"
@ -354,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
exit:
return return_value;
}
/*[clinic end generated code: output=6ea315343f6a94ba input=a9049054013a1b77]*/
/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/

View file

@ -841,7 +841,10 @@ static PyObject *
sys_getdefaultencoding_impl(PyObject *module)
/*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
{
return PyUnicode_FromString(PyUnicode_GetDefaultEncoding());
_Py_DECLARE_STR(utf_8, "utf-8");
PyObject *ret = &_Py_STR(utf_8);
Py_INCREF(ret);
return ret;
}
/*[clinic input]