bpo-36297: remove "unicode_internal" codec (GH-12342)

2024-09-04 15:56:13 +00:00 · 2019-03-18 15:44:11 +09:00 · 2019-03-18 15:44:11 +09:00 · 6a16b18224
parent 6fb544d8bc
commit 6a16b18224
12 changed files with 40 additions and 529 deletions
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@ -1316,16 +1316,10 @@ encodings.
 |                    |         | code actually uses UTF-8  |
 |                    |         | by default.               |
 +--------------------+---------+---------------------------+
-| unicode_internal   |         | Return the internal       |
-|                    |         | representation of the     |
-|                    |         | operand. Stateful codecs  |
-|                    |         | are not supported.        |
-|                    |         |                           |
-|                    |         | .. deprecated:: 3.3       |
-|                    |         |    This representation is |
-|                    |         |    obsoleted by           |
-|                    |         |    :pep:`393`.            |
-+--------------------+---------+---------------------------+
+
+.. versionchanged:: 3.8
+   "unicode_internal" codec is removed.
+

 .. _binary-transforms:

--- a/Doc/whatsnew/3.8.rst
+++ b/Doc/whatsnew/3.8.rst
@ -573,6 +573,9 @@ The following features and APIs have been removed from Python 3.8:
 * Removed the ``doctype()`` method of :class:`~xml.etree.ElementTree.XMLParser`.
  (Contributed by Serhiy Storchaka in :issue:`29209`.)

+* "unicode_internal" codec is removed.
+  (Contributed by Inada Naoki in :issue:`36297`.)
+

 Porting to Python 3.8
 =====================
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@ -896,15 +896,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
    Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
    ) Py_DEPRECATED(3.3);

-/* --- Unicode Internal Codec --------------------------------------------- */
-
-/* Only for internal use in _codecsmodule.c */
-PyObject *_PyUnicode_DecodeUnicodeInternal(
-    const char *string,
-    Py_ssize_t length,
-    const char *errors
-    );
-
 /* --- Latin-1 Codecs ----------------------------------------------------- */

 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
--- a/Lib/encodings/unicode_internal.py
+++ b/Lib/encodings/unicode_internal.py
@ -1,45 +0,0 @@
-""" Python 'unicode-internal' Codec
-
-
-Written by Marc-Andre Lemburg (mal@lemburg.com).
-
-(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
-
-"""
-import codecs
-
-### Codec APIs
-
-class Codec(codecs.Codec):
-
-    # Note: Binding these as C functions will result in the class not
-    # converting them to methods. This is intended.
-    encode = codecs.unicode_internal_encode
-    decode = codecs.unicode_internal_decode
-
-class IncrementalEncoder(codecs.IncrementalEncoder):
-    def encode(self, input, final=False):
-        return codecs.unicode_internal_encode(input, self.errors)[0]
-
-class IncrementalDecoder(codecs.IncrementalDecoder):
-    def decode(self, input, final=False):
-        return codecs.unicode_internal_decode(input, self.errors)[0]
-
-class StreamWriter(Codec,codecs.StreamWriter):
-    pass
-
-class StreamReader(Codec,codecs.StreamReader):
-    pass
-
-### encodings module API
-
-def getregentry():
-    return codecs.CodecInfo(
-        name='unicode-internal',
-        encode=Codec.encode,
-        decode=Codec.decode,
-        incrementalencoder=IncrementalEncoder,
-        incrementaldecoder=IncrementalDecoder,
-        streamwriter=StreamWriter,
-        streamreader=StreamReader,
-    )
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@ -211,42 +211,6 @@ def test_charmapencode(self):
        charmap[ord("?")] = "XYZ" # wrong type in mapping
        self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)

-    def test_decodeunicodeinternal(self):
-        with test.support.check_warnings(('unicode_internal codec has been '
-                                          'deprecated', DeprecationWarning)):
-            self.assertRaises(
-                UnicodeDecodeError,
-                b"\x00\x00\x00\x00\x00".decode,
-                "unicode-internal",
-            )
-            if len('\0'.encode('unicode-internal')) == 4:
-                def handler_unicodeinternal(exc):
-                    if not isinstance(exc, UnicodeDecodeError):
-                        raise TypeError("don't know how to handle %r" % exc)
-                    return ("\x01", 1)
-
-                self.assertEqual(
-                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
-                    "\u0000"
-                )
-
-                self.assertEqual(
-                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
-                    "\u0000\ufffd"
-                )
-
-                self.assertEqual(
-                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
-                    "\u0000\\x00"
-                )
-
-                codecs.register_error("test.hui", handler_unicodeinternal)
-
-                self.assertEqual(
-                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
-                    "\u0000\u0001\u0000"
-                )
-
    def test_callbacks(self):
        def handler1(exc):
            r = range(exc.start, exc.end)
@ -794,16 +758,13 @@ def test_badhandlerresults(self):
                ("ascii", b"\xff"),
                ("utf-8", b"\xff"),
                ("utf-7", b"+x-"),
-                ("unicode-internal", b"\x00"),
            ):
-                with test.support.check_warnings():
-                    # unicode-internal has been deprecated
-                    self.assertRaises(
-                        TypeError,
-                        bytes.decode,
-                        enc,
-                        "test.badhandler"
-                    )
+                self.assertRaises(
+                    TypeError,
+                    bytes.decode,
+                    enc,
+                    "test.badhandler"
+                )

    def test_lookup(self):
        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
@ -1013,7 +974,6 @@ def test_mutatingdecodehandler(self):
            ("utf-32", b"\xff"),
            ("unicode-escape", b"\\u123g"),
            ("raw-unicode-escape", b"\\u123g"),
-            ("unicode-internal", b"\xff"),
        ]

        def replacing(exc):
@ -1024,11 +984,9 @@ def replacing(exc):
                raise TypeError("don't know how to handle %r" % exc)
        codecs.register_error("test.replacing", replacing)

-        with test.support.check_warnings():
-            # unicode-internal has been deprecated
-            for (encoding, data) in baddata:
-                with self.assertRaises(TypeError):
-                    data.decode(encoding, "test.replacing")
+        for (encoding, data) in baddata:
+            with self.assertRaises(TypeError):
+                data.decode(encoding, "test.replacing")

        def mutating(exc):
            if isinstance(exc, UnicodeDecodeError):
@ -1039,10 +997,8 @@ def mutating(exc):
        codecs.register_error("test.mutating", mutating)
        # If the decoder doesn't pick up the modified input the following
        # will lead to an endless loop
-        with test.support.check_warnings():
-            # unicode-internal has been deprecated
-            for (encoding, data) in baddata:
-                self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+        for (encoding, data) in baddata:
+            self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")

    # issue32583
    def test_crashing_decode_handler(self):
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@ -1239,16 +1239,6 @@ def test_errors(self):
        self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))


-class RecodingTest(unittest.TestCase):
-    def test_recoding(self):
-        f = io.BytesIO()
-        with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:
-            f2.write("a")
-        # Python used to crash on this at exit because of a refcount
-        # bug in _codecsmodule.c
-
-        self.assertTrue(f.closed)
-
 # From RFC 3492
 punycode_testcases = [
    # A Arabic (Egyptian):
@ -1378,87 +1368,6 @@ def test_decode(self):
            self.assertEqual(uni, puny.decode("punycode"))


-class UnicodeInternalTest(unittest.TestCase):
-    @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
-    def test_bug1251300(self):
-        # Decoding with unicode_internal used to not correctly handle "code
-        # points" above 0x10ffff on UCS-4 builds.
-        ok = [
-            (b"\x00\x10\xff\xff", "\U0010ffff"),
-            (b"\x00\x00\x01\x01", "\U00000101"),
-            (b"", ""),
-        ]
-        not_ok = [
-            b"\x7f\xff\xff\xff",
-            b"\x80\x00\x00\x00",
-            b"\x81\x00\x00\x00",
-            b"\x00",
-            b"\x00\x00\x00\x00\x00",
-        ]
-        for internal, uni in ok:
-            if sys.byteorder == "little":
-                internal = bytes(reversed(internal))
-            with support.check_warnings():
-                self.assertEqual(uni, internal.decode("unicode_internal"))
-        for internal in not_ok:
-            if sys.byteorder == "little":
-                internal = bytes(reversed(internal))
-            with support.check_warnings(('unicode_internal codec has been '
-                                         'deprecated', DeprecationWarning)):
-                self.assertRaises(UnicodeDecodeError, internal.decode,
-                                  "unicode_internal")
-        if sys.byteorder == "little":
-            invalid = b"\x00\x00\x11\x00"
-            invalid_backslashreplace = r"\x00\x00\x11\x00"
-        else:
-            invalid = b"\x00\x11\x00\x00"
-            invalid_backslashreplace = r"\x00\x11\x00\x00"
-        with support.check_warnings():
-            self.assertRaises(UnicodeDecodeError,
-                              invalid.decode, "unicode_internal")
-        with support.check_warnings():
-            self.assertEqual(invalid.decode("unicode_internal", "replace"),
-                             '\ufffd')
-        with support.check_warnings():
-            self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),
-                             invalid_backslashreplace)
-
-    @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
-    def test_decode_error_attributes(self):
-        try:
-            with support.check_warnings(('unicode_internal codec has been '
-                                         'deprecated', DeprecationWarning)):
-                b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
-        except UnicodeDecodeError as ex:
-            self.assertEqual("unicode_internal", ex.encoding)
-            self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
-            self.assertEqual(4, ex.start)
-            self.assertEqual(8, ex.end)
-        else:
-            self.fail()
-
-    @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
-    def test_decode_callback(self):
-        codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
-        decoder = codecs.getdecoder("unicode_internal")
-        with support.check_warnings(('unicode_internal codec has been '
-                                     'deprecated', DeprecationWarning)):
-            ab = "ab".encode("unicode_internal").decode()
-            ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
-                                    "ascii"),
-                              "UnicodeInternalTest")
-        self.assertEqual(("ab", 12), ignored)
-
-    def test_encode_length(self):
-        with support.check_warnings(('unicode_internal codec has been '
-                                     'deprecated', DeprecationWarning)):
-            # Issue 3739
-            encoder = codecs.getencoder("unicode_internal")
-            self.assertEqual(encoder("a")[1], 1)
-            self.assertEqual(encoder("\xe9\u0142")[1], 2)
-
-            self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)
-
 # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
 nameprep_tests = [
    # 3.1 Map to nothing.
@ -1949,7 +1858,6 @@ def test_basic(self):
    "shift_jisx0213",
    "tis_620",
    "unicode_escape",
-    "unicode_internal",
    "utf_16",
    "utf_16_be",
    "utf_16_le",
@ -1969,7 +1877,6 @@ def test_basic(self):
 # The following encodings don't work in stateful mode
 broken_unicode_with_stateful = [
    "punycode",
-    "unicode_internal"
 ]


@ -1984,12 +1891,10 @@ def test_basics(self):
                name = "latin_1"
            self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))

-            with support.check_warnings():
-                # unicode-internal has been deprecated
-                (b, size) = codecs.getencoder(encoding)(s)
-                self.assertEqual(size, len(s), "encoding=%r" % encoding)
-                (chars, size) = codecs.getdecoder(encoding)(b)
-                self.assertEqual(chars, s, "encoding=%r" % encoding)
+            (b, size) = codecs.getencoder(encoding)(s)
+            self.assertEqual(size, len(s), "encoding=%r" % encoding)
+            (chars, size) = codecs.getdecoder(encoding)(b)
+            self.assertEqual(chars, s, "encoding=%r" % encoding)

            if encoding not in broken_unicode_with_stateful:
                # check stream reader/writer
@ -2116,9 +2021,7 @@ def test_bad_decode_args(self):
    def test_bad_encode_args(self):
        for encoding in all_unicode_encodings:
            encoder = codecs.getencoder(encoding)
-            with support.check_warnings():
-                # unicode-internal has been deprecated
-                self.assertRaises(TypeError, encoder)
+            self.assertRaises(TypeError, encoder)

    def test_encoding_map_type_initialized(self):
        from encodings import cp1140
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -2104,12 +2104,8 @@ def test_codecs(self):
            u = chr(c)
            for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
                             'utf-16-be', 'raw_unicode_escape',
-                             'unicode_escape', 'unicode_internal'):
-                with warnings.catch_warnings():
-                    # unicode-internal has been deprecated
-                    warnings.simplefilter("ignore", DeprecationWarning)
-
-                    self.assertEqual(str(u.encode(encoding),encoding), u)
+                             'unicode_escape'):
+                self.assertEqual(str(u.encode(encoding),encoding), u)

        # Roundtrip safety for BMP (just the first 256 chars)
        for c in range(256):
@ -2125,13 +2121,9 @@ def test_codecs(self):

        # Roundtrip safety for non-BMP (just a few chars)
        with warnings.catch_warnings():
-            # unicode-internal has been deprecated
-            warnings.simplefilter("ignore", DeprecationWarning)
-
            u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
            for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
-                             'raw_unicode_escape',
-                             'unicode_escape', 'unicode_internal'):
+                             'raw_unicode_escape', 'unicode_escape'):
                self.assertEqual(str(u.encode(encoding),encoding), u)

        # UTF-8 must be roundtrip safe for all code points
@ -2349,22 +2341,22 @@ def test_getnewargs(self):
        self.assertEqual(args[0], text)
        self.assertEqual(len(args), 1)

+    @support.cpython_only
    def test_resize(self):
+        from _testcapi import getargs_u
        for length in range(1, 100, 7):
            # generate a fresh string (refcount=1)
            text = 'a' * length + 'b'

-            with support.check_warnings(('unicode_internal codec has been '
-                                         'deprecated', DeprecationWarning)):
-                # fill wstr internal field
-                abc = text.encode('unicode_internal')
-                self.assertEqual(abc.decode('unicode_internal'), text)
+            # fill wstr internal field
+            abc = getargs_u(text)
+            self.assertEqual(abc, text)

-                # resize text: wstr field must be cleared and then recomputed
-                text += 'c'
-                abcdef = text.encode('unicode_internal')
-                self.assertNotEqual(abc, abcdef)
-                self.assertEqual(abcdef.decode('unicode_internal'), text)
+            # resize text: wstr field must be cleared and then recomputed
+            text += 'c'
+            abcdef = getargs_u(text)
+            self.assertNotEqual(abc, abcdef)
+            self.assertEqual(abcdef, text)

    def test_compare(self):
        # Issue #17615
--- a/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst
+++ b/Misc/NEWS.d/next/Library/2019-03-15-21-41-22.bpo-36297.Gz9ZfU.rst
@ -0,0 +1,2 @@
+"unicode_internal" codec is removed.  It was deprecated since Python 3.3.
+Patch by Inada Naoki.
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@ -21,8 +21,7 @@
        (Unicode object, bytes consumed)

   These <encoding>s are available: utf_8, unicode_escape,
-   raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
-   mbcs (on win32).
+   raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).


 Written by Marc-Andre Lemburg (mal@lemburg.com).
@ -250,38 +249,6 @@ _codecs_escape_encode_impl(PyObject *module, PyObject *data,
 }

 /* --- Decoder ------------------------------------------------------------ */
-/*[clinic input]
-_codecs.unicode_internal_decode
-    obj: object
-    errors: str(accept={str, NoneType}) = NULL
-    /
-[clinic start generated code]*/
-
-static PyObject *
-_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
-                                     const char *errors)
-/*[clinic end generated code: output=edbfe175e09eff9a input=8d57930aeda170c6]*/
-{
-    if (PyUnicode_Check(obj)) {
-        if (PyUnicode_READY(obj) < 0)
-            return NULL;
-        Py_INCREF(obj);
-        return codec_tuple(obj, PyUnicode_GET_LENGTH(obj));
-    }
-    else {
-        Py_buffer view;
-        PyObject *result;
-        if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
-            return NULL;
-
-        result = codec_tuple(
-                _PyUnicode_DecodeUnicodeInternal(view.buf, view.len, errors),
-                view.len);
-        PyBuffer_Release(&view);
-        return result;
-    }
-}
-
 /*[clinic input]
 _codecs.utf_7_decode
    data: Py_buffer
@ -686,51 +653,6 @@ _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
    return codec_tuple(result, data->len);
 }

-/*[clinic input]
-_codecs.unicode_internal_encode
-    obj: object
-    errors: str(accept={str, NoneType}) = NULL
-    /
-[clinic start generated code]*/
-
-static PyObject *
-_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
-                                     const char *errors)
-/*[clinic end generated code: output=a72507dde4ea558f input=8628f0280cf5ba61]*/
-{
-    if (PyErr_WarnEx(PyExc_DeprecationWarning,
-                     "unicode_internal codec has been deprecated",
-                     1))
-        return NULL;
-
-    if (PyUnicode_Check(obj)) {
-        Py_UNICODE *u;
-        Py_ssize_t len, size;
-
-        if (PyUnicode_READY(obj) < 0)
-            return NULL;
-
-        u = PyUnicode_AsUnicodeAndSize(obj, &len);
-        if (u == NULL)
-            return NULL;
-        if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
-            return PyErr_NoMemory();
-        size = len * sizeof(Py_UNICODE);
-        return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
-                           PyUnicode_GET_LENGTH(obj));
-    }
-    else {
-        Py_buffer view;
-        PyObject *result;
-        if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) != 0)
-            return NULL;
-        result = codec_tuple(PyBytes_FromStringAndSize(view.buf, view.len),
-                             view.len);
-        PyBuffer_Release(&view);
-        return result;
-    }
-}
-
 /*[clinic input]
 _codecs.utf_7_encode
    str: unicode
@ -1095,8 +1017,6 @@ static PyMethodDef _codecs_functions[] = {
    _CODECS_UTF_32_EX_DECODE_METHODDEF
    _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
    _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
-    _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF
-    _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF
    _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
    _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
    _CODECS_LATIN_1_ENCODE_METHODDEF
--- a/Modules/clinic/_codecsmodule.c.h
+++ b/Modules/clinic/_codecsmodule.c.h
@ -370,57 +370,6 @@ exit:
    return return_value;
 }

-PyDoc_STRVAR(_codecs_unicode_internal_decode__doc__,
-"unicode_internal_decode($module, obj, errors=None, /)\n"
-"--\n"
-"\n");
-
-#define _CODECS_UNICODE_INTERNAL_DECODE_METHODDEF    \
-    {"unicode_internal_decode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_decode, METH_FASTCALL, _codecs_unicode_internal_decode__doc__},
-
-static PyObject *
-_codecs_unicode_internal_decode_impl(PyObject *module, PyObject *obj,
-                                     const char *errors);
-
-static PyObject *
-_codecs_unicode_internal_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
-{
-    PyObject *return_value = NULL;
-    PyObject *obj;
-    const char *errors = NULL;
-
-    if (!_PyArg_CheckPositional("unicode_internal_decode", nargs, 1, 2)) {
-        goto exit;
-    }
-    obj = args[0];
-    if (nargs < 2) {
-        goto skip_optional;
-    }
-    if (args[1] == Py_None) {
-        errors = NULL;
-    }
-    else if (PyUnicode_Check(args[1])) {
-        Py_ssize_t errors_length;
-        errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
-        if (errors == NULL) {
-            goto exit;
-        }
-        if (strlen(errors) != (size_t)errors_length) {
-            PyErr_SetString(PyExc_ValueError, "embedded null character");
-            goto exit;
-        }
-    }
-    else {
-        _PyArg_BadArgument("unicode_internal_decode", 2, "str or None", args[1]);
-        goto exit;
-    }
-skip_optional:
-    return_value = _codecs_unicode_internal_decode_impl(module, obj, errors);
-
-exit:
-    return return_value;
-}
-
 PyDoc_STRVAR(_codecs_utf_7_decode__doc__,
 "utf_7_decode($module, data, errors=None, final=False, /)\n"
 "--\n"
@ -1853,57 +1802,6 @@ exit:
    return return_value;
 }

-PyDoc_STRVAR(_codecs_unicode_internal_encode__doc__,
-"unicode_internal_encode($module, obj, errors=None, /)\n"
-"--\n"
-"\n");
-
-#define _CODECS_UNICODE_INTERNAL_ENCODE_METHODDEF    \
-    {"unicode_internal_encode", (PyCFunction)(void(*)(void))_codecs_unicode_internal_encode, METH_FASTCALL, _codecs_unicode_internal_encode__doc__},
-
-static PyObject *
-_codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
-                                     const char *errors);
-
-static PyObject *
-_codecs_unicode_internal_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
-{
-    PyObject *return_value = NULL;
-    PyObject *obj;
-    const char *errors = NULL;
-
-    if (!_PyArg_CheckPositional("unicode_internal_encode", nargs, 1, 2)) {
-        goto exit;
-    }
-    obj = args[0];
-    if (nargs < 2) {
-        goto skip_optional;
-    }
-    if (args[1] == Py_None) {
-        errors = NULL;
-    }
-    else if (PyUnicode_Check(args[1])) {
-        Py_ssize_t errors_length;
-        errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length);
-        if (errors == NULL) {
-            goto exit;
-        }
-        if (strlen(errors) != (size_t)errors_length) {
-            PyErr_SetString(PyExc_ValueError, "embedded null character");
-            goto exit;
-        }
-    }
-    else {
-        _PyArg_BadArgument("unicode_internal_encode", 2, "str or None", args[1]);
-        goto exit;
-    }
-skip_optional:
-    return_value = _codecs_unicode_internal_encode_impl(module, obj, errors);
-
-exit:
-    return return_value;
-}
-
 PyDoc_STRVAR(_codecs_utf_7_encode__doc__,
 "utf_7_encode($module, str, errors=None, /)\n"
 "--\n"
@ -3024,4 +2922,4 @@ exit:
 #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
    #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
 #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
-/*[clinic end generated code: output=02bd0f0cf9a28150 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=da3c47709a55a05e input=a9049054013a1b77]*/
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -6551,108 +6551,6 @@ PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
    return result;
 }

-/* --- Unicode Internal Codec ------------------------------------------- */
-
-PyObject *
-_PyUnicode_DecodeUnicodeInternal(const char *s,
-                                 Py_ssize_t size,
-                                 const char *errors)
-{
-    const char *starts = s;
-    Py_ssize_t startinpos;
-    Py_ssize_t endinpos;
-    _PyUnicodeWriter writer;
-    const char *end;
-    const char *reason;
-    PyObject *errorHandler = NULL;
-    PyObject *exc = NULL;
-
-    if (PyErr_WarnEx(PyExc_DeprecationWarning,
-                     "unicode_internal codec has been deprecated",
-                     1))
-        return NULL;
-
-    if (size < 0) {
-        PyErr_BadInternalCall();
-        return NULL;
-    }
-    if (size == 0)
-        _Py_RETURN_UNICODE_EMPTY();
-
-    _PyUnicodeWriter_Init(&writer);
-    if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) {
-        PyErr_NoMemory();
-        goto onError;
-    }
-    writer.min_length = (size + (Py_UNICODE_SIZE - 1)) / Py_UNICODE_SIZE;
-
-    end = s + size;
-    while (s < end) {
-        Py_UNICODE uch;
-        Py_UCS4 ch;
-        if (end - s < Py_UNICODE_SIZE) {
-            endinpos = end-starts;
-            reason = "truncated input";
-            goto error;
-        }
-        /* We copy the raw representation one byte at a time because the
-           pointer may be unaligned (see test_codeccallbacks). */
-        ((char *) &uch)[0] = s[0];
-        ((char *) &uch)[1] = s[1];
-#ifdef Py_UNICODE_WIDE
-        ((char *) &uch)[2] = s[2];
-        ((char *) &uch)[3] = s[3];
-#endif
-        ch = uch;
-#ifdef Py_UNICODE_WIDE
-        /* We have to sanity check the raw data, otherwise doom looms for
-           some malformed UCS-4 data. */
-        if (ch > 0x10ffff) {
-            endinpos = s - starts + Py_UNICODE_SIZE;
-            reason = "illegal code point (> 0x10FFFF)";
-            goto error;
-        }
-#endif
-        s += Py_UNICODE_SIZE;
-#ifndef Py_UNICODE_WIDE
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
-        {
-            Py_UNICODE uch2;
-            ((char *) &uch2)[0] = s[0];
-            ((char *) &uch2)[1] = s[1];
-            if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
-            {
-                ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
-                s += Py_UNICODE_SIZE;
-            }
-        }
-#endif
-
-        if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
-            goto onError;
-        continue;
-
-  error:
-        startinpos = s - starts;
-        if (unicode_decode_call_errorhandler_writer(
-                errors, &errorHandler,
-                "unicode_internal", reason,
-                &starts, &end, &startinpos, &endinpos, &exc, &s,
-                &writer))
-            goto onError;
-    }
-
-    Py_XDECREF(errorHandler);
-    Py_XDECREF(exc);
-    return _PyUnicodeWriter_Finish(&writer);
-
-  onError:
-    _PyUnicodeWriter_Dealloc(&writer);
-    Py_XDECREF(errorHandler);
-    Py_XDECREF(exc);
-    return NULL;
-}
-
 /* --- Latin-1 Codec ------------------------------------------------------ */

 PyObject *
--- a/PCbuild/lib.pyproj
+++ b/PCbuild/lib.pyproj
@ -392,7 +392,6 @@
    <Compile Include="encodings\tis_620.py" />
    <Compile Include="encodings\undefined.py" />
    <Compile Include="encodings\unicode_escape.py" />
-    <Compile Include="encodings\unicode_internal.py" />
    <Compile Include="encodings\utf_16.py" />
    <Compile Include="encodings\utf_16_be.py" />
    <Compile Include="encodings\utf_16_le.py" />