From 8a73b57b9b5f6e36dd5a4c279f4d606d9e71a31f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Jul 2023 10:59:09 +0200 Subject: [PATCH] gh-106320: Remove _PyUnicode_TransformDecimalAndSpaceToASCII() (#106398) Remove private _PyUnicode_TransformDecimalAndSpaceToASCII() and other private _PyUnicode C API functions: move them to the internal C API (pycore_unicodeobject.h). No longer most of these functions. Replace _testcapi.unicode_transformdecimalandspacetoascii() with _testinternal._PyUnicode_TransformDecimalAndSpaceToASCII(). --- Include/cpython/unicodeobject.h | 37 ----------------------- Include/internal/pycore_unicodeobject.h | 39 +++++++++++++++++++++++-- Lib/test/test_capi/test_unicode.py | 8 +++-- Modules/_testcapi/unicode.c | 9 ------ Modules/_testinternalcapi.c | 12 ++++++++ Python/pystrhex.c | 1 + 6 files changed, 56 insertions(+), 50 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index dc8f6437c0e..e75b5e15494 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -167,10 +167,6 @@ typedef struct { } data; /* Canonical, smallest-form Unicode buffer */ } PyUnicodeObject; -PyAPI_FUNC(int) _PyUnicode_CheckConsistency( - PyObject *op, - int check_content); - #define _PyASCIIObject_CAST(op) \ (assert(PyUnicode_Check(op)), \ @@ -461,19 +457,6 @@ PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); #define _PyUnicode_AsString PyUnicode_AsUTF8 -/* --- Decimal Encoder ---------------------------------------------------- */ - -/* Coverts a Unicode object holding a decimal value to an ASCII string - for using in int, float and complex parsers. - Transforms code points that have decimal digit property to the - corresponding ASCII digit code points. Transforms spaces to ASCII. - Transforms code points starting from the first non-ASCII code point that - is neither a decimal digit nor a space to the end into '?'. */ - -PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( - PyObject *unicode /* Unicode object */ - ); - /* === Characters Type APIs =============================================== */ /* These should not be used directly. Use the Py_UNICODE_IS* and @@ -623,23 +606,3 @@ static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) { || Py_UNICODE_ISDIGIT(ch) || Py_UNICODE_ISNUMERIC(ch)); } - - -/* === Misc functions ===================================================== */ - -PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); - -/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ -PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); - -/* Fast equality check when the inputs are known to be exact unicode types - and where the hash values are equal (i.e. a very probable match) */ -PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); - -/* Equality check. */ -PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); - -PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); -PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); - -PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *); diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index dd20ac19d41..ad59c3e385f 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -11,8 +11,12 @@ extern "C" { #include "pycore_fileutils.h" // _Py_error_handler #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI -void _PyUnicode_ExactDealloc(PyObject *op); -Py_ssize_t _PyUnicode_InternedSize(void); +PyAPI_FUNC(int) _PyUnicode_CheckConsistency( + PyObject *op, + int check_content); + +extern void _PyUnicode_ExactDealloc(PyObject *op); +extern Py_ssize_t _PyUnicode_InternedSize(void); /* Get a copy of a Unicode string. */ PyAPI_FUNC(PyObject*) _PyUnicode_Copy( @@ -277,6 +281,18 @@ extern PyObject* _PyUnicode_EncodeCharmap( PyObject *mapping, /* encoding mapping */ const char *errors); /* error handling */ +/* --- Decimal Encoder ---------------------------------------------------- */ + +/* Coverts a Unicode object holding a decimal value to an ASCII string + for using in int, float and complex parsers. + Transforms code points that have decimal digit property to the + corresponding ASCII digit code points. Transforms spaces to ASCII. + Transforms code points starting from the first non-ASCII code point that + is neither a decimal digit nor a space to the end into '?'. */ + +PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( + PyObject *unicode); /* Unicode object */ + /* --- Methods & Slots ---------------------------------------------------- */ extern PyObject* _PyUnicode_JoinArray( @@ -323,6 +339,25 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( PyObject *thousands_sep, Py_UCS4 *maxchar); +/* --- Misc functions ----------------------------------------------------- */ + +extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int); + +/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ +PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); + +/* Fast equality check when the inputs are known to be exact unicode types + and where the hash values are equal (i.e. a very probable match) */ +extern int _PyUnicode_EQ(PyObject *, PyObject *); + +/* Equality check. */ +PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); + +extern int _PyUnicode_WideCharString_Converter(PyObject *, void *); +extern int _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); + +PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *); + /* --- Runtime lifecycle -------------------------------------------------- */ extern void _PyUnicode_InitState(PyInterpreterState *); diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index ca914459a62..622ee899390 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -7,6 +7,10 @@ import _testcapi except ImportError: _testcapi = None +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None NULL = None @@ -913,10 +917,10 @@ def test_getdefaultencoding(self): self.assertEqual(getdefaultencoding(), b'utf-8') @support.cpython_only - @unittest.skipIf(_testcapi is None, 'need _testcapi module') + @unittest.skipIf(_testinternalcapi is None, 'need _testinternalcapi module') def test_transform_decimal_and_space(self): """Test _PyUnicode_TransformDecimalAndSpaceToASCII()""" - from _testcapi import unicode_transformdecimalandspacetoascii as transform_decimal + from _testinternalcapi import _PyUnicode_TransformDecimalAndSpaceToASCII as transform_decimal self.assertEqual(transform_decimal('123'), '123') diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index 9c2760c3f76..51d741a6b5f 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -660,14 +660,6 @@ unicode_getdefaultencoding(PyObject *self, PyObject *Py_UNUSED(ignored)) return PyBytes_FromString(s); } -/* Test _PyUnicode_TransformDecimalAndSpaceToASCII() */ -static PyObject * -unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) -{ - NULLABLE(arg); - return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); -} - /* Test PyUnicode_DecodeUTF8() */ static PyObject * unicode_decodeutf8(PyObject *self, PyObject *args) @@ -1544,7 +1536,6 @@ static PyMethodDef TestMethods[] = { {"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS}, {"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS}, {"unicode_getdefaultencoding",unicode_getdefaultencoding, METH_NOARGS}, - {"unicode_transformdecimalandspacetoascii", unicode_transformdecimalandspacetoascii, METH_O}, {"unicode_concat", unicode_concat, METH_VARARGS}, {"unicode_splitlines", unicode_splitlines, METH_VARARGS}, {"unicode_split", unicode_split, METH_VARARGS}, diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 4875ee7bed1..14f91e8da17 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1253,6 +1253,17 @@ test_tstate_capi(PyObject *self, PyObject *Py_UNUSED(args)) } +/* Test _PyUnicode_TransformDecimalAndSpaceToASCII() */ +static PyObject * +unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg) +{ + if (arg == Py_None) { + arg = NULL; + } + return _PyUnicode_TransformDecimalAndSpaceToASCII(arg); +} + + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -1304,6 +1315,7 @@ static PyMethodDef module_functions[] = { {"_PyTime_ObjectToTimeval", test_pytime_object_to_timeval, METH_VARARGS}, {"_PyTraceMalloc_GetTraceback", tracemalloc_get_traceback, METH_VARARGS}, {"test_tstate_capi", test_tstate_capi, METH_NOARGS, NULL}, + {"_PyUnicode_TransformDecimalAndSpaceToASCII", unicode_transformdecimalandspacetoascii, METH_O}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/pystrhex.c b/Python/pystrhex.c index f798256e18e..ce456b79f16 100644 --- a/Python/pystrhex.c +++ b/Python/pystrhex.c @@ -2,6 +2,7 @@ #include "Python.h" #include "pycore_strhex.h" // _Py_strhex_with_sep() +#include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency() #include // abs() static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,