From 578ebc5d5fab2e5e0b87636361c6aeb8d2b287fa Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 1 Sep 2023 18:36:53 +0200 Subject: [PATCH] gh-108767: Replace ctype.h functions with pyctype.h functions (#108772) Replace locale dependent functions with Python "pyctype.h" locale independent functions: * Replace isalpha() with Py_ISALPHA(). * Replace isdigit() with Py_ISDIGIT(). * Replace isxdigit() with Py_ISXDIGIT(). * Replace tolower() with Py_TOLOWER(). Leave Modules/_sre/sre.c unchanged, it uses locale dependent functions on purpose. Include explicitly in _decimal.c to get isascii(). --- Modules/_decimal/_decimal.c | 1 + Modules/_zoneinfo.c | 14 +++++++------- Modules/getaddrinfo.c | 4 ++-- Objects/bytesobject.c | 8 ++++---- Parser/tokenizer.c | 37 ++++++++++++++++++------------------- Python/pystrcmp.c | 8 ++++---- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 585214cc45d..b49ea3cbb41 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -35,6 +35,7 @@ #include "complexobject.h" #include "mpdecimal.h" +#include // isascii() #include #include "docstrings.h" diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index 09f5fd4b2ef..3f7b2851c5b 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -1701,7 +1701,7 @@ parse_tz_str(zoneinfo_state *state, PyObject *tz_str_obj, _tzrule *out) static int parse_uint(const char *const p, uint8_t *value) { - if (!isdigit(*p)) { + if (!Py_ISDIGIT(*p)) { return -1; } @@ -1732,7 +1732,7 @@ parse_abbr(const char *const p, PyObject **abbr) // '+' ) character, or the minus-sign ( '-' ) character. The std // and dst fields in this case shall not include the quoting // characters. - if (!isalpha(buff) && !isdigit(buff) && buff != '+' && + if (!Py_ISALPHA(buff) && !Py_ISDIGIT(buff) && buff != '+' && buff != '-') { return -1; } @@ -1748,7 +1748,7 @@ parse_abbr(const char *const p, PyObject **abbr) // In the unquoted form, all characters in these fields shall be // alphabetic characters from the portable character set in the // current locale. - while (isalpha(*ptr)) { + while (Py_ISALPHA(*ptr)) { ptr++; } str_end = ptr; @@ -1802,7 +1802,7 @@ parse_tz_delta(const char *const p, long *total_seconds) // The hour can be 1 or 2 numeric characters for (size_t i = 0; i < 2; ++i) { buff = *ptr; - if (!isdigit(buff)) { + if (!Py_ISDIGIT(buff)) { if (i == 0) { return -1; } @@ -1830,7 +1830,7 @@ parse_tz_delta(const char *const p, long *total_seconds) for (size_t j = 0; j < 2; ++j) { buff = *ptr; - if (!isdigit(buff)) { + if (!Py_ISDIGIT(buff)) { return -1; } *(outputs[i]) *= 10; @@ -1932,7 +1932,7 @@ parse_transition_rule(const char *const p, TransitionRuleType **out) } for (size_t i = 0; i < 3; ++i) { - if (!isdigit(*ptr)) { + if (!Py_ISDIGIT(*ptr)) { if (i == 0) { return -1; } @@ -2007,7 +2007,7 @@ parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, uint8_t buff = 0; for (size_t j = 0; j < 2; j++) { - if (!isdigit(*ptr)) { + if (!Py_ISDIGIT(*ptr)) { if (i == 0 && j > 0) { break; } diff --git a/Modules/getaddrinfo.c b/Modules/getaddrinfo.c index f1c28d7d931..6fb6062a652 100644 --- a/Modules/getaddrinfo.c +++ b/Modules/getaddrinfo.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include "addrinfo.h" @@ -228,8 +227,9 @@ str_isnumber(const char *p) { unsigned char *q = (unsigned char *)p; while (*q) { - if (! isdigit(*q)) + if (!Py_ISDIGIT(*q)) { return NO; + } q++; } return YES; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index c3a31bec822..26227dd2511 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -722,11 +722,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (--fmtcnt >= 0) c = *fmt++; } - else if (c >= 0 && isdigit(c)) { + else if (c >= 0 && Py_ISDIGIT(c)) { width = c - '0'; while (--fmtcnt >= 0) { c = Py_CHARMASK(*fmt++); - if (!isdigit(c)) + if (!Py_ISDIGIT(c)) break; if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { PyErr_SetString( @@ -761,11 +761,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, if (--fmtcnt >= 0) c = *fmt++; } - else if (c >= 0 && isdigit(c)) { + else if (c >= 0 && Py_ISDIGIT(c)) { prec = c - '0'; while (--fmtcnt >= 0) { c = Py_CHARMASK(*fmt++); - if (!isdigit(c)) + if (!Py_ISDIGIT(c)) break; if (prec > (INT_MAX - ((int)c - '0')) / 10) { PyErr_SetString( diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index b10c9f1f8ea..6ec24895785 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -4,7 +4,6 @@ #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() -#include #include #include "tokenizer.h" @@ -158,7 +157,7 @@ get_normal_name(const char *s) /* for utf-8 and latin-1 */ else if (c == '_') buf[i] = '-'; else - buf[i] = tolower(c); + buf[i] = Py_TOLOWER(c); } buf[i] = '\0'; if (strcmp(buf, "utf-8") == 0 || @@ -1715,12 +1714,12 @@ tok_decimal_tail(struct tok_state *tok) while (1) { do { c = tok_nextc(tok); - } while (isdigit(c)); + } while (Py_ISDIGIT(c)); if (c != '_') { break; } c = tok_nextc(tok); - if (!isdigit(c)) { + if (!Py_ISDIGIT(c)) { tok_backup(tok, c); syntaxerror(tok, "invalid decimal literal"); return 0; @@ -2108,7 +2107,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t /* Period or number starting with period? */ if (c == '.') { c = tok_nextc(tok); - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { goto fraction; } else if (c == '.') { c = tok_nextc(tok); @@ -2131,7 +2130,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t } /* Number */ - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { if (c == '0') { /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); @@ -2142,13 +2141,13 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t if (c == '_') { c = tok_nextc(tok); } - if (!isxdigit(c)) { + if (!Py_ISXDIGIT(c)) { tok_backup(tok, c); return MAKE_TOKEN(syntaxerror(tok, "invalid hexadecimal literal")); } do { c = tok_nextc(tok); - } while (isxdigit(c)); + } while (Py_ISXDIGIT(c)); } while (c == '_'); if (!verify_end_of_number(tok, c, "hexadecimal")) { return MAKE_TOKEN(ERRORTOKEN); @@ -2162,7 +2161,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); } if (c < '0' || c >= '8') { - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in octal literal", c)); } @@ -2175,7 +2174,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); } while ('0' <= c && c < '8'); } while (c == '_'); - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in octal literal", c)); } @@ -2191,7 +2190,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); } if (c != '0' && c != '1') { - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c)); } else { @@ -2203,7 +2202,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); } while (c == '0' || c == '1'); } while (c == '_'); - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { return MAKE_TOKEN(syntaxerror(tok, "invalid digit '%c' in binary literal", c)); } if (!verify_end_of_number(tok, c, "binary")) { @@ -2217,7 +2216,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t while (1) { if (c == '_') { c = tok_nextc(tok); - if (!isdigit(c)) { + if (!Py_ISDIGIT(c)) { tok_backup(tok, c); return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); } @@ -2228,7 +2227,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); } char* zeros_end = tok->cur; - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { nonzero = 1; c = tok_decimal_tail(tok); if (c == 0) { @@ -2272,7 +2271,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); fraction: /* Fraction */ - if (isdigit(c)) { + if (Py_ISDIGIT(c)) { c = tok_decimal_tail(tok); if (c == 0) { return MAKE_TOKEN(ERRORTOKEN); @@ -2287,11 +2286,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); if (c == '+' || c == '-') { c = tok_nextc(tok); - if (!isdigit(c)) { + if (!Py_ISDIGIT(c)) { tok_backup(tok, c); return MAKE_TOKEN(syntaxerror(tok, "invalid decimal literal")); } - } else if (!isdigit(c)) { + } else if (!Py_ISDIGIT(c)) { tok_backup(tok, c); if (!verify_end_of_number(tok, e, "decimal")) { return MAKE_TOKEN(ERRORTOKEN); @@ -2326,7 +2325,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t } f_string_quote: - if (((tolower(*tok->start) == 'f' || tolower(*tok->start) == 'r') && (c == '\'' || c == '"'))) { + if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r') && (c == '\'' || c == '"'))) { int quote = c; int quote_size = 1; /* 1 or 3 */ @@ -2377,7 +2376,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t switch (*tok->start) { case 'F': case 'f': - the_current_tok->f_string_raw = tolower(*(tok->start + 1)) == 'r'; + the_current_tok->f_string_raw = Py_TOLOWER(*(tok->start + 1)) == 'r'; break; case 'R': case 'r': diff --git a/Python/pystrcmp.c b/Python/pystrcmp.c index 9224ce4c706..9796cb013ad 100644 --- a/Python/pystrcmp.c +++ b/Python/pystrcmp.c @@ -11,11 +11,11 @@ PyOS_mystrnicmp(const char *s1, const char *s2, Py_ssize_t size) return 0; p1 = (const unsigned char *)s1; p2 = (const unsigned char *)s2; - for (; (--size > 0) && *p1 && *p2 && (tolower(*p1) == tolower(*p2)); + for (; (--size > 0) && *p1 && *p2 && (Py_TOLOWER(*p1) == Py_TOLOWER(*p2)); p1++, p2++) { ; } - return tolower(*p1) - tolower(*p2); + return Py_TOLOWER(*p1) - Py_TOLOWER(*p2); } int @@ -23,8 +23,8 @@ PyOS_mystricmp(const char *s1, const char *s2) { const unsigned char *p1 = (const unsigned char *)s1; const unsigned char *p2 = (const unsigned char *)s2; - for (; *p1 && *p2 && (tolower(*p1) == tolower(*p2)); p1++, p2++) { + for (; *p1 && *p2 && (Py_TOLOWER(*p1) == Py_TOLOWER(*p2)); p1++, p2++) { ; } - return (tolower(*p1) - tolower(*p2)); + return (Py_TOLOWER(*p1) - Py_TOLOWER(*p2)); }