Merge pull request #33364 from YHNdnzj/utf8-modernization

basic/utf8: some modernizations
This commit is contained in:
Luca Boccassi 2024-06-16 22:33:08 +01:00 committed by GitHub
commit 65da79953b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 24 additions and 30 deletions

View file

@ -8,6 +8,7 @@
#include "alloc-util.h"
#include "macro.h"
#include "string-util-fundamental.h"
#include "utf8.h"
/* What is interpreted as whitespace? */
#define WHITESPACE " \t\n\r"
@ -234,6 +235,9 @@ static inline int strdup_to(char **ret, const char *src) {
}
bool string_is_safe(const char *p) _pure_;
static inline bool string_is_safe_ascii(const char *p) {
return ascii_is_valid(p) && string_is_safe(p);
}
DISABLE_WARNING_STRINGOP_TRUNCATION;
static inline void strncpy_exact(char *buf, const char *src, size_t buf_len) {

View file

@ -130,24 +130,24 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newlin
return true;
}
char *utf8_is_valid_n(const char *str, size_t len_bytes) {
char* utf8_is_valid_n(const char *str, size_t len_bytes) {
/* Check if the string is composed of valid utf8 characters. If length len_bytes is given, stop after
* len_bytes. Otherwise, stop at NUL. */
assert(str);
for (const char *p = str; len_bytes != SIZE_MAX ? (size_t) (p - str) < len_bytes : *p != '\0'; ) {
for (size_t i = 0; len_bytes != SIZE_MAX ? i < len_bytes : str[i] != '\0'; ) {
int len;
if (_unlikely_(*p == '\0') && len_bytes != SIZE_MAX)
if (_unlikely_(str[i] == '\0'))
return NULL; /* embedded NUL */
len = utf8_encoded_valid_unichar(p,
len_bytes != SIZE_MAX ? len_bytes - (p - str) : SIZE_MAX);
len = utf8_encoded_valid_unichar(str + i,
len_bytes != SIZE_MAX ? len_bytes - i : SIZE_MAX);
if (_unlikely_(len < 0))
return NULL; /* invalid character */
p += len;
i += len;
}
return (char*) str;
@ -271,27 +271,14 @@ char *utf8_escape_non_printable_full(const char *str, size_t console_width, bool
return str_realloc(p);
}
char *ascii_is_valid(const char *str) {
/* Check whether the string consists of valid ASCII bytes,
* i.e values between 0 and 127, inclusive. */
char* ascii_is_valid_n(const char *str, size_t len) {
/* Check whether the string consists of valid ASCII bytes, i.e values between 1 and 127, inclusive.
* Stops at len, or NUL byte if len is SIZE_MAX. */
assert(str);
for (const char *p = str; *p; p++)
if ((unsigned char) *p >= 128)
return NULL;
return (char*) str;
}
char *ascii_is_valid_n(const char *str, size_t len) {
/* Very similar to ascii_is_valid(), but checks exactly len
* bytes and rejects any NULs in that range. */
assert(str);
for (size_t i = 0; i < len; i++)
if ((unsigned char) str[i] >= 128 || str[i] == 0)
for (size_t i = 0; len != SIZE_MAX ? i < len : str[i] != '\0'; i++)
if ((unsigned char) str[i] >= 128 || str[i] == '\0')
return NULL;
return (char*) str;

View file

@ -14,12 +14,15 @@
bool unichar_is_valid(char32_t c);
char *utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
static inline char *utf8_is_valid(const char *s) {
return utf8_is_valid_n(s, SIZE_MAX);
char* utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
static inline char* utf8_is_valid(const char *str) {
return utf8_is_valid_n(str, SIZE_MAX);
}
char* ascii_is_valid_n(const char *str, size_t len) _pure_;
static inline char* ascii_is_valid(const char *str) {
return ascii_is_valid_n(str, SIZE_MAX);
}
char *ascii_is_valid(const char *s) _pure_;
char *ascii_is_valid_n(const char *str, size_t len);
int utf8_to_ascii(const char *str, char replacement_char, char **ret);

View file

@ -271,7 +271,7 @@ int sd_dhcp_server_set_boot_server_name(sd_dhcp_server *server, const char *name
int sd_dhcp_server_set_boot_filename(sd_dhcp_server *server, const char *filename) {
assert_return(server, -EINVAL);
if (filename && (!string_is_safe(filename) || !ascii_is_valid(filename)))
if (filename && !string_is_safe_ascii(filename))
return -EINVAL;
return free_and_strdup(&server->boot_filename, filename);