From c8367df74639a399a2195a2a07ff7ac74b88d426 Mon Sep 17 00:00:00 2001 From: Daniel Bertalan Date: Mon, 4 Oct 2021 16:59:13 +0200 Subject: [PATCH] LibC: Implement wcrtomb This function converts a single wide character into its multibyte representation (UTF-8 in our case). It is called from libc++'s `std::basic_ostream::flush`, which gets called at program exit from a global destructor in order to flush `std::wcout`. --- AK/UnicodeUtils.h | 25 +++++++++++++++++++++++++ Tests/LibC/TestWchar.cpp | 31 +++++++++++++++++++++++++++++++ Userland/Libraries/LibC/wchar.cpp | 19 ++++++++++++++++--- 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/AK/UnicodeUtils.h b/AK/UnicodeUtils.h index e7211deaea..18ce9a7daa 100644 --- a/AK/UnicodeUtils.h +++ b/AK/UnicodeUtils.h @@ -17,4 +17,29 @@ constexpr bool is_unicode_control_code_point(u32 code_point) Optional get_unicode_control_code_point_alias(u32); +template +[[nodiscard]] constexpr int code_point_to_utf8(u32 code_point, Callback callback) +{ + if (code_point <= 0x7f) { + callback((char)code_point); + return 1; + } else if (code_point <= 0x07ff) { + callback((char)(((code_point >> 6) & 0x1f) | 0xc0)); + callback((char)(((code_point >> 0) & 0x3f) | 0x80)); + return 2; + } else if (code_point <= 0xffff) { + callback((char)(((code_point >> 12) & 0x0f) | 0xe0)); + callback((char)(((code_point >> 6) & 0x3f) | 0x80)); + callback((char)(((code_point >> 0) & 0x3f) | 0x80)); + return 3; + } else if (code_point <= 0x10ffff) { + callback((char)(((code_point >> 18) & 0x07) | 0xf0)); + callback((char)(((code_point >> 12) & 0x3f) | 0x80)); + callback((char)(((code_point >> 6) & 0x3f) | 0x80)); + callback((char)(((code_point >> 0) & 0x3f) | 0x80)); + return 4; + } + return -1; +} + } diff --git a/Tests/LibC/TestWchar.cpp b/Tests/LibC/TestWchar.cpp index ea3bcd5eda..f893969a62 100644 --- a/Tests/LibC/TestWchar.cpp +++ b/Tests/LibC/TestWchar.cpp @@ -285,3 +285,34 @@ TEST_CASE(mbrtowc) EXPECT_EQ(ret, -1ul); EXPECT_EQ(errno, EILSEQ); } + +TEST_CASE(wcrtomb) +{ + char buf[MB_LEN_MAX]; + size_t ret = 0; + + // Ensure that `wc` is ignored when buf is a nullptr. + ret = wcrtomb(nullptr, L'a', nullptr); + EXPECT_EQ(ret, 1ul); + + ret = wcrtomb(nullptr, L'\U0001F41E', nullptr); + EXPECT_EQ(ret, 1ul); + + // When the buffer is non-null, the multibyte representation is written into it. + ret = wcrtomb(buf, L'a', nullptr); + EXPECT_EQ(ret, 1ul); + EXPECT_EQ(memcmp(buf, "a", ret), 0); + + ret = wcrtomb(buf, L'\U0001F41E', nullptr); + EXPECT_EQ(ret, 4ul); + EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", ret), 0); + + // When the wide character is invalid, -1 is returned and errno is set to EILSEQ. + ret = wcrtomb(buf, 0x110000, nullptr); + EXPECT_EQ(ret, (size_t)-1); + EXPECT_EQ(errno, EILSEQ); + + // Replacement characters and conversion errors are not confused. + ret = wcrtomb(buf, L'\uFFFD', nullptr); + EXPECT_NE(ret, (size_t)-1); +} diff --git a/Userland/Libraries/LibC/wchar.cpp b/Userland/Libraries/LibC/wchar.cpp index 37e6202b34..a0e1b361b6 100644 --- a/Userland/Libraries/LibC/wchar.cpp +++ b/Userland/Libraries/LibC/wchar.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -292,10 +293,22 @@ size_t mbrlen(const char*, size_t, mbstate_t*) TODO(); } -size_t wcrtomb(char*, wchar_t, mbstate_t*) +size_t wcrtomb(char* s, wchar_t wc, mbstate_t*) { - dbgln("FIXME: Implement wcrtomb()"); - TODO(); + if (s == nullptr) + wc = L'\0'; + + auto nwritten = AK::UnicodeUtils::code_point_to_utf8(wc, [&s](char byte) { + if (s != nullptr) + *s++ = byte; + }); + + if (nwritten < 0) { + errno = EILSEQ; + return (size_t)-1; + } else { + return nwritten; + } } int wcscoll(const wchar_t* ws1, const wchar_t* ws2)