From 420bdccf0b528152b29aa3a1cdfc1abf889655ac Mon Sep 17 00:00:00 2001 From: Tim Schumacher Date: Sun, 10 Oct 2021 21:34:00 +0200 Subject: [PATCH] LibC: Implement mbsrtowcs --- Tests/LibC/TestWchar.cpp | 51 +++++++++++++++++++++++++++++++ Userland/Libraries/LibC/wchar.cpp | 34 +++++++++++++++++++++ Userland/Libraries/LibC/wchar.h | 1 + 3 files changed, 86 insertions(+) diff --git a/Tests/LibC/TestWchar.cpp b/Tests/LibC/TestWchar.cpp index b96c432ab3..a158f33748 100644 --- a/Tests/LibC/TestWchar.cpp +++ b/Tests/LibC/TestWchar.cpp @@ -363,3 +363,54 @@ TEST_CASE(wcsrtombs) EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e", 9), 0); EXPECT_EQ(src, nullptr); } + +TEST_CASE(mbsrtowcs) +{ + mbstate_t state = {}; + wchar_t buf[4]; + const char good_chars[] = "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e"; + const char bad_chars[] = "\xf0\x9f\x90\x9e\xf0\xff\x90\x9e"; + const char* src; + size_t ret = 0; + + // Convert normal and valid multibyte sequences. + src = good_chars; + ret = mbsrtowcs(buf, &src, 3, &state); + EXPECT_EQ(ret, 2ul); + EXPECT_EQ(buf[0], L'\U0001F41E'); + EXPECT_EQ(buf[1], L'\U0001F41E'); + EXPECT_EQ(buf[2], L'\0'); + EXPECT_EQ(src, nullptr); + EXPECT_NE(mbsinit(&state), 0); + + // Stop on invalid multibyte sequences. + src = bad_chars; + ret = mbsrtowcs(buf, &src, 3, &state); + EXPECT_EQ(ret, -1ul); + EXPECT_EQ(buf[0], L'\U0001F41E'); + EXPECT_EQ(errno, EILSEQ); + EXPECT_EQ(src, bad_chars + 4); + + // Valid sequence but not enough space. + src = good_chars; + ret = mbsrtowcs(buf, &src, 1, &state); + EXPECT_EQ(ret, 1ul); + EXPECT_EQ(buf[0], L'\U0001F41E'); + EXPECT_EQ(src, good_chars + 4); + + // Try a conversion with no destination and too short length. + src = good_chars; + ret = mbsrtowcs(nullptr, &src, 1, &state); + EXPECT_EQ(ret, 2ul); + EXPECT_EQ(src, nullptr); + EXPECT_NE(mbsinit(&state), 0); + + // Try a conversion using the internal anonymous state. + src = good_chars; + ret = mbsrtowcs(buf, &src, 3, nullptr); + EXPECT_EQ(ret, 2ul); + EXPECT_EQ(buf[0], L'\U0001F41E'); + EXPECT_EQ(buf[1], L'\U0001F41E'); + EXPECT_EQ(buf[2], L'\0'); + EXPECT_EQ(src, nullptr); +} diff --git a/Userland/Libraries/LibC/wchar.cpp b/Userland/Libraries/LibC/wchar.cpp index 7bed4b5518..dd7ca983cf 100644 --- a/Userland/Libraries/LibC/wchar.cpp +++ b/Userland/Libraries/LibC/wchar.cpp @@ -505,4 +505,38 @@ size_t wcsrtombs(char* dest, const wchar_t** src, size_t len, mbstate_t* ps) written += ret; } } + +size_t mbsrtowcs(wchar_t* dst, const char** src, size_t len, mbstate_t* ps) +{ + static mbstate_t _anonymous_state = {}; + + if (ps == nullptr) + ps = &_anonymous_state; + + size_t written = 0; + while (written < len || !dst) { + // Convert next multibyte to wchar. + size_t ret = mbrtowc(dst, *src, MB_LEN_MAX, ps); + + // Multibyte sequence is invalid. + if (ret == -1ul) { + errno = EILSEQ; + return (size_t)-1; + } + + // Null byte has been reached. + if (**src == '\0') { + *src = nullptr; + return written; + } + + *src += ret; + written += 1; + if (dst) + dst += 1; + } + + // If we are here, we have written `len` wchars, but not reached the null byte. + return written; +} } diff --git a/Userland/Libraries/LibC/wchar.h b/Userland/Libraries/LibC/wchar.h index b7f2008415..543540e00a 100644 --- a/Userland/Libraries/LibC/wchar.h +++ b/Userland/Libraries/LibC/wchar.h @@ -57,5 +57,6 @@ long double wcstold(const wchar_t*, wchar_t**); int swprintf(wchar_t*, size_t, const wchar_t*, ...); int wcwidth(wchar_t); size_t wcsrtombs(char*, const wchar_t**, size_t, mbstate_t*); +size_t mbsrtowcs(wchar_t*, const char**, size_t, mbstate_t*); __END_DECLS