From c8367df74639a399a2195a2a07ff7ac74b88d426 Mon Sep 17 00:00:00 2001
From: Daniel Bertalan <dani@danielbertalan.dev>
Date: Mon, 4 Oct 2021 16:59:13 +0200
Subject: [PATCH] LibC: Implement wcrtomb

This function converts a single wide character into its multibyte
representation (UTF-8 in our case). It is called from libc++'s
`std::basic_ostream<wchar_t>::flush`, which gets called at program exit
from a global destructor in order to flush `std::wcout`.
---
 AK/UnicodeUtils.h                 | 25 +++++++++++++++++++++++++
 Tests/LibC/TestWchar.cpp          | 31 +++++++++++++++++++++++++++++++
 Userland/Libraries/LibC/wchar.cpp | 19 ++++++++++++++++---
 3 files changed, 72 insertions(+), 3 deletions(-)
diff --git a/AK/UnicodeUtils.h b/AK/UnicodeUtils.h
index e7211deaea..18ce9a7daa 100644
--- a/AK/UnicodeUtils.h
+++ b/AK/UnicodeUtils.h
@@ -17,4 +17,29 @@ constexpr bool is_unicode_control_code_point(u32 code_point)
 
 Optional<StringView> get_unicode_control_code_point_alias(u32);
 
+template<typename Callback>
+[[nodiscard]] constexpr int code_point_to_utf8(u32 code_point, Callback callback)
+{
+    if (code_point <= 0x7f) {
+        callback((char)code_point);
+        return 1;
+    } else if (code_point <= 0x07ff) {
+        callback((char)(((code_point >> 6) & 0x1f) | 0xc0));
+        callback((char)(((code_point >> 0) & 0x3f) | 0x80));
+        return 2;
+    } else if (code_point <= 0xffff) {
+        callback((char)(((code_point >> 12) & 0x0f) | 0xe0));
+        callback((char)(((code_point >> 6) & 0x3f) | 0x80));
+        callback((char)(((code_point >> 0) & 0x3f) | 0x80));
+        return 3;
+    } else if (code_point <= 0x10ffff) {
+        callback((char)(((code_point >> 18) & 0x07) | 0xf0));
+        callback((char)(((code_point >> 12) & 0x3f) | 0x80));
+        callback((char)(((code_point >> 6) & 0x3f) | 0x80));
+        callback((char)(((code_point >> 0) & 0x3f) | 0x80));
+        return 4;
+    }
+    return -1;
+}
+
 }
diff --git a/Tests/LibC/TestWchar.cpp b/Tests/LibC/TestWchar.cpp
index ea3bcd5eda..f893969a62 100644
--- a/Tests/LibC/TestWchar.cpp
+++ b/Tests/LibC/TestWchar.cpp
@@ -285,3 +285,34 @@ TEST_CASE(mbrtowc)
     EXPECT_EQ(ret, -1ul);
     EXPECT_EQ(errno, EILSEQ);
 }
+
+TEST_CASE(wcrtomb)
+{
+    char buf[MB_LEN_MAX];
+    size_t ret = 0;
+
+    // Ensure that `wc` is ignored when buf is a nullptr.
+    ret = wcrtomb(nullptr, L'a', nullptr);
+    EXPECT_EQ(ret, 1ul);
+
+    ret = wcrtomb(nullptr, L'\U0001F41E', nullptr);
+    EXPECT_EQ(ret, 1ul);
+
+    // When the buffer is non-null, the multibyte representation is written into it.
+    ret = wcrtomb(buf, L'a', nullptr);
+    EXPECT_EQ(ret, 1ul);
+    EXPECT_EQ(memcmp(buf, "a", ret), 0);
+
+    ret = wcrtomb(buf, L'\U0001F41E', nullptr);
+    EXPECT_EQ(ret, 4ul);
+    EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", ret), 0);
+
+    // When the wide character is invalid, -1 is returned and errno is set to EILSEQ.
+    ret = wcrtomb(buf, 0x110000, nullptr);
+    EXPECT_EQ(ret, (size_t)-1);
+    EXPECT_EQ(errno, EILSEQ);
+
+    // Replacement characters and conversion errors are not confused.
+    ret = wcrtomb(buf, L'\uFFFD', nullptr);
+    EXPECT_NE(ret, (size_t)-1);
+}
diff --git a/Userland/Libraries/LibC/wchar.cpp b/Userland/Libraries/LibC/wchar.cpp
index 37e6202b34..a0e1b361b6 100644
--- a/Userland/Libraries/LibC/wchar.cpp
+++ b/Userland/Libraries/LibC/wchar.cpp
@@ -6,6 +6,7 @@
 
 #include <AK/Assertions.h>
 #include <AK/Format.h>
+#include <AK/UnicodeUtils.h>
 #include <errno.h>
 #include <wchar.h>
 
@@ -292,10 +293,22 @@ size_t mbrlen(const char*, size_t, mbstate_t*)
     TODO();
 }
 
-size_t wcrtomb(char*, wchar_t, mbstate_t*)
+size_t wcrtomb(char* s, wchar_t wc, mbstate_t*)
 {
-    dbgln("FIXME: Implement wcrtomb()");
-    TODO();
+    if (s == nullptr)
+        wc = L'\0';
+
+    auto nwritten = AK::UnicodeUtils::code_point_to_utf8(wc, [&s](char byte) {
+        if (s != nullptr)
+            *s++ = byte;
+    });
+
+    if (nwritten < 0) {
+        errno = EILSEQ;
+        return (size_t)-1;
+    } else {
+        return nwritten;
+    }
 }
 
 int wcscoll(const wchar_t* ws1, const wchar_t* ws2)