From 86242f9c18103afa7364ef40a19dd27cf3c26f9e Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sun, 17 May 2020 20:03:03 +0200 Subject: [PATCH] AK: Add StringBuilder::append(Utf32View) This encodes the incoming UTF-32 sequence as UTF-8. --- AK/StringBuilder.cpp | 27 +++++++++++++++++++++++++++ AK/StringBuilder.h | 1 + 2 files changed, 28 insertions(+) diff --git a/AK/StringBuilder.cpp b/AK/StringBuilder.cpp index ffae4e76da..f69077c6ee 100644 --- a/AK/StringBuilder.cpp +++ b/AK/StringBuilder.cpp @@ -30,6 +30,7 @@ #include #include #include +#include namespace AK { @@ -112,4 +113,30 @@ void StringBuilder::clear() m_length = 0; } +void StringBuilder::append(const Utf32View& utf32_view) +{ + for (size_t i = 0; i < utf32_view.length(); ++i) { + auto codepoint = utf32_view.codepoints()[i]; + if (codepoint <= 0x7f) { + append((char)codepoint); + } else if (codepoint <= 0x07ff) { + append((char)(((codepoint >> 6) & 0x1f) | 0xc0)); + append((char)(((codepoint >> 0) & 0x3f) | 0x80)); + } else if (codepoint <= 0xffff) { + append((char)(((codepoint >> 12) & 0x0f) | 0xe0)); + append((char)(((codepoint >> 6) & 0x3f) | 0x80)); + append((char)(((codepoint >> 0) & 0x3f) | 0x80)); + } else if (codepoint <= 0x10ffff) { + append((char)(((codepoint >> 18) & 0x07) | 0xf0)); + append((char)(((codepoint >> 12) & 0x3f) | 0x80)); + append((char)(((codepoint >> 6) & 0x3f) | 0x80)); + append((char)(((codepoint >> 0) & 0x3f) | 0x80)); + } else { + append(0xef); + append(0xbf); + append(0xbd); + } + } +} + } diff --git a/AK/StringBuilder.h b/AK/StringBuilder.h index fc627b230b..53665b86ab 100644 --- a/AK/StringBuilder.h +++ b/AK/StringBuilder.h @@ -40,6 +40,7 @@ public: ~StringBuilder() {} void append(const StringView&); + void append(const Utf32View&); void append(char); void append(const char*, size_t); void appendf(const char*, ...);