diff --git a/AK/String.cpp b/AK/String.cpp index 8aa3d7dbcd..c42b089105 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -221,6 +222,44 @@ ErrorOr String::from_utf8(StringView view) return String { move(data) }; } +ErrorOr String::repeated(u32 code_point, size_t count) +{ + VERIFY(is_unicode(code_point)); + + Array code_point_as_utf8; + size_t i = 0; + + size_t code_point_byte_length = UnicodeUtils::code_point_to_utf8(code_point, [&](auto byte) { + code_point_as_utf8[i++] = static_cast(byte); + }); + + auto copy_to_buffer = [&](u8* buffer) { + if (code_point_byte_length == 1) { + memset(buffer, code_point_as_utf8[0], count); + return; + } + + for (i = 0; i < count; ++i) + memcpy(buffer + (i * code_point_byte_length), code_point_as_utf8.data(), code_point_byte_length); + }; + + auto total_byte_count = code_point_byte_length * count; + + if (total_byte_count <= MAX_SHORT_STRING_BYTE_COUNT) { + ShortString short_string; + copy_to_buffer(short_string.storage); + short_string.byte_count_and_short_string_flag = (total_byte_count << 1) | SHORT_STRING_FLAG; + + return String { short_string }; + } + + u8* buffer = nullptr; + auto new_string_data = TRY(Detail::StringData::create_uninitialized(total_byte_count, buffer)); + copy_to_buffer(buffer); + + return String { move(new_string_data) }; +} + StringView String::bytes_as_string_view() const { return StringView(bytes()); diff --git a/AK/String.h b/AK/String.h index 1a7c01b1a9..05ceb42f86 100644 --- a/AK/String.h +++ b/AK/String.h @@ -93,6 +93,9 @@ public: return String { short_string }; } + // Creates a new String with a single code point repeated N times. + static ErrorOr repeated(u32 code_point, size_t count); + // Creates a new String by case-transforming this String. Using these methods require linking LibUnicode into your application. ErrorOr to_lowercase(Optional const& locale = {}) const; ErrorOr to_uppercase(Optional const& locale = {}) const; diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index 872a944667..86218d694d 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -357,3 +357,81 @@ TEST_CASE(find_byte_offset) EXPECT_EQ(index5, 8u); } } + +TEST_CASE(repeated) +{ + { + auto string1 = MUST(String::repeated('a', 0)); + EXPECT(string1.is_short_string()); + EXPECT(string1.is_empty()); + + auto string2 = MUST(String::repeated(0x03C9U, 0)); + EXPECT(string2.is_short_string()); + EXPECT(string2.is_empty()); + + auto string3 = MUST(String::repeated(0x10300, 0)); + EXPECT(string3.is_short_string()); + EXPECT(string3.is_empty()); + } + { + auto string1 = MUST(String::repeated('a', 1)); + EXPECT(string1.is_short_string()); + EXPECT_EQ(string1.bytes_as_string_view().length(), 1u); + EXPECT_EQ(string1, "a"sv); + + auto string2 = MUST(String::repeated(0x03C9U, 1)); + EXPECT(string2.is_short_string()); + EXPECT_EQ(string2.bytes_as_string_view().length(), 2u); + EXPECT_EQ(string2, "ω"sv); + + auto string3 = MUST(String::repeated(0x10300, 1)); +#ifdef AK_ARCH_64_BIT + EXPECT(string3.is_short_string()); +#else + EXPECT(!string3.is_short_string()); +#endif + EXPECT_EQ(string3.bytes_as_string_view().length(), 4u); + EXPECT_EQ(string3, "𐌀"sv); + } + { + auto string1 = MUST(String::repeated('a', 3)); + EXPECT(string1.is_short_string()); + EXPECT_EQ(string1.bytes_as_string_view().length(), 3u); + EXPECT_EQ(string1, "aaa"sv); + + auto string2 = MUST(String::repeated(0x03C9U, 3)); +#ifdef AK_ARCH_64_BIT + EXPECT(string2.is_short_string()); +#else + EXPECT(!string2.is_short_string()); +#endif + EXPECT_EQ(string2.bytes_as_string_view().length(), 6u); + EXPECT_EQ(string2, "ωωω"sv); + + auto string3 = MUST(String::repeated(0x10300, 3)); + EXPECT(!string3.is_short_string()); + EXPECT_EQ(string3.bytes_as_string_view().length(), 12u); + EXPECT_EQ(string3, "𐌀𐌀𐌀"sv); + } + { + auto string1 = MUST(String::repeated('a', 10)); + EXPECT(!string1.is_short_string()); + EXPECT_EQ(string1.bytes_as_string_view().length(), 10u); + EXPECT_EQ(string1, "aaaaaaaaaa"sv); + + auto string2 = MUST(String::repeated(0x03C9U, 10)); + EXPECT(!string2.is_short_string()); + EXPECT_EQ(string2.bytes_as_string_view().length(), 20u); + EXPECT_EQ(string2, "ωωωωωωωωωω"sv); + + auto string3 = MUST(String::repeated(0x10300, 10)); + EXPECT(!string3.is_short_string()); + EXPECT_EQ(string3.bytes_as_string_view().length(), 40u); + EXPECT_EQ(string3, "𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀𐌀"sv); + } + + EXPECT_CRASH("Creating a string from an invalid code point", [] { + (void)String::repeated(0xffffffff, 1); + return Test::Crash::Failure::DidNotCrash; + }); +}