AK: Add a method to find the byte offset of a code point

This commit is contained in:
Timothy Flynn 2023-01-22 09:24:12 -05:00 committed by Tim Flynn
parent 5e44b93af2
commit d50724956e
3 changed files with 58 additions and 0 deletions

View file

@ -275,6 +275,20 @@ ErrorOr<Vector<String>> String::split_limit(u32 separator, size_t limit, SplitBe
return result;
}
Optional<size_t> String::find_byte_offset(u32 code_point, size_t from_byte_offset) const
{
auto code_points = this->code_points();
if (from_byte_offset >= code_points.byte_length())
return {};
for (auto it = code_points.iterator_at_byte_offset(from_byte_offset); it != code_points.end(); ++it) {
if (*it == code_point)
return code_points.byte_offset_of(it);
}
return {};
}
bool String::operator==(String const& other) const
{
if (is_short_string())

View file

@ -127,6 +127,8 @@ public:
ErrorOr<Vector<String>> split_limit(u32 separator, size_t limit, SplitBehavior = SplitBehavior::Nothing) const;
ErrorOr<Vector<String>> split(u32 separator, SplitBehavior = SplitBehavior::Nothing) const;
Optional<size_t> find_byte_offset(u32 code_point, size_t from_byte_offset = 0) const;
[[nodiscard]] bool operator==(String const&) const;
[[nodiscard]] bool operator!=(String const& other) const { return !(*this == other); }

View file

@ -315,3 +315,45 @@ TEST_CASE(split)
EXPECT_EQ(parts[2], "ω"sv);
}
}
TEST_CASE(find_byte_offset)
{
{
String string {};
auto index = string.find_byte_offset(0);
EXPECT(!index.has_value());
}
{
auto string = MUST(String::from_utf8("foo"sv));
auto index1 = string.find_byte_offset('f');
EXPECT_EQ(index1, 0u);
auto index2 = string.find_byte_offset('o');
EXPECT_EQ(index2, 1u);
auto index3 = string.find_byte_offset('o', *index2 + 1);
EXPECT_EQ(index3, 2u);
auto index4 = string.find_byte_offset('b');
EXPECT(!index4.has_value());
}
{
auto string = MUST(String::from_utf8("ωΣωΣω"sv));
auto index1 = string.find_byte_offset(0x03C9U);
EXPECT_EQ(index1, 0u);
auto index2 = string.find_byte_offset(0x03A3u);
EXPECT_EQ(index2, 2u);
auto index3 = string.find_byte_offset(0x03C9U, 2);
EXPECT_EQ(index3, 4u);
auto index4 = string.find_byte_offset(0x03A3u, 4);
EXPECT_EQ(index4, 6u);
auto index5 = string.find_byte_offset(0x03C9U, 6);
EXPECT_EQ(index5, 8u);
}
}