AK: Make String::{starts,ends}_with(code_point) handle non-ASCII

We currently pass the code point to StringView::{starts,ends}_with,
which actually accepts a single char, thus cannot handle non-ASCII
code points.
This commit is contained in:
Timothy Flynn 2023-03-08 08:56:02 -05:00 committed by Linus Groh
parent f61e65a609
commit f882581e91
3 changed files with 98 additions and 6 deletions

View file

@ -496,7 +496,10 @@ bool String::contains(char needle, CaseSensitivity case_sensitivity) const
bool String::starts_with(u32 code_point) const
{
return bytes_as_string_view().starts_with(code_point);
if (is_empty())
return false;
return *code_points().begin() == code_point;
}
bool String::starts_with_bytes(StringView bytes) const
@ -506,7 +509,14 @@ bool String::starts_with_bytes(StringView bytes) const
bool String::ends_with(u32 code_point) const
{
return bytes_as_string_view().ends_with(code_point);
if (is_empty())
return false;
u32 last_code_point = 0;
for (auto it = code_points().begin(); it != code_points().end(); ++it)
last_code_point = *it;
return last_code_point == code_point;
}
bool String::ends_with_bytes(StringView bytes) const

View file

@ -111,11 +111,11 @@ public:
// Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application.
ErrorOr<bool> equals_ignoring_case(String const&) const;
bool starts_with(u32 code_point) const;
bool starts_with_bytes(StringView) const;
[[nodiscard]] bool starts_with(u32 code_point) const;
[[nodiscard]] bool starts_with_bytes(StringView) const;
bool ends_with(u32 code_point) const;
bool ends_with_bytes(StringView) const;
[[nodiscard]] bool ends_with(u32 code_point) const;
[[nodiscard]] bool ends_with_bytes(StringView) const;
// Creates a substring with a deep copy of the specified data window.
ErrorOr<String> substring_from_byte_offset(size_t start, size_t byte_count) const;

View file

@ -711,3 +711,85 @@ TEST_CASE(trim)
EXPECT(result.is_empty());
}
}
TEST_CASE(starts_with)
{
EXPECT(String {}.starts_with_bytes({}));
EXPECT(!String {}.starts_with_bytes(" "sv));
EXPECT(!String {}.starts_with(0));
EXPECT("a"_short_string.starts_with_bytes({}));
EXPECT("a"_short_string.starts_with_bytes("a"sv));
EXPECT(!"a"_short_string.starts_with_bytes("b"sv));
EXPECT(!"a"_short_string.starts_with_bytes("ab"sv));
EXPECT("a"_short_string.starts_with(0x0061));
EXPECT(!"a"_short_string.starts_with(0x0062));
EXPECT("abc"_short_string.starts_with_bytes({}));
EXPECT("abc"_short_string.starts_with_bytes("a"sv));
EXPECT("abc"_short_string.starts_with_bytes("ab"sv));
EXPECT("abc"_short_string.starts_with_bytes("abc"sv));
EXPECT(!"abc"_short_string.starts_with_bytes("b"sv));
EXPECT(!"abc"_short_string.starts_with_bytes("bc"sv));
EXPECT("abc"_short_string.starts_with(0x0061));
EXPECT(!"abc"_short_string.starts_with(0x0062));
EXPECT(!"abc"_short_string.starts_with(0x0063));
auto emoji = MUST("😀🙃"_string);
EXPECT(emoji.starts_with_bytes("\xF0"sv));
EXPECT(emoji.starts_with_bytes("\xF0\x9F"sv));
EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98"sv));
EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80"sv));
EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0"sv));
EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0\x9F"sv));
EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0\x9F\x99"sv));
EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0\x9F\x99\x83"sv));
EXPECT(!emoji.starts_with_bytes("a"sv));
EXPECT(!emoji.starts_with_bytes("🙃"sv));
EXPECT(emoji.starts_with(0x1F600));
EXPECT(!emoji.starts_with(0x1F643));
}
TEST_CASE(ends_with)
{
EXPECT(String {}.ends_with_bytes({}));
EXPECT(!String {}.ends_with_bytes(" "sv));
EXPECT(!String {}.ends_with(0));
EXPECT("a"_short_string.ends_with_bytes({}));
EXPECT("a"_short_string.ends_with_bytes("a"sv));
EXPECT(!"a"_short_string.ends_with_bytes("b"sv));
EXPECT(!"a"_short_string.ends_with_bytes("ba"sv));
EXPECT("a"_short_string.ends_with(0x0061));
EXPECT(!"a"_short_string.ends_with(0x0062));
EXPECT("abc"_short_string.ends_with_bytes({}));
EXPECT("abc"_short_string.ends_with_bytes("c"sv));
EXPECT("abc"_short_string.ends_with_bytes("bc"sv));
EXPECT("abc"_short_string.ends_with_bytes("abc"sv));
EXPECT(!"abc"_short_string.ends_with_bytes("b"sv));
EXPECT(!"abc"_short_string.ends_with_bytes("ab"sv));
EXPECT("abc"_short_string.ends_with(0x0063));
EXPECT(!"abc"_short_string.ends_with(0x0062));
EXPECT(!"abc"_short_string.ends_with(0x0061));
auto emoji = MUST("😀🙃"_string);
EXPECT(emoji.ends_with_bytes("\x83"sv));
EXPECT(emoji.ends_with_bytes("\x99\x83"sv));
EXPECT(emoji.ends_with_bytes("\x9F\x99\x83"sv));
EXPECT(emoji.ends_with_bytes("\xF0\x9F\x99\x83"sv));
EXPECT(emoji.ends_with_bytes("\x80\xF0\x9F\x99\x83"sv));
EXPECT(emoji.ends_with_bytes("\x98\x80\xF0\x9F\x99\x83"sv));
EXPECT(emoji.ends_with_bytes("\x9F\x98\x80\xF0\x9F\x99\x83"sv));
EXPECT(emoji.ends_with_bytes("\xF0\x9F\x98\x80\xF0\x9F\x99\x83"sv));
EXPECT(!emoji.ends_with_bytes("a"sv));
EXPECT(!emoji.ends_with_bytes("😀"sv));
EXPECT(emoji.ends_with(0x1F643));
EXPECT(!emoji.ends_with(0x1F600));
}