From f882581e91c256a0400bbd262676580037dc30f4 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 8 Mar 2023 08:56:02 -0500 Subject: [PATCH] AK: Make String::{starts,ends}_with(code_point) handle non-ASCII We currently pass the code point to StringView::{starts,ends}_with, which actually accepts a single char, thus cannot handle non-ASCII code points. --- AK/String.cpp | 14 ++++++- AK/String.h | 8 ++-- Tests/AK/TestString.cpp | 82 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/AK/String.cpp b/AK/String.cpp index a03fcdefc4..428684f9ca 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -496,7 +496,10 @@ bool String::contains(char needle, CaseSensitivity case_sensitivity) const bool String::starts_with(u32 code_point) const { - return bytes_as_string_view().starts_with(code_point); + if (is_empty()) + return false; + + return *code_points().begin() == code_point; } bool String::starts_with_bytes(StringView bytes) const @@ -506,7 +509,14 @@ bool String::starts_with_bytes(StringView bytes) const bool String::ends_with(u32 code_point) const { - return bytes_as_string_view().ends_with(code_point); + if (is_empty()) + return false; + + u32 last_code_point = 0; + for (auto it = code_points().begin(); it != code_points().end(); ++it) + last_code_point = *it; + + return last_code_point == code_point; } bool String::ends_with_bytes(StringView bytes) const diff --git a/AK/String.h b/AK/String.h index 32959e26ab..adbcd6da5b 100644 --- a/AK/String.h +++ b/AK/String.h @@ -111,11 +111,11 @@ public: // Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application. ErrorOr equals_ignoring_case(String const&) const; - bool starts_with(u32 code_point) const; - bool starts_with_bytes(StringView) const; + [[nodiscard]] bool starts_with(u32 code_point) const; + [[nodiscard]] bool starts_with_bytes(StringView) const; - bool ends_with(u32 code_point) const; - bool ends_with_bytes(StringView) const; + [[nodiscard]] bool ends_with(u32 code_point) const; + [[nodiscard]] bool ends_with_bytes(StringView) const; // Creates a substring with a deep copy of the specified data window. ErrorOr substring_from_byte_offset(size_t start, size_t byte_count) const; diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index d841bd56dd..305f22ee92 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -711,3 +711,85 @@ TEST_CASE(trim) EXPECT(result.is_empty()); } } + +TEST_CASE(starts_with) +{ + EXPECT(String {}.starts_with_bytes({})); + EXPECT(!String {}.starts_with_bytes(" "sv)); + EXPECT(!String {}.starts_with(0)); + + EXPECT("a"_short_string.starts_with_bytes({})); + EXPECT("a"_short_string.starts_with_bytes("a"sv)); + EXPECT(!"a"_short_string.starts_with_bytes("b"sv)); + EXPECT(!"a"_short_string.starts_with_bytes("ab"sv)); + + EXPECT("a"_short_string.starts_with(0x0061)); + EXPECT(!"a"_short_string.starts_with(0x0062)); + + EXPECT("abc"_short_string.starts_with_bytes({})); + EXPECT("abc"_short_string.starts_with_bytes("a"sv)); + EXPECT("abc"_short_string.starts_with_bytes("ab"sv)); + EXPECT("abc"_short_string.starts_with_bytes("abc"sv)); + EXPECT(!"abc"_short_string.starts_with_bytes("b"sv)); + EXPECT(!"abc"_short_string.starts_with_bytes("bc"sv)); + + EXPECT("abc"_short_string.starts_with(0x0061)); + EXPECT(!"abc"_short_string.starts_with(0x0062)); + EXPECT(!"abc"_short_string.starts_with(0x0063)); + + auto emoji = MUST("😀🙃"_string); + EXPECT(emoji.starts_with_bytes("\xF0"sv)); + EXPECT(emoji.starts_with_bytes("\xF0\x9F"sv)); + EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98"sv)); + EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80"sv)); + EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0"sv)); + EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0\x9F"sv)); + EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0\x9F\x99"sv)); + EXPECT(emoji.starts_with_bytes("\xF0\x9F\x98\x80\xF0\x9F\x99\x83"sv)); + EXPECT(!emoji.starts_with_bytes("a"sv)); + EXPECT(!emoji.starts_with_bytes("🙃"sv)); + + EXPECT(emoji.starts_with(0x1F600)); + EXPECT(!emoji.starts_with(0x1F643)); +} + +TEST_CASE(ends_with) +{ + EXPECT(String {}.ends_with_bytes({})); + EXPECT(!String {}.ends_with_bytes(" "sv)); + EXPECT(!String {}.ends_with(0)); + + EXPECT("a"_short_string.ends_with_bytes({})); + EXPECT("a"_short_string.ends_with_bytes("a"sv)); + EXPECT(!"a"_short_string.ends_with_bytes("b"sv)); + EXPECT(!"a"_short_string.ends_with_bytes("ba"sv)); + + EXPECT("a"_short_string.ends_with(0x0061)); + EXPECT(!"a"_short_string.ends_with(0x0062)); + + EXPECT("abc"_short_string.ends_with_bytes({})); + EXPECT("abc"_short_string.ends_with_bytes("c"sv)); + EXPECT("abc"_short_string.ends_with_bytes("bc"sv)); + EXPECT("abc"_short_string.ends_with_bytes("abc"sv)); + EXPECT(!"abc"_short_string.ends_with_bytes("b"sv)); + EXPECT(!"abc"_short_string.ends_with_bytes("ab"sv)); + + EXPECT("abc"_short_string.ends_with(0x0063)); + EXPECT(!"abc"_short_string.ends_with(0x0062)); + EXPECT(!"abc"_short_string.ends_with(0x0061)); + + auto emoji = MUST("😀🙃"_string); + EXPECT(emoji.ends_with_bytes("\x83"sv)); + EXPECT(emoji.ends_with_bytes("\x99\x83"sv)); + EXPECT(emoji.ends_with_bytes("\x9F\x99\x83"sv)); + EXPECT(emoji.ends_with_bytes("\xF0\x9F\x99\x83"sv)); + EXPECT(emoji.ends_with_bytes("\x80\xF0\x9F\x99\x83"sv)); + EXPECT(emoji.ends_with_bytes("\x98\x80\xF0\x9F\x99\x83"sv)); + EXPECT(emoji.ends_with_bytes("\x9F\x98\x80\xF0\x9F\x99\x83"sv)); + EXPECT(emoji.ends_with_bytes("\xF0\x9F\x98\x80\xF0\x9F\x99\x83"sv)); + EXPECT(!emoji.ends_with_bytes("a"sv)); + EXPECT(!emoji.ends_with_bytes("😀"sv)); + + EXPECT(emoji.ends_with(0x1F643)); + EXPECT(!emoji.ends_with(0x1F600)); +}