AK+LibUnicode: Provide Unicode-aware caseless String matching

The Unicode spec defines much more complicated caseless matching
algorithms in its Collation spec. This implements the "basic" case
folding comparison.
This commit is contained in:
Timothy Flynn 2023-01-17 11:30:10 -05:00 committed by Linus Groh
parent 8f2589b3b0
commit 537fcaf59e
3 changed files with 79 additions and 0 deletions

View file

@ -49,6 +49,10 @@ public:
ErrorOr<String> to_lowercase(Optional<StringView> const& locale = {}) const;
ErrorOr<String> to_uppercase(Optional<StringView> const& locale = {}) const;
ErrorOr<String> to_titlecase(Optional<StringView> const& locale = {}) const;
ErrorOr<String> to_casefold() const;
// Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application.
ErrorOr<bool> equals_ignoring_case(String const&) const;
// Creates a substring with a deep copy of the specified data window.
ErrorOr<String> substring_from_byte_offset(size_t start, size_t byte_count) const;

View file

@ -187,6 +187,66 @@ TEST_CASE(to_titlecase)
}
}
TEST_CASE(equals_ignoring_case)
{
{
String string1 {};
String string2 {};
EXPECT(MUST(string1.equals_ignoring_case(string2)));
}
{
auto string1 = MUST(String::from_utf8("abcd"sv));
auto string2 = MUST(String::from_utf8("ABCD"sv));
auto string3 = MUST(String::from_utf8("AbCd"sv));
auto string4 = MUST(String::from_utf8("dcba"sv));
EXPECT(MUST(string1.equals_ignoring_case(string2)));
EXPECT(MUST(string1.equals_ignoring_case(string3)));
EXPECT(!MUST(string1.equals_ignoring_case(string4)));
EXPECT(MUST(string2.equals_ignoring_case(string1)));
EXPECT(MUST(string2.equals_ignoring_case(string3)));
EXPECT(!MUST(string2.equals_ignoring_case(string4)));
EXPECT(MUST(string3.equals_ignoring_case(string1)));
EXPECT(MUST(string3.equals_ignoring_case(string2)));
EXPECT(!MUST(string3.equals_ignoring_case(string4)));
}
{
auto string1 = MUST(String::from_utf8("\u00DF"sv)); // LATIN SMALL LETTER SHARP S
auto string2 = MUST(String::from_utf8("SS"sv));
auto string3 = MUST(String::from_utf8("Ss"sv));
auto string4 = MUST(String::from_utf8("ss"sv));
auto string5 = MUST(String::from_utf8("S"sv));
auto string6 = MUST(String::from_utf8("s"sv));
EXPECT(MUST(string1.equals_ignoring_case(string2)));
EXPECT(MUST(string1.equals_ignoring_case(string3)));
EXPECT(MUST(string1.equals_ignoring_case(string4)));
EXPECT(!MUST(string1.equals_ignoring_case(string5)));
EXPECT(!MUST(string1.equals_ignoring_case(string6)));
EXPECT(MUST(string2.equals_ignoring_case(string1)));
EXPECT(MUST(string2.equals_ignoring_case(string3)));
EXPECT(MUST(string2.equals_ignoring_case(string4)));
EXPECT(!MUST(string2.equals_ignoring_case(string5)));
EXPECT(!MUST(string2.equals_ignoring_case(string6)));
EXPECT(MUST(string3.equals_ignoring_case(string1)));
EXPECT(MUST(string3.equals_ignoring_case(string2)));
EXPECT(MUST(string3.equals_ignoring_case(string4)));
EXPECT(!MUST(string3.equals_ignoring_case(string5)));
EXPECT(!MUST(string3.equals_ignoring_case(string6)));
EXPECT(MUST(string4.equals_ignoring_case(string1)));
EXPECT(MUST(string4.equals_ignoring_case(string2)));
EXPECT(MUST(string4.equals_ignoring_case(string3)));
EXPECT(!MUST(string4.equals_ignoring_case(string5)));
EXPECT(!MUST(string4.equals_ignoring_case(string6)));
}
}
TEST_CASE(is_one_of)
{
auto foo = MUST(String::from_utf8("foo"sv));

View file

@ -33,4 +33,19 @@ ErrorOr<String> String::to_titlecase(Optional<StringView> const& locale) const
return builder.to_string();
}
ErrorOr<String> String::to_casefold() const
{
StringBuilder builder;
TRY(Unicode::Detail::build_casefold_string(code_points(), builder));
return builder.to_string();
}
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145
ErrorOr<bool> String::equals_ignoring_case(String const& other) const
{
// A string X is a caseless match for a string Y if and only if:
// toCasefold(X) = toCasefold(Y)
return TRY(to_casefold()) == TRY(other.to_casefold());
}
}