From b9dc0b7d1becd5d482854fa3dfd2ddbe14ad55b4 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 20 Oct 2022 08:44:18 -0400 Subject: [PATCH] AK: Do not append string bytes as code points when title-casing a string By appending individual bytes as code points, we were "breaking apart" multi-byte UTF-8 code points. This now behaves the same way as the invert_case() helper in StringUtils. --- AK/StringUtils.cpp | 4 ++-- Tests/AK/TestStringUtils.cpp | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/AK/StringUtils.cpp b/AK/StringUtils.cpp index b9aed9c9aa..42887cce05 100644 --- a/AK/StringUtils.cpp +++ b/AK/StringUtils.cpp @@ -465,9 +465,9 @@ String to_titlecase(StringView str) for (auto ch : str) { if (next_is_upper) - builder.append_code_point(to_ascii_uppercase(ch)); + builder.append(to_ascii_uppercase(ch)); else - builder.append_code_point(to_ascii_lowercase(ch)); + builder.append(to_ascii_lowercase(ch)); next_is_upper = ch == ' '; } diff --git a/Tests/AK/TestStringUtils.cpp b/Tests/AK/TestStringUtils.cpp index a06227c137..98011b59bf 100644 --- a/Tests/AK/TestStringUtils.cpp +++ b/Tests/AK/TestStringUtils.cpp @@ -387,4 +387,6 @@ TEST_CASE(to_titlecase) EXPECT_EQ(AK::StringUtils::to_titlecase("foo bar"sv), "Foo Bar"sv); EXPECT_EQ(AK::StringUtils::to_titlecase("foo bar"sv), "Foo Bar"sv); EXPECT_EQ(AK::StringUtils::to_titlecase(" foo bar "sv), " Foo Bar "sv); + EXPECT_EQ(AK::StringUtils::to_titlecase("\xc3\xa7"sv), "\xc3\xa7"sv); // U+00E7 LATIN SMALL LETTER C WITH CEDILLA + EXPECT_EQ(AK::StringUtils::to_titlecase("\xe1\x80\x80"sv), "\xe1\x80\x80"sv); // U+1000 MYANMAR LETTER KA }