AK+LibUnicode: Provide Unicode-aware String case transformations

Since AK can't refer to LibUnicode directly, the strategy here is that
if you need case transformations, you can link LibUnicode and receive
them. If you try to use either of these methods without linking it, then
you'll of course get a linker error (note we don't do any fallbacks to
e.g. ASCII case transformations). If you don't need these methods, you
don't have to link LibUnicode.
This commit is contained in:
Timothy Flynn 2023-01-08 16:33:30 -05:00 committed by Andrew Kaster
parent 12f6793223
commit 6fcc1c7426
6 changed files with 77 additions and 0 deletions

View file

@ -9,6 +9,7 @@
#include <AK/Concepts.h>
#include <AK/Format.h>
#include <AK/Forward.h>
#include <AK/Optional.h>
#include <AK/RefCounted.h>
#include <AK/Span.h>
#include <AK/StringView.h>
@ -43,6 +44,11 @@ public:
// Creates a new String from a sequence of UTF-8 encoded code points.
static ErrorOr<String> from_utf8(StringView);
// Creates a new String by transforming this String to lower- or uppercase. Using these methods
// require linking LibUnicode into your application.
ErrorOr<String> to_lowercase(Optional<StringView> const& locale = {}) const;
ErrorOr<String> to_uppercase(Optional<StringView> const& locale = {}) const;
// Creates a substring with a deep copy of the specified data window.
ErrorOr<String> substring_from_byte_offset(size_t start, size_t byte_count) const;

View file

@ -558,6 +558,7 @@ if (BUILD_LAGOM)
foreach(source ${AK_TEST_SOURCES})
lagom_test(${source} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../Tests/AK)
endforeach()
target_link_libraries(TestString LibUnicode)
# LibAudio
file(GLOB LIBAUDIO_TEST_SOURCES CONFIGURE_DEPENDS "../../Tests/LibAudio/*.cpp")

View file

@ -86,3 +86,5 @@ set(AK_TEST_SOURCES
foreach(source IN LISTS AK_TEST_SOURCES)
serenity_test("${source}" AK)
endforeach()
target_link_libraries(TestString PRIVATE LibUnicode)

View file

@ -107,3 +107,41 @@ TEST_CASE(replace)
EXPECT_EQ(result, "anon@courage:~"sv);
}
}
TEST_CASE(to_lowercase)
{
{
auto string = MUST(String::from_utf8("Aa"sv));
auto result = MUST(string.to_lowercase());
EXPECT_EQ(result, "aa"sv);
}
{
auto string = MUST(String::from_utf8("Ωω"sv));
auto result = MUST(string.to_lowercase());
EXPECT_EQ(result, "ωω"sv);
}
{
auto string = MUST(String::from_utf8("İi̇"sv));
auto result = MUST(string.to_lowercase());
EXPECT_EQ(result, "i̇i̇"sv);
}
}
TEST_CASE(to_uppercase)
{
{
auto string = MUST(String::from_utf8("Aa"sv));
auto result = MUST(string.to_uppercase());
EXPECT_EQ(result, "AA"sv);
}
{
auto string = MUST(String::from_utf8("Ωω"sv));
auto result = MUST(string.to_uppercase());
EXPECT_EQ(result, "ΩΩ"sv);
}
{
auto string = MUST(String::from_utf8("ʼn"sv));
auto result = MUST(string.to_uppercase());
EXPECT_EQ(result, "ʼN"sv);
}
}

View file

@ -5,6 +5,7 @@ set(SOURCES
CurrencyCode.cpp
Emoji.cpp
Normalize.cpp
String.cpp
UnicodeUtils.cpp
${UNICODE_DATA_SOURCES}
)

View file

@ -0,0 +1,29 @@
/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <LibUnicode/UnicodeUtils.h>
// This file contains definitions of AK::String methods which require UCD data.
namespace AK {
ErrorOr<String> String::to_lowercase(Optional<StringView> const& locale) const
{
StringBuilder builder;
TRY(Unicode::Detail::build_lowercase_string(code_points(), builder, locale));
return builder.to_string();
}
ErrorOr<String> String::to_uppercase(Optional<StringView> const& locale) const
{
StringBuilder builder;
TRY(Unicode::Detail::build_uppercase_string(code_points(), builder, locale));
return builder.to_string();
}
}