1
0
mirror of https://github.com/SerenityOS/serenity synced 2024-07-09 11:00:46 +00:00

AK: Add Utf16View::to_utf8 to convert the view to a UTF-8 AK::String

This commit is contained in:
Timothy Flynn 2023-01-08 18:56:53 -05:00 committed by Linus Groh
parent d0403ec14f
commit 2eacc7aec1
3 changed files with 17 additions and 10 deletions

View File

@ -82,6 +82,11 @@ u32 Utf16View::decode_surrogate_pair(u16 high_surrogate, u16 low_surrogate)
}
ErrorOr<DeprecatedString> Utf16View::to_deprecated_string(AllowInvalidCodeUnits allow_invalid_code_units) const
{
return TRY(to_utf8(allow_invalid_code_units)).to_deprecated_string();
}
ErrorOr<String> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
{
StringBuilder builder;
@ -105,7 +110,7 @@ ErrorOr<DeprecatedString> Utf16View::to_deprecated_string(AllowInvalidCodeUnits
TRY(builder.try_append_code_point(code_point));
}
return builder.build();
return builder.to_string();
}
size_t Utf16View::length_in_code_points() const

View File

@ -12,6 +12,7 @@
#include <AK/Forward.h>
#include <AK/Optional.h>
#include <AK/Span.h>
#include <AK/String.h>
#include <AK/Types.h>
#include <AK/Vector.h>
@ -76,6 +77,7 @@ public:
};
ErrorOr<DeprecatedString> to_deprecated_string(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
ErrorOr<String> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
bool is_null() const { return m_code_units.is_null(); }
bool is_empty() const { return m_code_units.is_empty(); }

View File

@ -7,7 +7,7 @@
#include <LibTest/TestCase.h>
#include <AK/Array.h>
#include <AK/DeprecatedString.h>
#include <AK/String.h>
#include <AK/StringView.h>
#include <AK/Types.h>
#include <AK/Utf16View.h>
@ -53,17 +53,17 @@ TEST_CASE(decode_utf8)
TEST_CASE(encode_utf8)
{
{
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
auto utf8_string = MUST(String::from_utf8("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
auto string = MUST(AK::utf8_to_utf16(utf8_string));
Utf16View view { string };
EXPECT_EQ(MUST(view.to_deprecated_string(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string);
EXPECT_EQ(MUST(view.to_deprecated_string(Utf16View::AllowInvalidCodeUnits::No)), utf8_string);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string);
}
{
auto encoded = Array { (u16)0xd83d };
Utf16View view { encoded };
EXPECT_EQ(MUST(view.to_deprecated_string(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(MUST(view.to_deprecated_string(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
}
}
@ -269,14 +269,14 @@ TEST_CASE(substring_view)
view = view.substring_view(7, 2);
EXPECT(view.length_in_code_units() == 2);
EXPECT_EQ(MUST(view.to_deprecated_string()), "😀"sv);
EXPECT_EQ(MUST(view.to_utf8()), "😀"sv);
}
{
Utf16View view { string };
view = view.substring_view(7, 1);
EXPECT(view.length_in_code_units() == 1);
EXPECT_EQ(MUST(view.to_deprecated_string(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(MUST(view.to_deprecated_string(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
}
}