AK+Everywhere: Make UTF-16 to UTF-8 converter fallible

This could fail to allocate the underlying storage needed to store the
UTF-8 data. Propagate this error.
This commit is contained in:
Timothy Flynn 2023-01-07 13:59:10 -05:00 committed by Linus Groh
parent 1edb96376b
commit d793262beb
10 changed files with 25 additions and 22 deletions

View file

@ -81,7 +81,7 @@ u32 Utf16View::decode_surrogate_pair(u16 high_surrogate, u16 low_surrogate)
return ((high_surrogate - high_surrogate_min) << 10) + (low_surrogate - low_surrogate_min) + first_supplementary_plane_code_point;
}
DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
ErrorOr<DeprecatedString> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
{
StringBuilder builder;
@ -92,17 +92,17 @@ DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_uni
if ((next < end_ptr()) && is_low_surrogate(*next)) {
auto code_point = decode_surrogate_pair(*ptr, *next);
builder.append_code_point(code_point);
TRY(builder.try_append_code_point(code_point));
++ptr;
continue;
}
}
builder.append_code_point(static_cast<u32>(*ptr));
TRY(builder.try_append_code_point(static_cast<u32>(*ptr)));
}
} else {
for (auto code_point : *this)
builder.append_code_point(code_point);
TRY(builder.try_append_code_point(code_point));
}
return builder.build();

View file

@ -75,7 +75,7 @@ public:
No,
};
DeprecatedString to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
ErrorOr<DeprecatedString> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
bool is_null() const { return m_code_units.is_null(); }
bool is_empty() const { return m_code_units.is_empty(); }

View file

@ -56,14 +56,14 @@ TEST_CASE(encode_utf8)
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
auto string = MUST(AK::utf8_to_utf16(utf8_string));
Utf16View view { string };
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string);
}
{
auto encoded = Array { (u16)0xd83d };
Utf16View view { encoded };
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
}
}
@ -269,14 +269,14 @@ TEST_CASE(substring_view)
view = view.substring_view(7, 2);
EXPECT(view.length_in_code_units() == 2);
EXPECT_EQ(view.to_utf8(), "😀"sv);
EXPECT_EQ(MUST(view.to_utf8()), "😀"sv);
}
{
Utf16View view { string };
view = view.substring_view(7, 1);
EXPECT(view.length_in_code_units() == 1);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
}
}

View file

@ -372,7 +372,7 @@ void HexEditorWidget::update_inspector_values(size_t position)
if (valid_code_units == 0)
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
else
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8());
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8().release_value_but_fixme_should_propagate_errors());
} else {
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
}

View file

@ -1265,7 +1265,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
} else if (is_ascii_digit(next)) {
bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2));
auto capture_position_string = replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8();
auto capture_position_string = TRY_OR_THROW_OOM(vm, replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8());
auto capture_position = capture_position_string.to_uint();
if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) {
@ -1295,7 +1295,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
result.append(curr);
} else {
auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position);
auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
auto group_name = TRY_OR_THROW_OOM(vm, group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
auto capture = TRY(named_captures.as_object().get(group_name));
@ -1311,7 +1311,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
}
}
return Utf16String(move(result)).to_utf8();
return TRY_OR_THROW_OOM(vm, Utf16View { result }.to_utf8());
}
}

View file

@ -718,7 +718,7 @@ ThrowCompletionOr<Vector<PatternPartition>> format_date_time_pattern(VM& vm, Dat
if (formatted_value.length() > 2) {
Utf16String utf16_formatted_value { formatted_value };
if (utf16_formatted_value.length_in_code_units() > 2)
formatted_value = utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8();
formatted_value = TRY_OR_THROW_OOM(vm, utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8());
}
break;

View file

@ -68,7 +68,8 @@ DeprecatedString const& PrimitiveString::deprecated_string() const
{
resolve_rope_if_needed();
if (!m_has_utf8_string) {
m_utf8_string = m_utf16_string.to_utf8();
// FIXME: Propagate this error.
m_utf8_string = MUST(m_utf16_string.to_utf8(vm()));
m_has_utf8_string = true;
}
return m_utf8_string;

View file

@ -6,6 +6,7 @@
#include <AK/StringView.h>
#include <LibJS/Runtime/Utf16String.h>
#include <LibJS/Runtime/VM.h>
namespace JS {
namespace Detail {
@ -96,9 +97,9 @@ Utf16View Utf16String::substring_view(size_t code_unit_offset) const
return view().substring_view(code_unit_offset);
}
DeprecatedString Utf16String::to_utf8() const
ThrowCompletionOr<DeprecatedString> Utf16String::to_utf8(VM& vm) const
{
return view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
return TRY_OR_THROW_OOM(vm, view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
}
u16 Utf16String::code_unit_at(size_t index) const

View file

@ -12,6 +12,7 @@
#include <AK/Types.h>
#include <AK/Utf16View.h>
#include <AK/Vector.h>
#include <LibJS/Runtime/Completion.h>
namespace JS {
namespace Detail {
@ -49,7 +50,7 @@ public:
Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const;
Utf16View substring_view(size_t code_unit_offset) const;
DeprecatedString to_utf8() const;
ThrowCompletionOr<DeprecatedString> to_utf8(VM&) const;
u16 code_unit_at(size_t index) const;
size_t length_in_code_units() const;

View file

@ -385,7 +385,7 @@ public:
{
return m_view.visit(
[](StringView view) { return view.to_deprecated_string(); },
[](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); },
[](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes).release_value_but_fixme_should_propagate_errors(); },
[](auto& view) {
StringBuilder builder;
for (auto it = view.begin(); it != view.end(); ++it)