mirror of
https://github.com/SerenityOS/serenity
synced 2024-07-21 10:05:32 +00:00
AK+Everywhere: Make UTF-16 to UTF-8 converter fallible
This could fail to allocate the underlying storage needed to store the UTF-8 data. Propagate this error.
This commit is contained in:
parent
1edb96376b
commit
d793262beb
|
@ -81,7 +81,7 @@ u32 Utf16View::decode_surrogate_pair(u16 high_surrogate, u16 low_surrogate)
|
|||
return ((high_surrogate - high_surrogate_min) << 10) + (low_surrogate - low_surrogate_min) + first_supplementary_plane_code_point;
|
||||
}
|
||||
|
||||
DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
|
||||
ErrorOr<DeprecatedString> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
||||
|
@ -92,17 +92,17 @@ DeprecatedString Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_uni
|
|||
|
||||
if ((next < end_ptr()) && is_low_surrogate(*next)) {
|
||||
auto code_point = decode_surrogate_pair(*ptr, *next);
|
||||
builder.append_code_point(code_point);
|
||||
TRY(builder.try_append_code_point(code_point));
|
||||
++ptr;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
builder.append_code_point(static_cast<u32>(*ptr));
|
||||
TRY(builder.try_append_code_point(static_cast<u32>(*ptr)));
|
||||
}
|
||||
} else {
|
||||
for (auto code_point : *this)
|
||||
builder.append_code_point(code_point);
|
||||
TRY(builder.try_append_code_point(code_point));
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
|
|
|
@ -75,7 +75,7 @@ public:
|
|||
No,
|
||||
};
|
||||
|
||||
DeprecatedString to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
|
||||
ErrorOr<DeprecatedString> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
|
||||
|
||||
bool is_null() const { return m_code_units.is_null(); }
|
||||
bool is_empty() const { return m_code_units.is_empty(); }
|
||||
|
|
|
@ -56,14 +56,14 @@ TEST_CASE(encode_utf8)
|
|||
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
|
||||
auto string = MUST(AK::utf8_to_utf16(utf8_string));
|
||||
Utf16View view { string };
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
|
||||
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string);
|
||||
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string);
|
||||
}
|
||||
{
|
||||
auto encoded = Array { (u16)0xd83d };
|
||||
Utf16View view { encoded };
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv);
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv);
|
||||
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
|
||||
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -269,14 +269,14 @@ TEST_CASE(substring_view)
|
|||
view = view.substring_view(7, 2);
|
||||
|
||||
EXPECT(view.length_in_code_units() == 2);
|
||||
EXPECT_EQ(view.to_utf8(), "😀"sv);
|
||||
EXPECT_EQ(MUST(view.to_utf8()), "😀"sv);
|
||||
}
|
||||
{
|
||||
Utf16View view { string };
|
||||
view = view.substring_view(7, 1);
|
||||
|
||||
EXPECT(view.length_in_code_units() == 1);
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), "\xed\xa0\xbd"sv);
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), "\ufffd"sv);
|
||||
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
|
||||
EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), "\ufffd"sv);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -372,7 +372,7 @@ void HexEditorWidget::update_inspector_values(size_t position)
|
|||
if (valid_code_units == 0)
|
||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
|
||||
else
|
||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8());
|
||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, utf16_view.unicode_substring_view(0, 1).to_utf8().release_value_but_fixme_should_propagate_errors());
|
||||
} else {
|
||||
value_inspector_model->set_parsed_value(ValueInspectorModel::ValueType::UTF16, "");
|
||||
}
|
||||
|
|
|
@ -1265,7 +1265,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
|
|||
} else if (is_ascii_digit(next)) {
|
||||
bool is_two_digits = (i + 2 < replace_view.length_in_code_units()) && is_ascii_digit(replace_view.code_unit_at(i + 2));
|
||||
|
||||
auto capture_position_string = replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8();
|
||||
auto capture_position_string = TRY_OR_THROW_OOM(vm, replace_view.substring_view(i + 1, is_two_digits ? 2 : 1).to_utf8());
|
||||
auto capture_position = capture_position_string.to_uint();
|
||||
|
||||
if (capture_position.has_value() && (*capture_position > 0) && (*capture_position <= captures.size())) {
|
||||
|
@ -1295,7 +1295,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
|
|||
result.append(curr);
|
||||
} else {
|
||||
auto group_name_view = replace_view.substring_view(start_position, *end_position - start_position);
|
||||
auto group_name = group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
|
||||
auto group_name = TRY_OR_THROW_OOM(vm, group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
|
||||
auto capture = TRY(named_captures.as_object().get(group_name));
|
||||
|
||||
|
@ -1311,7 +1311,7 @@ ThrowCompletionOr<DeprecatedString> get_substitution(VM& vm, Utf16View const& ma
|
|||
}
|
||||
}
|
||||
|
||||
return Utf16String(move(result)).to_utf8();
|
||||
return TRY_OR_THROW_OOM(vm, Utf16View { result }.to_utf8());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -718,7 +718,7 @@ ThrowCompletionOr<Vector<PatternPartition>> format_date_time_pattern(VM& vm, Dat
|
|||
if (formatted_value.length() > 2) {
|
||||
Utf16String utf16_formatted_value { formatted_value };
|
||||
if (utf16_formatted_value.length_in_code_units() > 2)
|
||||
formatted_value = utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8();
|
||||
formatted_value = TRY_OR_THROW_OOM(vm, utf16_formatted_value.substring_view(utf16_formatted_value.length_in_code_units() - 2).to_utf8());
|
||||
}
|
||||
|
||||
break;
|
||||
|
|
|
@ -68,7 +68,8 @@ DeprecatedString const& PrimitiveString::deprecated_string() const
|
|||
{
|
||||
resolve_rope_if_needed();
|
||||
if (!m_has_utf8_string) {
|
||||
m_utf8_string = m_utf16_string.to_utf8();
|
||||
// FIXME: Propagate this error.
|
||||
m_utf8_string = MUST(m_utf16_string.to_utf8(vm()));
|
||||
m_has_utf8_string = true;
|
||||
}
|
||||
return m_utf8_string;
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <AK/StringView.h>
|
||||
#include <LibJS/Runtime/Utf16String.h>
|
||||
#include <LibJS/Runtime/VM.h>
|
||||
|
||||
namespace JS {
|
||||
namespace Detail {
|
||||
|
@ -96,9 +97,9 @@ Utf16View Utf16String::substring_view(size_t code_unit_offset) const
|
|||
return view().substring_view(code_unit_offset);
|
||||
}
|
||||
|
||||
DeprecatedString Utf16String::to_utf8() const
|
||||
ThrowCompletionOr<DeprecatedString> Utf16String::to_utf8(VM& vm) const
|
||||
{
|
||||
return view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes);
|
||||
return TRY_OR_THROW_OOM(vm, view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
}
|
||||
|
||||
u16 Utf16String::code_unit_at(size_t index) const
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <AK/Types.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibJS/Runtime/Completion.h>
|
||||
|
||||
namespace JS {
|
||||
namespace Detail {
|
||||
|
@ -49,7 +50,7 @@ public:
|
|||
Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const;
|
||||
Utf16View substring_view(size_t code_unit_offset) const;
|
||||
|
||||
DeprecatedString to_utf8() const;
|
||||
ThrowCompletionOr<DeprecatedString> to_utf8(VM&) const;
|
||||
u16 code_unit_at(size_t index) const;
|
||||
|
||||
size_t length_in_code_units() const;
|
||||
|
|
|
@ -385,7 +385,7 @@ public:
|
|||
{
|
||||
return m_view.visit(
|
||||
[](StringView view) { return view.to_deprecated_string(); },
|
||||
[](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); },
|
||||
[](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes).release_value_but_fixme_should_propagate_errors(); },
|
||||
[](auto& view) {
|
||||
StringBuilder builder;
|
||||
for (auto it = view.begin(); it != view.end(); ++it)
|
||||
|
|
Loading…
Reference in a new issue