AK+LibJS: Implement String.from{CharCode,CodePoint} using UTF-16 strings

Most of String.prototype and RegExp.prototype is implemented with UTF-16
so this is to prevent extra copying of the string data.
This commit is contained in:
Timothy Flynn 2021-08-02 17:02:17 -04:00 committed by Andreas Kling
parent b6ff7f4fcc
commit 70080feab2
3 changed files with 31 additions and 18 deletions

View file

@ -25,15 +25,8 @@ static Vector<u16> to_utf16_impl(UtfViewType const& view) requires(IsSame<UtfVie
{
Vector<u16> utf16_data;
for (auto code_point : view) {
if (code_point < first_supplementary_plane_code_point) {
utf16_data.append(static_cast<u16>(code_point));
} else {
code_point -= first_supplementary_plane_code_point;
utf16_data.append(static_cast<u16>(high_surrogate_min | (code_point >> 10)));
utf16_data.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff)));
}
}
for (auto code_point : view)
code_point_to_utf16(utf16_data, code_point);
return utf16_data;
}
@ -53,6 +46,19 @@ Vector<u16> utf32_to_utf16(Utf32View const& utf32_view)
return to_utf16_impl(utf32_view);
}
void code_point_to_utf16(Vector<u16>& string, u32 code_point)
{
VERIFY(is_unicode(code_point));
if (code_point < first_supplementary_plane_code_point) {
string.append(static_cast<u16>(code_point));
} else {
code_point -= first_supplementary_plane_code_point;
string.append(static_cast<u16>(high_surrogate_min | (code_point >> 10)));
string.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff)));
}
}
bool Utf16View::is_high_surrogate(u16 code_unit)
{
return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max);

View file

@ -18,6 +18,7 @@ namespace AK {
Vector<u16> utf8_to_utf16(StringView const&);
Vector<u16> utf8_to_utf16(Utf8View const&);
Vector<u16> utf32_to_utf16(Utf32View const&);
void code_point_to_utf16(Vector<u16>&, u32);
class Utf16View;

View file

@ -5,6 +5,7 @@
*/
#include <AK/StringBuilder.h>
#include <AK/Utf16View.h>
#include <AK/Utf32View.h>
#include <LibJS/Runtime/AbstractOperations.h>
#include <LibJS/Runtime/Array.h>
@ -124,22 +125,25 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::raw)
// 22.1.2.1 String.fromCharCode ( ...codeUnits ), https://tc39.es/ecma262/#sec-string.fromcharcode
JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_char_code)
{
StringBuilder builder;
Vector<u16> string;
string.ensure_capacity(vm.argument_count());
for (size_t i = 0; i < vm.argument_count(); ++i) {
auto char_code = vm.argument(i).to_i32(global_object);
auto code_unit = vm.argument(i).to_u16(global_object);
if (vm.exception())
return {};
auto truncated = char_code & 0xffff;
// FIXME: We need an Utf16View :^)
builder.append(Utf32View((u32*)&truncated, 1));
string.append(code_unit);
}
return js_string(vm, builder.build());
return js_string(vm, move(string));
}
// 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint
JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
{
StringBuilder builder;
Vector<u16> string;
string.ensure_capacity(vm.argument_count()); // This will be an under-estimate if any code point is > 0xffff.
for (size_t i = 0; i < vm.argument_count(); ++i) {
auto next_code_point = vm.argument(i).to_number(global_object);
if (vm.exception())
@ -153,9 +157,11 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
vm.throw_exception<RangeError>(global_object, ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
return {};
}
builder.append_code_point(code_point);
AK::code_point_to_utf16(string, static_cast<u32>(code_point));
}
return js_string(vm, builder.build());
return js_string(vm, move(string));
}
}