[dart2wasm] Stop using Utf8Decoder._convertIntercepted

Issue: #54018.

Change-Id: Ic8ee663f45acc3ae0300cdd3f1cbb9132110c6f3
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/337481
Reviewed-by: Aske Simon Christensen <askesc@google.com>
Commit-Queue: Ömer Ağacan <omersa@google.com>
This commit is contained in:
Ömer Sinan Ağacan 2023-11-21 11:18:12 +00:00 committed by Commit Queue
parent dd8952f575
commit 7f5cba2e53
2 changed files with 105 additions and 118 deletions

View file

@ -39,46 +39,7 @@ class Utf8Decoder {
@patch
static String? _convertIntercepted(
bool allowMalformed, List<int> codeUnits, int start, int? end) {
// We intercept the calls always to make sure the standard library UTF8
// decoder is only passed `U8List`, so that array accesses will be
// monomorphic and inlined.
if (codeUnits is U8List) {
return _Utf8Decoder(allowMalformed)._convertSingle(
unsafeCast<U8List>(codeUnits), start, end, codeUnits, start);
} else {
// TODO(omersa): Check if `codeUnits` is a JS array and call browser UTF8
// decoder here.
//
// If we're passed a `List<int>` other than `U8List` or a JS typed array,
// it means the performance is not too important. So we convert the input
// to `U8List` to avoid shipping another UTF8 decoder.
end ??= codeUnits.length;
final length = end - start;
final u8list = U8List(length);
final u8listData = u8list.data;
if (allowMalformed) {
int u8listIdx = 0;
for (int codeUnitsIdx = start; codeUnitsIdx < end; codeUnitsIdx += 1) {
int byte = codeUnits[codeUnitsIdx];
if (byte < 0 || byte > 255) {
byte = 0xFF;
}
u8listData.write(u8listIdx++, byte);
}
} else {
int u8listIdx = 0;
for (int codeUnitsIdx = start; codeUnitsIdx < end; codeUnitsIdx += 1) {
final byte = codeUnits[codeUnitsIdx];
if (byte < 0 || byte > 255) {
throw FormatException(
'Invalid UTF-8 byte', codeUnits, codeUnitsIdx);
}
u8listData.write(u8listIdx++, byte);
}
}
return _Utf8Decoder(allowMalformed)
._convertSingle(u8list, 0, length, codeUnits, start);
}
return null;
}
}
@ -1713,14 +1674,47 @@ class _Utf8Decoder {
@patch
String convertSingle(List<int> codeUnits, int start, int? maybeEnd) {
// `Utf8Decoder._convertIntercepted` should intercept all calls to call the
// right decoder for the `codeUnits` type.
throw 'Utf8Decoder.convert was not intercepted';
}
int end = RangeError.checkValidRange(start, maybeEnd, codeUnits.length);
if (start == end) return "";
String _convertSingle(U8List bytes, int start, int? maybeEnd,
List<int> actualSource, int actualStart) {
final int end = RangeError.checkValidRange(start, maybeEnd, bytes.length);
final U8List bytes;
if (codeUnits is U8List) {
bytes = unsafeCast<U8List>(codeUnits);
} else {
// TODO(omersa): Check if `codeUnits` is a JS array and call browser UTF8
// decoder here.
//
// If we're passed a `List<int>` other than `U8List` or a JS typed array,
// it means the performance is not too important. Convert the input to
// `U8List` to avoid shipping another UTF-8 decoder.
final length = end - start;
bytes = U8List(length);
final u8listData = bytes.data;
if (allowMalformed) {
int u8listIdx = 0;
for (int codeUnitsIdx = start; codeUnitsIdx < end; codeUnitsIdx += 1) {
int byte = codeUnits[codeUnitsIdx];
if (byte < 0 || byte > 255) {
byte = 0xFF;
}
u8listData.write(u8listIdx++, byte);
}
} else {
int u8listIdx = 0;
for (int codeUnitsIdx = start; codeUnitsIdx < end; codeUnitsIdx += 1) {
final byte = codeUnits[codeUnitsIdx];
if (byte < 0 || byte > 255) {
throw FormatException(
'Invalid UTF-8 byte', codeUnits, codeUnitsIdx);
}
u8listData.write(u8listIdx++, byte);
}
}
start = 0;
end = length;
}
final actualStart = start;
// Skip initial BOM.
start = skipBomSingle(bytes, start, end);
@ -1760,7 +1754,7 @@ class _Utf8Decoder {
_charOrIndex = end;
}
final String message = errorDescription(_state);
throw FormatException(message, actualSource, actualStart + _charOrIndex);
throw FormatException(message, codeUnits, actualStart + _charOrIndex);
}
// Start over on slow path.

View file

@ -23,86 +23,16 @@ dynamic _parseJson(
@patch
class Utf8Decoder {
// Always fall back to the Dart implementation for strings shorter than this
// threshold, as there is a large, constant overhead for using TextDecoder.
// TODO(omersa): This is copied from dart2js runtime, make sure the value is
// right for dart2wasm.
static const int _shortInputThreshold = 15;
@patch
Converter<List<int>, T> fuse<T>(Converter<String, T> next) {
return super.fuse(next);
}
// Allow intercepting of UTF-8 decoding when built-in lists are passed.
@patch
static String? _convertIntercepted(
bool allowMalformed, List<int> codeUnits, int start, int? end) {
if (codeUnits is JSUint8ArrayImpl) {
final JSUint8ArrayImpl jsCodeUnits = codeUnits;
end ??= jsCodeUnits.length;
if (end - start < _shortInputThreshold) {
return null;
}
return _convertInterceptedUint8List(
allowMalformed, jsCodeUnits, start, end);
}
return null; // This call was not intercepted.
}
static String? _convertInterceptedUint8List(
bool allowMalformed, JSUint8ArrayImpl codeUnits, int start, int end) {
final JSAny? decoder = allowMalformed ? _decoderNonFatal : _decoder;
if (decoder == null) {
return null;
}
if (0 == start && end == codeUnits.length) {
return _useTextDecoder(
externRefForJSAny(decoder), codeUnits.toJSArrayExternRef());
}
RangeError.checkValidRange(start, end, codeUnits.length);
final length = end - start;
return _useTextDecoder(externRefForJSAny(decoder),
codeUnits.toJSArrayExternRef(start, length));
}
static String? _useTextDecoder(
WasmExternRef? decoder, WasmExternRef? codeUnits) {
// If the input is malformed, catch the exception and return `null` to fall
// back on unintercepted decoder. The fallback will either succeed in
// decoding, or report the problem better than TextDecoder.
try {
return JSStringImpl(js.JS<WasmExternRef?>(
'(decoder, codeUnits) => decoder.decode(codeUnits)',
decoder,
codeUnits));
} catch (e) {}
return null;
}
// TextDecoder is not defined on some browsers and on the stand-alone d8 and
// jsshell engines. Use a lazy initializer to do feature detection once.
//
// Globls need to return boxed Dart values, so these return `JSAny?` instead
// of `WasmExternRef?`.
static final JSAny? _decoder = () {
try {
return js
.JS<WasmExternRef>('() => new TextDecoder("utf-8", {fatal: true})')
.toJS;
} catch (e) {}
return null;
}();
static final JSAny? _decoderNonFatal = () {
try {
return js
.JS<WasmExternRef>('() => new TextDecoder("utf-8", {fatal: false})')
.toJS;
} catch (e) {}
return null;
}();
}
//// Implementation ///////////////////////////////////////////////////////////
@ -1556,8 +1486,34 @@ class _Utf8Decoder {
@patch
_Utf8Decoder(this.allowMalformed) : _state = beforeBom;
// Always fall back to the Dart implementation for strings shorter than this
// threshold, as there is a large, constant overhead for using TextDecoder.
// TODO(omersa): This is copied from dart2js runtime, make sure the value is
// right for dart2wasm.
static const int _shortInputThreshold = 15;
@patch
String convertSingle(List<int> codeUnits, int start, int? maybeEnd) {
final codeUnitsLength = codeUnits.length;
final end = RangeError.checkValidRange(start, maybeEnd, codeUnitsLength);
if (start == end) return "";
final length = end - start;
if (codeUnits is JSUint8ArrayImpl) {
if (length >= _shortInputThreshold) {
final JSAny? decoder = allowMalformed ? _decoderNonFatal : _decoder;
if (decoder != null) {
final arrayRef = codeUnits.toJSArrayExternRef(start, length);
final textDecoderResult =
_useTextDecoder(externRefForJSAny(decoder), arrayRef);
if (textDecoderResult != null) {
return textDecoderResult;
}
}
}
}
return convertGeneral(codeUnits, start, maybeEnd, true);
}
@ -1565,6 +1521,43 @@ class _Utf8Decoder {
String convertChunked(List<int> codeUnits, int start, int? maybeEnd) {
return convertGeneral(codeUnits, start, maybeEnd, false);
}
static String? _useTextDecoder(
WasmExternRef? decoder, WasmExternRef? codeUnits) {
// If the input is malformed, catch the exception and return `null` to fall
// back on unintercepted decoder. The fallback will either succeed in
// decoding, or report the problem better than TextDecoder.
try {
return JSStringImpl(js.JS<WasmExternRef?>(
'(decoder, codeUnits) => decoder.decode(codeUnits)',
decoder,
codeUnits));
} catch (e) {}
return null;
}
// TextDecoder is not defined on some browsers and on the stand-alone d8 and
// jsshell engines. Use a lazy initializer to do feature detection once.
//
// Globls need to return boxed Dart values, so these return `JSAny?` instead
// of `WasmExternRef?`.
static final JSAny? _decoder = () {
try {
return js
.JS<WasmExternRef>('() => new TextDecoder("utf-8", {fatal: true})')
.toJS;
} catch (e) {}
return null;
}();
static final JSAny? _decoderNonFatal = () {
try {
return js
.JS<WasmExternRef>('() => new TextDecoder("utf-8", {fatal: false})')
.toJS;
} catch (e) {}
return null;
}();
}
double _parseDouble(String source, int start, int end) =>