mirror of
https://github.com/dart-lang/sdk
synced 2024-10-14 17:59:39 +00:00
[dart2wasm] Optimize UTF8 decoder when decoding a native array
Intercept all `Utf8Decoder.convert` calls to check the input type and call the right UTF8 decoder function. This currently only optimizes `U8List` parsing, and only the fastest path where the input is an ASCII string. Follow-up CLs will optimize JS typed array parsing by calling browser's UTF8 decoder and the slow paths of non-ASCII and malformed buffers. Results from an internal benchmark extracted from a real use case: Before: Parse(RunTime): 330,500 us. After: Parse(RunTime): 90,310 us. (-72%) dart2js -O4 results of the same benchmark: Parse(RunTime): 234,250 us. Change-Id: I470697a9eb516e1e031e9b865ddb29e8f25569bf Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/331187 Commit-Queue: Ömer Ağacan <omersa@google.com> Reviewed-by: Jackson Gardner <jacksongardner@google.com>
This commit is contained in:
parent
0a436b7005
commit
405a03909c
|
@ -4,6 +4,7 @@
|
|||
|
||||
import "dart:_internal" show patch, POWERS_OF_TEN, unsafeCast;
|
||||
import "dart:_string";
|
||||
import "dart:_typed_data";
|
||||
import "dart:typed_data" show Uint8List, Uint16List;
|
||||
|
||||
/// This patch library has no additional parts.
|
||||
|
@ -27,7 +28,7 @@ class Utf8Decoder {
|
|||
@patch
|
||||
Converter<List<int>, T> fuse<T>(Converter<String, T> next) {
|
||||
if (next is JsonDecoder) {
|
||||
return new _JsonUtf8Decoder(
|
||||
return _JsonUtf8Decoder(
|
||||
(next as JsonDecoder)._reviver, this._allowMalformed)
|
||||
as dynamic/*=Converter<List<int>, T>*/;
|
||||
}
|
||||
|
@ -35,11 +36,49 @@ class Utf8Decoder {
|
|||
return super.fuse<T>(next);
|
||||
}
|
||||
|
||||
// Allow intercepting of UTF-8 decoding when built-in lists are passed.
|
||||
@patch
|
||||
static String? _convertIntercepted(
|
||||
bool allowMalformed, List<int> codeUnits, int start, int? end) {
|
||||
return null; // This call was not intercepted.
|
||||
// We intercept the calls always to make sure the standard library UTF8
|
||||
// decoder is only passed `U8List`, so that array accesses will be
|
||||
// monomorphic and inlined.
|
||||
if (codeUnits is U8List) {
|
||||
return _Utf8Decoder(allowMalformed)._convertSingle(
|
||||
unsafeCast<U8List>(codeUnits), start, end, codeUnits, start);
|
||||
} else {
|
||||
// TODO(omersa): Check if `codeUnits` is a JS array and call browser UTF8
|
||||
// decoder here.
|
||||
//
|
||||
// If we're passed a `List<int>` other than `U8List` or a JS typed array,
|
||||
// it means the performance is not too important. So we convert the input
|
||||
// to `U8List` to avoid shipping another UTF8 decoder.
|
||||
end ??= codeUnits.length;
|
||||
final length = end - start;
|
||||
final u8list = U8List(length);
|
||||
final u8listData = u8list.data;
|
||||
if (allowMalformed) {
|
||||
int u8listIdx = 0;
|
||||
for (int codeUnitsIdx = start; codeUnitsIdx < end; codeUnitsIdx += 1) {
|
||||
int byte = codeUnits[codeUnitsIdx];
|
||||
if (byte < 0 || byte > 255) {
|
||||
byte = 0xFF;
|
||||
}
|
||||
u8listData.write(u8listIdx++, byte);
|
||||
}
|
||||
} else {
|
||||
int u8listIdx = 0;
|
||||
for (int codeUnitsIdx = start; codeUnitsIdx < end; codeUnitsIdx += 1) {
|
||||
final byte = codeUnits[codeUnitsIdx];
|
||||
if (byte < 0 || byte > 255) {
|
||||
throw FormatException(
|
||||
'Invalid UTF-8 byte', codeUnits, codeUnitsIdx);
|
||||
}
|
||||
u8listData.write(u8listIdx++, byte);
|
||||
}
|
||||
}
|
||||
return _Utf8Decoder(allowMalformed)
|
||||
._convertSingle(u8list, 0, length, codeUnits, start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1364,7 +1403,7 @@ abstract class _ChunkedJsonParser<T> {
|
|||
message = "Unexpected character";
|
||||
if (position == chunkEnd) message = "Unexpected end of input";
|
||||
}
|
||||
throw new FormatException(message, chunk, position);
|
||||
throw FormatException(message, chunk, position);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1469,7 +1508,7 @@ class _JsonStringDecoderSink extends StringConversionSinkBase {
|
|||
* Chunked JSON parser that parses UTF-8 chunks.
|
||||
*/
|
||||
class _JsonUtf8Parser extends _ChunkedJsonParser<List<int>> {
|
||||
static final Uint8List emptyChunk = Uint8List(0);
|
||||
static final U8List emptyChunk = U8List(0);
|
||||
|
||||
final _Utf8Decoder decoder;
|
||||
List<int> chunk = emptyChunk;
|
||||
|
@ -1674,20 +1713,14 @@ class _Utf8Decoder {
|
|||
|
||||
@patch
|
||||
String convertSingle(List<int> codeUnits, int start, int? maybeEnd) {
|
||||
int end = RangeError.checkValidRange(start, maybeEnd, codeUnits.length);
|
||||
// `Utf8Decoder._convertIntercepted` should intercept all calls to call the
|
||||
// right decoder for the `codeUnits` type.
|
||||
throw 'Utf8Decoder.convert was not intercepted';
|
||||
}
|
||||
|
||||
// Have bytes as Uint8List.
|
||||
Uint8List bytes;
|
||||
int errorOffset;
|
||||
if (codeUnits is Uint8List) {
|
||||
bytes = unsafeCast<Uint8List>(codeUnits);
|
||||
errorOffset = 0;
|
||||
} else {
|
||||
bytes = _makeUint8List(codeUnits, start, end);
|
||||
errorOffset = start;
|
||||
end -= start;
|
||||
start = 0;
|
||||
}
|
||||
String _convertSingle(U8List bytes, int start, int? maybeEnd,
|
||||
List<int> actualSource, int actualStart) {
|
||||
final int end = RangeError.checkValidRange(start, maybeEnd, bytes.length);
|
||||
|
||||
// Skip initial BOM.
|
||||
start = skipBomSingle(bytes, start, end);
|
||||
|
@ -1696,14 +1729,14 @@ class _Utf8Decoder {
|
|||
if (start == end) return "";
|
||||
|
||||
// Scan input to determine size and appropriate decoder.
|
||||
int size = scan(bytes, start, end);
|
||||
int flags = _scanFlags;
|
||||
final int size = scan(bytes, start, end);
|
||||
final int flags = _scanFlags;
|
||||
|
||||
if (flags == 0) {
|
||||
// Pure ASCII.
|
||||
assert(size == end - start);
|
||||
OneByteString result = OneByteString.withLength(size);
|
||||
copyRangeFromUint8ListToOneByteString(bytes, result, start, 0, size);
|
||||
oneByteStringArray(result).copy(0, bytes.data, start, size);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1726,7 +1759,7 @@ class _Utf8Decoder {
|
|||
_charOrIndex = end;
|
||||
}
|
||||
final String message = errorDescription(_state);
|
||||
throw FormatException(message, codeUnits, errorOffset + _charOrIndex);
|
||||
throw FormatException(message, actualSource, actualStart + _charOrIndex);
|
||||
}
|
||||
|
||||
// Start over on slow path.
|
||||
|
|
|
@ -31,6 +31,10 @@ void writeIntoOneByteString(OneByteString s, int index, int codePoint) =>
|
|||
void writeIntoTwoByteString(TwoByteString s, int index, int codePoint) =>
|
||||
s._setAt(index, codePoint);
|
||||
|
||||
/// Static function for `OneByteString._array` to avoid making `_array` public.
|
||||
@pragma('wasm:prefer-inline')
|
||||
WasmIntArray<WasmI8> oneByteStringArray(OneByteString s) => s._array;
|
||||
|
||||
/// The [fromStart] and [toStart] indices together with the [length] must
|
||||
/// specify ranges within the bounds of the list / string.
|
||||
void copyRangeFromUint8ListToOneByteString(
|
||||
|
|
Loading…
Reference in a new issue