From 455d85f528b15c16980fe5dd14823b035793b387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96mer=20Sinan=20A=C4=9Facan?= Date: Tue, 11 Jun 2024 16:10:23 +0000 Subject: [PATCH] [dart2wasm] Port VM JSON parsing improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ports https://dart-review.googlesource.com/c/sdk/+/365803 to dart2wasm. Benchmarks: https://golem.corp.goog/Revision?repository=dart&revision=110551&patch=19222 Change-Id: Id4a8e0f44abcde3552c50605d9b329443d43d1d5 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/370821 Commit-Queue: Ömer Ağacan Reviewed-by: Martin Kustermann --- sdk/lib/_internal/wasm/lib/convert_patch.dart | 143 +++++++++++++++--- sdk/lib/_internal/wasm/lib/string.dart | 10 ++ 2 files changed, 128 insertions(+), 25 deletions(-) diff --git a/sdk/lib/_internal/wasm/lib/convert_patch.dart b/sdk/lib/_internal/wasm/lib/convert_patch.dart index 42000698413..dee5c108fb6 100644 --- a/sdk/lib/_internal/wasm/lib/convert_patch.dart +++ b/sdk/lib/_internal/wasm/lib/convert_patch.dart @@ -103,7 +103,7 @@ class _JsonListener { void popContainer() { value = currentContainer; currentContainer = stack.removeLast(); - if (currentContainer is Map) key = stack.removeLast() as String; + if (currentContainer is Map) key = unsafeCast(stack.removeLast()); } void handleString(String value) { @@ -128,12 +128,12 @@ class _JsonListener { } void propertyName() { - key = value as String; + key = unsafeCast(value); value = null; } void propertyValue() { - var map = currentContainer as Map; + var map = unsafeCast(currentContainer); var reviver = this.reviver; if (reviver != null) { value = reviver(key, value); @@ -153,7 +153,7 @@ class _JsonListener { } void arrayElement() { - var list = currentContainer as List; + var list = unsafeCast(currentContainer); var reviver = this.reviver; if (reviver != null) { value = reviver(list.length, value); @@ -524,6 +524,13 @@ mixin _ChunkedJsonParser on _JsonParserWithListener { */ int getChar(int index); + /** + * Returns [true] if [getChar] is returning UTF16 code units. + * + * Otherwise it is expected that [getChar] is returning UTF8 bytes. + */ + bool get isUtf16Input; + /** * Copy ASCII characters from start to end of chunk into a list. * @@ -808,16 +815,30 @@ mixin _ChunkedJsonParser on _JsonParserWithListener { position = parsePartial(position); if (position == length) return; } + final OneByteString charAttributes = + unsafeCast(_characterAttributes); + int state = this.state; + outer: while (position < length) { - int char = getChar(position); - switch (char) { - case SPACE: - case CARRIAGE_RETURN: - case NEWLINE: - case TAB: - position++; + int char = 0; + do { + char = getChar(position); + if (isUtf16Input && char > 0xFF) { break; + } + if ((oneByteStringCodeUnitAtUnchecked(charAttributes, char) & + CHAR_WHITESPACE) == + 0) { + break; + } + position++; + if (position >= length) { + break outer; + } + } while (true); + + switch (char) { case QUOTE: if ((state & ALLOW_STRING_MASK) != 0) fail(position); state |= VALUE_READ_BITS; @@ -977,6 +998,37 @@ mixin _ChunkedJsonParser on _JsonParserWithListener { return length; } + static const int CHAR_SIMPLE_STRING_END = 1; + static const int CHAR_WHITESPACE = 2; + + /** + * [_characterAttributes] string was generated using the following code: + * + * ``` + * int $(String ch) => ch.codeUnitAt(0); + * final list = Uint8List(256); + * for (var i = 0; i < $(' '); i++) { + * list[i] |= CHAR_SIMPLE_STRING_END; + * } + * list[$('"')] |= CHAR_SIMPLE_STRING_END; + * list[$('\\')] |= CHAR_SIMPLE_STRING_END; + * list[$(' ')] |= CHAR_WHITESPACE; + * list[$('\r')] |= CHAR_WHITESPACE; + * list[$('\n')] |= CHAR_WHITESPACE; + * list[$('\t')] |= CHAR_WHITESPACE; + * for (var i = 0; i < 256; i += 64) { + * print("'${String.fromCharCodes([ + * for (var v in list.skip(i).take(64)) v + $(' '), + * ])}'"); + * } + * ``` + */ + static const String _characterAttributes = + '!!!!!!!!!##!!#!!!!!!!!!!!!!!!!!!" ! ' + ' ! ' + ' ' + ' '; + /** * Parses a string value. * @@ -984,28 +1036,43 @@ mixin _ChunkedJsonParser on _JsonParserWithListener { * Returned position right after the final quote. */ int parseString(int position) { + final OneByteString charAttributes = + unsafeCast(_characterAttributes); + // Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"' // Initial position is right after first '"'. int start = position; int end = chunkEnd; int bits = 0; - while (position < end) { - int char = getChar(position++); - bits |= char; // Includes final '"', but that never matters. - // BACKSLASH is larger than QUOTE and SPACE. - if (char > BACKSLASH) { - continue; + int char = 0; + if (position < end) { + do { + // Caveat: do not combine the following two lines together. It helps + // compiler to generate better code (it currently can't reorder operations + // to reduce register pressure). + char = getChar(position); + position++; + bits |= char; // Includes final '"', but that never matters. + if (isUtf16Input && char > 0xFF) { + continue; + } + if ((oneByteStringCodeUnitAtUnchecked(charAttributes, char) & + CHAR_SIMPLE_STRING_END) != + 0) { + break; + } + } while (position < end); + if (char == QUOTE) { + int sliceEnd = position - 1; + listener.handleString(getString(start, sliceEnd, bits)); + return sliceEnd + 1; } if (char == BACKSLASH) { - beginString(); int sliceEnd = position - 1; + beginString(); if (start < sliceEnd) addSliceToString(start, sliceEnd); return parseStringToBuffer(sliceEnd); } - if (char == QUOTE) { - listener.handleString(getString(start, position - 1, bits)); - return position; - } if (char < SPACE) { fail(position - 1, "Control character in string"); } @@ -1055,6 +1122,9 @@ mixin _ChunkedJsonParser on _JsonParserWithListener { * slices of non-escape characters using [addSliceToString]. */ int parseStringToBuffer(int position) { + final OneByteString charAttributes = + unsafeCast(_characterAttributes); + int end = chunkEnd; int start = position; while (true) { @@ -1064,11 +1134,25 @@ mixin _ChunkedJsonParser on _JsonParserWithListener { } return chunkString(STR_PLAIN); } - int char = getChar(position++); - if (char > BACKSLASH) continue; + + int char = 0; + do { + char = getChar(position); + position++; + if (isUtf16Input && char > 0xFF) { + continue; + } + if ((oneByteStringCodeUnitAtUnchecked(charAttributes, char) & + CHAR_SIMPLE_STRING_END) != + 0) { + break; + } + } while (position < end); + if (char < SPACE) { fail(position - 1); // Control character in string. } + if (char == QUOTE) { int quotePosition = position - 1; if (quotePosition > start) { @@ -1077,13 +1161,16 @@ mixin _ChunkedJsonParser on _JsonParserWithListener { listener.handleString(endString()); return position; } + if (char != BACKSLASH) { continue; } + // Handle escape. if (position - 1 > start) { addSliceToString(start, position - 1); } + if (position == end) return chunkString(STR_ESCAPE); position = parseStringEscape(position); if (position == end) return position; @@ -1379,6 +1466,9 @@ class _JsonStringParser extends _JsonParserWithListener _JsonStringParser(_JsonListener listener) : super(listener); + @pragma('wasm:prefer-inline') + bool get isUtf16Input => true; + int getChar(int position) => chunk.codeUnitAt(position); String getString(int start, int end, int bits) { @@ -1500,13 +1590,16 @@ class _JsonUtf8Parser extends _JsonParserWithListener parse(start); } + @pragma('wasm:prefer-inline') + bool get isUtf16Input => false; + @pragma('wasm:prefer-inline') int getChar(int position) => chunk[position]; String getString(int start, int end, int bits) { const int maxAsciiChar = 0x7f; if (bits <= maxAsciiChar) { - return new String.fromCharCodes(chunk, start, end); + return createOneByteStringFromCharacters(chunk, start, end); } beginString(); if (start < end) addSliceToString(start, end); diff --git a/sdk/lib/_internal/wasm/lib/string.dart b/sdk/lib/_internal/wasm/lib/string.dart index a5b61afb653..def0cef018b 100644 --- a/sdk/lib/_internal/wasm/lib/string.dart +++ b/sdk/lib/_internal/wasm/lib/string.dart @@ -17,6 +17,7 @@ import 'dart:_js_helper' show JS, jsStringToDartString; import 'dart:_js_types' show JSStringImpl; import 'dart:_object_helper'; import 'dart:_string_helper'; +import 'dart:_typed_data'; import 'dart:_wasm'; import "dart:typed_data" show Uint8List, Uint16List; @@ -52,6 +53,15 @@ void copyRangeFromUint8ListToOneByteString( } } +@pragma("wasm:prefer-inline") +OneByteString createOneByteStringFromCharacters( + U8List bytes, int start, int end) { + final len = end - start; + final s = OneByteString.withLength(len); + s._array.copy(0, bytes.data, start, len); + return s; +} + extension OneByteStringUnsafeExtensions on String { @pragma('wasm:prefer-inline') int oneByteStringCodeUnitAtUnchecked(int index) =>