[dart2wasm] Initial regexp support.

Change-Id: Ia461c77979785bbc0510052a31f94bdd83babc01
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/250582
Reviewed-by: Aske Simon Christensen <askesc@google.com>
Commit-Queue: Joshua Litt <joshualitt@google.com>
This commit is contained in:
Joshua Litt 2022-08-18 19:49:34 +00:00 committed by Commit Bot
parent 7ddee84e78
commit 4810c8930b
7 changed files with 420 additions and 9 deletions

View file

@ -64,6 +64,15 @@ function dataViewFromDartByteData(byteData, byteLength) {
// A special symbol attached to functions that wrap Dart functions.
var jsWrappedDartFunctionSymbol = Symbol("JSWrappedDartFunction");
// Calls a constructor with a variable number of arguments.
function callConstructorVarArgs(constructor, args) {
// Apply bind to the constructor. We pass `null` as the first argument
// to `bind.apply` because this is `bind`'s unused context
// argument(`new` will explicitly create a new context).
var factoryFunction = constructor.bind.apply(constructor, [null, ...args]);
return new factoryFunction();
}
// Imports for printing and event loop
var dart2wasm = {
printToConsole: function(string) {
@ -204,6 +213,9 @@ var dart2wasm = {
isJSObject: function(o) {
return o instanceof Object;
},
isJSRegExp: function(o) {
return o instanceof RegExp;
},
roundtrip: function (o) {
// This function exists as a hook for the native JS -> Wasm type
// conversion rules. The Dart runtime will overload variants of this
@ -229,12 +241,13 @@ var dart2wasm = {
callMethodVarArgs: function(object, name, args) {
return object[name].apply(object, args);
},
callConstructorVarArgs: function(constructor, args) {
// Apply bind to the constructor. We pass `null` as the first argument
// to `bind.apply` because this is `bind`'s unused context
// argument(`new` will explicitly create a new context).
var factoryFunction = constructor.bind.apply(constructor, [null, ...args]);
return new factoryFunction();
callConstructorVarArgs: callConstructorVarArgs,
safeCallConstructorVarArgs: function(constructor, args) {
try {
return callConstructorVarArgs(constructor, args);
} catch (e) {
return String(e);
}
},
getTimeZoneNameForSeconds: function(secondsSinceEpoch) {
var date = new Date(secondsSinceEpoch * 1000);
@ -298,6 +311,17 @@ var dart2wasm = {
}
return parseFloat(jsSource);
},
quoteStringForRegExp: function(string) {
// We specialize this method in the runtime to avoid the overhead of
// jumping back and forth between JS and Dart. This method is optimized
// to test before replacement, which should be much faster. This might
// be worth measuring in real world use cases though.
var jsString = stringFromDartString(string);
if (/[[\]{}()*+?.\\^$|]/.test(jsString)) {
jsString = jsString.replace(/[[\]{}()*+?.\\^$|]/g, '\\$&');
}
return stringToDartString(jsString);
},
};
function instantiate(filename, imports) {

View file

@ -32,6 +32,8 @@ import "dart:_internal"
import "dart:_internal" as _internal show Symbol;
import 'dart:_js_helper' show JSSyntaxRegExp, quoteStringForRegExp;
import "dart:collection"
show
HashMap,

View file

@ -6,16 +6,23 @@
library dart._js_helper;
import 'dart:_internal';
import 'dart:collection';
import 'dart:typed_data';
import 'dart:wasm';
part 'regexp_helper.dart';
/// [JSValue] is the root of the JS interop object hierarchy.
class JSValue {
final WasmAnyRef _ref;
JSValue(this._ref);
static JSValue? box(WasmAnyRef? ref) => ref == null ? null : JSValue(ref);
// Currently we always explictly box JS ref's in [JSValue] objects. In the
// future, we will want to leave these values unboxed when possible, even when
// they are nullable.
static JSValue? box(WasmAnyRef? ref) =>
isDartNull(ref) ? null : JSValue(ref!);
WasmAnyRef toAnyRef() => _ref;
String toString() => jsStringToDartString(_ref);
@ -39,9 +46,79 @@ extension ListOfObjectToJS on List<Object?> {
}
extension ObjectToJS on Object {
JSValue toJS() => JSValue(jsObjectFromDartObject(this));
WasmAnyRef toAnyRef() => jsObjectFromDartObject(this);
JSValue toJS() => JSValue(toAnyRef());
}
// For now both `null` and `undefined` in JS map to `null` in Dart.
bool isDartNull(WasmAnyRef? ref) => ref == null || isJSUndefined(ref);
/// A [JSArray] is a wrapper for a native JSArray.
class JSArray extends JSValue {
JSArray(WasmAnyRef ref) : super(ref);
static JSArray? box(WasmAnyRef? ref) =>
isDartNull(ref) ? null : JSArray(ref!);
JSValue? pop() =>
JSValue.box(callMethodVarArgsRaw(_ref, 'pop'.toAnyRef(), [].toAnyRef()));
JSValue? operator [](int index) =>
JSValue.box(getPropertyRaw(_ref, intToJSNumber(index)));
void operator []=(int index, JSValue? value) =>
setPropertyRaw(_ref, intToJSNumber(index), value?.toAnyRef());
int get length =>
toDartNumber(getPropertyRaw(_ref, 'length'.toAnyRef())!).floor();
}
/// A [JSObject] is a wrapper for any JS object literal.
class JSObject extends JSValue {
JSObject(WasmAnyRef ref) : super(ref);
static JSObject? box(WasmAnyRef? ref) =>
isDartNull(ref) ? null : JSObject(ref!);
JSValue? operator [](String key) =>
JSValue.box(getPropertyRaw(_ref, key.toAnyRef()));
void operator []=(String key, JSValue? value) =>
setPropertyRaw(_ref, key.toAnyRef(), value?.toAnyRef());
}
class JSArrayIteratorAdapter<T> extends Iterator<T> {
final JSArray array;
int index = -1;
JSArrayIteratorAdapter(this.array);
@override
bool moveNext() {
index++;
if (index > array.length) {
throw 'Iterator out of bounds';
}
return index < array.length;
}
@override
T get current => dartifyRaw(array[index]?.toAnyRef()) as T;
}
/// [JSArrayIterableAdapter] lazily adapts a [JSArray] to Dart's [Iterable]
/// interface.
class JSArrayIterableAdapter<T> extends EfficientLengthIterable<T> {
final JSArray array;
JSArrayIterableAdapter(this.array);
@override
Iterator<T> get iterator => JSArrayIteratorAdapter<T>(array);
@override
int get length => array.length;
}
// Convert to double to avoid converting to [BigInt] in the case of int64.
WasmAnyRef intToJSNumber(int i) => toJSNumber(i.toDouble());
WasmAnyRef? getConstructorString(String constructor) =>
getPropertyRaw(globalThisRaw(), constructor.toAnyRef());
@ -113,6 +190,9 @@ external bool isJSWrappedDartFunction(WasmAnyRef? o);
@pragma("wasm:import", "dart2wasm.isJSObject")
external bool isJSObject(WasmAnyRef? o);
@pragma("wasm:import", "dart2wasm.isJSRegExp")
external bool isJSRegExp(WasmAnyRef object);
// The JS runtime will run helpful conversion routines between refs and bool /
// double. In the longer term hopefully we can find a way to avoid the round
// trip.
@ -187,6 +267,10 @@ external WasmAnyRef globalThisRaw();
@pragma("wasm:import", "dart2wasm.callConstructorVarArgs")
external WasmAnyRef callConstructorVarArgsRaw(WasmAnyRef o, WasmAnyRef args);
@pragma("wasm:import", "dart2wasm.safeCallConstructorVarArgs")
external WasmAnyRef safeCallConstructorVarArgsRaw(
WasmAnyRef o, WasmAnyRef args);
@pragma("wasm:import", "dart2wasm.hasProperty")
external bool hasPropertyRaw(WasmAnyRef o, WasmAnyRef name);
@ -202,7 +286,7 @@ external WasmAnyRef? callMethodVarArgsRaw(
WasmAnyRef o, WasmAnyRef method, WasmAnyRef? args);
@pragma("wasm:import", "dart2wasm.stringify")
external String stringifyRaw(WasmAnyRef? object);
external String stringify(WasmAnyRef? object);
// Currently, `allowInterop` returns a Function type. This is unfortunate for
// Dart2wasm because it means arbitrary Dart functions can flow to JS util
@ -409,6 +493,14 @@ F _wrapDartFunction<F extends Function>(F f, String trampolineName) {
return f;
}
/// Returns the JS constructor object for a given [String].
WasmAnyRef getConstructorRaw(String name) =>
getPropertyRaw(globalThisRaw(), name.toAnyRef())!;
/// Equivalent to `Object.keys(object)`.
JSArray objectKeys(JSValue object) => JSArray(callMethodVarArgsRaw(
getConstructorRaw('Object'), 'keys'.toAnyRef(), [object].toAnyRef())!);
/// Methods used by the wasm runtime.
@pragma("wasm:export", "\$listLength")
double _listLength(List list) => list.length.toDouble();

View file

@ -0,0 +1,270 @@
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
part of dart._js_helper;
// TODO(joshualitt): This is a fork of the DDC RegExp class. In the longer term,
// with careful factoring we may be able to share this code.
// TODO(joshualitt): We should be able to build this library off of static
// interop.
/// Returns a string for a RegExp pattern that matches [string]. This is done by
/// escaping all RegExp metacharacters.
@pragma('wasm:import', 'dart2wasm.quoteStringForRegExp')
external String quoteStringForRegExp(String string);
class JSNativeMatch extends JSArray {
JSNativeMatch(WasmAnyRef ref) : super(ref);
static JSNativeMatch? box(WasmAnyRef? ref) =>
isDartNull(ref) ? null : JSNativeMatch(ref!);
String get input => jsStringToDartString(
getPropertyRaw(this.toAnyRef(), 'input'.toAnyRef())!);
int get index =>
toDartNumber(getPropertyRaw(this.toAnyRef(), 'index'.toAnyRef())!)
.floor();
JSObject? get groups =>
JSObject.box(getPropertyRaw(this.toAnyRef(), 'groups'.toAnyRef()));
}
class JSNativeRegExp extends JSValue {
JSNativeRegExp(WasmAnyRef ref) : super(ref);
JSNativeMatch? exec(String string) => JSNativeMatch.box(callMethodVarArgsRaw(
this.toAnyRef(), 'exec'.toAnyRef(), [string].toAnyRef()));
bool test(String string) => toDartBool(callMethodVarArgsRaw(
this.toAnyRef(), 'test'.toAnyRef(), [string].toAnyRef())!);
String get flags => jsStringToDartString(
getPropertyRaw(this.toAnyRef(), 'flags'.toAnyRef())!);
bool get multiline =>
toDartBool(getPropertyRaw(this.toAnyRef(), 'multiline'.toAnyRef())!);
bool get ignoreCase =>
toDartBool(getPropertyRaw(this.toAnyRef(), 'ignoreCase'.toAnyRef())!);
bool get unicode =>
toDartBool(getPropertyRaw(this.toAnyRef(), 'unicode'.toAnyRef())!);
bool get dotAll =>
toDartBool(getPropertyRaw(this.toAnyRef(), 'dotAll'.toAnyRef())!);
set lastIndex(int start) => setPropertyRaw(
this.toAnyRef(), 'lastIndex'.toAnyRef(), intToJSNumber(start));
}
class JSSyntaxRegExp implements RegExp {
final String pattern;
final JSNativeRegExp _nativeRegExp;
JSNativeRegExp? _nativeGlobalRegExp;
JSNativeRegExp? _nativeAnchoredRegExp;
String toString() => 'RegExp/$pattern/' + _nativeRegExp.flags;
JSSyntaxRegExp(String source,
{bool multiLine: false,
bool caseSensitive: true,
bool unicode: false,
bool dotAll: false})
: this.pattern = source,
this._nativeRegExp = makeNative(
source, multiLine, caseSensitive, unicode, dotAll, false);
JSNativeRegExp get _nativeGlobalVersion {
if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp!;
return _nativeGlobalRegExp = makeNative(
pattern, isMultiLine, isCaseSensitive, isUnicode, isDotAll, true);
}
JSNativeRegExp get _nativeAnchoredVersion {
if (_nativeAnchoredRegExp != null) return _nativeAnchoredRegExp!;
// An "anchored version" of a regexp is created by adding "|()" to the
// source. This means that the regexp always matches at the first position
// that it tries, and you can see if the original regexp matched, or it
// was the added zero-width match that matched, by looking at the last
// capture. If it is a String, the match participated, otherwise it didn't.
return _nativeAnchoredRegExp = makeNative(
'$pattern|()', isMultiLine, isCaseSensitive, isUnicode, isDotAll, true);
}
bool get isMultiLine => _nativeRegExp.multiline;
bool get isCaseSensitive => _nativeRegExp.ignoreCase;
bool get isUnicode => _nativeRegExp.unicode;
bool get isDotAll => _nativeRegExp.dotAll;
static JSNativeRegExp makeNative(String source, bool multiLine,
bool caseSensitive, bool unicode, bool dotAll, bool global) {
String m = multiLine == true ? 'm' : '';
String i = caseSensitive == true ? '' : 'i';
String u = unicode ? 'u' : '';
String s = dotAll ? 's' : '';
String g = global ? 'g' : '';
String modifiers = '$m$i$u$s$g';
// The call to create the regexp is wrapped in a try catch so we can
// reformat the exception if need be.
WasmAnyRef result = safeCallConstructorVarArgsRaw(
getConstructorRaw('RegExp'), [source, modifiers].toAnyRef());
if (isJSRegExp(result)) return JSNativeRegExp(result);
// The returned value is the stringified JavaScript exception. Turn it into
// a Dart exception.
String errorMessage = jsStringToDartString(result);
throw new FormatException('Illegal RegExp pattern ($errorMessage)', source);
}
RegExpMatch? firstMatch(String string) {
JSNativeMatch? m = _nativeRegExp.exec(string);
if (m == null) return null;
return new _MatchImplementation(this, m);
}
bool hasMatch(String string) {
return _nativeRegExp.test(string);
}
String? stringMatch(String string) {
var match = firstMatch(string);
if (match != null) return match.group(0);
return null;
}
Iterable<RegExpMatch> allMatches(String string, [int start = 0]) {
if (start < 0 || start > string.length) {
throw new RangeError.range(start, 0, string.length);
}
return _AllMatchesIterable(this, string, start);
}
RegExpMatch? _execGlobal(String string, int start) {
JSNativeRegExp regexp = _nativeGlobalVersion;
regexp.lastIndex = start;
JSNativeMatch? match = regexp.exec(string);
if (match == null) return null;
return new _MatchImplementation(this, match);
}
RegExpMatch? _execAnchored(String string, int start) {
JSNativeRegExp regexp = _nativeAnchoredVersion;
regexp.lastIndex = start;
JSNativeMatch? match = regexp.exec(string);
if (match == null) return null;
// If the last capture group participated, the original regexp did not
// match at the start position.
if (match.pop() != null) return null;
return new _MatchImplementation(this, match);
}
RegExpMatch? matchAsPrefix(String string, [int start = 0]) {
if (start < 0 || start > string.length) {
throw new RangeError.range(start, 0, string.length);
}
return _execAnchored(string, start);
}
}
class _MatchImplementation implements RegExpMatch {
final Pattern pattern;
// Contains a JS RegExp match object.
// It is an Array of String values with extra 'index' and 'input' properties.
// If there were named capture groups, there will also be an extra 'groups'
// property containing an object with capture group names as keys and
// matched strings as values.
final JSNativeMatch _match;
_MatchImplementation(this.pattern, this._match);
String get input => _match.input;
int get start => _match.index;
int get end => (start + (_match[0].toString()).length);
String? group(int index) => _match[index]?.toString();
String? operator [](int index) => group(index);
int get groupCount => _match.length - 1;
List<String?> groups(List<int> groups) {
List<String?> out = [];
for (int i in groups) {
out.add(group(i));
}
return out;
}
String? namedGroup(String name) {
JSObject? groups = _match.groups;
if (groups != null) {
JSValue? result = groups[name];
if (result != null ||
hasPropertyRaw(groups.toAnyRef(), name.toAnyRef())) {
return result?.toString();
}
}
throw ArgumentError.value(name, "name", "Not a capture group name");
}
Iterable<String> get groupNames {
JSObject? groups = _match.groups;
if (groups != null) {
return JSArrayIterableAdapter<String>(objectKeys(groups));
}
return Iterable.empty();
}
}
class _AllMatchesIterable extends IterableBase<RegExpMatch> {
final JSSyntaxRegExp _re;
final String _string;
final int _start;
_AllMatchesIterable(this._re, this._string, this._start);
Iterator<RegExpMatch> get iterator =>
new _AllMatchesIterator(_re, _string, _start);
}
class _AllMatchesIterator implements Iterator<RegExpMatch> {
final JSSyntaxRegExp _regExp;
String? _string;
int _nextIndex;
RegExpMatch? _current;
_AllMatchesIterator(this._regExp, this._string, this._nextIndex);
RegExpMatch get current => _current as RegExpMatch;
static bool _isLeadSurrogate(int c) {
return c >= 0xd800 && c <= 0xdbff;
}
static bool _isTrailSurrogate(int c) {
return c >= 0xdc00 && c <= 0xdfff;
}
bool moveNext() {
var string = _string;
if (string == null) return false;
if (_nextIndex <= string.length) {
RegExpMatch? match = _regExp._execGlobal(string, _nextIndex);
if (match != null) {
_current = match;
int nextIndex = match.end;
if (match.start == nextIndex) {
// Zero-width match. Advance by one more, unless the regexp
// is in unicode mode and it would put us within a surrogate
// pair. In that case, advance past the code point as a whole.
if (_regExp.isUnicode &&
_nextIndex + 1 < string.length &&
_isLeadSurrogate(string.codeUnitAt(_nextIndex)) &&
_isTrailSurrogate(string.codeUnitAt(_nextIndex + 1))) {
nextIndex++;
}
nextIndex++;
}
_nextIndex = nextIndex;
return true;
}
}
_current = null;
_string = null; // Marks iteration as ended.
return false;
}
}

View file

@ -0,0 +1,21 @@
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
@patch
class RegExp {
@patch
factory RegExp(String source,
{bool multiLine = false,
bool caseSensitive = true,
bool unicode = false,
bool dotAll = false}) =>
JSSyntaxRegExp(source,
multiLine: multiLine,
caseSensitive: caseSensitive,
unicode: unicode,
dotAll: dotAll);
@patch
static String escape(String text) => quoteStringForRegExp(text);
}

View file

@ -214,6 +214,7 @@
"_internal/vm/lib/null_patch.dart",
"_internal/vm/lib/map_patch.dart",
"_internal/wasm/lib/object_patch.dart",
"_internal/wasm/lib/regexp_patch.dart",
"_internal/wasm/lib/stack_trace_patch.dart",
"_internal/wasm/lib/stopwatch_patch.dart",
"_internal/wasm/lib/string_buffer_patch.dart",

View file

@ -203,6 +203,7 @@ wasm:
- _internal/vm/lib/null_patch.dart
- _internal/vm/lib/map_patch.dart
- _internal/wasm/lib/object_patch.dart
- _internal/wasm/lib/regexp_patch.dart
- _internal/wasm/lib/stack_trace_patch.dart
- _internal/wasm/lib/stopwatch_patch.dart
- _internal/wasm/lib/string_buffer_patch.dart