Revert "[vm] Finish adding support for ECMAScript 2018 features."

This reverts commit 5ebb640a67.

Reason for revert: <INSERT REASONING HERE>

Original change's description:
> [vm] Finish adding support for ECMAScript 2018 features.
> 
> This work pulls in v8 support for these features with
> appropriate changes for Dart and closes
> https://github.com/dart-lang/sdk/issues/34935.
> 
> This adds support for the following features:
> 
> * Interpreting patterns as Unicode patterns instead of
>   BMP patterns
> * the dotAll flag (`/s`) for changing the behavior
>   of '.' to also match line terminators
> * Escapes for character classes described by Unicode
>   property groups (e.g., \p{Greek} to match all Greek
>   characters, or \P{Greek} for all non-Greek characters).
> 
> The following TC39 proposals describe some of the added features:
> 
> * https://github.com/tc39/proposal-regexp-dotall-flag
> * https://github.com/tc39/proposal-regexp-unicode-property-escapes
> 
> These additional changes are included:
> 
> * Extends named capture group names to include the full
>   range of identifier characters supported by ECMAScript,
>   not just ASCII.
> * Changing the RegExp interface to return RegExpMatch
>   objects, not Match objects, so that downcasting is
>   not necessary to use named capture groups from Dart
> 
> **Note**: The changes to the RegExp interface are a
> breaking change for implementers of the RegExp interface.
> Current users of the RegExp interface (i.e., code using Dart
> RegExp objects) will not be affected.
> 
> Change-Id: I0709ed0a8d5db36680e32bbad585594857b9ace4
> Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/95651
> Commit-Queue: Stevie Strickland <sstrickl@google.com>
> Reviewed-by: Johnni Winther <johnniwinther@google.com>
> Reviewed-by: Lasse R.H. Nielsen <lrn@google.com>
> Reviewed-by: Martin Kustermann <kustermann@google.com>

TBR=lrn@google.com,kustermann@google.com,jmesserly@google.com,johnniwinther@google.com,sstrickl@google.com

# Not skipping CQ checks because original CL landed > 1 day ago.

Change-Id: I1eda0fee4fd9e94df095944049833a67b07277e2
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100560
Reviewed-by: Keerti Parthasarathy <keertip@google.com>
Reviewed-by: Martin Kustermann <kustermann@google.com>
Commit-Queue: Keerti Parthasarathy <keertip@google.com>
This commit is contained in:
Keerti Parthasarathy 2019-04-25 14:29:51 +00:00 committed by commit-bot@chromium.org
parent 000a3eb3f7
commit 9238e25305
74 changed files with 635 additions and 5156 deletions

View file

@ -1,32 +1,3 @@
## 2.3.0-dev.XX.0
(Add new changes here, and they will be copied to the change section for the
next dev version)
### Core library changes
#### `dart:core`
* **Breaking change**: The `RegExp` interface has been extended with two new
constructor named parameters:
* `unicode:` (`bool`, default: `false`), for Unicode patterns , and
* `dotAll:` (`bool`, default: `false`), to change the matching behavior of
'.' to also match line terminating characters.
Appropriate properties for these named parameters have also been added so
their use can be detected after construction.
In addition, `RegExp` methods that originally returned `Match` objects
now return a more specific subtype, `RegExpMatch`, which adds two features:
* `Iterable<String> groupNames`, a property that contains the names of all
named capture groups, and
* `String namedGroup(String name)`: a method that retrieves the match for
the given named capture group
This change only affects implementers of the `RegExp` interface; current
code using Dart regular expressions will not be affected.
## 2.3.0
The focus in this release is on the new "UI-as-code" language features which

5
DEPS
View file

@ -91,7 +91,6 @@ vars = {
"http_retry_tag": "0.1.1",
"http_tag" : "0.12.0+2",
"http_throttle_tag" : "1.0.2",
"icu_rev" : "c56c671998902fcc4fc9ace88c83daa99f980793",
"idl_parser_rev": "5fb1ebf49d235b5a70c9f49047e83b0654031eb7",
"intl_tag": "0.15.7",
"jinja2_rev": "2222b31554f03e62600cd7e383376a7c187967a1",
@ -211,10 +210,6 @@ deps = {
Var("chromium_git") + "/chromium/src/third_party/ply.git" +
"@" + Var("ply_rev"),
Var("dart_root") + "/third_party/icu":
Var("chromium_git") + "/chromium/deps/icu.git" +
"@" + Var("icu_rev"),
Var("dart_root") + "/tools/idl_parser":
Var("chromium_git") + "/chromium/src/tools/idl_parser.git" +
"@" + Var("idl_parser_rev"),

View file

@ -15,9 +15,6 @@ if (is_android) {
android_sdk_root = default_android_sdk_root
android_sdk_version = default_android_sdk_version
android_sdk_build_tools_version = default_android_sdk_build_tools_version
# Unused by Dart. Required for GN files in the third_party package ICU.
enable_java_templates = false
}
# Host stuff -----------------------------------------------------------------

View file

@ -1,10 +0,0 @@
# Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
# for details. All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.
# A trivial rules file that allows for the Chromium third_party ICU to
# be built successfully.
import("//build/config/android/config.gni")
assert(is_android)

View file

@ -1,30 +0,0 @@
# Copyright (c) 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# Originally from v8, included in our repository as the ICU third party
# import depends on it for building.
# This header file defines the "host_byteorder" variable.
# Not that this is currently used only for building v8.
# The chromium code generally assumes little-endianness.
declare_args() {
host_byteorder = "undefined"
}
# Detect host byteorder
# ppc64 can be either BE or LE
if (host_cpu == "ppc64") {
if (current_os == "aix") {
host_byteorder = "big"
} else {
# Only use the script when absolutely necessary
host_byteorder =
exec_script("//build/config/get_host_byteorder.py", [], "trim string")
}
} else if (host_cpu == "ppc" || host_cpu == "s390" || host_cpu == "s390x" ||
host_cpu == "mips" || host_cpu == "mips64") {
host_byteorder = "big"
} else {
host_byteorder = "little"
}

View file

@ -547,15 +547,9 @@ class bool {
class RegExp {
@patch
factory RegExp(String source,
{bool multiLine = false,
bool caseSensitive = true,
bool unicode = false,
bool dotAll = false}) =>
{bool multiLine = false, bool caseSensitive = true}) =>
JSSyntaxRegExp(source,
multiLine: multiLine,
caseSensitive: caseSensitive,
unicode: unicode,
dotAll: dotAll);
multiLine: multiLine, caseSensitive: caseSensitive);
@patch
static String escape(String text) => quoteStringForRegExp(text);

View file

@ -47,22 +47,18 @@ class JSSyntaxRegExp implements RegExp {
var _nativeGlobalRegExp;
var _nativeAnchoredRegExp;
String toString() =>
'RegExp/$pattern/' + JS('String', '#.flags', _nativeRegExp);
String toString() => "RegExp/$pattern/";
JSSyntaxRegExp(String source,
{bool multiLine = false,
bool caseSensitive = true,
bool unicode = false,
bool dotAll = false})
{bool multiLine = false, bool caseSensitive = true})
: this.pattern = source,
this._nativeRegExp = makeNative(
source, multiLine, caseSensitive, unicode, dotAll, false);
this._nativeRegExp =
makeNative(source, multiLine, caseSensitive, false);
get _nativeGlobalVersion {
if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp;
return _nativeGlobalRegExp = makeNative(
pattern, _isMultiLine, _isCaseSensitive, _isUnicode, _isDotAll, true);
return _nativeGlobalRegExp =
makeNative(pattern, _isMultiLine, _isCaseSensitive, true);
}
get _nativeAnchoredVersion {
@ -72,21 +68,17 @@ class JSSyntaxRegExp implements RegExp {
// that it tries, and you can see if the original regexp matched, or it
// was the added zero-width match that matched, by looking at the last
// capture. If it is a String, the match participated, otherwise it didn't.
return _nativeAnchoredRegExp = makeNative("$pattern|()", _isMultiLine,
_isCaseSensitive, _isUnicode, _isDotAll, true);
return _nativeAnchoredRegExp =
makeNative("$pattern|()", _isMultiLine, _isCaseSensitive, true);
}
bool get _isMultiLine => JS("bool", "#.multiline", _nativeRegExp);
bool get _isCaseSensitive => JS("bool", "!#.ignoreCase", _nativeRegExp);
bool get _isUnicode => JS("bool", "#.unicode", _nativeRegExp);
bool get _isDotAll => JS("bool", "#.dotAll", _nativeRegExp);
static makeNative(@nullCheck String source, bool multiLine,
bool caseSensitive, bool unicode, bool dotAll, bool global) {
bool caseSensitive, bool global) {
String m = multiLine ? 'm' : '';
String i = caseSensitive ? '' : 'i';
String u = unicode ? 'u' : '';
String s = dotAll ? 's' : '';
String g = global ? 'g' : '';
// We're using the JavaScript's try catch instead of the Dart one
// to avoid dragging in Dart runtime support just because of using
@ -95,7 +87,7 @@ class JSSyntaxRegExp implements RegExp {
'',
'(function() {'
'try {'
'return new RegExp(#, # + # + # + # + #);'
'return new RegExp(#, # + # + #);'
'} catch (e) {'
'return e;'
'}'
@ -103,8 +95,6 @@ class JSSyntaxRegExp implements RegExp {
source,
m,
i,
u,
s,
g);
if (JS('bool', '# instanceof RegExp', regexp)) return regexp;
// The returned value is the JavaScript exception. Turn it into a
@ -113,7 +103,7 @@ class JSSyntaxRegExp implements RegExp {
throw FormatException("Illegal RegExp pattern: $source, $errorMessage");
}
RegExpMatch firstMatch(@nullCheck String string) {
Match firstMatch(@nullCheck String string) {
List m = JS('JSExtendableArray|Null', r'#.exec(#)', _nativeRegExp, string);
if (m == null) return null;
return _MatchImplementation(this, JSArray<String>.of(m));
@ -130,7 +120,7 @@ class JSSyntaxRegExp implements RegExp {
return null;
}
Iterable<RegExpMatch> allMatches(@nullCheck String string,
Iterable<Match> allMatches(@nullCheck String string,
[@nullCheck int start = 0]) {
if (start < 0 || start > string.length) {
throw RangeError.range(start, 0, string.length);
@ -138,7 +128,7 @@ class JSSyntaxRegExp implements RegExp {
return _AllMatchesIterable(this, string, start);
}
RegExpMatch _execGlobal(String string, int start) {
Match _execGlobal(String string, int start) {
Object regexp = _nativeGlobalVersion;
JS("void", "#.lastIndex = #", regexp, start);
List match = JS("JSExtendableArray|Null", "#.exec(#)", regexp, string);
@ -146,7 +136,7 @@ class JSSyntaxRegExp implements RegExp {
return _MatchImplementation(this, JSArray<String>.of(match));
}
RegExpMatch _execAnchored(String string, int start) {
Match _execAnchored(String string, int start) {
Object regexp = _nativeAnchoredVersion;
JS("void", "#.lastIndex = #", regexp, start);
List match = JS("JSExtendableArray|Null", "#.exec(#)", regexp, string);
@ -158,7 +148,7 @@ class JSSyntaxRegExp implements RegExp {
return _MatchImplementation(this, JSArray<String>.of(match));
}
RegExpMatch matchAsPrefix(String string, [int start = 0]) {
Match matchAsPrefix(String string, [int start = 0]) {
if (start < 0 || start > string.length) {
throw RangeError.range(start, 0, string.length);
}
@ -167,8 +157,6 @@ class JSSyntaxRegExp implements RegExp {
bool get isMultiLine => _isMultiLine;
bool get isCaseSensitive => _isCaseSensitive;
bool get isUnicode => _isUnicode;
bool get isDotAll => _isDotAll;
}
class _MatchImplementation implements RegExpMatch {
@ -219,34 +207,25 @@ class _MatchImplementation implements RegExpMatch {
}
}
class _AllMatchesIterable extends IterableBase<RegExpMatch> {
class _AllMatchesIterable extends IterableBase<Match> {
final JSSyntaxRegExp _re;
final String _string;
final int _start;
_AllMatchesIterable(this._re, this._string, this._start);
Iterator<RegExpMatch> get iterator =>
_AllMatchesIterator(_re, _string, _start);
Iterator<Match> get iterator => _AllMatchesIterator(_re, _string, _start);
}
class _AllMatchesIterator implements Iterator<RegExpMatch> {
class _AllMatchesIterator implements Iterator<Match> {
final JSSyntaxRegExp _regExp;
String _string;
int _nextIndex;
RegExpMatch _current;
Match _current;
_AllMatchesIterator(this._regExp, this._string, this._nextIndex);
RegExpMatch get current => _current;
static bool _isLeadSurrogate(int c) {
return c >= 0xd800 && c <= 0xdbff;
}
static bool _isTrailSurrogate(int c) {
return c >= 0xdc00 && c <= 0xdfff;
}
Match get current => _current;
bool moveNext() {
if (_string == null) return false;
@ -256,15 +235,6 @@ class _AllMatchesIterator implements Iterator<RegExpMatch> {
_current = match;
int nextIndex = match.end;
if (match.start == nextIndex) {
// Zero-width match. Advance by one more, unless the regexp
// is in unicode mode and it would put us within a surrogate
// pair. In that case, advance past the code point as a whole.
if (_regExp.isUnicode &&
_nextIndex + 1 < _string.length &&
_isLeadSurrogate(_string.codeUnitAt(_nextIndex)) &&
_isTrailSurrogate(_string.codeUnitAt(_nextIndex + 1))) {
nextIndex++;
}
nextIndex++;
}
_nextIndex = nextIndex;
@ -278,6 +248,6 @@ class _AllMatchesIterator implements Iterator<RegExpMatch> {
}
/** Find the first match of [regExp] in [string] at or after [start]. */
RegExpMatch firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
Match firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
return regExp._execGlobal(string, start);
}

View file

@ -14,7 +14,7 @@
namespace dart {
DEFINE_NATIVE_ENTRY(RegExp_factory, 0, 6) {
DEFINE_NATIVE_ENTRY(RegExp_factory, 0, 4) {
ASSERT(
TypeArguments::CheckedHandle(zone, arguments->NativeArgAt(0)).IsNull());
GET_NON_NULL_NATIVE_ARGUMENT(String, pattern, arguments->NativeArgAt(1));
@ -22,30 +22,17 @@ DEFINE_NATIVE_ENTRY(RegExp_factory, 0, 6) {
arguments->NativeArgAt(2));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_case_sensitive,
arguments->NativeArgAt(3));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_unicode,
arguments->NativeArgAt(4));
GET_NON_NULL_NATIVE_ARGUMENT(Instance, handle_dot_all,
arguments->NativeArgAt(5));
bool ignore_case = handle_case_sensitive.raw() != Bool::True().raw();
bool multi_line = handle_multi_line.raw() == Bool::True().raw();
bool unicode = handle_unicode.raw() == Bool::True().raw();
bool dot_all = handle_dot_all.raw() == Bool::True().raw();
RegExpFlags flags;
if (ignore_case) flags.SetIgnoreCase();
if (multi_line) flags.SetMultiLine();
if (unicode) flags.SetUnicode();
if (dot_all) flags.SetDotAll();
// Parse the pattern once in order to throw any format exceptions within
// the factory constructor. It is parsed again upon compilation.
RegExpCompileData compileData;
// Throws an exception on parsing failure.
RegExpParser::ParseRegExp(pattern, flags, &compileData);
RegExpParser::ParseRegExp(pattern, multi_line, &compileData);
// Create a RegExp object containing only the initial parameters.
return RegExpEngine::CreateRegExp(thread, pattern, flags);
return RegExpEngine::CreateRegExp(thread, pattern, multi_line, ignore_case);
}
DEFINE_NATIVE_ENTRY(RegExp_getPattern, 0, 1) {
@ -57,25 +44,13 @@ DEFINE_NATIVE_ENTRY(RegExp_getPattern, 0, 1) {
DEFINE_NATIVE_ENTRY(RegExp_getIsMultiLine, 0, 1) {
const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
ASSERT(!regexp.IsNull());
return Bool::Get(regexp.flags().IsMultiLine()).raw();
}
DEFINE_NATIVE_ENTRY(RegExp_getIsUnicode, 0, 1) {
const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
ASSERT(!regexp.IsNull());
return Bool::Get(regexp.flags().IsUnicode()).raw();
}
DEFINE_NATIVE_ENTRY(RegExp_getIsDotAll, 0, 1) {
const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
ASSERT(!regexp.IsNull());
return Bool::Get(regexp.flags().IsDotAll()).raw();
return Bool::Get(regexp.is_multi_line()).raw();
}
DEFINE_NATIVE_ENTRY(RegExp_getIsCaseSensitive, 0, 1) {
const RegExp& regexp = RegExp::CheckedHandle(zone, arguments->NativeArgAt(0));
ASSERT(!regexp.IsNull());
return Bool::Get(!regexp.flags().IgnoreCase()).raw();
return Bool::Get(!regexp.is_ignore_case()).raw();
}
DEFINE_NATIVE_ENTRY(RegExp_getGroupCount, 0, 1) {

View file

@ -8,12 +8,8 @@
class RegExp {
@patch
factory RegExp(String source,
{bool multiLine: false,
bool caseSensitive: true,
bool unicode: false,
bool dotAll: false}) {
_RegExpHashKey key =
new _RegExpHashKey(source, multiLine, caseSensitive, unicode, dotAll);
{bool multiLine: false, bool caseSensitive: true}) {
_RegExpHashKey key = new _RegExpHashKey(source, multiLine, caseSensitive);
_RegExpHashValue value = _cache[key];
if (value == null) {
@ -25,10 +21,7 @@ class RegExp {
value = new _RegExpHashValue(
new _RegExp(source,
multiLine: multiLine,
caseSensitive: caseSensitive,
unicode: unicode,
dotAll: dotAll),
multiLine: multiLine, caseSensitive: caseSensitive),
key);
_cache[key] = value;
} else {
@ -121,20 +114,15 @@ class _RegExpHashKey extends LinkedListEntry<_RegExpHashKey> {
final String pattern;
final bool multiLine;
final bool caseSensitive;
final bool unicode;
final bool dotAll;
_RegExpHashKey(this.pattern, this.multiLine, this.caseSensitive, this.unicode,
this.dotAll);
_RegExpHashKey(this.pattern, this.multiLine, this.caseSensitive);
int get hashCode => pattern.hashCode;
bool operator ==(that) {
return (that is _RegExpHashKey) &&
(this.pattern == that.pattern) &&
(this.multiLine == that.multiLine) &&
(this.caseSensitive == that.caseSensitive) &&
(this.unicode == that.unicode) &&
(this.dotAll == that.dotAll);
(this.caseSensitive == that.caseSensitive);
}
}
@ -212,11 +200,9 @@ class _RegExpMatch implements RegExpMatch {
class _RegExp implements RegExp {
factory _RegExp(String pattern,
{bool multiLine: false,
bool caseSensitive: true,
bool unicode: false,
bool dotAll: false}) native "RegExp_factory";
bool caseSensitive: true}) native "RegExp_factory";
RegExpMatch firstMatch(String str) {
Match firstMatch(String str) {
if (str is! String) throw new ArgumentError(str);
List match = _ExecuteMatch(str, 0);
if (match == null) {
@ -225,7 +211,7 @@ class _RegExp implements RegExp {
return new _RegExpMatch(this, str, match);
}
Iterable<RegExpMatch> allMatches(String string, [int start = 0]) {
Iterable<Match> allMatches(String string, [int start = 0]) {
if (string is! String) throw new ArgumentError(string);
if (start is! int) throw new ArgumentError(start);
if (0 > start || start > string.length) {
@ -234,7 +220,7 @@ class _RegExp implements RegExp {
return new _AllMatchesIterable(this, string, start);
}
RegExpMatch matchAsPrefix(String string, [int start = 0]) {
Match matchAsPrefix(String string, [int start = 0]) {
if (string is! String) throw new ArgumentError(string);
if (start is! int) throw new ArgumentError(start);
if (start < 0 || start > string.length) {
@ -266,10 +252,6 @@ class _RegExp implements RegExp {
bool get isCaseSensitive native "RegExp_getIsCaseSensitive";
bool get isUnicode native "RegExp_getIsUnicode";
bool get isDotAll native "RegExp_getIsDotAll";
int get _groupCount native "RegExp_getGroupCount";
// Returns a List [String, int, String, int, ...] where each
@ -345,34 +327,25 @@ class _RegExp implements RegExp {
native "RegExp_ExecuteMatchSticky";
}
class _AllMatchesIterable extends IterableBase<RegExpMatch> {
class _AllMatchesIterable extends IterableBase<Match> {
final _RegExp _re;
final String _str;
final int _start;
_AllMatchesIterable(this._re, this._str, this._start);
Iterator<RegExpMatch> get iterator =>
new _AllMatchesIterator(_re, _str, _start);
Iterator<Match> get iterator => new _AllMatchesIterator(_re, _str, _start);
}
class _AllMatchesIterator implements Iterator<RegExpMatch> {
class _AllMatchesIterator implements Iterator<Match> {
final String _str;
int _nextIndex;
_RegExp _re;
RegExpMatch _current;
Match _current;
_AllMatchesIterator(this._re, this._str, this._nextIndex);
RegExpMatch get current => _current;
static bool _isLeadSurrogate(int c) {
return c >= 0xd800 && c <= 0xdbff;
}
static bool _isTrailSurrogate(int c) {
return c >= 0xdc00 && c <= 0xdfff;
}
Match get current => _current;
bool moveNext() {
if (_re == null) return false; // Cleared after a failed match.
@ -382,15 +355,7 @@ class _AllMatchesIterator implements Iterator<RegExpMatch> {
_current = new _RegExpMatch(_re, _str, match);
_nextIndex = _current.end;
if (_nextIndex == _current.start) {
// Zero-width match. Advance by one more, unless the regexp
// is in unicode mode and it would put us within a surrogate
// pair. In that case, advance past the code point as a whole.
if (_re.isUnicode &&
_nextIndex + 1 < _str.length &&
_isLeadSurrogate(_str.codeUnitAt(_nextIndex)) &&
_isTrailSurrogate(_str.codeUnitAt(_nextIndex + 1))) {
_nextIndex++;
}
// Zero-width match. Advance by one more.
_nextIndex++;
}
return true;

View file

@ -1,273 +0,0 @@
// Copyright (c) 2019, the Dart project authors.
// Copyright 2010 the V8 project authors.
// Please see the AUTHORS file for details. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// The original file can be found at:
// https://github.com/v8/v8/blob/master/src/splay-tree-inl.h
#ifndef RUNTIME_PLATFORM_SPLAY_TREE_INL_H_
#define RUNTIME_PLATFORM_SPLAY_TREE_INL_H_
#include <vector>
#include "platform/splay-tree.h"
namespace dart {
template <typename Config, class B, class Allocator>
SplayTree<Config, B, Allocator>::~SplayTree() {
NodeDeleter deleter;
ForEachNode(&deleter);
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::Insert(const Key& key, Locator* locator) {
if (is_empty()) {
// If the tree is empty, insert the new node.
root_ = new (allocator_) Node(key, Config::NoValue());
} else {
// Splay on the key to move the last node on the search path
// for the key to the root of the tree.
Splay(key);
// Ignore repeated insertions with the same key.
int cmp = Config::Compare(key, root_->key_);
if (cmp == 0) {
locator->bind(root_);
return false;
}
// Insert the new node.
Node* node = new (allocator_) Node(key, Config::NoValue());
InsertInternal(cmp, node);
}
locator->bind(root_);
return true;
}
template <typename Config, class B, class Allocator>
void SplayTree<Config, B, Allocator>::InsertInternal(int cmp, Node* node) {
if (cmp > 0) {
node->left_ = root_;
node->right_ = root_->right_;
root_->right_ = nullptr;
} else {
node->right_ = root_;
node->left_ = root_->left_;
root_->left_ = nullptr;
}
root_ = node;
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::FindInternal(const Key& key) {
if (is_empty()) return false;
Splay(key);
return Config::Compare(key, root_->key_) == 0;
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::Contains(const Key& key) {
return FindInternal(key);
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::Find(const Key& key, Locator* locator) {
if (FindInternal(key)) {
locator->bind(root_);
return true;
} else {
return false;
}
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::FindGreatestLessThan(const Key& key,
Locator* locator) {
if (is_empty()) return false;
// Splay on the key to move the node with the given key or the last
// node on the search path to the top of the tree.
Splay(key);
// Now the result is either the root node or the greatest node in
// the left subtree.
int cmp = Config::Compare(root_->key_, key);
if (cmp <= 0) {
locator->bind(root_);
return true;
} else {
Node* temp = root_;
root_ = root_->left_;
bool result = FindGreatest(locator);
root_ = temp;
return result;
}
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::FindLeastGreaterThan(const Key& key,
Locator* locator) {
if (is_empty()) return false;
// Splay on the key to move the node with the given key or the last
// node on the search path to the top of the tree.
Splay(key);
// Now the result is either the root node or the least node in
// the right subtree.
int cmp = Config::Compare(root_->key_, key);
if (cmp >= 0) {
locator->bind(root_);
return true;
} else {
Node* temp = root_;
root_ = root_->right_;
bool result = FindLeast(locator);
root_ = temp;
return result;
}
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::FindGreatest(Locator* locator) {
if (is_empty()) return false;
Node* current = root_;
while (current->right_ != nullptr)
current = current->right_;
locator->bind(current);
return true;
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::FindLeast(Locator* locator) {
if (is_empty()) return false;
Node* current = root_;
while (current->left_ != nullptr)
current = current->left_;
locator->bind(current);
return true;
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::Move(const Key& old_key,
const Key& new_key) {
if (!FindInternal(old_key)) return false;
Node* node_to_move = root_;
RemoveRootNode(old_key);
Splay(new_key);
int cmp = Config::Compare(new_key, root_->key_);
if (cmp == 0) {
// A node with the target key already exists.
delete node_to_move;
return false;
}
node_to_move->key_ = new_key;
InsertInternal(cmp, node_to_move);
return true;
}
template <typename Config, class B, class Allocator>
bool SplayTree<Config, B, Allocator>::Remove(const Key& key) {
if (!FindInternal(key)) return false;
Node* node_to_remove = root_;
RemoveRootNode(key);
delete node_to_remove;
return true;
}
template <typename Config, class B, class Allocator>
void SplayTree<Config, B, Allocator>::RemoveRootNode(const Key& key) {
if (root_->left_ == nullptr) {
// No left child, so the new tree is just the right child.
root_ = root_->right_;
} else {
// Left child exists.
Node* right = root_->right_;
// Make the original left child the new root.
root_ = root_->left_;
// Splay to make sure that the new root has an empty right child.
Splay(key);
// Insert the original right child as the right child of the new
// root.
root_->right_ = right;
}
}
template <typename Config, class B, class Allocator>
void SplayTree<Config, B, Allocator>::Splay(const Key& key) {
if (is_empty()) return;
Node dummy_node(Config::kNoKey, Config::NoValue());
// Create a dummy node. The use of the dummy node is a bit
// counter-intuitive: The right child of the dummy node will hold
// the L tree of the algorithm. The left child of the dummy node
// will hold the R tree of the algorithm. Using a dummy node, left
// and right will always be nodes and we avoid special cases.
Node* dummy = &dummy_node;
Node* left = dummy;
Node* right = dummy;
Node* current = root_;
while (true) {
int cmp = Config::Compare(key, current->key_);
if (cmp < 0) {
if (current->left_ == nullptr) break;
if (Config::Compare(key, current->left_->key_) < 0) {
// Rotate right.
Node* temp = current->left_;
current->left_ = temp->right_;
temp->right_ = current;
current = temp;
if (current->left_ == nullptr) break;
}
// Link right.
right->left_ = current;
right = current;
current = current->left_;
} else if (cmp > 0) {
if (current->right_ == nullptr) break;
if (Config::Compare(key, current->right_->key_) > 0) {
// Rotate left.
Node* temp = current->right_;
current->right_ = temp->left_;
temp->left_ = current;
current = temp;
if (current->right_ == nullptr) break;
}
// Link left.
left->right_ = current;
left = current;
current = current->right_;
} else {
break;
}
}
// Assemble.
left->right_ = current->left_;
right->left_ = current->right_;
current->left_ = dummy->right_;
current->right_ = dummy->left_;
root_ = current;
}
template <typename Config, class B, class Allocator>
template <class Callback>
void SplayTree<Config, B, Allocator>::ForEach(Callback* callback) {
NodeToPairAdaptor<Callback> callback_adaptor(callback);
ForEachNode(&callback_adaptor);
}
template <typename Config, class B, class Allocator>
template <class Callback>
void SplayTree<Config, B, Allocator>::ForEachNode(Callback* callback) {
if (root_ == nullptr) return;
// Pre-allocate some space for tiny trees.
std::vector<Node*> nodes_to_visit;
nodes_to_visit.push_back(root_);
size_t pos = 0;
while (pos < nodes_to_visit.size()) {
Node* node = nodes_to_visit[pos++];
if (node->left() != nullptr) nodes_to_visit.push_back(node->left());
if (node->right() != nullptr) nodes_to_visit.push_back(node->right());
callback->Call(node);
}
}
} // namespace dart
#endif // RUNTIME_PLATFORM_SPLAY_TREE_INL_H_

View file

@ -1,172 +0,0 @@
// Copyright (c) 2019, the Dart project authors.
// Copyright 2010 the V8 project authors.
// Please see the AUTHORS file for details. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// The original file can be found at:
// https://github.com/v8/v8/blob/master/src/splay-tree.h
#ifndef RUNTIME_PLATFORM_SPLAY_TREE_H_
#define RUNTIME_PLATFORM_SPLAY_TREE_H_
#include "platform/allocation.h"
namespace dart {
// A splay tree. The config type parameter encapsulates the different
// configurations of a concrete splay tree:
//
// typedef Key: the key type
// typedef Value: the value type
// static const Key kNoKey: the dummy key used when no key is set
// static Value kNoValue(): the dummy value used to initialize nodes
// static int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
//
// The tree is also parameterized by an allocation policy
// (Allocator). The policy is used for allocating lists in the C free
// store or the zone; see zone.h.
template <typename Config, class B, class Allocator>
class SplayTree : public B {
public:
typedef typename Config::Key Key;
typedef typename Config::Value Value;
class Locator;
explicit SplayTree(Allocator* allocator)
: root_(nullptr), allocator_(allocator) {}
~SplayTree();
Allocator* allocator() { return allocator_; }
// Checks if there is a mapping for the key.
bool Contains(const Key& key);
// Inserts the given key in this tree with the given value. Returns
// true if a node was inserted, otherwise false. If found the locator
// is enabled and provides access to the mapping for the key.
bool Insert(const Key& key, Locator* locator);
// Looks up the key in this tree and returns true if it was found,
// otherwise false. If the node is found the locator is enabled and
// provides access to the mapping for the key.
bool Find(const Key& key, Locator* locator);
// Finds the mapping with the greatest key less than or equal to the
// given key.
bool FindGreatestLessThan(const Key& key, Locator* locator);
// Find the mapping with the greatest key in this tree.
bool FindGreatest(Locator* locator);
// Finds the mapping with the least key greater than or equal to the
// given key.
bool FindLeastGreaterThan(const Key& key, Locator* locator);
// Find the mapping with the least key in this tree.
bool FindLeast(Locator* locator);
// Move the node from one key to another.
bool Move(const Key& old_key, const Key& new_key);
// Remove the node with the given key from the tree.
bool Remove(const Key& key);
// Remove all keys from the tree.
void Clear() { ResetRoot(); }
bool is_empty() { return root_ == nullptr; }
// Perform the splay operation for the given key. Moves the node with
// the given key to the top of the tree. If no node has the given
// key, the last node on the search path is moved to the top of the
// tree.
void Splay(const Key& key);
class Node : public B {
public:
Node(const Key& key, const Value& value)
: key_(key), value_(value), left_(nullptr), right_(nullptr) {}
Key key() { return key_; }
Value value() { return value_; }
Node* left() { return left_; }
Node* right() { return right_; }
private:
friend class SplayTree;
friend class Locator;
Key key_;
Value value_;
Node* left_;
Node* right_;
};
// A locator provides access to a node in the tree without actually
// exposing the node.
class Locator : public B {
public:
explicit Locator(Node* node) : node_(node) {}
Locator() : node_(nullptr) {}
const Key& key() { return node_->key_; }
Value& value() { return node_->value_; }
void set_value(const Value& value) { node_->value_ = value; }
inline void bind(Node* node) { node_ = node; }
private:
Node* node_;
};
template <class Callback>
void ForEach(Callback* callback);
protected:
// Resets tree root. Existing nodes become unreachable.
void ResetRoot() { root_ = nullptr; }
private:
// Search for a node with a given key. If found, root_ points
// to the node.
bool FindInternal(const Key& key);
// Inserts a node assuming that root_ is already set up.
void InsertInternal(int cmp, Node* node);
// Removes root_ node.
void RemoveRootNode(const Key& key);
template <class Callback>
class NodeToPairAdaptor : public B {
public:
explicit NodeToPairAdaptor(Callback* callback) : callback_(callback) {}
void Call(Node* node) { callback_->Call(node->key(), node->value()); }
private:
Callback* callback_;
DISALLOW_COPY_AND_ASSIGN(NodeToPairAdaptor);
};
class NodeDeleter : public B {
public:
NodeDeleter() = default;
void Call(Node* node) { delete node; }
private:
DISALLOW_COPY_AND_ASSIGN(NodeDeleter);
};
template <class Callback>
void ForEachNode(Callback* callback);
Node* root_;
Allocator* allocator_;
DISALLOW_COPY_AND_ASSIGN(SplayTree);
};
} // namespace dart
#endif // RUNTIME_PLATFORM_SPLAY_TREE_H_

View file

@ -15,7 +15,6 @@ class String;
class Utf : AllStatic {
public:
static const int32_t kMaxCodePoint = 0x10FFFF;
static const int32_t kInvalidChar = 0xFFFFFFFF;
static bool IsLatin1(int32_t code_point) {
return (code_point >= 0) && (code_point <= 0xFF);
@ -30,7 +29,7 @@ class Utf : AllStatic {
}
// Returns true if the code point value is above Plane 17.
static bool IsOutOfRange(int32_t code_point) {
static bool IsOutOfRange(intptr_t code_point) {
return (code_point < 0) || (code_point > kMaxCodePoint);
}
};
@ -57,11 +56,11 @@ class Utf8 : AllStatic {
static intptr_t Length(const String& str);
static intptr_t Encode(int32_t ch, char* dst);
static intptr_t Encode(const String& src, char* dst, intptr_t len);
static intptr_t Decode(const uint8_t* utf8_array,
intptr_t array_len,
int32_t* ch);
static intptr_t Encode(const String& src, char* dst, intptr_t len);
static bool DecodeToLatin1(const uint8_t* utf8_array,
intptr_t array_len,
@ -153,10 +152,6 @@ class Utf16 : AllStatic {
static void Encode(int32_t codepoint, uint16_t* dst);
static const int32_t kMaxCodeUnit = 0xFFFF;
static const int32_t kLeadSurrogateStart = 0xD800;
static const int32_t kLeadSurrogateEnd = 0xDBFF;
static const int32_t kTrailSurrogateStart = 0xDC00;
static const int32_t kTrailSurrogateEnd = 0xDFFF;
private:
static const int32_t kLeadSurrogateOffset = (0xD800 - (0x10000 >> 10));
@ -192,11 +187,11 @@ class CaseMapping : AllStatic {
// The size of the stage 1 index.
// TODO(cshapiro): improve indexing so this value is unnecessary.
static const intptr_t kStage1Size = 261;
static const int kStage1Size = 261;
// The size of a stage 2 block in bytes.
static const intptr_t kBlockSizeLog2 = 8;
static const intptr_t kBlockSize = 1 << kBlockSizeLog2;
static const int kBlockSizeLog2 = 8;
static const int kBlockSize = 1 << kBlockSizeLog2;
static int32_t Convert(int32_t ch, int32_t mapping) {
if (Utf::IsLatin1(ch)) {
@ -227,25 +222,6 @@ class CaseMapping : AllStatic {
static const int32_t stage2_exception_[][2];
};
class Latin1 {
public:
static const int32_t kMaxChar = 0xff;
// Convert the character to Latin-1 case equivalent if possible.
static inline uint16_t TryConvertToLatin1(uint16_t c) {
switch (c) {
// This are equivalent characters in unicode.
case 0x39c:
case 0x3bc:
return 0xb5;
// This is an uppercase of a Latin-1 character
// outside of Latin-1.
case 0x178:
return 0xff;
}
return c;
}
};
} // namespace dart
#endif // RUNTIME_PLATFORM_UNICODE_H_

View file

@ -203,14 +203,7 @@ class Utils {
return (static_cast<int64_t>(high) << 32) | (low & 0x0ffffffffLL);
}
static inline constexpr bool IsAlphaNumeric(uint32_t c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
IsDecimalDigit(c);
}
static inline constexpr bool IsDecimalDigit(uint32_t c) {
return ('0' <= c) && (c <= '9');
}
static bool IsDecimalDigit(char c) { return ('0' <= c) && (c <= '9'); }
static bool IsHexDigit(char c) {
return IsDecimalDigit(c) || (('A' <= c) && (c <= 'F')) ||

View file

@ -60,9 +60,8 @@ config("libdart_vm_config") {
library_for_all_configs("libdart_vm") {
target_type = "source_set"
extra_deps = [ "//third_party/icu" ]
if (is_fuchsia) {
extra_deps += [
extra_deps = [
# TODO(US-399): Remove time_service specific code when it is no longer
# necessary.
"//sdk/lib/sys/cpp",

View file

@ -98,12 +98,10 @@ namespace dart {
V(Double_toStringAsExponential, 2) \
V(Double_toStringAsPrecision, 2) \
V(Double_flipSignBit, 1) \
V(RegExp_factory, 6) \
V(RegExp_factory, 4) \
V(RegExp_getPattern, 1) \
V(RegExp_getIsMultiLine, 1) \
V(RegExp_getIsCaseSensitive, 1) \
V(RegExp_getIsUnicode, 1) \
V(RegExp_getIsDotAll, 1) \
V(RegExp_getGroupCount, 1) \
V(RegExp_getGroupNameMap, 1) \
V(RegExp_ExecuteMatch, 3) \

View file

@ -3630,8 +3630,7 @@ class RegExpSerializationCluster : public SerializationCluster {
RawRegExp* regexp = objects_[i];
AutoTraceObject(regexp);
WriteFromTo(regexp);
s->Write<int32_t>(regexp->ptr()->num_one_byte_registers_);
s->Write<int32_t>(regexp->ptr()->num_two_byte_registers_);
s->Write<int32_t>(regexp->ptr()->num_registers_);
s->Write<int8_t>(regexp->ptr()->type_flags_);
}
}
@ -3662,8 +3661,7 @@ class RegExpDeserializationCluster : public DeserializationCluster {
Deserializer::InitializeHeader(regexp, kRegExpCid,
RegExp::InstanceSize());
ReadFromTo(regexp);
regexp->ptr()->num_one_byte_registers_ = d->Read<int32_t>();
regexp->ptr()->num_two_byte_registers_ = d->Read<int32_t>();
regexp->ptr()->num_registers_ = d->Read<int32_t>();
regexp->ptr()->type_flags_ = d->Read<int8_t>();
}
}

View file

@ -1272,8 +1272,8 @@ void ConstantPropagator::VisitMathMinMax(MathMinMaxInstr* instr) {
}
}
void ConstantPropagator::VisitCaseInsensitiveCompare(
CaseInsensitiveCompareInstr* instr) {
void ConstantPropagator::VisitCaseInsensitiveCompareUC16(
CaseInsensitiveCompareUC16Instr* instr) {
SetValue(instr, non_constant_);
}

View file

@ -5161,6 +5161,10 @@ const char* MathUnaryInstr::KindToCString(MathUnaryKind kind) {
return "";
}
const RuntimeEntry& CaseInsensitiveCompareUC16Instr::TargetFunction() const {
return kCaseInsensitiveCompareUC16RuntimeEntry;
}
TruncDivModInstr::TruncDivModInstr(Value* lhs, Value* rhs, intptr_t deopt_id)
: TemplateDefinition(deopt_id) {
SetInputAt(0, lhs);

View file

@ -18,7 +18,6 @@
#include "vm/native_entry.h"
#include "vm/object.h"
#include "vm/parser.h"
#include "vm/runtime_entry.h"
#include "vm/static_type_exactness_state.h"
#include "vm/token_position.h"
@ -423,7 +422,7 @@ struct InstrAttrs {
M(Unbox, kNoGC) \
M(BoxInt64, _) \
M(UnboxInt64, kNoGC) \
M(CaseInsensitiveCompare, _) \
M(CaseInsensitiveCompareUC16, _) \
M(BinaryInt64Op, kNoGC) \
M(ShiftInt64Op, kNoGC) \
M(SpeculativeShiftInt64Op, kNoGC) \
@ -5955,18 +5954,18 @@ class MathUnaryInstr : public TemplateDefinition<1, NoThrow, Pure> {
// Calls into the runtime and performs a case-insensitive comparison of the
// UTF16 strings (i.e. TwoByteString or ExternalTwoByteString) located at
// str[lhs_index:lhs_index + length] and str[rhs_index:rhs_index + length].
// Depending on the runtime entry passed, we will treat the strings as either
// UCS2 (no surrogate handling) or UTF16 (surrogates handled appropriately).
class CaseInsensitiveCompareInstr
//
// TODO(zerny): Remove this once (if) functions inherited from unibrow
// are moved to dart code.
class CaseInsensitiveCompareUC16Instr
: public TemplateDefinition<4, NoThrow, Pure> {
public:
CaseInsensitiveCompareInstr(Value* str,
Value* lhs_index,
Value* rhs_index,
Value* length,
const RuntimeEntry& entry,
intptr_t cid)
: entry_(entry), cid_(cid) {
CaseInsensitiveCompareUC16Instr(Value* str,
Value* lhs_index,
Value* rhs_index,
Value* length,
intptr_t cid)
: cid_(cid) {
ASSERT(cid == kTwoByteStringCid || cid == kExternalTwoByteStringCid);
ASSERT(index_scale() == 2);
SetInputAt(0, str);
@ -5980,7 +5979,7 @@ class CaseInsensitiveCompareInstr
Value* rhs_index() const { return inputs_[2]; }
Value* length() const { return inputs_[3]; }
const RuntimeEntry& TargetFunction() const { return entry_; }
const RuntimeEntry& TargetFunction() const;
bool IsExternal() const { return cid_ == kExternalTwoByteStringCid; }
intptr_t class_id() const { return cid_; }
intptr_t index_scale() const { return Instance::ElementSizeFor(cid_); }
@ -5989,18 +5988,17 @@ class CaseInsensitiveCompareInstr
virtual Representation representation() const { return kTagged; }
DECLARE_INSTRUCTION(CaseInsensitiveCompare)
DECLARE_INSTRUCTION(CaseInsensitiveCompareUC16)
virtual CompileType ComputeType() const;
virtual bool AttributesEqual(Instruction* other) const {
return other->AsCaseInsensitiveCompare()->cid_ == cid_;
return other->AsCaseInsensitiveCompareUC16()->cid_ == cid_;
}
private:
const RuntimeEntry& entry_;
const intptr_t cid_;
DISALLOW_COPY_AND_ASSIGN(CaseInsensitiveCompareInstr);
DISALLOW_COPY_AND_ASSIGN(CaseInsensitiveCompareUC16Instr);
};
// Represents Math's static min and max functions.

View file

@ -5137,7 +5137,7 @@ void MathUnaryInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
}
}
LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@ -5151,7 +5151,8 @@ LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
return summary;
}
void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
FlowGraphCompiler* compiler) {
// Call the function.
__ CallRuntime(TargetFunction(), TargetFunction().argument_count());
}

View file

@ -4361,7 +4361,7 @@ void MathUnaryInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
}
}
LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@ -4375,7 +4375,8 @@ LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
return summary;
}
void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
FlowGraphCompiler* compiler) {
// Call the function.
__ CallRuntime(TargetFunction(), TargetFunction().argument_count());
}

View file

@ -55,7 +55,7 @@ DECLARE_FLAG(int, optimization_counter_threshold);
// - Optimized RegExps,
// - Precompilation.
#define FOR_EACH_UNREACHABLE_INSTRUCTION(M) \
M(CaseInsensitiveCompare) \
M(CaseInsensitiveCompareUC16) \
M(GenericCheckBound) \
M(IndirectGoto) \
M(Int64ToDouble) \

View file

@ -4459,7 +4459,7 @@ void MathUnaryInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
}
}
LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@ -4473,7 +4473,8 @@ LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
return summary;
}
void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
FlowGraphCompiler* compiler) {
// Save ESP. EDI is chosen because it is callee saved so we do not need to
// back it up before calling into the runtime.
static const Register kSavedSPReg = EDI;
@ -4488,7 +4489,7 @@ void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
// Call the function.
__ CallRuntime(TargetFunction(), TargetFunction().argument_count());
// Restore ESP and pop the old value off the stack.
// Restore ESP.
__ movl(ESP, kSavedSPReg);
}

View file

@ -4511,7 +4511,7 @@ void MathUnaryInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
}
}
LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
LocationSummary* CaseInsensitiveCompareUC16Instr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumTemps = 0;
@ -4525,7 +4525,8 @@ LocationSummary* CaseInsensitiveCompareInstr::MakeLocationSummary(
return summary;
}
void CaseInsensitiveCompareInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
void CaseInsensitiveCompareUC16Instr::EmitNativeCode(
FlowGraphCompiler* compiler) {
// Save RSP. R13 is chosen because it is callee saved so we do not need to
// back it up before calling into the runtime.
static const Register kSavedSPReg = R13;

View file

@ -1515,7 +1515,7 @@ CompileType MathMinMaxInstr::ComputeType() const {
return CompileType::FromCid(result_cid_);
}
CompileType CaseInsensitiveCompareInstr::ComputeType() const {
CompileType CaseInsensitiveCompareUC16Instr::ComputeType() const {
return CompileType::FromCid(kBoolCid);
}

View file

@ -166,10 +166,11 @@ void IrregexpCompilationPipeline::ParseFunction(
RegExp& regexp = RegExp::Handle(parsed_function->function().regexp());
const String& pattern = String::Handle(regexp.pattern());
const bool multiline = regexp.is_multi_line();
RegExpCompileData* compile_data = new (zone) RegExpCompileData();
// Parsing failures are handled in the RegExp factory constructor.
RegExpParser::ParseRegExp(pattern, regexp.flags(), compile_data);
RegExpParser::ParseRegExp(pattern, multiline, compile_data);
regexp.set_num_bracket_expressions(compile_data->capture_count);
regexp.set_capture_name_map(compile_data->capture_name_map);

View file

@ -21751,45 +21751,23 @@ RawRegExp* RegExp::New(Heap::Space space) {
NoSafepointScope no_safepoint;
result ^= raw;
result.set_type(kUninitialized);
result.set_flags(RegExpFlags());
result.set_num_registers(/*is_one_byte=*/false, -1);
result.set_num_registers(/*is_one_byte=*/true, -1);
result.set_flags(0);
result.set_num_registers(-1);
}
return result.raw();
}
const char* RegExpFlags::ToCString() const {
switch (value_ & ~kGlobal) {
case kIgnoreCase | kMultiLine | kDotAll | kUnicode:
return "imsu";
case kIgnoreCase | kMultiLine | kDotAll:
return "ims";
case kIgnoreCase | kMultiLine | kUnicode:
return "imu";
case kIgnoreCase | kUnicode | kDotAll:
return "ius";
case kMultiLine | kDotAll | kUnicode:
return "msu";
const char* RegExp::Flags() const {
switch (flags()) {
case kGlobal | kIgnoreCase | kMultiLine:
case kIgnoreCase | kMultiLine:
return "im";
case kIgnoreCase | kDotAll:
return "is";
case kIgnoreCase | kUnicode:
return "iu";
case kMultiLine | kDotAll:
return "ms";
case kMultiLine | kUnicode:
return "mu";
case kDotAll | kUnicode:
return "su";
case kGlobal | kIgnoreCase:
case kIgnoreCase:
return "i";
case kGlobal | kMultiLine:
case kMultiLine:
return "m";
case kDotAll:
return "s";
case kUnicode:
return "u";
default:
break;
}
@ -21811,7 +21789,9 @@ bool RegExp::CanonicalizeEquals(const Instance& other) const {
return false;
}
// Match the flags.
if (flags() != other_js.flags()) {
if ((is_global() != other_js.is_global()) ||
(is_ignore_case() != other_js.is_ignore_case()) ||
(is_multi_line() != other_js.is_multi_line())) {
return false;
}
return true;
@ -21820,7 +21800,7 @@ bool RegExp::CanonicalizeEquals(const Instance& other) const {
const char* RegExp::ToCString() const {
const String& str = String::Handle(pattern());
return OS::SCreate(Thread::Current()->zone(), "RegExp: pattern=%s flags=%s",
str.ToCString(), flags().ToCString());
str.ToCString(), Flags());
}
RawWeakProperty* WeakProperty::New(Heap::Space space) {

View file

@ -9105,55 +9105,6 @@ class StackTrace : public Instance {
friend class Debugger;
};
class RegExpFlags {
public:
// Flags are passed to a regex object as follows:
// 'i': ignore case, 'g': do global matches, 'm': pattern is multi line,
// 'u': pattern is full Unicode, not just BMP, 's': '.' in pattern matches
// all characters including line terminators.
enum Flags {
kNone = 0,
kGlobal = 1,
kIgnoreCase = 2,
kMultiLine = 4,
kUnicode = 8,
kDotAll = 16,
};
static const int kDefaultFlags = 0;
RegExpFlags() : value_(kDefaultFlags) {}
explicit RegExpFlags(int value) : value_(value) {}
inline bool IsGlobal() const { return (value_ & kGlobal) != 0; }
inline bool IgnoreCase() const { return (value_ & kIgnoreCase) != 0; }
inline bool IsMultiLine() const { return (value_ & kMultiLine) != 0; }
inline bool IsUnicode() const { return (value_ & kUnicode) != 0; }
inline bool IsDotAll() const { return (value_ & kDotAll) != 0; }
inline bool NeedsUnicodeCaseEquivalents() {
// Both unicode and ignore_case flags are set. We need to use ICU to find
// the closure over case equivalents.
return IsUnicode() && IgnoreCase();
}
void SetGlobal() { value_ |= kGlobal; }
void SetIgnoreCase() { value_ |= kIgnoreCase; }
void SetMultiLine() { value_ |= kMultiLine; }
void SetUnicode() { value_ |= kUnicode; }
void SetDotAll() { value_ |= kDotAll; }
const char* ToCString() const;
int value() const { return value_; }
bool operator==(const RegExpFlags& other) { return value_ == other.value_; }
bool operator!=(const RegExpFlags& other) { return value_ != other.value_; }
private:
int value_;
};
// Internal JavaScript regular expression object.
class RegExp : public Instance {
public:
@ -9167,11 +9118,20 @@ class RegExp : public Instance {
kComplex = 2,
};
// Flags are passed to a regex object as follows:
// 'i': ignore case, 'g': do global matches, 'm': pattern is multi line.
enum Flags {
kNone = 0,
kGlobal = 1,
kIgnoreCase = 2,
kMultiLine = 4,
};
enum {
kTypePos = 0,
kTypeSize = 2,
kFlagsPos = 2,
kFlagsSize = 5,
kFlagsSize = 4,
};
class TypeBits : public BitField<int8_t, RegExType, kTypePos, kTypeSize> {};
@ -9181,10 +9141,11 @@ class RegExp : public Instance {
bool is_simple() const { return (type() == kSimple); }
bool is_complex() const { return (type() == kComplex); }
intptr_t num_registers(bool is_one_byte) const {
return is_one_byte ? raw_ptr()->num_one_byte_registers_
: raw_ptr()->num_two_byte_registers_;
}
bool is_global() const { return (flags() & kGlobal); }
bool is_ignore_case() const { return (flags() & kIgnoreCase); }
bool is_multi_line() const { return (flags() & kMultiLine); }
intptr_t num_registers() const { return raw_ptr()->num_registers_; }
RawString* pattern() const { return raw_ptr()->pattern_; }
RawSmi* num_bracket_expressions() const {
@ -9248,48 +9209,15 @@ class RegExp : public Instance {
void set_num_bracket_expressions(intptr_t value) const;
void set_capture_name_map(const Array& array) const;
void set_is_global() const {
RegExpFlags f = flags();
f.SetGlobal();
set_flags(f);
}
void set_is_ignore_case() const {
RegExpFlags f = flags();
f.SetIgnoreCase();
set_flags(f);
}
void set_is_multi_line() const {
RegExpFlags f = flags();
f.SetMultiLine();
set_flags(f);
}
void set_is_unicode() const {
RegExpFlags f = flags();
f.SetUnicode();
set_flags(f);
}
void set_is_dot_all() const {
RegExpFlags f = flags();
f.SetDotAll();
set_flags(f);
}
void set_is_global() const { set_flags(flags() | kGlobal); }
void set_is_ignore_case() const { set_flags(flags() | kIgnoreCase); }
void set_is_multi_line() const { set_flags(flags() | kMultiLine); }
void set_is_simple() const { set_type(kSimple); }
void set_is_complex() const { set_type(kComplex); }
void set_num_registers(bool is_one_byte, intptr_t value) const {
if (is_one_byte) {
StoreNonPointer(&raw_ptr()->num_one_byte_registers_, value);
} else {
StoreNonPointer(&raw_ptr()->num_two_byte_registers_, value);
}
void set_num_registers(intptr_t value) const {
StoreNonPointer(&raw_ptr()->num_registers_, value);
}
RegExpFlags flags() const {
return RegExpFlags(FlagsBits::decode(raw_ptr()->type_flags_));
}
void set_flags(RegExpFlags flags) const {
StoreNonPointer(&raw_ptr()->type_flags_,
FlagsBits::update(flags.value(), raw_ptr()->type_flags_));
}
const char* Flags() const;
virtual bool CanonicalizeEquals(const Instance& other) const;
@ -9305,8 +9233,13 @@ class RegExp : public Instance {
StoreNonPointer(&raw_ptr()->type_flags_,
TypeBits::update(type, raw_ptr()->type_flags_));
}
void set_flags(intptr_t value) const {
StoreNonPointer(&raw_ptr()->type_flags_,
FlagsBits::update(value, raw_ptr()->type_flags_));
}
RegExType type() const { return TypeBits::decode(raw_ptr()->type_flags_); }
intptr_t flags() const { return FlagsBits::decode(raw_ptr()->type_flags_); }
FINAL_HEAP_OBJECT_IMPLEMENTATION(RegExp, Instance);
friend class Class;

View file

@ -1487,8 +1487,8 @@ void RegExp::PrintJSONImpl(JSONStream* stream, bool ref) const {
return;
}
jsobj.AddProperty("isCaseSensitive", !flags().IgnoreCase());
jsobj.AddProperty("isMultiLine", flags().IsMultiLine());
jsobj.AddProperty("isCaseSensitive", !is_ignore_case());
jsobj.AddProperty("isMultiLine", is_multi_line());
if (!FLAG_interpret_irregexp) {
Function& func = Function::Handle();

View file

@ -2428,17 +2428,11 @@ class RawRegExp : public RawInstance {
VISIT_TO(RawObject*, external_two_byte_sticky_function_)
RawObject** to_snapshot(Snapshot::Kind kind) { return to(); }
// The same pattern may use different amount of registers if compiled
// for a one-byte target than a two-byte target. For example, we do not
// need to allocate registers to check whether the current position is within
// a surrogate pair when matching a Unicode pattern against a one-byte string.
intptr_t num_one_byte_registers_;
intptr_t num_two_byte_registers_;
intptr_t num_registers_;
// A bitfield with two fields:
// type: Uninitialized, simple or complex.
// flags: Represents global/local, case insensitive, multiline, unicode,
// dotAll.
// flags: Represents global/local, case insensitive, multiline.
int8_t type_flags_;
};

View file

@ -2172,9 +2172,7 @@ RawRegExp* RegExp::ReadFrom(SnapshotReader* reader,
*reader->StringHandle() ^= reader->ReadObjectImpl(kAsInlinedObject);
regex.set_pattern(*reader->StringHandle());
regex.StoreNonPointer(&regex.raw_ptr()->num_one_byte_registers_,
reader->Read<int32_t>());
regex.StoreNonPointer(&regex.raw_ptr()->num_two_byte_registers_,
regex.StoreNonPointer(&regex.raw_ptr()->num_registers_,
reader->Read<int32_t>());
regex.StoreNonPointer(&regex.raw_ptr()->type_flags_, reader->Read<int8_t>());
@ -2204,8 +2202,7 @@ void RawRegExp::WriteTo(SnapshotWriter* writer,
// Write out all the other fields.
writer->Write<RawObject*>(ptr()->num_bracket_expressions_);
writer->WriteObjectImpl(ptr()->pattern_, kAsInlinedObject);
writer->Write<int32_t>(ptr()->num_one_byte_registers_);
writer->Write<int32_t>(ptr()->num_two_byte_registers_);
writer->Write<int32_t>(ptr()->num_registers_);
writer->Write<int8_t>(ptr()->type_flags_);
}

File diff suppressed because it is too large Load diff

View file

@ -5,14 +5,11 @@
#ifndef RUNTIME_VM_REGEXP_H_
#define RUNTIME_VM_REGEXP_H_
#include "platform/unicode.h"
#include "vm/compiler/assembler/assembler.h"
#include "vm/compiler/backend/flow_graph_compiler.h"
#include "vm/compiler/backend/il.h"
#include "vm/object.h"
#include "vm/regexp_assembler.h"
#include "vm/splay-tree.h"
namespace dart {
@ -28,42 +25,32 @@ class BoyerMooreLookahead;
class CharacterRange {
public:
CharacterRange() : from_(0), to_(0) {}
CharacterRange(int32_t from, int32_t to) : from_(from), to_(to) {}
CharacterRange(uint16_t from, uint16_t to) : from_(from), to_(to) {}
static void AddClassEscape(uint16_t type,
ZoneGrowableArray<CharacterRange>* ranges);
// Add class escapes with case equivalent closure for \w and \W if necessary.
static void AddClassEscape(uint16_t type,
ZoneGrowableArray<CharacterRange>* ranges,
bool add_unicode_case_equivalents);
static GrowableArray<const intptr_t> GetWordBounds();
static inline CharacterRange Singleton(int32_t value) {
static inline CharacterRange Singleton(uint16_t value) {
return CharacterRange(value, value);
}
static inline CharacterRange Range(int32_t from, int32_t to) {
static inline CharacterRange Range(uint16_t from, uint16_t to) {
ASSERT(from <= to);
return CharacterRange(from, to);
}
static inline CharacterRange Everything() {
return CharacterRange(0, Utf::kMaxCodePoint);
return CharacterRange(0, 0xFFFF);
}
static inline ZoneGrowableArray<CharacterRange>* List(Zone* zone,
CharacterRange range) {
auto list = new (zone) ZoneGrowableArray<CharacterRange>(1);
list->Add(range);
return list;
}
bool Contains(int32_t i) const { return from_ <= i && i <= to_; }
int32_t from() const { return from_; }
void set_from(int32_t value) { from_ = value; }
int32_t to() const { return to_; }
void set_to(int32_t value) { to_ = value; }
bool Contains(uint16_t i) const { return from_ <= i && i <= to_; }
uint16_t from() const { return from_; }
void set_from(uint16_t value) { from_ = value; }
uint16_t to() const { return to_; }
void set_to(uint16_t value) { to_ = value; }
bool is_valid() const { return from_ <= to_; }
bool IsEverything(int32_t max) const { return from_ == 0 && to_ >= max; }
bool IsEverything(uint16_t max) const { return from_ == 0 && to_ >= max; }
bool IsSingleton() const { return (from_ == to_); }
static void AddCaseEquivalents(ZoneGrowableArray<CharacterRange>* ranges,
bool is_one_byte,
Zone* zone);
void AddCaseEquivalents(ZoneGrowableArray<CharacterRange>* ranges,
bool is_one_byte,
Zone* zone);
static void Split(ZoneGrowableArray<CharacterRange>* base,
GrowableArray<const intptr_t> overlay,
ZoneGrowableArray<CharacterRange>** included,
@ -84,8 +71,8 @@ class CharacterRange {
static const intptr_t kPayloadMask = (1 << 24) - 1;
private:
int32_t from_;
int32_t to_;
uint16_t from_;
uint16_t to_;
DISALLOW_ALLOCATION();
};
@ -118,98 +105,6 @@ class OutSet : public ZoneAllocated {
friend class Trace;
};
// A mapping from integers, specified as ranges, to a set of integers.
// Used for mapping character ranges to choices.
class DispatchTable : public ValueObject {
public:
explicit DispatchTable(Zone* zone) : tree_(zone) {}
class Entry {
public:
Entry() : from_(0), to_(0), out_set_(nullptr) {}
Entry(int32_t from, int32_t to, OutSet* out_set)
: from_(from), to_(to), out_set_(out_set) {
ASSERT(from <= to);
}
int32_t from() { return from_; }
int32_t to() { return to_; }
void set_to(int32_t value) { to_ = value; }
void AddValue(int value, Zone* zone) {
out_set_ = out_set_->Extend(value, zone);
}
OutSet* out_set() { return out_set_; }
private:
int32_t from_;
int32_t to_;
OutSet* out_set_;
};
class Config {
public:
typedef int32_t Key;
typedef Entry Value;
static const int32_t kNoKey;
static const Entry NoValue() { return Value(); }
static inline int Compare(int32_t a, int32_t b) {
if (a == b)
return 0;
else if (a < b)
return -1;
else
return 1;
}
};
void AddRange(CharacterRange range, int32_t value, Zone* zone);
OutSet* Get(int32_t value);
void Dump();
template <typename Callback>
void ForEach(Callback* callback) {
return tree()->ForEach(callback);
}
private:
// There can't be a static empty set since it allocates its
// successors in a zone and caches them.
OutSet* empty() { return &empty_; }
OutSet empty_;
ZoneSplayTree<Config>* tree() { return &tree_; }
ZoneSplayTree<Config> tree_;
};
// Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
class UnicodeRangeSplitter : public ValueObject {
public:
UnicodeRangeSplitter(Zone* zone, ZoneGrowableArray<CharacterRange>* base);
void Call(uint32_t from, DispatchTable::Entry entry);
ZoneGrowableArray<CharacterRange>* bmp() { return bmp_; }
ZoneGrowableArray<CharacterRange>* lead_surrogates() {
return lead_surrogates_;
}
ZoneGrowableArray<CharacterRange>* trail_surrogates() {
return trail_surrogates_;
}
ZoneGrowableArray<CharacterRange>* non_bmp() const { return non_bmp_; }
private:
static const int kBase = 0;
// Separate ranges into
static const int kBmpCodePoints = 1;
static const int kLeadSurrogates = 2;
static const int kTrailSurrogates = 3;
static const int kNonBmpCodePoints = 4;
Zone* zone_;
DispatchTable table_;
ZoneGrowableArray<CharacterRange>* bmp_;
ZoneGrowableArray<CharacterRange>* lead_surrogates_;
ZoneGrowableArray<CharacterRange>* trail_surrogates_;
ZoneGrowableArray<CharacterRange>* non_bmp_;
};
#define FOR_EACH_NODE_TYPE(VISIT) \
VISIT(End) \
VISIT(Action) \
@ -449,7 +344,9 @@ class RegExpNode : public ZoneAllocated {
// If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
// itself, or NULL if the node can never match.
virtual RegExpNode* FilterOneByte(intptr_t depth) { return this; }
virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case) {
return this;
}
// Helper for FilterOneByte.
RegExpNode* replacement() {
ASSERT(info()->replacement_calculated);
@ -546,7 +443,7 @@ class SeqRegExpNode : public RegExpNode {
: RegExpNode(on_success->zone()), on_success_(on_success) {}
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual RegExpNode* FilterOneByte(intptr_t depth);
virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
virtual void FillInBMInfo(intptr_t offset,
intptr_t budget,
BoyerMooreLookahead* bm,
@ -556,7 +453,7 @@ class SeqRegExpNode : public RegExpNode {
}
protected:
RegExpNode* FilterSuccessor(intptr_t depth);
RegExpNode* FilterSuccessor(intptr_t depth, bool ignore_case);
private:
RegExpNode* on_success_;
@ -664,19 +561,6 @@ class TextNode : public SeqRegExpNode {
read_backward_(read_backward) {
elms_->Add(TextElement::CharClass(that));
}
// Create TextNode for a single character class for the given ranges.
static TextNode* CreateForCharacterRanges(
ZoneGrowableArray<CharacterRange>* ranges,
bool read_backward,
RegExpNode* on_success,
RegExpFlags flags);
// Create TextNode for a surrogate pair with a range given for the
// lead and the trail surrogate each.
static TextNode* CreateForSurrogatePair(CharacterRange lead,
CharacterRange trail,
bool read_backward,
RegExpNode* on_success,
RegExpFlags flags);
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual intptr_t EatsAtLeast(intptr_t still_to_find,
@ -697,7 +581,7 @@ class TextNode : public SeqRegExpNode {
BoyerMooreLookahead* bm,
bool not_at_start);
void CalculateOffsets();
virtual RegExpNode* FilterOneByte(intptr_t depth);
virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
private:
enum TextEmitPassType {
@ -775,13 +659,11 @@ class BackReferenceNode : public SeqRegExpNode {
public:
BackReferenceNode(intptr_t start_reg,
intptr_t end_reg,
RegExpFlags flags,
bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
end_reg_(end_reg),
flags_(flags),
read_backward_(read_backward) {}
virtual void Accept(NodeVisitor* visitor);
intptr_t start_register() { return start_reg_; }
@ -805,7 +687,6 @@ class BackReferenceNode : public SeqRegExpNode {
private:
intptr_t start_reg_;
intptr_t end_reg_;
RegExpFlags flags_;
bool read_backward_;
};
@ -880,9 +761,9 @@ class GuardedAlternative {
public:
explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) {}
void AddGuard(Guard* guard, Zone* zone);
RegExpNode* node() const { return node_; }
RegExpNode* node() { return node_; }
void set_node(RegExpNode* node) { node_ = node; }
ZoneGrowableArray<Guard*>* guards() const { return guards_; }
ZoneGrowableArray<Guard*>* guards() { return guards_; }
private:
RegExpNode* node_;
@ -930,12 +811,11 @@ class ChoiceNode : public RegExpNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
virtual RegExpNode* FilterOneByte(intptr_t depth);
virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
virtual bool read_backward() { return false; }
protected:
intptr_t GreedyLoopTextLengthForAlternative(
const GuardedAlternative* alternative);
intptr_t GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
ZoneGrowableArray<GuardedAlternative>* alternatives_;
private:
@ -1006,7 +886,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return !is_first;
}
virtual RegExpNode* FilterOneByte(intptr_t depth);
virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
};
class LoopChoiceNode : public ChoiceNode {
@ -1038,7 +918,7 @@ class LoopChoiceNode : public ChoiceNode {
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterOneByte(intptr_t depth);
virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case);
private:
// AddAlternative is made private for loop nodes because alternatives
@ -1406,8 +1286,10 @@ class NodeVisitor : public ValueObject {
// +-------+ +------------+
class Analysis : public NodeVisitor {
public:
explicit Analysis(bool is_one_byte)
: is_one_byte_(is_one_byte), error_message_(NULL) {}
Analysis(bool ignore_case, bool is_one_byte)
: ignore_case_(ignore_case),
is_one_byte_(is_one_byte),
error_message_(NULL) {}
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
@ -1423,6 +1305,7 @@ class Analysis : public NodeVisitor {
void fail(const char* error_message) { error_message_ = error_message; }
private:
bool ignore_case_;
bool is_one_byte_;
const char* error_message_;
@ -1515,7 +1398,8 @@ class RegExpEngine : public AllStatic {
static RawRegExp* CreateRegExp(Thread* thread,
const String& pattern,
RegExpFlags flags);
bool multi_line,
bool ignore_case);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};

View file

@ -4,10 +4,6 @@
#include "vm/regexp_assembler.h"
#include "unicode/uchar.h"
#include "platform/unicode.h"
#include "vm/flags.h"
#include "vm/regexp.h"
#include "vm/unibrow-inl.h"
@ -20,10 +16,11 @@ void PrintUtf16(uint16_t c) {
OS::PrintErr(format, c);
}
RawBool* CaseInsensitiveCompareUCS2(RawString* str_raw,
RawSmi* lhs_index_raw,
RawSmi* rhs_index_raw,
RawSmi* length_raw) {
static RawBool* CaseInsensitiveCompareUC16(RawString* str_raw,
RawSmi* lhs_index_raw,
RawSmi* rhs_index_raw,
RawSmi* length_raw) {
const String& str = String::Handle(str_raw);
const Smi& lhs_index = Smi::Handle(lhs_index_raw);
const Smi& rhs_index = Smi::Handle(rhs_index_raw);
@ -51,50 +48,13 @@ RawBool* CaseInsensitiveCompareUCS2(RawString* str_raw,
return Bool::True().raw();
}
RawBool* CaseInsensitiveCompareUTF16(RawString* str_raw,
RawSmi* lhs_index_raw,
RawSmi* rhs_index_raw,
RawSmi* length_raw) {
const String& str = String::Handle(str_raw);
const Smi& lhs_index = Smi::Handle(lhs_index_raw);
const Smi& rhs_index = Smi::Handle(rhs_index_raw);
const Smi& length = Smi::Handle(length_raw);
for (intptr_t i = 0; i < length.Value(); i++) {
int32_t c1 = str.CharAt(lhs_index.Value() + i);
int32_t c2 = str.CharAt(rhs_index.Value() + i);
if (Utf16::IsLeadSurrogate(c1)) {
// Non-BMP characters do not have case-equivalents in the BMP.
// Both have to be non-BMP for them to be able to match.
if (!Utf16::IsLeadSurrogate(c2)) return Bool::False().raw();
if (i + 1 < length.Value()) {
uint16_t c1t = str.CharAt(lhs_index.Value() + i + 1);
uint16_t c2t = str.CharAt(rhs_index.Value() + i + 1);
if (Utf16::IsTrailSurrogate(c1t) && Utf16::IsTrailSurrogate(c2t)) {
c1 = Utf16::Decode(c1, c1t);
c2 = Utf16::Decode(c2, c2t);
i++;
}
}
}
c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
if (c1 != c2) return Bool::False().raw();
}
return Bool::True().raw();
}
DEFINE_RAW_LEAF_RUNTIME_ENTRY(
CaseInsensitiveCompareUCS2,
CaseInsensitiveCompareUC16,
4,
false /* is_float */,
reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUCS2));
reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUC16));
DEFINE_RAW_LEAF_RUNTIME_ENTRY(
CaseInsensitiveCompareUTF16,
4,
false /* is_float */,
reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUTF16));
BlockLabel::BlockLabel()
: block_(NULL), is_bound_(false), is_linked_(false), pos_(-1) {
@ -112,18 +72,4 @@ RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
RegExpMacroAssembler::~RegExpMacroAssembler() {}
void RegExpMacroAssembler::CheckNotInSurrogatePair(intptr_t cp_offset,
BlockLabel* on_failure) {
BlockLabel ok;
// Check that current character is not a trail surrogate.
LoadCurrentCharacter(cp_offset, &ok);
CheckCharacterNotInRange(Utf16::kTrailSurrogateStart,
Utf16::kTrailSurrogateEnd, &ok);
// Check that previous character is not a lead surrogate.
LoadCurrentCharacter(cp_offset - 1, &ok);
CheckCharacterInRange(Utf16::kLeadSurrogateStart, Utf16::kLeadSurrogateEnd,
on_failure);
BindBlock(&ok);
}
} // namespace dart

View file

@ -13,19 +13,6 @@ namespace dart {
// Utility function for the DotPrinter
void PrintUtf16(uint16_t c);
// Compares two-byte strings case insensitively as UCS2.
// Called from generated RegExp code.
RawBool* CaseInsensitiveCompareUCS2(RawString* str_raw,
RawSmi* lhs_index_raw,
RawSmi* rhs_index_raw,
RawSmi* length_raw);
// Compares two-byte strings case insensitively as UTF16.
// Called from generated RegExp code.
RawBool* CaseInsensitiveCompareUTF16(RawString* str_raw,
RawSmi* lhs_index_raw,
RawSmi* rhs_index_raw,
RawSmi* length_raw);
/// Convenience wrapper around a BlockEntryInstr pointer.
class BlockLabel : public ValueObject {
@ -140,7 +127,6 @@ class RegExpMacroAssembler : public ZoneAllocated {
BlockLabel* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
bool read_backward,
bool unicode,
BlockLabel* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
@ -229,33 +215,22 @@ class RegExpMacroAssembler : public ZoneAllocated {
virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to) = 0;
virtual void WriteStackPointerToRegister(intptr_t reg) = 0;
// Check that we are not in the middle of a surrogate pair.
void CheckNotInSurrogatePair(intptr_t cp_offset, BlockLabel* on_failure);
// Controls the generation of large inlined constants in the code.
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
bool slow_safe() { return slow_safe_compiler_; }
enum GlobalMode {
NOT_GLOBAL,
GLOBAL,
GLOBAL_NO_ZERO_LENGTH_CHECK,
GLOBAL_UNICODE
};
enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK };
// Set whether the regular expression has the global flag. Exiting due to
// a failure in a global regexp may still mean success overall.
inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
inline bool global() { return global_mode_ != NOT_GLOBAL; }
inline bool global_with_zero_length_check() {
return global_mode_ == GLOBAL || global_mode_ == GLOBAL_UNICODE;
}
inline bool global_unicode() { return global_mode_ == GLOBAL_UNICODE; }
inline bool global_with_zero_length_check() { return global_mode_ == GLOBAL; }
Zone* zone() const { return zone_; }
private:
bool slow_safe_compiler_;
GlobalMode global_mode_;
bool global_mode_;
Zone* zone_;
};

View file

@ -349,14 +349,11 @@ void BytecodeRegExpMacroAssembler::CheckNotBackReference(
void BytecodeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
intptr_t start_reg,
bool read_backward,
bool unicode,
BlockLabel* on_not_equal) {
ASSERT(start_reg >= 0);
ASSERT(start_reg <= kMaxRegister);
Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
: BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
: (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
: BC_CHECK_NOT_BACK_REF_NO_CASE),
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
: BC_CHECK_NOT_BACK_REF_NO_CASE,
start_reg);
EmitOrLink(on_not_equal);
}
@ -437,10 +434,11 @@ static intptr_t Prepare(const RegExp& regexp,
}
#endif // !defined(PRODUCT)
const bool multiline = regexp.is_multi_line();
RegExpCompileData* compile_data = new (zone) RegExpCompileData();
// Parsing failures are handled in the RegExp factory constructor.
RegExpParser::ParseRegExp(pattern, regexp.flags(), compile_data);
RegExpParser::ParseRegExp(pattern, multiline, compile_data);
regexp.set_num_bracket_expressions(compile_data->capture_count);
regexp.set_capture_name_map(compile_data->capture_name_map);
@ -453,15 +451,15 @@ static intptr_t Prepare(const RegExp& regexp,
RegExpEngine::CompilationResult result = RegExpEngine::CompileBytecode(
compile_data, regexp, is_one_byte, sticky, zone);
ASSERT(result.bytecode != NULL);
ASSERT(regexp.num_registers(is_one_byte) == -1 ||
regexp.num_registers(is_one_byte) == result.num_registers);
regexp.set_num_registers(is_one_byte, result.num_registers);
ASSERT((regexp.num_registers() == -1) ||
(regexp.num_registers() == result.num_registers));
regexp.set_num_registers(result.num_registers);
regexp.set_bytecode(is_one_byte, sticky, *(result.bytecode));
}
ASSERT(regexp.num_registers(is_one_byte) != -1);
ASSERT(regexp.num_registers() != -1);
return regexp.num_registers(is_one_byte) +
return regexp.num_registers() +
(Smi::Value(regexp.num_bracket_expressions()) + 1) * 2;
}

View file

@ -83,7 +83,6 @@ class BytecodeRegExpMacroAssembler : public RegExpMacroAssembler {
BlockLabel* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
bool read_backward,
bool unicode,
BlockLabel* on_no_match);
virtual void IfRegisterLT(intptr_t register_index,
intptr_t comparand,

View file

@ -821,7 +821,6 @@ void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
intptr_t start_reg,
bool read_backward,
bool unicode,
BlockLabel* on_no_match) {
TAG();
ASSERT(start_reg + 1 <= registers_count_);
@ -968,17 +967,9 @@ void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
Value* length_value = Bind(LoadLocal(capture_length_));
Definition* is_match_def;
if (unicode) {
is_match_def = new (Z) CaseInsensitiveCompareInstr(
string_value, lhs_index_value, rhs_index_value, length_value,
kCaseInsensitiveCompareUTF16RuntimeEntry, specialization_cid_);
} else {
is_match_def = new (Z) CaseInsensitiveCompareInstr(
string_value, lhs_index_value, rhs_index_value, length_value,
kCaseInsensitiveCompareUCS2RuntimeEntry, specialization_cid_);
}
Definition* is_match_def = new (Z) CaseInsensitiveCompareUC16Instr(
string_value, lhs_index_value, rhs_index_value, length_value,
specialization_cid_);
BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
on_no_match);

View file

@ -67,7 +67,6 @@ class IRRegExpMacroAssembler : public RegExpMacroAssembler {
BlockLabel* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
bool read_backward,
bool unicode,
BlockLabel* on_no_match);
virtual void CheckNotCharacter(uint32_t c, BlockLabel* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,

View file

@ -108,8 +108,7 @@ class RegExpAssertion : public RegExpTree {
BOUNDARY,
NON_BOUNDARY
};
RegExpAssertion(AssertionType type, RegExpFlags flags)
: assertion_type_(type), flags_(flags) {}
explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpAssertion* AsAssertion();
@ -122,7 +121,6 @@ class RegExpAssertion : public RegExpTree {
private:
AssertionType assertion_type_;
RegExpFlags flags_;
};
class CharacterSet : public ValueObject {
@ -152,40 +150,18 @@ class CharacterSet : public ValueObject {
class RegExpCharacterClass : public RegExpTree {
public:
enum Flag {
// The character class is negated and should match everything but the
// specified ranges.
NEGATED = 1 << 0,
// The character class contains part of a split surrogate and should not
// be unicode-desugared.
CONTAINS_SPLIT_SURROGATE = 1 << 1,
};
using CharacterClassFlags = intptr_t;
static inline CharacterClassFlags DefaultFlags() { return 0; }
RegExpCharacterClass(
ZoneGrowableArray<CharacterRange>* ranges,
RegExpFlags flags,
CharacterClassFlags character_class_flags = DefaultFlags())
: set_(ranges),
flags_(flags),
character_class_flags_(character_class_flags) {
// Convert the empty set of ranges to the negated Everything() range.
if (ranges->is_empty()) {
ranges->Add(CharacterRange::Everything());
character_class_flags_ ^= NEGATED;
}
}
RegExpCharacterClass(uint16_t type, RegExpFlags flags)
: set_(type), flags_(flags), character_class_flags_(0) {}
RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges,
bool is_negated)
: set_(ranges), is_negated_(is_negated) {}
explicit RegExpCharacterClass(uint16_t type)
: set_(type), is_negated_(false) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass() const;
virtual bool IsTextElement() const { return true; }
virtual intptr_t min_match() const { return 1; }
// The character class may match two code units for unicode regexps.
virtual intptr_t max_match() const { return 2; }
virtual intptr_t max_match() const { return 1; }
virtual void AppendToText(RegExpText* text);
CharacterSet character_set() const { return set_; }
// TODO(lrn): Remove need for complex version if is_standard that
@ -204,22 +180,16 @@ class RegExpCharacterClass : public RegExpTree {
// * : All characters
uint16_t standard_type() const { return set_.standard_set_type(); }
ZoneGrowableArray<CharacterRange>* ranges() { return set_.ranges(); }
bool is_negated() const { return character_class_flags_ & NEGATED; }
RegExpFlags flags() const { return flags_; }
bool contains_split_surrogate() const {
return character_class_flags_ & CONTAINS_SPLIT_SURROGATE;
}
bool is_negated() const { return is_negated_; }
private:
CharacterSet set_;
RegExpFlags flags_;
CharacterClassFlags character_class_flags_;
bool is_negated_;
};
class RegExpAtom : public RegExpTree {
public:
RegExpAtom(ZoneGrowableArray<uint16_t>* data, RegExpFlags flags)
: data_(data), flags_(flags) {}
explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpAtom* AsAtom();
@ -230,12 +200,9 @@ class RegExpAtom : public RegExpTree {
virtual void AppendToText(RegExpText* text);
ZoneGrowableArray<uint16_t>* data() const { return data_; }
intptr_t length() const { return data_->length(); }
RegExpFlags flags() const { return flags_; }
bool ignore_case() const { return flags_.IgnoreCase(); }
private:
ZoneGrowableArray<uint16_t>* data_;
const RegExpFlags flags_;
};
class RegExpText : public RegExpTree {
@ -403,10 +370,9 @@ class RegExpLookaround : public RegExpTree {
class RegExpBackReference : public RegExpTree {
public:
explicit RegExpBackReference(RegExpFlags flags)
: capture_(nullptr), name_(nullptr), flags_(flags) {}
RegExpBackReference(RegExpCapture* capture, RegExpFlags flags)
: capture_(capture), name_(nullptr), flags_(flags) {}
RegExpBackReference() : capture_(nullptr), name_(nullptr) {}
explicit RegExpBackReference(RegExpCapture* capture)
: capture_(capture), name_(nullptr) {}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
@ -425,7 +391,6 @@ class RegExpBackReference : public RegExpTree {
private:
RegExpCapture* capture_;
const ZoneGrowableArray<uint16_t>* name_;
RegExpFlags flags_;
};
class RegExpEmpty : public RegExpTree {

View file

@ -55,19 +55,17 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) /*bc8 reg_idx24 addr32*/ \
V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \
V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */
V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 46, 8) /* bc8 offset24 addr32 */ \
V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */
// clang-format on

View file

@ -24,25 +24,27 @@ static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
intptr_t from,
intptr_t current,
intptr_t len,
const String& subject,
bool unicode);
const String& subject);
template <>
bool BackRefMatchesNoCase<uint16_t>(Canonicalize* interp_canonicalize,
intptr_t from,
intptr_t current,
intptr_t len,
const String& subject,
bool unicode) {
Bool& ret = Bool::Handle();
if (unicode) {
ret = CaseInsensitiveCompareUTF16(subject.raw(), Smi::New(from),
Smi::New(current), Smi::New(len));
} else {
ret = CaseInsensitiveCompareUCS2(subject.raw(), Smi::New(from),
Smi::New(current), Smi::New(len));
const String& subject) {
for (int i = 0; i < len; i++) {
int32_t old_char = subject.CharAt(from++);
int32_t new_char = subject.CharAt(current++);
if (old_char == new_char) continue;
int32_t old_string[1] = {old_char};
int32_t new_string[1] = {new_char};
interp_canonicalize->get(old_char, '\0', old_string);
interp_canonicalize->get(new_char, '\0', new_string);
if (old_string[0] != new_string[0]) {
return false;
}
}
return ret.value();
return true;
}
template <>
@ -50,9 +52,7 @@ bool BackRefMatchesNoCase<uint8_t>(Canonicalize* interp_canonicalize,
intptr_t from,
intptr_t current,
intptr_t len,
const String& subject,
bool unicode) {
// For Latin1 characters the unicode flag makes no difference.
const String& subject) {
for (int i = 0; i < len; i++) {
unsigned int old_char = subject.CharAt(from++);
unsigned int new_char = subject.CharAt(current++);
@ -513,11 +513,7 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
FALL_THROUGH;
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
const bool unicode =
(insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
@ -529,7 +525,7 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
break;
} else {
if (BackRefMatchesNoCase<Char>(&canonicalize, from, current, len,
subject, unicode)) {
subject)) {
current += len;
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
} else {
@ -566,11 +562,7 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
FALL_THROUGH;
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
bool unicode = (insn & BYTECODE_MASK) ==
BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
@ -582,7 +574,7 @@ static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
break;
} else {
if (BackRefMatchesNoCase<Char>(&canonicalize, from, current - len,
len, subject, unicode)) {
len, subject)) {
current -= len;
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
} else {

File diff suppressed because it is too large Load diff

View file

@ -14,18 +14,13 @@ namespace dart {
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneAllocated {
public:
explicit RegExpBuilder(RegExpFlags flags);
RegExpBuilder();
void AddCharacter(uint16_t character);
void AddUnicodeCharacter(uint32_t character);
void AddEscapedUnicodeCharacter(uint32_t character);
// "Adds" an empty expression. Does nothing except consume a
// following quantifier
void AddEmpty();
void AddCharacterClass(RegExpCharacterClass* cc);
void AddCharacterClassForDesugaring(uint32_t c);
void AddAtom(RegExpTree* tree);
void AddTerm(RegExpTree* tree);
void AddAssertion(RegExpTree* tree);
void NewAlternative(); // '|'
// Attempt to add a quantifier to the last atom added. The return value
@ -35,30 +30,17 @@ class RegExpBuilder : public ZoneAllocated {
intptr_t max,
RegExpQuantifier::QuantifierType type);
RegExpTree* ToRegExp();
RegExpFlags flags() const { return flags_; }
bool ignore_case() const { return flags_.IgnoreCase(); }
bool is_multi_line() const { return flags_.IsMultiLine(); }
bool is_dot_all() const { return flags_.IsDotAll(); }
private:
static const uint16_t kNoPendingSurrogate = 0;
void AddLeadSurrogate(uint16_t lead_surrogate);
void AddTrailSurrogate(uint16_t trail_surrogate);
void FlushPendingSurrogate();
void FlushCharacters();
void FlushText();
void FlushTerms();
bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
bool NeedsDesugaringForIgnoreCase(uint32_t c);
Zone* zone() const { return zone_; }
bool is_unicode() const { return flags_.IsUnicode(); }
Zone* zone_;
bool pending_empty_;
RegExpFlags flags_;
ZoneGrowableArray<uint16_t>* characters_;
uint16_t pending_surrogate_;
GrowableArray<RegExpTree*> terms_;
GrowableArray<RegExpTree*> text_;
GrowableArray<RegExpTree*> alternatives_;
@ -74,15 +56,16 @@ using RegExpCaptureName = ZoneGrowableArray<uint16_t>;
class RegExpParser : public ValueObject {
public:
RegExpParser(const String& in, String* error, RegExpFlags regexp_flags);
RegExpParser(const String& in, String* error, bool multiline_mode);
static void ParseRegExp(const String& input,
RegExpFlags regexp_flags,
bool multiline,
RegExpCompileData* result);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
RegExpTree* ParseGroup();
RegExpTree* ParseCharacterClass();
// Parses a {...,...} quantifier and stores the range in the given
// out parameters.
@ -95,24 +78,6 @@ class RegExpParser : public ValueObject {
// Checks whether the following is a length-digit hexadecimal number,
// and sets the value if it is.
bool ParseHexEscape(intptr_t length, uint32_t* value);
bool ParseUnicodeEscape(uint32_t* value);
bool ParseUnlimitedLengthHexNumber(uint32_t max_value, uint32_t* value);
// Parses either {UNICODE_PROPERTY_NAME=UNICODE_PROPERTY_VALUE} or
// the shorthand {UNICODE_PROPERTY_NAME_OR_VALUE} and stores the
// result in the given out parameters. If the shorthand is used,
// nothing will be added to name_2.
bool ParsePropertyClassName(ZoneGrowableArray<char>* name_1,
ZoneGrowableArray<char>* name_2);
// Adds the specified unicode property to the provided character range.
bool AddPropertyClassRange(ZoneGrowableArray<CharacterRange>* add_to,
bool negate,
ZoneGrowableArray<char>* name_1,
ZoneGrowableArray<char>* name_2);
// Returns a regexp node that corresponds to one of these unicode
// property sequences: "Any", "ASCII", "Assigned".
RegExpTree* GetPropertySequence(ZoneGrowableArray<char>* name_1);
RegExpTree* ParseCharacterClass(const RegExpBuilder* builder);
uint32_t ParseOctalLiteral();
@ -122,10 +87,7 @@ class RegExpParser : public ValueObject {
// can be reparsed.
bool ParseBackReferenceIndex(intptr_t* index_out);
// Attempts to parse a possible escape within a character class.
bool ParseClassEscape(ZoneGrowableArray<CharacterRange>* ranges,
bool add_unicode_case_equivalents,
uint32_t* char_out);
CharacterRange ParseClassAtom(uint16_t* char_class);
void ReportError(const char* message);
void Advance();
void Advance(intptr_t dist);
@ -138,9 +100,6 @@ class RegExpParser : public ValueObject {
void set_contains_anchor() { contains_anchor_ = true; }
intptr_t captures_started() { return captures_started_; }
intptr_t position() { return next_pos_ - 1; }
bool is_unicode() const { return top_level_flags_.IsUnicode(); }
static bool IsSyntaxCharacterOrSlash(uint32_t c);
static const intptr_t kMaxCaptures = 1 << 16;
static const uint32_t kEndMarker = (1 << 21);
@ -161,10 +120,9 @@ class RegExpParser : public ValueObject {
RegExpLookaround::Type lookaround_type,
intptr_t disjunction_capture_index,
const RegExpCaptureName* capture_name,
RegExpFlags flags,
Zone* zone)
: previous_state_(previous_state),
builder_(new (zone) RegExpBuilder(flags)),
builder_(new (zone) RegExpBuilder()),
group_type_(group_type),
lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index),
@ -240,7 +198,6 @@ class RegExpParser : public ValueObject {
bool has_more() { return has_more_; }
bool has_next() { return next_pos_ < in().Length(); }
uint32_t Next();
uint32_t ReadNext(bool update_position);
const String& in() { return in_; }
void ScanForCaptures();
@ -255,7 +212,7 @@ class RegExpParser : public ValueObject {
// The capture count is only valid after we have scanned for captures.
intptr_t capture_count_;
bool has_more_;
RegExpFlags top_level_flags_;
bool multiline_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;

View file

@ -16,7 +16,7 @@ static RawArray* Match(const String& pat, const String& str) {
Thread* thread = Thread::Current();
Zone* zone = thread->zone();
const RegExp& regexp =
RegExp::Handle(RegExpEngine::CreateRegExp(thread, pat, RegExpFlags()));
RegExp::Handle(RegExpEngine::CreateRegExp(thread, pat, false, false));
const Smi& idx = Smi::Handle(Smi::New(0));
return IRRegExpMacroAssembler::Execute(regexp, str, idx, /*sticky=*/false,
zone);

View file

@ -79,9 +79,7 @@ namespace dart {
V(double, LibcAsin, double) \
V(double, LibcAtan, double) \
V(double, LibcAtan2, double, double) \
V(RawBool*, CaseInsensitiveCompareUCS2, RawString*, RawSmi*, RawSmi*, \
RawSmi*) \
V(RawBool*, CaseInsensitiveCompareUTF16, RawString*, RawSmi*, RawSmi*, \
V(RawBool*, CaseInsensitiveCompareUC16, RawString*, RawSmi*, RawSmi*, \
RawSmi*) \
V(void, EnterSafepoint) \
V(void, ExitSafepoint)

View file

@ -1,32 +0,0 @@
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
#ifndef RUNTIME_VM_SPLAY_TREE_H_
#define RUNTIME_VM_SPLAY_TREE_H_
#include "platform/splay-tree.h"
#include "vm/zone.h"
namespace dart {
// A zone splay tree. The config type parameter encapsulates the
// different configurations of a concrete splay tree (see
// platform/splay-tree.h). The tree itself and all its elements are allocated
// in the Zone.
template <typename Config>
class ZoneSplayTree final : public SplayTree<Config, ZoneAllocated, Zone> {
public:
explicit ZoneSplayTree(Zone* zone)
: SplayTree<Config, ZoneAllocated, Zone>(ASSERT_NOTNULL(zone)) {}
~ZoneSplayTree() {
// Reset the root to avoid unneeded iteration over all tree nodes
// in the destructor. For a zone-allocated tree, nodes will be
// freed by the Zone.
SplayTree<Config, ZoneAllocated, Zone>::ResetRoot();
}
};
} // namespace dart
#endif // RUNTIME_VM_SPLAY_TREE_H_

View file

@ -83,7 +83,7 @@ static void GenerateCallToCallLeafRuntimeStub(Assembler* assembler,
__ LoadObject(R1, lhs_index);
__ LoadObject(R2, rhs_index);
__ LoadObject(R3, length);
__ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
__ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ LeaveDartFrame();
__ ret(); // Return value is in R0.
}

View file

@ -82,7 +82,7 @@ static void GenerateCallToCallLeafRuntimeStub(Assembler* assembler,
__ LoadObject(R1, lhs_index);
__ LoadObject(R2, rhs_index);
__ LoadObject(R3, length);
__ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
__ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ LeaveDartFrameAndReturn(); // Return value is in R0.
}

View file

@ -86,7 +86,7 @@ static void GenerateCallToCallLeafRuntimeStub(Assembler* assembler,
__ movl(Address(ESP, 2 * kWordSize), EAX); // Push argument 3.
__ LoadObject(EAX, length);
__ movl(Address(ESP, 3 * kWordSize), EAX); // Push argument 4.
__ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
__ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ leave();
__ ret(); // Return value is in EAX.
}

View file

@ -83,7 +83,7 @@ static void GenerateCallToCallLeafRuntimeStub(Assembler* assembler,
__ LoadObject(CallingConventions::kArg2Reg, lhs_index);
__ LoadObject(CallingConventions::kArg3Reg, rhs_index);
__ LoadObject(CallingConventions::kArg4Reg, length);
__ CallRuntime(kCaseInsensitiveCompareUCS2RuntimeEntry, 4);
__ CallRuntime(kCaseInsensitiveCompareUC16RuntimeEntry, 4);
__ LeaveStubFrame();
__ ret(); // Return value is in RAX.
}

View file

@ -528,15 +528,9 @@ class RegExp {
@pragma('dart2js:noInline')
@patch
factory RegExp(String source,
{bool multiLine: false,
bool caseSensitive: true,
bool unicode: false,
bool dotAll: false}) =>
{bool multiLine: false, bool caseSensitive: true}) =>
new JSSyntaxRegExp(source,
multiLine: multiLine,
caseSensitive: caseSensitive,
unicode: unicode,
dotAll: dotAll);
multiLine: multiLine, caseSensitive: caseSensitive);
@patch
static String escape(String text) => quoteStringForRegExp(text);

View file

@ -42,22 +42,18 @@ class JSSyntaxRegExp implements RegExp {
var _nativeGlobalRegExp;
var _nativeAnchoredRegExp;
String toString() =>
'RegExp/$pattern/' + JS('String', '#.flags', _nativeRegExp);
String toString() => 'RegExp/$pattern/';
JSSyntaxRegExp(String source,
{bool multiLine: false,
bool caseSensitive: true,
bool unicode: false,
bool dotAll: false})
{bool multiLine: false, bool caseSensitive: true})
: this.pattern = source,
this._nativeRegExp = makeNative(
source, multiLine, caseSensitive, unicode, dotAll, false);
this._nativeRegExp =
makeNative(source, multiLine, caseSensitive, false);
get _nativeGlobalVersion {
if (_nativeGlobalRegExp != null) return _nativeGlobalRegExp;
return _nativeGlobalRegExp = makeNative(
pattern, _isMultiLine, _isCaseSensitive, _isUnicode, _isDotAll, true);
return _nativeGlobalRegExp =
makeNative(pattern, _isMultiLine, _isCaseSensitive, true);
}
get _nativeAnchoredVersion {
@ -67,22 +63,18 @@ class JSSyntaxRegExp implements RegExp {
// that it tries, and you can see if the original regexp matched, or it
// was the added zero-width match that matched, by looking at the last
// capture. If it is a String, the match participated, otherwise it didn't.
return _nativeAnchoredRegExp = makeNative('$pattern|()', _isMultiLine,
_isCaseSensitive, _isUnicode, _isDotAll, true);
return _nativeAnchoredRegExp =
makeNative('$pattern|()', _isMultiLine, _isCaseSensitive, true);
}
bool get _isMultiLine => JS('bool', '#.multiline', _nativeRegExp);
bool get _isCaseSensitive => JS('bool', '!#.ignoreCase', _nativeRegExp);
bool get _isUnicode => JS('bool', '#.unicode', _nativeRegExp);
bool get _isDotAll => JS('bool', '#.dotAll', _nativeRegExp);
static makeNative(String source, bool multiLine, bool caseSensitive,
bool unicode, bool dotAll, bool global) {
static makeNative(
String source, bool multiLine, bool caseSensitive, bool global) {
checkString(source);
String m = multiLine == true ? 'm' : '';
String i = caseSensitive == true ? '' : 'i';
String u = unicode ? 'u' : '';
String s = dotAll ? 's' : '';
String g = global ? 'g' : '';
// We're using the JavaScript's try catch instead of the Dart one to avoid
// dragging in Dart runtime support just because of using RegExp.
@ -95,12 +87,10 @@ class JSSyntaxRegExp implements RegExp {
} catch (e) {
return e;
}
})(#, # + # + # + # + #)''',
})(#, # + # + #)''',
source,
m,
i,
u,
s,
g);
if (JS('bool', '# instanceof RegExp', regexp)) return regexp;
// The returned value is the JavaScript exception. Turn it into a
@ -109,7 +99,7 @@ class JSSyntaxRegExp implements RegExp {
throw new FormatException('Illegal RegExp pattern ($errorMessage)', source);
}
RegExpMatch firstMatch(String string) {
Match firstMatch(String string) {
List m = JS('JSExtendableArray|Null', r'#.exec(#)', _nativeRegExp,
checkString(string));
if (m == null) return null;
@ -126,7 +116,7 @@ class JSSyntaxRegExp implements RegExp {
return null;
}
Iterable<RegExpMatch> allMatches(String string, [int start = 0]) {
Iterable<Match> allMatches(String string, [int start = 0]) {
checkString(string);
checkInt(start);
if (start < 0 || start > string.length) {
@ -135,7 +125,7 @@ class JSSyntaxRegExp implements RegExp {
return new _AllMatchesIterable(this, string, start);
}
RegExpMatch _execGlobal(String string, int start) {
Match _execGlobal(String string, int start) {
Object regexp = _nativeGlobalVersion;
JS('void', '#.lastIndex = #', regexp, start);
List match = JS('JSExtendableArray|Null', '#.exec(#)', regexp, string);
@ -143,7 +133,7 @@ class JSSyntaxRegExp implements RegExp {
return new _MatchImplementation(this, match);
}
RegExpMatch _execAnchored(String string, int start) {
Match _execAnchored(String string, int start) {
Object regexp = _nativeAnchoredVersion;
JS('void', '#.lastIndex = #', regexp, start);
List match = JS('JSExtendableArray|Null', '#.exec(#)', regexp, string);
@ -154,7 +144,7 @@ class JSSyntaxRegExp implements RegExp {
return new _MatchImplementation(this, match);
}
RegExpMatch matchAsPrefix(String string, [int start = 0]) {
Match matchAsPrefix(String string, [int start = 0]) {
if (start < 0 || start > string.length) {
throw new RangeError.range(start, 0, string.length);
}
@ -163,8 +153,6 @@ class JSSyntaxRegExp implements RegExp {
bool get isMultiLine => _isMultiLine;
bool get isCaseSensitive => _isCaseSensitive;
bool get isUnicode => _isUnicode;
bool get isDotAll => _isDotAll;
}
class _MatchImplementation implements RegExpMatch {
@ -231,34 +219,25 @@ class _MatchImplementation implements RegExpMatch {
}
}
class _AllMatchesIterable extends IterableBase<RegExpMatch> {
class _AllMatchesIterable extends IterableBase<Match> {
final JSSyntaxRegExp _re;
final String _string;
final int _start;
_AllMatchesIterable(this._re, this._string, this._start);
Iterator<RegExpMatch> get iterator =>
new _AllMatchesIterator(_re, _string, _start);
Iterator<Match> get iterator => new _AllMatchesIterator(_re, _string, _start);
}
class _AllMatchesIterator implements Iterator<RegExpMatch> {
class _AllMatchesIterator implements Iterator<Match> {
final JSSyntaxRegExp _regExp;
String _string;
int _nextIndex;
RegExpMatch _current;
Match _current;
_AllMatchesIterator(this._regExp, this._string, this._nextIndex);
RegExpMatch get current => _current;
static bool _isLeadSurrogate(int c) {
return c >= 0xd800 && c <= 0xdbff;
}
static bool _isTrailSurrogate(int c) {
return c >= 0xdc00 && c <= 0xdfff;
}
Match get current => _current;
bool moveNext() {
if (_string == null) return false;
@ -268,15 +247,6 @@ class _AllMatchesIterator implements Iterator<RegExpMatch> {
_current = match;
int nextIndex = match.end;
if (match.start == nextIndex) {
// Zero-width match. Advance by one more, unless the regexp
// is in unicode mode and it would put us within a surrogate
// pair. In that case, advance past the code point as a whole.
if (_regExp.isUnicode &&
_nextIndex + 1 < _string.length &&
_isLeadSurrogate(_string.codeUnitAt(_nextIndex)) &&
_isTrailSurrogate(_string.codeUnitAt(_nextIndex + 1))) {
nextIndex++;
}
nextIndex++;
}
_nextIndex = nextIndex;
@ -290,6 +260,6 @@ class _AllMatchesIterator implements Iterator<RegExpMatch> {
}
/// Find the first match of [regExp] in [string] at or after [start].
RegExpMatch firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
Match firstMatchAfter(JSSyntaxRegExp regExp, String string, int start) {
return regExp._execGlobal(string, start);
}

View file

@ -16,7 +16,7 @@ part of dart.core;
* for the specification of JavaScript regular expressions.
*
* [firstMatch] is the main implementation method that applies a regular
* expression to a string and returns the first [RegExpMatch]. All
* expression to a string and returns the first [Match]. All
* other methods in [RegExp] can build on it.
*
* Use [allMatches] to look for all matches of a regular expression in
@ -27,7 +27,7 @@ part of dart.core;
* ```dart
* RegExp exp = new RegExp(r"(\w+)");
* String str = "Parse my string";
* Iterable<RegExpMatch> matches = exp.allMatches(str);
* Iterable<Match> matches = exp.allMatches(str);
* ```
*
* Note the use of a _raw string_ (a string prefixed with `r`)
@ -47,12 +47,6 @@ abstract class RegExp implements Pattern {
*
* If `caseSensitive` is disabled, then case is ignored.
*
* If `unicode` is enabled, then the pattern is treated as a Unicode
* pattern as described by the ECMAScript standard.
*
* If `dotAll` is enabled, then the `.` pattern will match _all_ characters,
* including line terminators.
*
* Example:
*
* ```dart
@ -66,10 +60,7 @@ abstract class RegExp implements Pattern {
* interpolation is required.
*/
external factory RegExp(String source,
{bool multiLine = false,
bool caseSensitive = true,
bool unicode = false,
bool dotAll = false});
{bool multiLine = false, bool caseSensitive = true});
/**
* Returns a regular expression that matches [text].
@ -88,14 +79,14 @@ abstract class RegExp implements Pattern {
* Searches for the first match of the regular expression
* in the string [input]. Returns `null` if there is no match.
*/
RegExpMatch firstMatch(String input);
Match firstMatch(String input);
/**
* Returns an iterable of the matches of the regular expression on [input].
*
* If [start] is provided, only start looking for matches at `start`.
*/
Iterable<RegExpMatch> allMatches(String input, [int start = 0]);
Iterable<Match> allMatches(String input, [int start = 0]);
/**
* Returns whether the regular expression has a match in the string [input].
@ -129,33 +120,6 @@ abstract class RegExp implements Pattern {
* versions of the same letter.
*/
bool get isCaseSensitive;
/**
* Whether this regular expression uses full Unicode matching.
*
* In Unicode mode, UTF-16 surrogate pairs in the original string will be
* treated as a single code point and will not match separately. Otherwise,
* the target string will be treated purely as a sequence of individual code
* units and surrogates will not be treated specially.
*
* In Unicode mode, the syntax of the RegExp pattern is more restricted, but
* some pattern features, like Unicode property escapes, are only available in
* this mode.
*/
bool get isUnicode;
/**
* Whether "." in this regular expression matches line terminators.
*
* Normally, the "." character matches a single character, unless that
* character is a line terminator. If this feature is active, then the "."
* character will match any single character including line terminators.
*
* This feature is distinct from [isMultiline], as they affect the behavior
* of different pattern characters, and so they can be used together or
* separately.
*/
bool get isDotAll;
}
/**

View file

@ -1,117 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2017 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
// The flags accessors.
var re = new RegExp(r".", dotAll: true);
assertTrue(re.isCaseSensitive);
assertFalse(re.isMultiLine);
assertFalse(re.isUnicode);
assertTrue(re.isDotAll);
re = new RegExp(r".",
caseSensitive: false, multiLine: true, unicode: true, dotAll: true);
assertFalse(re.isCaseSensitive);
assertTrue(re.isMultiLine);
assertTrue(re.isUnicode);
assertTrue(re.isDotAll);
re = new RegExp(r".", caseSensitive: false, multiLine: true, unicode: true);
assertFalse(re.isCaseSensitive);
assertTrue(re.isMultiLine);
assertTrue(re.isUnicode);
assertFalse(re.isDotAll);
// Default '.' behavior.
re = new RegExp(r"^.$");
assertTrue(re.hasMatch("a"));
assertTrue(re.hasMatch("3"));
assertTrue(re.hasMatch("π"));
assertTrue(re.hasMatch("\u2027"));
assertTrue(re.hasMatch("\u0085"));
assertTrue(re.hasMatch("\v"));
assertTrue(re.hasMatch("\f"));
assertTrue(re.hasMatch("\u180E"));
assertFalse(re.hasMatch("\u{10300}")); // Supplementary plane.
assertFalse(re.hasMatch("\n"));
assertFalse(re.hasMatch("\r"));
assertFalse(re.hasMatch("\u2028"));
assertFalse(re.hasMatch("\u2029"));
// Default '.' behavior (unicode).
re = new RegExp(r"^.$", unicode: true);
assertTrue(re.hasMatch("a"));
assertTrue(re.hasMatch("3"));
assertTrue(re.hasMatch("π"));
assertTrue(re.hasMatch("\u2027"));
assertTrue(re.hasMatch("\u0085"));
assertTrue(re.hasMatch("\v"));
assertTrue(re.hasMatch("\f"));
assertTrue(re.hasMatch("\u180E"));
assertTrue(re.hasMatch("\u{10300}")); // Supplementary plane.
assertFalse(re.hasMatch("\n"));
assertFalse(re.hasMatch("\r"));
assertFalse(re.hasMatch("\u2028"));
assertFalse(re.hasMatch("\u2029"));
// DotAll '.' behavior.
re = new RegExp(r"^.$", dotAll: true);
assertTrue(re.hasMatch("a"));
assertTrue(re.hasMatch("3"));
assertTrue(re.hasMatch("π"));
assertTrue(re.hasMatch("\u2027"));
assertTrue(re.hasMatch("\u0085"));
assertTrue(re.hasMatch("\v"));
assertTrue(re.hasMatch("\f"));
assertTrue(re.hasMatch("\u180E"));
assertFalse(re.hasMatch("\u{10300}")); // Supplementary plane.
assertTrue(re.hasMatch("\n"));
assertTrue(re.hasMatch("\r"));
assertTrue(re.hasMatch("\u2028"));
assertTrue(re.hasMatch("\u2029"));
// DotAll '.' behavior (unicode).
re = new RegExp(r"^.$", unicode: true, dotAll: true);
assertTrue(re.hasMatch("a"));
assertTrue(re.hasMatch("3"));
assertTrue(re.hasMatch("π"));
assertTrue(re.hasMatch("\u2027"));
assertTrue(re.hasMatch("\u0085"));
assertTrue(re.hasMatch("\v"));
assertTrue(re.hasMatch("\f"));
assertTrue(re.hasMatch("\u180E"));
assertTrue(re.hasMatch("\u{10300}")); // Supplementary plane.
assertTrue(re.hasMatch("\n"));
assertTrue(re.hasMatch("\r"));
assertTrue(re.hasMatch("\u2028"));
assertTrue(re.hasMatch("\u2029"));
}

View file

@ -433,6 +433,8 @@ void main() {
assertThrows(() => new RegExp(r"(?<=.)?")); //# 01: ok
assertThrows(() => new RegExp(r"(?<=.)+")); //# 01: ok
assertThrows(() => new RegExp(r"(?<=.)*", unicode: true)); //# 01: ok
assertThrows(() => new RegExp(r"(?<=.){1,2}", unicode: true)); //# 01: ok
// No unicode flag (yet), so can't test these.
// See https://github.com/dart-lang/sdk/issues/36170.
// assertThrows("/(?<=.)*/u", SyntaxError);
// assertThrows("/(?<=.){1,2}/u", SyntaxError);
}

View file

@ -31,90 +31,35 @@ import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
void testRE(RegExp re, String input, bool expectedResult) {
if (expectedResult) {
assertTrue(re.hasMatch(input));
} else {
assertFalse(re.hasMatch(input));
}
}
void execRE(RegExp re, String input, List<String> expectedResult) {
assertTrue(re.hasMatch(input));
shouldBe(re.firstMatch(input), expectedResult);
}
void execString(String pattern, String input, List<String> expectedResult,
{bool unicode = true, bool caseSensitive: false}) {
execRE(RegExp(pattern, unicode: unicode, caseSensitive: caseSensitive),
input, expectedResult);
}
void namedRE(RegExp re, String input, Map<String, String> expectedResults) {
assertTrue(re.hasMatch(input));
var match = re.firstMatch(input);
var match = re.firstMatch(input) as RegExpMatch;
for (var s in expectedResults.keys) {
assertEquals(match.namedGroup(s), expectedResults[s]);
}
}
void execStringGroups(
String pattern, String input, Map<String, String> expectedResults,
{bool unicode = true, bool caseSensitive: false}) {
namedRE(RegExp(pattern, unicode: unicode, caseSensitive: caseSensitive),
input, expectedResults);
}
void hasNames(RegExp re, String input, List<String> expectedResults) {
assertTrue(re.hasMatch(input));
var match = re.firstMatch(input);
var match = re.firstMatch(input) as RegExpMatch;
for (var s in match.groupNames) {
assertTrue(expectedResults.contains(s));
}
}
void matchesIndexEqual(String input, RegExp re1, RegExp re2) {
var m1 = re1.firstMatch(input);
var m2 = re2.firstMatch(input);
if (m2 == null) {
assertNull(m1);
} else {
assertTrue(m1 != null);
assertEquals(m1.groupCount, m2.groupCount);
for (int i = 0; i < m1.groupCount; i++) {
assertEquals(m1.group(i), m2.group(i));
}
}
}
// Malformed named captures.
// Empty name.
assertThrows(() => RegExp(r"(?<>a)", unicode: true));
// Unterminated name.
assertThrows(() => RegExp(r"(?<aa)", unicode: true));
// Name starting with digits.
assertThrows(() => RegExp(r"(?<42a>a)", unicode: true));
// Name starting with invalid char.
assertThrows(() => RegExp(r"(?<:a>a)", unicode: true));
// Name containing invalid char.
assertThrows(() => RegExp(r"(?<a:>a)", unicode: true));
// Duplicate name.
assertThrows(() => RegExp(r"(?<a>a)(?<a>a)", unicode: true));
// Duplicate name.
assertThrows(() => RegExp(r"(?<a>a)(?<b>b)(?<a>a)", unicode: true));
// Invalid reference.
assertThrows(() => RegExp(r"\k<a>", unicode: true));
// Unterminated reference.
assertThrows(() => RegExp(r"\k<a", unicode: true));
// Lone \k.
assertThrows(() => RegExp(r"\k", unicode: true));
// Lone \k.
assertThrows(() => RegExp(r"(?<a>.)\k", unicode: true));
// Unterminated reference.
assertThrows(() => RegExp(r"(?<a>.)\k<a", unicode: true));
// Invalid reference.
assertThrows(() => RegExp(r"(?<a>.)\k<b>", unicode: true));
// Invalid reference.
assertThrows(() => RegExp(r"(?<a>a)\k<ab>", unicode: true));
// Invalid reference.
assertThrows(() => RegExp(r"(?<ab>a)\k<a>", unicode: true));
// Invalid reference.
assertThrows(() => RegExp(r"\k<a>(?<ab>a)", unicode: true));
// Identity escape in capture.
assertThrows(() => RegExp(r"(?<a>\a)", unicode: true));
// Behavior in non-unicode mode.
assertThrows(() => RegExp(r"(?<>a)"));
assertThrows(() => RegExp(r"(?<aa)"));
@ -157,182 +102,6 @@ void main() {
assertThrows(() => RegExp(r"\k<a(?<a>.)"));
assertThrows(() => RegExp(r"\k(?<a>.)"));
// Basic named groups.
execString(r"(?<a>a)", "bab", ["a", "a"]);
execString(r"(?<a42>a)", "bab", ["a", "a"]);
execString(r"(?<_>a)", "bab", ["a", "a"]);
execString(r"(?<$>a)", "bab", ["a", "a"]);
execString(r".(?<$>a).", "bab", ["bab", "a"]);
execString(r".(?<a>a)(.)", "bab", ["bab", "a", "b"]);
execString(r".(?<a>a)(?<b>.)", "bab", ["bab", "a", "b"]);
execString(r".(?<a>\w\w)", "bab", ["bab", "ab"]);
execString(r"(?<a>\w\w\w)", "bab", ["bab", "bab"]);
execString(r"(?<a>\w\w)(?<b>\w)", "bab", ["bab", "ba", "b"]);
execString(r"(?<a>a)", "bab", ["a", "a"], unicode: false);
execString(r"(?<a42>a)", "bab", ["a", "a"], unicode: false);
execString(r"(?<_>a)", "bab", ["a", "a"], unicode: false);
execString(r"(?<$>a)", "bab", ["a", "a"], unicode: false);
execString(r".(?<$>a).", "bab", ["bab", "a"], unicode: false);
execString(r".(?<a>a)(.)", "bab", ["bab", "a", "b"], unicode: false);
execString(r".(?<a>a)(?<b>.)", "bab", ["bab", "a", "b"], unicode: false);
execString(r".(?<a>\w\w)", "bab", ["bab", "ab"], unicode: false);
execString(r"(?<a>\w\w\w)", "bab", ["bab", "bab"], unicode: false);
execString(r"(?<a>\w\w)(?<b>\w)", "bab", ["bab", "ba", "b"], unicode: false);
matchesIndexEqual(
"bab", RegExp(r"(?<a>a)", unicode: true), RegExp(r"(a)", unicode: true));
matchesIndexEqual("bab", RegExp(r"(?<a42>a)", unicode: true),
RegExp(r"(a)", unicode: true));
matchesIndexEqual(
"bab", RegExp(r"(?<_>a)", unicode: true), RegExp(r"(a)", unicode: true));
matchesIndexEqual(
"bab", RegExp(r"(?<$>a)", unicode: true), RegExp(r"(a)", unicode: true));
matchesIndexEqual("bab", RegExp(r".(?<$>a).", unicode: true),
RegExp(r".(a).", unicode: true));
matchesIndexEqual("bab", RegExp(r".(?<a>a)(.)", unicode: true),
RegExp(r".(a)(.)", unicode: true));
matchesIndexEqual("bab", RegExp(r".(?<a>a)(?<b>.)", unicode: true),
RegExp(r".(a)(.)", unicode: true));
matchesIndexEqual("bab", RegExp(r".(?<a>\w\w)", unicode: true),
RegExp(r".(\w\w)", unicode: true));
matchesIndexEqual("bab", RegExp(r"(?<a>\w\w\w)", unicode: true),
RegExp(r"(\w\w\w)", unicode: true));
matchesIndexEqual("bab", RegExp(r"(?<a>\w\w)(?<b>\w)", unicode: true),
RegExp(r"(\w\w)(\w)", unicode: true));
execString(r"(?<b>b).\1", "bab", ["bab", "b"]);
execString(r"(.)(?<a>a)\1\2", "baba", ["baba", "b", "a"]);
execString(r"(.)(?<a>a)(?<b>\1)(\2)", "baba", ["baba", "b", "a", "b", "a"]);
execString(r"(?<lt><)a", "<a", ["<a", "<"]);
execString(r"(?<gt>>)a", ">a", [">a", ">"]);
// Named references.
var pattern = r"(?<b>.).\k<b>";
execString(pattern, "bab", ["bab", "b"]);
assertFalse(RegExp(pattern, unicode: true).hasMatch("baa"));
// Nested groups.
pattern = r"(?<a>.(?<b>.(?<c>.)))";
execString(pattern, "bab", ["bab", "bab", "ab", "b"]);
execStringGroups(pattern, "bab", {"a": "bab", "b": "ab", "c": "b"});
// Reference inside group.
pattern = r"(?<a>\k<a>\w)..";
execString(pattern, "bab", ["bab", "b"]);
execStringGroups(pattern, "bab", {"a": "b"});
// Reference before group.
pattern = r"\k<a>(?<a>b)\w\k<a>";
execString(pattern, "bab", ["bab", "b"], unicode: false);
execString(pattern, "bab", ["bab", "b"]);
execStringGroups(pattern, "bab", {"a": "b"});
pattern = r"(?<b>b)\k<a>(?<a>a)\k<b>";
execString(pattern, "bab", ["bab", "b", "a"], unicode: false);
execString(pattern, "bab", ["bab", "b", "a"]);
execStringGroups(pattern, "bab", {"a": "a", "b": "b"});
// Reference named groups.
var match = RegExp(r"(?<a>a)(?<b>b)\k<a>", unicode: true).firstMatch("aba");
assertEquals("a", match.namedGroup("a"));
assertEquals("b", match.namedGroup("b"));
assertFalse(match.groupNames.contains("c"));
match =
RegExp(r"(?<a>a)(?<b>b)\k<a>|(?<c>c)", unicode: true).firstMatch("aba");
assertNull(match.namedGroup("c"));
// Unicode names.
execStringGroups(r"(?<π>a)", "bab", {"π": "a"});
execStringGroups(r"(?<\u{03C0}>a)", "bab", {"π": "a"});
execStringGroups(r"(?<π>a)", "bab", {"\u03C0": "a"});
execStringGroups(r"(?<\u{03C0}>a)", "bab", {"\u03C0": "a"});
execStringGroups(r"(?<$>a)", "bab", {"\$": "a"});
execStringGroups(r"(?<_>a)", "bab", {"_": "a"});
execStringGroups(r"(?<$𐒤>a)", "bab", {"\$𐒤": "a"});
execStringGroups(r"(?<_\u200C>a)", "bab", {"_\u200C": "a"});
execStringGroups(r"(?<_\u200D>a)", "bab", {"_\u200D": "a"});
execStringGroups(r"(?<ಠ_ಠ>a)", "bab", {"ಠ_ಠ": "a"});
// ID_Continue but not ID_Start.
assertThrows(() => RegExp(r"/(?<❤>a)", unicode: true));
assertThrows(() => RegExp(r"/(?<𐒤>a)", unicode: true));
execStringGroups(r"(?<π>a)", "bab", {"π": "a"}, unicode: false);
execStringGroups(r"(?<$>a)", "bab", {"\$": "a"}, unicode: false);
execStringGroups(r"(?<_>a)", "bab", {"_": "a"}, unicode: false);
assertThrows(() => RegExp(r"(?<$𐒤>a)"));
execStringGroups(r"(?<ಠ_ಠ>a)", "bab", {"ಠ_ಠ": "a"}, unicode: false);
// ID_Continue but not ID_Start.
assertThrows(() => RegExp(r"/(?<❤>a)"));
assertThrows(() => RegExp(r"/(?<𐒤>a)"));
// Interaction with lookbehind assertions.
pattern = r"(?<=(?<a>\w){3})f";
execString(pattern, "abcdef", ["f", "c"]);
execStringGroups(pattern, "abcdef", {"a": "c"});
execStringGroups(r"(?<=(?<a>\w){4})f", "abcdef", {"a": "b"});
execStringGroups(r"(?<=(?<a>\w)+)f", "abcdef", {"a": "a"});
assertFalse(RegExp(r"(?<=(?<a>\w){6})f", unicode: true).hasMatch("abcdef"));
execString(r"((?<=\w{3}))f", "abcdef", ["f", ""]);
execString(r"(?<a>(?<=\w{3}))f", "abcdef", ["f", ""]);
execString(r"(?<!(?<a>\d){3})f", "abcdef", ["f", null]);
assertFalse(RegExp(r"(?<!(?<a>\D){3})f", unicode: true).hasMatch("abcdef"));
execString(r"(?<!(?<a>\D){3})f|f", "abcdef", ["f", null]);
execString(r"(?<a>(?<!\D{3}))f|f", "abcdef", ["f", null]);
// Matches contain the names of named captures
match = RegExp(r"(?<fst>.)|(?<snd>.)", unicode: true).firstMatch("abcd");
Expect.setEquals(["fst", "snd"], match.groupNames);
// Backslash as ID_Start and ID_Continue (v8:5868).
assertThrows(() => RegExp("(?<\\>.)")); // '\' misclassified as ID_Start.
assertThrows(() => RegExp("(?<a\\>.)")); // '\' misclassified as ID_Continue.
// Backreference before the group (exercises the capture mini-parser).
assertThrows(() => RegExp(r"/\1(?:.)", unicode: true));
assertThrows(() => RegExp(r"/\1(?<=a).", unicode: true));
assertThrows(() => RegExp(r"/\1(?<!a).", unicode: true));
execString(r"\1(?<a>.)", "abcd", ["a", "a"]);
// Unicode escapes in capture names. (Testing both unicode interpreted by
// Dart string handling and also escaped unicode making it to RegExp parser.)
// \u Lead \u Trail
assertTrue(RegExp("(?<a\uD801\uDCA4>.)", unicode: true).hasMatch("a"));
assertTrue(RegExp(r"(?<a\uD801\uDCA4>.)", unicode: true).hasMatch("a"));
assertThrows(() => RegExp("(?<a\uD801>.)", unicode: true)); // \u Lead
assertThrows(() => RegExp(r"(?<a\uD801>.)", unicode: true)); // \u Lead
assertThrows(() => RegExp("(?<a\uDCA4>.)", unicode: true)); // \u Trail
assertThrows(() => RegExp(r"(?<a\uDCA4>.)", unicode: true)); // \u Trail
// \u NonSurrogate
assertTrue(RegExp("(?<\u0041>.)", unicode: true).hasMatch("a"));
assertTrue(RegExp(r"(?<\u0041>.)", unicode: true).hasMatch("a"));
// \u{ Surrogate, ID_Continue }
assertTrue(RegExp("(?<a\u{104A4}>.)", unicode: true).hasMatch("a"));
assertTrue(RegExp(r"(?<a\u{104A4}>.)", unicode: true).hasMatch("a"));
// \u{ Out-of-bounds } -- only need to test RegExp parser for this.
assertThrows(() => RegExp(r"(?<a\\u{110000}>.)", unicode: true));
// Also checking non-unicode patterns, where surrogate pairs will not
// be combined (so only \u0041 will have any success).
assertThrows(() => RegExp("(?<a\uD801\uDCA4>.)"));
assertThrows(() => RegExp(r"(?<a\uD801\uDCA4>.)"));
assertThrows(() => RegExp("(?<a\uD801>.)"));
assertThrows(() => RegExp(r"(?<a\uD801>.)"));
assertThrows(() => RegExp("(?<a\uDCA4>.)"));
assertThrows(() => RegExp(r"(?<a\uDCA4>.)"));
assertTrue(RegExp("(?<\u0041>.)").hasMatch("a"));
assertTrue(RegExp(r"(?<\u0041>.)").hasMatch("a"));
assertThrows(() => RegExp("(?<a\u{104A4}>.)"));
assertThrows(() => RegExp(r"(?<a\u{104A4}>.)"));
assertThrows(() => RegExp("(?<a\u{10FFFF}>.)"));
assertThrows(() => RegExp(r"(?<a\u{10FFFF}>.)"));
assertThrows(() => RegExp(r"(?<a\\u{110000}>.)"));
// TODO(sstrickl): Add more tests when unicode flag support is in.
// https://github.com/dart-lang/sdk/issues/36170
}

View file

@ -1,160 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2011 the V8 project authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
// ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void execl(List<String> expectation, RegExp re, String subject) {
shouldBe(re.firstMatch(subject), expectation);
}
void execs(List<String> expectation, String pattern, String subject) {
final re = RegExp(pattern, unicode: true);
shouldBe(re.firstMatch(subject), expectation);
}
void main() {
// Character ranges.
execs(["A"], r"[A-D]", "A");
execs(["ABCD"], r"[A-D]+", "ZABCDEF");
execs(["\u{12345}"], r"[\u1234-\u{12345}]", "\u{12345}");
execs(null, r"[^\u1234-\u{12345}]", "\u{12345}");
execs(["\u{1234}"], r"[\u1234-\u{12345}]", "\u{1234}");
execs(null, r"[^\u1234-\u{12345}]", "\u{1234}");
execs(null, r"[\u1234-\u{12345}]", "\u{1233}");
execs(["\u{1233}"], r"[^\u1234-\u{12345}]", "\u{1233}");
execs(["\u{12346}"], r"[^\u1234-\u{12345}]", "\u{12346}");
execs(null, r"[\u1234-\u{12345}]", "\u{12346}");
execs(["\u{12342}"], r"[\u{12340}-\u{12345}]", "\u{12342}");
execs(["\u{12342}"], r"[\ud808\udf40-\ud808\udf45]", "\u{12342}");
execs(null, r"[^\u{12340}-\u{12345}]", "\u{12342}");
execs(null, r"[^\ud808\udf40-\ud808\udf45]", "\u{12342}");
execs(["\u{ffff}"], r"[\u{ff80}-\u{12345}]", "\u{ffff}");
execs(["\u{ffff}"], r"[\u{ff80}-\ud808\udf45]", "\u{ffff}");
execs(null, r"[^\u{ff80}-\u{12345}]", "\u{ffff}");
execs(null, r"[^\u{ff80}-\ud808\udf45]", "\u{ffff}");
// Lone surrogate
execs(["\udc00"], r"[^\u{ff80}-\u{12345}]", "\uff99\u{dc00}A");
execs(["\udc01"], r"[\u0100-\u{10ffff}]", "A\udc01");
execs(["\udc03"], r"[\udc01-\udc03]", "\ud801\udc02\udc03");
execs(["\ud801"], r"[\ud801-\ud803]", "\ud802\udc01\ud801");
// Paired surrogate.
execs(null, r"[^\u{ff80}-\u{12345}]", "\u{d800}\u{dc00}");
execs(["\ud800\udc00"], r"[\u{ff80}-\u{12345}]", "\u{d800}\u{dc00}");
execs(["foo\u{10e6d}bar"], r"foo\ud803\ude6dbar", "foo\u{10e6d}bar");
// Lone surrogates
execs(["\ud801\ud801"], r"\ud801+", "\ud801\udc01\ud801\ud801");
execs(["\udc01\udc01"], r"\udc01+", "\ud801\ud801\udc01\udc01\udc01");
execs(["\udc02\udc03A"], r"\W\WA", "\ud801\udc01A\udc02\udc03A");
execs(["\ud801\ud802"], r"\ud801.", "\ud801\udc01\ud801\ud802");
execs(["\udc02\udc03A"], r"[\ud800-\udfff][\ud800-\udfff]A",
"\ud801\udc01A\udc02\udc03A");
// Character classes
execs(null, r"\w", "\ud801\udc01");
execl(["\ud801"], RegExp(r"[^\w]"), "\ud801\udc01");
execs(["\ud801\udc01"], r"[^\w]", "\ud801\udc01");
execl(["\ud801"], RegExp(r"\W"), "\ud801\udc01");
execs(["\ud801\udc01"], r"\W", "\ud801\udc01");
execs(["\ud800X"], r".X", "\ud800XaX");
execs(["aX"], r".(?<!\ud800)X", "\ud800XaX");
execs(["aX"], r".(?<![\ud800-\ud900])X", "\ud800XaX");
execs(null, r"[]", "\u1234");
execs(["0abc"], r"[^]abc", "0abc");
execs(["\u1234abc"], r"[^]abc", "\u1234abc");
execs(["\u{12345}abc"], r"[^]abc", "\u{12345}abc");
execs(null, r"[\u{0}-\u{1F444}]", "\ud83d\udfff");
// Backward matches of lone surrogates.
execs(["B", "\ud803A"], r"(?<=([\ud800-\ud900]A))B",
"\ud801\udc00AB\udc00AB\ud802\ud803AB");
execs(["B", "\udc00A"], r"(?<=([\ud800-\u{10300}]A))B",
"\ud801\udc00AB\udc00AB\ud802\ud803AB");
execs(["B", "\udc11A"], r"(?<=([\udc00-\udd00]A))B",
"\ud801\udc00AB\udc11AB\ud802\ud803AB");
execs(["X", "\ud800C"], r"(?<=(\ud800\w))X",
"\ud800\udc00AX\udc11BX\ud800\ud800CX");
execs(["C", "\ud800\ud800"], r"(?<=(\ud800.))\w",
"\ud800\udc00AX\udc11BX\ud800\ud800CX");
execs(["X", "\udc01C"], r"(?<=(\udc01\w))X",
"\ud800\udc01AX\udc11BX\udc01\udc01CX");
execs(["C", "\udc01\udc01"], r"(?<=(\udc01.)).",
"\ud800\udc01AX\udc11BX\udc01\udc01CX");
const L = "\ud800";
const T = "\udc00";
const X = "X";
// Test string contains only match.
void testw(bool expect, String src, String subject) {
var re = RegExp(r"^" + src + r"$", unicode: true);
assertEquals(expect, re.hasMatch(subject));
}
// Test string starts with match.
void tests(bool expect, String src, String subject) {
var re = RegExp(r"^" + src, unicode: true);
assertEquals(expect, re.hasMatch(subject));
}
testw(true, X, X);
testw(true, L, L);
testw(true, T, T);
testw(true, L + T, L + T);
testw(true, T + L, T + L);
testw(false, T, L + T);
testw(false, L, L + T);
testw(true, r".(?<=" + L + r")", L);
testw(true, r".(?<=" + T + r")", T);
testw(true, r".(?<=" + L + T + r")", L + T);
testw(true, r".(?<=" + L + T + r")", L + T);
tests(true, r".(?<=" + T + r")", T + L);
tests(false, r".(?<=" + L + r")", L + T);
tests(false, r".(?<=" + T + r")", L + T);
tests(true, r"..(?<=" + T + r")", T + T + L);
tests(true, r"..(?<=" + T + r")", X + T + L);
tests(true, r"...(?<=" + L + r")", X + T + L);
tests(false, r"...(?<=" + T + r")", X + L + T);
tests(true, r"..(?<=" + L + T + r")", X + L + T);
tests(true, r"..(?<=" + L + T + r"(?<=" + L + T + r"))", X + L + T);
tests(false, r"..(?<=" + L + r"(" + T + r"))", X + L + T);
tests(false, r".*" + L, X + L + T);
tests(true, r".*" + L, X + L + L + T);
tests(false, r".*" + L, X + L + T + L + T);
tests(false, r".*" + T, X + L + T + L + T);
tests(true, r".*" + T, X + L + T + T + L + T);
}

View file

@ -1,307 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2014 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void testRegExpHelper(RegExp r) {
assertTrue(r.hasMatch("foo"));
assertTrue(r.hasMatch("boo"));
assertFalse(r.hasMatch("moo"));
}
void TestUnicodeEscapes() {
testRegExpHelper(RegExp(r"(\u0066|\u0062)oo"));
testRegExpHelper(RegExp(r"(\u0066|\u0062)oo", unicode: true));
testRegExpHelper(RegExp(r"(\u{0066}|\u{0062})oo", unicode: true));
testRegExpHelper(RegExp(r"(\u{66}|\u{000062})oo", unicode: true));
// Note that we need \\ inside a string, otherwise it's interpreted as a
// unicode escape inside a string.
testRegExpHelper(RegExp("(\\u0066|\\u0062)oo"));
testRegExpHelper(RegExp("(\\u0066|\\u0062)oo", unicode: true));
testRegExpHelper(RegExp("(\\u{0066}|\\u{0062})oo", unicode: true));
testRegExpHelper(RegExp("(\\u{66}|\\u{000062})oo", unicode: true));
// Though, unicode escapes via strings should work too.
testRegExpHelper(RegExp("(\u0066|\u0062)oo"));
testRegExpHelper(RegExp("(\u0066|\u0062)oo", unicode: true));
testRegExpHelper(RegExp("(\u{0066}|\u{0062})oo", unicode: true));
testRegExpHelper(RegExp("(\u{66}|\u{000062})oo", unicode: true));
}
void TestUnicodeEscapesInCharacterClasses() {
testRegExpHelper(RegExp(r"[\u0062-\u0066]oo"));
testRegExpHelper(RegExp(r"[\u0062-\u0066]oo", unicode: true));
testRegExpHelper(RegExp(r"[\u{0062}-\u{0066}]oo", unicode: true));
testRegExpHelper(RegExp(r"[\u{62}-\u{000066}]oo", unicode: true));
// Note that we need \\ inside a string, otherwise it's interpreted as a
// unicode escape inside a string.
testRegExpHelper(RegExp("[\\u0062-\\u0066]oo"));
testRegExpHelper(RegExp("[\\u0062-\\u0066]oo", unicode: true));
testRegExpHelper(RegExp("[\\u{0062}-\\u{0066}]oo", unicode: true));
testRegExpHelper(RegExp("[\\u{62}-\\u{000066}]oo", unicode: true));
// Though, unicode escapes via strings should work too.
testRegExpHelper(RegExp("[\u0062-\u0066]oo"));
testRegExpHelper(RegExp("[\u0062-\u0066]oo", unicode: true));
testRegExpHelper(RegExp("[\u{0062}-\u{0066}]oo", unicode: true));
testRegExpHelper(RegExp("[\u{62}-\u{000066}]oo", unicode: true));
}
void TestBraceEscapesWithoutUnicodeFlag() {
// \u followed by illegal escape will be parsed as u. {x} will be the
// character count.
void helper1(RegExp r) {
assertFalse(r.hasMatch("fbar"));
assertFalse(r.hasMatch("fubar"));
assertTrue(r.hasMatch("fuubar"));
assertFalse(r.hasMatch("fuuubar"));
}
helper1(RegExp(r"f\u{2}bar"));
helper1(RegExp("f\\u{2}bar"));
void helper2(RegExp r) {
assertFalse(r.hasMatch("fbar"));
assertTrue(r.hasMatch("fubar"));
assertTrue(r.hasMatch("fuubar"));
assertFalse(r.hasMatch("fuuubar"));
}
helper2(RegExp(r"f\u{1,2}bar"));
helper2(RegExp("f\\u{1,2}bar"));
void helper3(RegExp r) {
assertTrue(r.hasMatch("u"));
assertTrue(r.hasMatch("{"));
assertTrue(r.hasMatch("2"));
assertTrue(r.hasMatch("}"));
assertFalse(r.hasMatch("q"));
assertFalse(r.hasMatch("("));
assertFalse(r.hasMatch(")"));
}
helper3(RegExp(r"[\u{2}]"));
helper3(RegExp("[\\u{2}]"));
}
void TestInvalidEscapes() {
// Without the u flag, invalid unicode escapes and other invalid escapes are
// treated as identity escapes.
void helper1(RegExp r) {
assertTrue(r.hasMatch("firstuxz89second"));
}
helper1(RegExp(r"first\u\x\z\8\9second"));
helper1(RegExp("first\\u\\x\\z\\8\\9second"));
void helper2(RegExp r) {
assertTrue(r.hasMatch("u"));
assertTrue(r.hasMatch("x"));
assertTrue(r.hasMatch("z"));
assertTrue(r.hasMatch("8"));
assertTrue(r.hasMatch("9"));
assertFalse(r.hasMatch("q"));
assertFalse(r.hasMatch("7"));
}
helper2(RegExp(r"[\u\x\z\8\9]"));
helper2(RegExp("[\\u\\x\\z\\8\\9]"));
// However, with the u flag, these are treated as invalid escapes.
assertThrows(() => RegExp(r"\u", unicode: true));
assertThrows(() => RegExp(r"\u12", unicode: true));
assertThrows(() => RegExp(r"\ufoo", unicode: true));
assertThrows(() => RegExp(r"\x", unicode: true));
assertThrows(() => RegExp(r"\xfoo", unicode: true));
assertThrows(() => RegExp(r"\z", unicode: true));
assertThrows(() => RegExp(r"\8", unicode: true));
assertThrows(() => RegExp(r"\9", unicode: true));
assertThrows(() => RegExp("\\u", unicode: true));
assertThrows(() => RegExp("\\u12", unicode: true));
assertThrows(() => RegExp("\\ufoo", unicode: true));
assertThrows(() => RegExp("\\x", unicode: true));
assertThrows(() => RegExp("\\xfoo", unicode: true));
assertThrows(() => RegExp("\\z", unicode: true));
assertThrows(() => RegExp("\\8", unicode: true));
assertThrows(() => RegExp("\\9", unicode: true));
}
void TestTooBigHexEscape() {
// The hex number inside \u{} has a maximum value.
RegExp(r"\u{10ffff}", unicode: true);
RegExp("\\u{10ffff}", unicode: true);
assertThrows(() => RegExp(r"\u{110000}", unicode: true));
assertThrows(() => RegExp("\\u{110000}", unicode: true));
// Without the u flag, they're of course fine ({x} is the count).
RegExp(r"\u{110000}");
RegExp("\\u{110000}");
}
void TestSyntaxEscapes() {
// Syntax escapes work the same with or without the u flag.
void helper(RegExp r) {
assertTrue(r.hasMatch("foo[bar"));
assertFalse(r.hasMatch("foo]bar"));
}
helper(RegExp(r"foo\[bar"));
helper(RegExp("foo\\[bar"));
helper(RegExp(r"foo\[bar", unicode: true));
helper(RegExp("foo\\[bar", unicode: true));
}
void TestUnicodeSurrogates() {
// U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
void helper(RegExp r) {
assertTrue(r.hasMatch("foo\u{10e6d}bar"));
}
helper(RegExp(r"foo\ud803\ude6dbar", unicode: true));
helper(RegExp("foo\\ud803\\ude6dbar", unicode: true));
}
void main() {
TestUnicodeEscapes();
TestUnicodeEscapesInCharacterClasses();
TestBraceEscapesWithoutUnicodeFlag();
TestInvalidEscapes();
TestTooBigHexEscape();
TestSyntaxEscapes();
TestUnicodeSurrogates();
// Non-BMP patterns.
// Single character atom.
assertTrue(RegExp("\u{12345}", unicode: true).hasMatch("\u{12345}"));
assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\u{12345}"));
assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\ud808\udf45"));
assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\ud808\udf45"));
assertFalse(RegExp(r"\u{12345}", unicode: true).hasMatch("\udf45"));
assertFalse(RegExp(r"\u{12345}", unicode: true).hasMatch("\udf45"));
// Multi-character atom.
assertTrue(RegExp(r"\u{12345}\u{23456}", unicode: true)
.hasMatch("a\u{12345}\u{23456}b"));
assertTrue(RegExp(r"\u{12345}\u{23456}", unicode: true)
.hasMatch("b\u{12345}\u{23456}c"));
assertFalse(RegExp(r"\u{12345}\u{23456}", unicode: true)
.hasMatch("a\udf45\u{23456}b"));
assertFalse(RegExp(r"\u{12345}\u{23456}", unicode: true)
.hasMatch("b\udf45\u{23456}c"));
// Disjunction.
assertTrue(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
.hasMatch("a\u{12345}\u{23456}b"));
assertTrue(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
.hasMatch("b\u{12345}\u{23456}c"));
assertFalse(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
.hasMatch("a\udf45\u{23456}b"));
assertFalse(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
.hasMatch("b\udf45\u{23456}c"));
// Alternative.
assertTrue(
RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("a\u{12345}b"));
assertTrue(
RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("b\u{23456}c"));
assertFalse(
RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("a\udf45\ud84db"));
assertFalse(
RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("b\udf45\ud808c"));
// Capture.
assertTrue(RegExp("(\u{12345}|\u{23456}).\\1", unicode: true)
.hasMatch("\u{12345}b\u{12345}"));
assertTrue(RegExp(r"(\u{12345}|\u{23456}).\1", unicode: true)
.hasMatch("\u{12345}b\u{12345}"));
assertFalse(RegExp("(\u{12345}|\u{23456}).\\1", unicode: true)
.hasMatch("\u{12345}b\u{23456}"));
assertFalse(RegExp(r"(\u{12345}|\u{23456}).\1", unicode: true)
.hasMatch("\u{12345}b\u{23456}"));
// Quantifier.
assertTrue(RegExp("\u{12345}{3}", unicode: true)
.hasMatch("\u{12345}\u{12345}\u{12345}"));
assertTrue(RegExp(r"\u{12345}{3}", unicode: true)
.hasMatch("\u{12345}\u{12345}\u{12345}"));
assertTrue(RegExp("\u{12345}{3}").hasMatch("\u{12345}\udf45\udf45"));
assertFalse(RegExp(r"\ud808\udf45{3}", unicode: true)
.hasMatch("\u{12345}\udf45\udf45"));
assertTrue(RegExp(r"\ud808\udf45{3}", unicode: true)
.hasMatch("\u{12345}\u{12345}\u{12345}"));
assertFalse(
RegExp("\u{12345}{3}", unicode: true).hasMatch("\u{12345}\udf45\udf45"));
assertFalse(
RegExp(r"\u{12345}{3}", unicode: true).hasMatch("\u{12345}\udf45\udf45"));
// Literal surrogates.
shouldBe(
RegExp("\ud800\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"),
["\u{10000}\u{10000}"]);
shouldBe(
RegExp("\\ud800\\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"),
["\u{10000}\u{10000}"]);
shouldBe(
RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", unicode: true)
.firstMatch("\u{10003}\u{50001}"),
["\u{10003}\u{50001}"]);
shouldBe(
RegExp("[\ud800\udc03-\u{50001}\]+", unicode: true)
.firstMatch("\u{10003}\u{50001}"),
["\u{10003}\u{50001}"]);
// Unicode escape sequences to represent a non-BMP character cannot have
// mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence.
assertThrows(() => RegExp("[\\ud800\udc03-\ud900\\udc01\]+", unicode: true));
assertNull(
RegExp("\\ud800\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"));
assertNull(
RegExp("\ud800\\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"));
assertNull(RegExp("[\\ud800\udc00]", unicode: true).firstMatch("\u{10000}"));
assertNull(
RegExp("[\\{ud800}\udc00]", unicode: true).firstMatch("\u{10000}"));
assertNull(RegExp("[\ud800\\udc00]", unicode: true).firstMatch("\u{10000}"));
assertNull(
RegExp("[\ud800\\{udc00}]", unicode: true).firstMatch("\u{10000}"));
assertNull(RegExp(r"\u{d800}\u{dc00}+", unicode: true)
.firstMatch("\ud800\udc00\udc00"));
assertNull(RegExp(r"\ud800\u{dc00}+", unicode: true)
.firstMatch("\ud800\udc00\udc00"));
assertNull(RegExp(r"\u{d800}\udc00+", unicode: true)
.firstMatch("\ud800\udc00\udc00"));
}

View file

@ -1,201 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
void t(RegExp re, String s) {
assertTrue(re.hasMatch(s));
}
void f(RegExp re, String s) {
assertFalse(re.hasMatch(s));
}
assertThrows(() => RegExp("\\p{Hiragana}", unicode: true));
assertThrows(() => RegExp("\\p{Bidi_Class}", unicode: true));
assertThrows(() => RegExp("\\p{Bidi_C=False}", unicode: true));
assertThrows(() => RegExp("\\P{Bidi_Control=Y}", unicode: true));
assertThrows(() => RegExp("\\p{AHex=Yes}", unicode: true));
assertThrows(() => RegExp("\\p{Composition_Exclusion}", unicode: true));
assertThrows(() => RegExp("\\p{CE}", unicode: true));
assertThrows(() => RegExp("\\p{Full_Composition_Exclusion}", unicode: true));
assertThrows(() => RegExp("\\p{Comp_Ex}", unicode: true));
assertThrows(() => RegExp("\\p{Grapheme_Link}", unicode: true));
assertThrows(() => RegExp("\\p{Gr_Link}", unicode: true));
assertThrows(() => RegExp("\\p{Hyphen}", unicode: true));
assertThrows(() => RegExp("\\p{NFD_Inert}", unicode: true));
assertThrows(() => RegExp("\\p{NFDK_Inert}", unicode: true));
assertThrows(() => RegExp("\\p{NFC_Inert}", unicode: true));
assertThrows(() => RegExp("\\p{NFKC_Inert}", unicode: true));
assertThrows(() => RegExp("\\p{Segment_Starter}", unicode: true));
t(RegExp(r"\p{Alphabetic}", unicode: true), "æ");
f(RegExp(r"\p{Alpha}", unicode: true), "1");
t(RegExp(r"\p{ASCII_Hex_Digit}", unicode: true), "f");
f(RegExp(r"\p{AHex}", unicode: true), "g");
t(RegExp(r"\p{Bidi_Control}", unicode: true), "\u200e");
f(RegExp(r"\p{Bidi_C}", unicode: true), "g");
t(RegExp(r"\p{Bidi_Mirrored}", unicode: true), "(");
f(RegExp(r"\p{Bidi_M}", unicode: true), "-");
t(RegExp(r"\p{Case_Ignorable}", unicode: true), "\u02b0");
f(RegExp(r"\p{CI}", unicode: true), "a");
t(RegExp(r"\p{Changes_When_Casefolded}", unicode: true), "B");
f(RegExp(r"\p{CWCF}", unicode: true), "1");
t(RegExp(r"\p{Changes_When_Casemapped}", unicode: true), "b");
f(RegExp(r"\p{CWCM}", unicode: true), "1");
t(RegExp(r"\p{Changes_When_Lowercased}", unicode: true), "B");
f(RegExp(r"\p{CWL}", unicode: true), "1");
t(RegExp(r"\p{Changes_When_Titlecased}", unicode: true), "b");
f(RegExp(r"\p{CWT}", unicode: true), "1");
t(RegExp(r"\p{Changes_When_Uppercased}", unicode: true), "b");
f(RegExp(r"\p{CWU}", unicode: true), "1");
t(RegExp(r"\p{Dash}", unicode: true), "-");
f(RegExp(r"\p{Dash}", unicode: true), "1");
t(RegExp(r"\p{Default_Ignorable_Code_Point}", unicode: true), "\u00ad");
f(RegExp(r"\p{DI}", unicode: true), "1");
t(RegExp(r"\p{Deprecated}", unicode: true), "\u17a3");
f(RegExp(r"\p{Dep}", unicode: true), "1");
t(RegExp(r"\p{Diacritic}", unicode: true), "\u0301");
f(RegExp(r"\p{Dia}", unicode: true), "1");
t(RegExp(r"\p{Emoji}", unicode: true), "\u2603");
f(RegExp(r"\p{Emoji}", unicode: true), "x");
t(RegExp(r"\p{Emoji_Component}", unicode: true), "\u{1F1E6}");
f(RegExp(r"\p{Emoji_Component}", unicode: true), "x");
t(RegExp(r"\p{Emoji_Modifier_Base}", unicode: true), "\u{1F6CC}");
f(RegExp(r"\p{Emoji_Modifier_Base}", unicode: true), "x");
t(RegExp(r"\p{Emoji_Modifier}", unicode: true), "\u{1F3FE}");
f(RegExp(r"\p{Emoji_Modifier}", unicode: true), "x");
t(RegExp(r"\p{Emoji_Presentation}", unicode: true), "\u{1F308}");
f(RegExp(r"\p{Emoji_Presentation}", unicode: true), "x");
t(RegExp(r"\p{Extender}", unicode: true), "\u3005");
f(RegExp(r"\p{Ext}", unicode: true), "x");
t(RegExp(r"\p{Grapheme_Base}", unicode: true), " ");
f(RegExp(r"\p{Gr_Base}", unicode: true), "\u0010");
t(RegExp(r"\p{Grapheme_Extend}", unicode: true), "\u0300");
f(RegExp(r"\p{Gr_Ext}", unicode: true), "x");
t(RegExp(r"\p{Hex_Digit}", unicode: true), "a");
f(RegExp(r"\p{Hex}", unicode: true), "g");
t(RegExp(r"\p{ID_Continue}", unicode: true), "1");
f(RegExp(r"\p{IDC}", unicode: true), ".");
t(RegExp(r"\p{ID_Start}", unicode: true), "a");
f(RegExp(r"\p{IDS}", unicode: true), "1");
t(RegExp(r"\p{Ideographic}", unicode: true), "");
f(RegExp(r"\p{Ideo}", unicode: true), "H");
t(RegExp(r"\p{IDS_Binary_Operator}", unicode: true), "\u2FF0");
f(RegExp(r"\p{IDSB}", unicode: true), "a");
t(RegExp(r"\p{IDS_Trinary_Operator}", unicode: true), "\u2FF2");
f(RegExp(r"\p{IDST}", unicode: true), "a");
t(RegExp(r"\p{Join_Control}", unicode: true), "\u200c");
f(RegExp(r"\p{Join_C}", unicode: true), "a");
t(RegExp(r"\p{Logical_Order_Exception}", unicode: true), "\u0e40");
f(RegExp(r"\p{LOE}", unicode: true), "a");
t(RegExp(r"\p{Lowercase}", unicode: true), "a");
f(RegExp(r"\p{Lower}", unicode: true), "A");
t(RegExp(r"\p{Math}", unicode: true), "=");
f(RegExp(r"\p{Math}", unicode: true), "A");
t(RegExp(r"\p{Noncharacter_Code_Point}", unicode: true), "\uFDD0");
f(RegExp(r"\p{NChar}", unicode: true), "A");
t(RegExp(r"\p{Pattern_Syntax}", unicode: true), "\u0021");
f(RegExp(r"\p{NChar}", unicode: true), "A");
t(RegExp(r"\p{Pattern_White_Space}", unicode: true), "\u0009");
f(RegExp(r"\p{Pat_Syn}", unicode: true), "A");
t(RegExp(r"\p{Quotation_Mark}", unicode: true), "'");
f(RegExp(r"\p{QMark}", unicode: true), "A");
t(RegExp(r"\p{Radical}", unicode: true), "\u2FAD");
f(RegExp(r"\p{Radical}", unicode: true), "A");
t(RegExp(r"\p{Regional_Indicator}", unicode: true), "\u{1F1E6}");
f(RegExp(r"\p{Regional_Indicator}", unicode: true), "A");
t(RegExp(r"\p{Sentence_Terminal}", unicode: true), "!");
f(RegExp(r"\p{STerm}", unicode: true), "A");
t(RegExp(r"\p{Soft_Dotted}", unicode: true), "i");
f(RegExp(r"\p{SD}", unicode: true), "A");
t(RegExp(r"\p{Terminal_Punctuation}", unicode: true), ".");
f(RegExp(r"\p{Term}", unicode: true), "A");
t(RegExp(r"\p{Unified_Ideograph}", unicode: true), "\u4e00");
f(RegExp(r"\p{UIdeo}", unicode: true), "A");
t(RegExp(r"\p{Uppercase}", unicode: true), "A");
f(RegExp(r"\p{Upper}", unicode: true), "a");
t(RegExp(r"\p{Variation_Selector}", unicode: true), "\uFE00");
f(RegExp(r"\p{VS}", unicode: true), "A");
t(RegExp(r"\p{White_Space}", unicode: true), " ");
f(RegExp(r"\p{WSpace}", unicode: true), "A");
t(RegExp(r"\p{XID_Continue}", unicode: true), "1");
f(RegExp(r"\p{XIDC}", unicode: true), " ");
t(RegExp(r"\p{XID_Start}", unicode: true), "A");
f(RegExp(r"\p{XIDS}", unicode: true), " ");
}

View file

@ -1,56 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
assertThrows(() => RegExp("[\\p]", unicode: true));
assertThrows(() => RegExp("[\\p{garbage}]", unicode: true));
assertThrows(() => RegExp("[\\p{}]", unicode: true));
assertThrows(() => RegExp("[\\p{]", unicode: true));
assertThrows(() => RegExp("[\\p}]", unicode: true));
assertThrows(() => RegExp("^[\\p{Lu}-\\p{Ll}]+\$", unicode: true));
assertTrue(RegExp(r"^[\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("ABCabc"));
assertTrue(RegExp(r"^[\p{Lu}-]+$", unicode: true).hasMatch("ABC-"));
assertFalse(RegExp(r"^[\P{Lu}\p{Ll}]+$", unicode: true).hasMatch("ABCabc"));
assertTrue(RegExp(r"^[\P{Lu}\p{Ll}]+$", unicode: true).hasMatch("abc"));
assertTrue(RegExp(r"^[\P{Lu}]+$", unicode: true).hasMatch("abc123"));
assertFalse(RegExp(r"^[\P{Lu}]+$", unicode: true).hasMatch("XYZ"));
assertTrue(RegExp(r"[\p{Math}]", unicode: true).hasMatch("+"));
assertTrue(RegExp(r"[\P{Bidi_M}]", unicode: true).hasMatch(" "));
assertTrue(RegExp(r"[\p{Hex}]", unicode: true).hasMatch("A"));
assertTrue(RegExp(r"^[^\P{Lu}]+$", unicode: true).hasMatch("XYZ"));
assertFalse(RegExp(r"^[^\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("abc"));
assertFalse(RegExp(r"^[^\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("ABC"));
assertTrue(RegExp(r"^[^\p{Lu}\p{Ll}]+$", unicode: true).hasMatch("123"));
assertTrue(RegExp(r"^[^\p{Lu}\P{Ll}]+$", unicode: true).hasMatch("abc"));
}

View file

@ -1,55 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
assertThrows(() => RegExp("\\p{Bidi_Class=L}+", unicode: true));
assertThrows(() => RegExp("\\p{bc=Left_To_Right}+", unicode: true));
assertThrows(() => RegExp("\\p{bc=AL}+", unicode: true));
assertThrows(() => RegExp("\\p{bc=Arabic_Letter}+", unicode: true));
assertThrows(() => RegExp("\\p{Line_Break=Glue}", unicode: true));
assertThrows(() => RegExp("\\p{lb=AL}", unicode: true));
assertThrows(() => RegExp("\\p{Block=}", unicode: true));
assertThrows(() => RegExp("\\p{=}", unicode: true));
assertThrows(() => RegExp("\\p{=L}", unicode: true));
assertThrows(() => RegExp("\\p{=Hiragana}", unicode: true));
assertThrows(() => RegExp("\\p{Block=CJK=}", unicode: true));
assertThrows(() => RegExp("\\p{Age=V8_0}", unicode: true));
assertDoesNotThrow(
() => RegExp("\\p{General_Category=Letter}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{gc=L}", unicode: true));
assertThrows(
() => RegExp("\\p{General_Category_Mask=Letter}", unicode: true));
assertThrows(() => RegExp("\\p{gcm=L}", unicode: true));
}

View file

@ -1,71 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
assertThrows(() => RegExp("\\p{In CJK}", unicode: true));
assertThrows(() => RegExp("\\p{InCJKUnifiedIdeographs}", unicode: true));
assertThrows(() => RegExp("\\p{InCJK}", unicode: true));
assertThrows(() => RegExp("\\p{InCJK_Unified_Ideographs}", unicode: true));
assertThrows(() => RegExp("\\p{InCyrillic_Sup}", unicode: true));
assertThrows(() => RegExp("\\p{InCyrillic_Supplement}", unicode: true));
assertThrows(() => RegExp("\\p{InCyrillic_Supplementary}", unicode: true));
assertThrows(() => RegExp("\\p{InCyrillicSupplementary}", unicode: true));
assertThrows(() => RegExp("\\p{InCyrillic_supplementary}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{C}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{Other}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{Cc}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{Control}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{cntrl}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{M}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{Mark}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{Combining_Mark}", unicode: true));
assertThrows(() => RegExp("\\p{Combining Mark}", unicode: true));
assertDoesNotThrow(() => RegExp("\\p{Script=Copt}", unicode: true));
assertThrows(() => RegExp("\\p{Coptic}", unicode: true));
assertThrows(() => RegExp("\\p{Qaac}", unicode: true));
assertThrows(() => RegExp("\\p{Egyp}", unicode: true));
assertDoesNotThrow(
() => RegExp("\\p{Script=Egyptian_Hieroglyphs}", unicode: true));
assertThrows(() => RegExp("\\p{EgyptianHieroglyphs}", unicode: true));
assertThrows(() => RegExp("\\p{BidiClass=LeftToRight}", unicode: true));
assertThrows(() => RegExp("\\p{BidiC=LeftToRight}", unicode: true));
assertThrows(() => RegExp("\\p{bidi_c=Left_To_Right}", unicode: true));
assertThrows(() => RegExp("\\p{Block=CJK}", unicode: true));
assertThrows(() => RegExp("\\p{Block = CJK}", unicode: true));
assertThrows(() => RegExp("\\p{Block=cjk}", unicode: true));
assertThrows(() => RegExp("\\p{BLK=CJK}", unicode: true));
}

View file

@ -1,114 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
assertThrows(() => RegExp("\\p", unicode: true));
assertThrows(() => RegExp("\\p{garbage}", unicode: true));
assertThrows(() => RegExp("\\p{}", unicode: true));
assertThrows(() => RegExp("\\p{", unicode: true));
assertThrows(() => RegExp("\\p}", unicode: true));
assertThrows(() => RegExp("\\pL", unicode: true));
assertThrows(() => RegExp("\\P", unicode: true));
assertThrows(() => RegExp("\\P{garbage}", unicode: true));
assertThrows(() => RegExp("\\P{}", unicode: true));
assertThrows(() => RegExp("\\P{", unicode: true));
assertThrows(() => RegExp("\\P}", unicode: true));
assertThrows(() => RegExp("\\PL", unicode: true));
assertTrue(RegExp(r"\p{Ll}", unicode: true).hasMatch("a"));
assertFalse(RegExp(r"\P{Ll}", unicode: true).hasMatch("a"));
assertTrue(RegExp(r"\P{Ll}", unicode: true).hasMatch("A"));
assertFalse(RegExp(r"\p{Ll}", unicode: true).hasMatch("A"));
assertTrue(RegExp(r"\p{Ll}", unicode: true).hasMatch("\u{1D7BE}"));
assertFalse(RegExp(r"\P{Ll}", unicode: true).hasMatch("\u{1D7BE}"));
assertFalse(RegExp(r"\p{Ll}", unicode: true).hasMatch("\u{1D5E3}"));
assertTrue(RegExp(r"\P{Ll}", unicode: true).hasMatch("\u{1D5E3}"));
assertTrue(
RegExp(r"\p{Ll}", caseSensitive: false, unicode: true).hasMatch("a"));
assertTrue(RegExp(r"\p{Ll}", caseSensitive: false, unicode: true)
.hasMatch("\u{118D4}"));
assertTrue(
RegExp(r"\p{Ll}", caseSensitive: false, unicode: true).hasMatch("A"));
assertTrue(RegExp(r"\p{Ll}", caseSensitive: false, unicode: true)
.hasMatch("\u{118B4}"));
assertTrue(
RegExp(r"\P{Ll}", caseSensitive: false, unicode: true).hasMatch("a"));
assertTrue(RegExp(r"\P{Ll}", caseSensitive: false, unicode: true)
.hasMatch("\u{118D4}"));
assertTrue(
RegExp(r"\P{Ll}", caseSensitive: false, unicode: true).hasMatch("A"));
assertTrue(RegExp(r"\P{Ll}", caseSensitive: false, unicode: true)
.hasMatch("\u{118B4}"));
assertTrue(RegExp(r"\p{Lu}", unicode: true).hasMatch("A"));
assertFalse(RegExp(r"\P{Lu}", unicode: true).hasMatch("A"));
assertTrue(RegExp(r"\P{Lu}", unicode: true).hasMatch("a"));
assertFalse(RegExp(r"\p{Lu}", unicode: true).hasMatch("a"));
assertTrue(RegExp(r"\p{Lu}", unicode: true).hasMatch("\u{1D5E3}"));
assertFalse(RegExp(r"\P{Lu}", unicode: true).hasMatch("\u{1D5E3}"));
assertFalse(RegExp(r"\p{Lu}", unicode: true).hasMatch("\u{1D7BE}"));
assertTrue(RegExp(r"\P{Lu}", unicode: true).hasMatch("\u{1D7BE}"));
assertTrue(
RegExp(r"\p{Lu}", caseSensitive: false, unicode: true).hasMatch("a"));
assertTrue(RegExp(r"\p{Lu}", caseSensitive: false, unicode: true)
.hasMatch("\u{118D4}"));
assertTrue(
RegExp(r"\p{Lu}", caseSensitive: false, unicode: true).hasMatch("A"));
assertTrue(RegExp(r"\p{Lu}", caseSensitive: false, unicode: true)
.hasMatch("\u{118B4}"));
assertTrue(
RegExp(r"\P{Lu}", caseSensitive: false, unicode: true).hasMatch("a"));
assertTrue(RegExp(r"\P{Lu}", caseSensitive: false, unicode: true)
.hasMatch("\u{118D4}"));
assertTrue(
RegExp(r"\P{Lu}", caseSensitive: false, unicode: true).hasMatch("A"));
assertTrue(RegExp(r"\P{Lu}", caseSensitive: false, unicode: true)
.hasMatch("\u{118B4}"));
assertTrue(RegExp(r"\p{Sm}", unicode: true).hasMatch("+"));
assertFalse(RegExp(r"\P{Sm}", unicode: true).hasMatch("+"));
assertTrue(RegExp(r"\p{Sm}", unicode: true).hasMatch("\u{1D6C1}"));
assertFalse(RegExp(r"\P{Sm}", unicode: true).hasMatch("\u{1D6C1}"));
assertFalse(RegExp(r"\p{L}", unicode: true).hasMatch("\uA6EE"));
assertTrue(RegExp(r"\P{L}", unicode: true).hasMatch("\uA6EE"));
assertTrue(RegExp(r"\p{Lowercase_Letter}", unicode: true).hasMatch("a"));
assertTrue(RegExp(r"\p{Math_Symbol}", unicode: true).hasMatch("+"));
assertTrue(RegExp(r"\p{gc=Ll}", unicode: true).hasMatch("a"));
assertTrue(
RegExp(r"\p{General_Category=Math_Symbol}", unicode: true).hasMatch("+"));
assertTrue(RegExp(r"\p{General_Category=L}", unicode: true).hasMatch("X"));
}

View file

@ -1,68 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
assertThrows(() => RegExp("\p{blk=CJK}+", unicode: true));
assertThrows(() => RegExp("\p{blk=CJK_Unified_Ideographs}+", unicode: true));
assertThrows(() => RegExp("\p{blk=CJK}+", unicode: true));
assertThrows(() => RegExp("\p{blk=CJK_Unified_Ideographs}+", unicode: true));
assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
assertThrows(() => RegExp("\p{Block=ASCII}+", unicode: true));
assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
assertThrows(() => RegExp("\p{Block=Basic_Latin}+", unicode: true));
assertThrows(() => RegExp("\p{NFKD_Quick_Check=Y}+", unicode: true));
assertThrows(() => RegExp("\p{NFKD_QC=Yes}+", unicode: true));
assertThrows(() => RegExp("\p{Numeric_Type=Decimal}+", unicode: true));
assertThrows(() => RegExp("\p{nt=De}+", unicode: true));
assertThrows(() => RegExp("\p{Bidi_Class=Arabic_Letter}+", unicode: true));
assertThrows(() => RegExp("\p{Bidi_Class=AN}+", unicode: true));
assertThrows(() => RegExp("\p{ccc=OV}+", unicode: true));
assertThrows(() => RegExp("\p{Sentence_Break=Format}+", unicode: true));
assertThrows(() => RegExp("\\p{In}", unicode: true));
assertThrows(() => RegExp("\\pI", unicode: true));
assertThrows(() => RegExp("\\p{I}", unicode: true));
assertThrows(() => RegExp("\\p{CJK}", unicode: true));
assertThrows(() => RegExp("\\p{}", unicode: true));
}

View file

@ -1,77 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
void t(RegExp re, String s) {
assertTrue(re.hasMatch(s));
}
void f(RegExp re, String s) {
assertFalse(re.hasMatch(s));
}
t(RegExp(r"\p{Script=Common}+", unicode: true), ".");
f(RegExp(r"\p{Script=Common}+", unicode: true),
"supercalifragilisticexpialidocious");
t(RegExp(r"\p{Script=Han}+", unicode: true), "话说天下大势,分久必合,合久必分");
t(RegExp(r"\p{Script=Hani}+", unicode: true), "吾庄后有一桃园,花开正盛");
f(RegExp(r"\p{Script=Han}+", unicode: true), "おはようございます");
f(RegExp(r"\p{Script=Hani}+", unicode: true),
"Something is rotten in the state of Denmark");
t(RegExp(r"\p{Script=Latin}+", unicode: true),
"Wie froh bin ich, daß ich weg bin!");
t(RegExp(r"\p{Script=Latn}+", unicode: true),
"It was a bright day in April, and the clocks were striking thirteen");
f(RegExp(r"\p{Script=Latin}+", unicode: true), "奔腾千里荡尘埃,渡水登山紫雾开");
f(RegExp(r"\p{Script=Latn}+", unicode: true), "いただきます");
t(RegExp(r"\p{sc=Hiragana}", unicode: true), "いただきます");
t(RegExp(r"\p{sc=Hira}", unicode: true), "ありがとうございました");
f(RegExp(r"\p{sc=Hiragana}", unicode: true),
"Als Gregor Samsa eines Morgens aus unruhigen Träumen erwachte");
f(RegExp(r"\p{sc=Hira}", unicode: true), "Call me Ishmael");
t(RegExp(r"\p{sc=Phoenician}", unicode: true), "\u{10900}\u{1091a}");
t(RegExp(r"\p{sc=Phnx}", unicode: true), "\u{1091f}\u{10916}");
f(RegExp(r"\p{sc=Phoenician}", unicode: true), "Arthur est un perroquet");
f(RegExp(r"\p{sc=Phnx}", unicode: true), "设心狠毒非良士,操卓原来一路人");
t(RegExp(r"\p{sc=Grek}", unicode: true),
"ἄνδρα μοι ἔννεπε, μοῦσα, πολύτροπον, ὃς μάλα πολλὰ");
t(RegExp(r"\p{sc=Greek}", unicode: true),
"μῆνιν ἄειδε θεὰ Πηληϊάδεω Ἀχιλῆος");
f(RegExp(r"\p{sc=Greek}", unicode: true), "高贤未服英雄志,屈节偏生杰士疑");
f(RegExp(r"\p{sc=Greek}", unicode: true),
"Mr. Jones, of the Manor Farm, had locked the hen-houses for the night");
}

View file

@ -1,110 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
void t(RegExp re, String s) {
assertTrue(re.hasMatch(s));
}
void f(RegExp re, String s) {
assertFalse(re.hasMatch(s));
}
t(RegExp(r"\p{ASCII}+", unicode: true), "abc123");
f(RegExp(r"\p{ASCII}+", unicode: true), "ⓐⓑⓒ①②③");
f(RegExp(r"\p{ASCII}+", unicode: true), "🄰🄱🄲①②③");
f(RegExp(r"\P{ASCII}+", unicode: true), "abcd123");
t(RegExp(r"\P{ASCII}+", unicode: true), "ⓐⓑⓒ①②③");
t(RegExp(r"\P{ASCII}+", unicode: true), "🄰🄱🄲①②③");
f(RegExp(r"[^\p{ASCII}]+", unicode: true), "abc123");
f(RegExp(r"[\p{ASCII}]+", unicode: true), "ⓐⓑⓒ①②③");
f(RegExp(r"[\p{ASCII}]+", unicode: true), "🄰🄱🄲①②③");
t(RegExp(r"[^\P{ASCII}]+", unicode: true), "abcd123");
t(RegExp(r"[\P{ASCII}]+", unicode: true), "ⓐⓑⓒ①②③");
f(RegExp(r"[^\P{ASCII}]+", unicode: true), "🄰🄱🄲①②③");
t(RegExp(r"\p{Any}+", unicode: true), "🄰🄱🄲①②③");
shouldBe(
RegExp(r"\p{Any}", unicode: true).firstMatch("\ud800\ud801"), ["\ud800"]);
shouldBe(
RegExp(r"\p{Any}", unicode: true).firstMatch("\udc00\udc01"), ["\udc00"]);
shouldBe(RegExp(r"\p{Any}", unicode: true).firstMatch("\ud800\udc01"),
["\ud800\udc01"]);
shouldBe(RegExp(r"\p{Any}", unicode: true).firstMatch("\udc01"), ["\udc01"]);
f(RegExp(r"\P{Any}+", unicode: true), "123");
f(RegExp(r"[\P{Any}]+", unicode: true), "123");
t(RegExp(r"[\P{Any}\d]+", unicode: true), "123");
t(RegExp(r"[^\P{Any}]+", unicode: true), "123");
t(RegExp(r"\p{Assigned}+", unicode: true), "123");
t(RegExp(r"\p{Assigned}+", unicode: true), "🄰🄱🄲");
f(RegExp(r"\p{Assigned}+", unicode: true), "\ufdd0");
f(RegExp(r"\p{Assigned}+", unicode: true), "\u{fffff}");
f(RegExp(r"\P{Assigned}+", unicode: true), "123");
f(RegExp(r"\P{Assigned}+", unicode: true), "🄰🄱🄲");
t(RegExp(r"\P{Assigned}+", unicode: true), "\ufdd0");
t(RegExp(r"\P{Assigned}+", unicode: true), "\u{fffff}");
f(RegExp(r"\P{Assigned}", unicode: true), "");
t(RegExp(r"[^\P{Assigned}]+", unicode: true), "123");
f(RegExp(r"[\P{Assigned}]+", unicode: true), "🄰🄱🄲");
f(RegExp(r"[^\P{Assigned}]+", unicode: true), "\ufdd0");
t(RegExp(r"[\P{Assigned}]+", unicode: true), "\u{fffff}");
f(RegExp(r"[\P{Assigned}]", unicode: true), "");
f(RegExp(r"[^\u1234\p{ASCII}]+", unicode: true), "\u1234");
t(RegExp(r"[x\P{ASCII}]+", unicode: true), "x");
t(RegExp(r"[\u1234\p{ASCII}]+", unicode: true), "\u1234");
// Contributory binary properties are not supported.
assertThrows(() => RegExp("\\p{Other_Alphabetic}", unicode: true));
assertThrows(() => RegExp("\\P{OAlpha}", unicode: true));
assertThrows(
() => RegExp("\\p{Other_Default_Ignorable_Code_Point}", unicode: true));
assertThrows(() => RegExp("\\P{ODI}", unicode: true));
assertThrows(() => RegExp("\\p{Other_Grapheme_Extend}", unicode: true));
assertThrows(() => RegExp("\\P{OGr_Ext}", unicode: true));
assertThrows(() => RegExp("\\p{Other_ID_Continue}", unicode: true));
assertThrows(() => RegExp("\\P{OIDC}", unicode: true));
assertThrows(() => RegExp("\\p{Other_ID_Start}", unicode: true));
assertThrows(() => RegExp("\\P{OIDS}", unicode: true));
assertThrows(() => RegExp("\\p{Other_Lowercase}", unicode: true));
assertThrows(() => RegExp("\\P{OLower}", unicode: true));
assertThrows(() => RegExp("\\p{Other_Math}", unicode: true));
assertThrows(() => RegExp("\\P{OMath}", unicode: true));
assertThrows(() => RegExp("\\p{Other_Uppercase}", unicode: true));
assertThrows(() => RegExp("\\P{OUpper}", unicode: true));
}

View file

@ -1,81 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
String replace(String string) {
return string
.replaceAll("L", "\ud800")
.replaceAll("l", "\ud801")
.replaceAll("T", "\udc00")
.replaceAll(".", "[^]");
}
void test(List<String> expectation, String regexp_source, String subject) {
if (expectation != null) expectation = expectation.map(replace).toList();
subject = replace(subject);
regexp_source = replace(regexp_source);
shouldBe(new RegExp(regexp_source, unicode: true).firstMatch(subject),
expectation);
}
void main() {
// Back reference does not end in the middle of a surrogate pair.
test(null, "(L)\\1", "LLT");
test(["LLTLl", "L", "l"], "(L).*\\1(.)", "LLTLl");
test(null, "(aL).*\\1", "aLaLT");
test(["aLaLTaLl", "aL", "l"], "(aL).*\\1(.)", "aLaLTaLl");
var s = "TabcLxLTabcLxTabcLTyTabcLz";
test([s, "TabcL", "z"], "([^x]+).*\\1(.)", s);
// Back reference does not start in the middle of a surrogate pair.
test(["TLTabTc", "T", "c"], "(T).*\\1(.)", "TLTabTc");
// Lookbehinds.
test(null, "(?<=\\1(T)x)", "LTTx");
test(["", "b", "T"], "(?<=(.)\\2.*(T)x)", "bTaLTTx");
test(null, "(?<=\\1.*(L)x)", "LTLx");
test(["", "b", "L"], "(?<=(.)\\2.*(L)x)", "bLaLTLx");
test(null, "([^x]+)x*\\1", "LxLT");
test(null, "([^x]+)x*\\1", "TxLT");
test(null, "([^x]+)x*\\1", "LTxL");
test(null, "([^x]+)x*\\1", "LTxT");
test(null, "([^x]+)x*\\1", "xLxLT");
test(null, "([^x]+)x*\\1", "xTxLT");
test(null, "([^x]+)x*\\1", "xLTxL");
test(null, "([^x]+)x*\\1", "xLTxT");
test(null, "([^x]+)x*\\1", "xxxLxxLTxx");
test(null, "([^x]+)x*\\1", "xxxTxxLTxx");
test(null, "([^x]+)x*\\1", "xxxLTxxLxx");
test(null, "([^x]+)x*\\1", "xxxLTxxTxx");
test(["LTTxxLTT", "LTT"], "([^x]+)x*\\1", "xxxLTTxxLTTxx");
}

View file

@ -1,135 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
// Non-unicode use toUpperCase mappings.
assertFalse(RegExp(r"[\u00e5]", caseSensitive: false).hasMatch("\u212b"));
assertFalse(
RegExp(r"[\u212b]", caseSensitive: false).hasMatch("\u00e5\u1234"));
assertFalse(RegExp(r"[\u212b]", caseSensitive: false).hasMatch("\u00e5"));
assertTrue("\u212b".toLowerCase() == "\u00e5");
assertTrue("\u00c5".toLowerCase() == "\u00e5");
assertTrue("\u00e5".toUpperCase() == "\u00c5");
// Unicode uses case folding mappings.
assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
.hasMatch("\u212b"));
assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
.hasMatch("\u00c5"));
assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
.hasMatch("\u00e5"));
assertTrue(RegExp(r"\u00e5", caseSensitive: false, unicode: true)
.hasMatch("\u212b"));
assertTrue(RegExp(r"\u00c5", caseSensitive: false, unicode: true)
.hasMatch("\u00e5"));
assertTrue(RegExp(r"\u00c5", caseSensitive: false, unicode: true)
.hasMatch("\u212b"));
assertTrue(RegExp(r"\u00c5", caseSensitive: false, unicode: true)
.hasMatch("\u00c5"));
assertTrue(RegExp(r"\u212b", caseSensitive: false, unicode: true)
.hasMatch("\u00c5"));
assertTrue(RegExp(r"\u212b", caseSensitive: false, unicode: true)
.hasMatch("\u00e5"));
assertTrue(RegExp(r"\u212b", caseSensitive: false, unicode: true)
.hasMatch("\u212b"));
// Non-BMP.
assertFalse(RegExp(r"\u{10400}", caseSensitive: false).hasMatch("\u{10428}"));
assertTrue(RegExp(r"\u{10400}", caseSensitive: false, unicode: true)
.hasMatch("\u{10428}"));
assertTrue(RegExp(r"\ud801\udc00", caseSensitive: false, unicode: true)
.hasMatch("\u{10428}"));
assertTrue(RegExp(r"[\u{10428}]", caseSensitive: false, unicode: true)
.hasMatch("\u{10400}"));
assertTrue(RegExp(r"[\ud801\udc28]", caseSensitive: false, unicode: true)
.hasMatch("\u{10400}"));
shouldBe(
RegExp(r"[\uff40-\u{10428}]+", caseSensitive: false, unicode: true)
.firstMatch("\uff21\u{10400}abc"),
["\uff21\u{10400}"]);
shouldBe(
RegExp(r"[^\uff40-\u{10428}]+", caseSensitive: false, unicode: true)
.firstMatch("\uff21\u{10400}abc\uff23"),
["abc"]);
shouldBe(
RegExp(r"[\u24d5-\uff33]+", caseSensitive: false, unicode: true)
.firstMatch("\uff54\uff53\u24bb\u24ba"),
["\uff53\u24bb"]);
// Full mappings are ignored.
assertFalse(
RegExp(r"\u00df", caseSensitive: false, unicode: true).hasMatch("SS"));
assertFalse(RegExp(r"\u1f8d", caseSensitive: false, unicode: true)
.hasMatch("\u1f05\u03b9"));
// Simple mappings work.
assertTrue(RegExp(r"\u1f8d", caseSensitive: false, unicode: true)
.hasMatch("\u1f85"));
// Common mappings work.
assertTrue(RegExp(r"\u1f6b", caseSensitive: false, unicode: true)
.hasMatch("\u1f63"));
// Back references.
shouldBe(
RegExp(r"(.)\1\1", caseSensitive: false, unicode: true)
.firstMatch("\u00e5\u212b\u00c5"),
["\u00e5\u212b\u00c5", "\u00e5"]);
shouldBe(
RegExp(r"(.)\1", caseSensitive: false, unicode: true)
.firstMatch("\u{118aa}\u{118ca}"),
["\u{118aa}\u{118ca}", "\u{118aa}"]);
// Misc.
assertTrue(RegExp(r"\u00e5\u00e5\u00e5", caseSensitive: false, unicode: true)
.hasMatch("\u212b\u00e5\u00c5"));
assertTrue(RegExp(r"AB\u{10400}", caseSensitive: false, unicode: true)
.hasMatch("ab\u{10428}"));
// Non-Latin1 maps to Latin1.
shouldBe(
RegExp(r"^\u017F", caseSensitive: false, unicode: true).firstMatch("s"),
["s"]);
shouldBe(
RegExp(r"^\u017F", caseSensitive: false, unicode: true)
.firstMatch("s\u1234"),
["s"]);
shouldBe(
RegExp(r"^a[\u017F]", caseSensitive: false, unicode: true)
.firstMatch("as"),
["as"]);
shouldBe(
RegExp(r"^a[\u017F]", caseSensitive: false, unicode: true)
.firstMatch("as\u1234"),
["as"]);
}

View file

@ -1,155 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
// Testing handling of paired and non-paired surrogates in unicode mode
var r = new RegExp(r".", unicode: true);
var m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
shouldBe(m, ["\ud800\udc00"]);
assertEquals(m.end, 2);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
shouldBe(m, ["\ud800\udc00"]);
assertEquals(m.end, 2);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
shouldBe(m, ["\ud801\udc01"]);
assertEquals(m.end, 4);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
shouldBe(m, ["\ud801\udc01"]);
assertEquals(m.end, 4);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
m = r.matchAsPrefix("\ud800\udc00\ud801\ud802", 3);
shouldBe(m, ["\ud802"]);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\ud802", 4));
// Testing handling of paired and non-paired surrogates in non-unicode mode
r = new RegExp(r".");
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
shouldBe(m, ["\ud800"]);
assertEquals(m.end, 1);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
shouldBe(m, ["\udc00"]);
assertEquals(m.end, 2);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
shouldBe(m, ["\ud801"]);
assertEquals(m.end, 3);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
shouldBe(m, ["\udc01"]);
assertEquals(m.end, 4);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
// Testing same with start anchor, unicode mode.
r = new RegExp("^.", unicode: true);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
shouldBe(m, ["\ud800\udc00"]);
assertEquals(2, m.end);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
shouldBe(m, ["\ud800\udc00"]);
assertEquals(2, m.end);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2));
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3));
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
// Testing same with start anchor, non-unicode mode.
r = new RegExp("^.");
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
shouldBe(m, ["\ud800"]);
assertEquals(1, m.end);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1));
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2));
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3));
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
// Now with both anchored and not as alternatives (with the anchored
// version as a captured group), unicode mode.
r = new RegExp(r"(?:(^.)|.)", unicode: true);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
shouldBe(m, ["\ud800\udc00", "\ud800\udc00"]);
assertEquals(m.end, 2);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
shouldBe(m, ["\ud800\udc00", "\ud800\udc00"]);
assertEquals(m.end, 2);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
shouldBe(m, ["\ud801\udc01", null]);
assertEquals(m.end, 4);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
shouldBe(m, ["\ud801\udc01", null]);
assertEquals(m.end, 4);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
m = r.matchAsPrefix("\ud800\udc00\ud801\ud802", 3);
shouldBe(m, ["\ud802", null]);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\ud802", 4));
// Now with both anchored and not as alternatives (with the anchored
// version as a captured group), non-unicode mode.
r = new RegExp(r"(?:(^.)|.)");
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01");
shouldBe(m, ["\ud800", "\ud800"]);
assertEquals(m.end, 1);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 1);
shouldBe(m, ["\udc00", null]);
assertEquals(m.end, 2);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 2);
shouldBe(m, ["\ud801", null]);
assertEquals(m.end, 3);
m = r.matchAsPrefix("\ud800\udc00\ud801\udc01", 3);
shouldBe(m, ["\udc01", null]);
assertEquals(m.end, 4);
assertNull(r.matchAsPrefix("\ud800\udc00\ud801\udc01", 4));
}

View file

@ -1,74 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
// test262/data/test/language/literals/regexp/u-dec-esc
assertThrows(() => RegExp(r"\1", unicode: true));
// test262/language/literals/regexp/u-invalid-char-range-a
assertThrows(() => RegExp(r"[\w-a]", unicode: true));
// test262/language/literals/regexp/u-invalid-char-range-b
assertThrows(() => RegExp(r"[a-\w]", unicode: true));
// test262/language/literals/regexp/u-invalid-char-esc
assertThrows(() => RegExp(r"\c", unicode: true));
assertThrows(() => RegExp(r"\c0", unicode: true));
// test262/built-ins/RegExp/unicode_restricted_quantifiable_assertion
assertThrows(() => RegExp(r"(?=.)*", unicode: true));
assertThrows(() => RegExp(r"(?=.){1,2}", unicode: true));
// test262/built-ins/RegExp/unicode_restricted_octal_escape
assertThrows(() => RegExp(r"[\1]", unicode: true));
assertThrows(() => RegExp(r"\00", unicode: true));
assertThrows(() => RegExp(r"\09", unicode: true));
// test262/built-ins/RegExp/unicode_restricted_identity_escape_alpha
assertThrows(() => RegExp(r"[\c]", unicode: true));
// test262/built-ins/RegExp/unicode_restricted_identity_escape_c
assertThrows(() => RegExp(r"[\c0]", unicode: true));
// test262/built-ins/RegExp/unicode_restricted_incomple_quantifier
assertThrows(() => RegExp(r"a{", unicode: true));
assertThrows(() => RegExp(r"a{1,", unicode: true));
assertThrows(() => RegExp(r"{", unicode: true));
assertThrows(() => RegExp(r"}", unicode: true));
// test262/data/test/built-ins/RegExp/unicode_restricted_brackets
assertThrows(() => RegExp(r"]", unicode: true));
// test262/built-ins/RegExp/unicode_identity_escape
assertDoesNotThrow(() => RegExp(r"\/", unicode: true));
// escaped \0 (as NUL) is allowed inside a character class.
shouldBe(RegExp(r"[\0]", unicode: true).firstMatch("\u0000"), ["\u0000"]);
// unless it is followed by another digit.
assertThrows(() => RegExp(r"[\00]", unicode: true));
assertThrows(() => RegExp(r"[\01]", unicode: true));
assertThrows(() => RegExp(r"[\09]", unicode: true));
shouldBe(RegExp(r"[1\0a]+", unicode: true).firstMatch("b\u{0}1\u{0}a\u{0}2"),
["\u{0}1\u{0}a\u{0}"]);
// escaped \- is allowed inside a character class.
shouldBe(RegExp(r"[a\-z]", unicode: true).firstMatch("12-34"), ["-"]);
}

View file

@ -1,39 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2013 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
final s = "a" * 10000000 + "\u1234";
shouldBe(
RegExp(r"(\u1234)", unicode: true).firstMatch(s), ["\u1234", "\u1234"]);
}

View file

@ -1,93 +0,0 @@
// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2016 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import 'package:expect/expect.dart';
import 'v8_regexp_utils.dart';
void main() {
const L = "\ud800";
const T = "\udc00";
const x = "x";
var r = RegExp(r"()"); // Not unicode.
// Zero-length matches do not advance lastIndex.
var m = r.matchAsPrefix(L + T + L + T);
shouldBe(m, ["", ""]);
assertEquals(0, m.end);
m = r.matchAsPrefix(L + T + L + T, 1);
shouldBe(m, ["", ""]);
assertEquals(1, m.end);
var u = RegExp(r"()", unicode: true);
// Zero-length matches do not advance lastIndex (but do respect paired
// surrogates).
m = u.matchAsPrefix(L + T + L + T);
shouldBe(m, ["", ""]);
assertEquals(0, m.end);
m = u.matchAsPrefix(L + T + L + T, 1);
shouldBe(m, ["", ""]);
assertEquals(0, m.end);
// However, with repeating matches, we do advance from match to match.
var ms = r.allMatches(L + T + L + T);
assertEquals(5, ms.length);
for (var i = 0; i < ms.length; i++) {
shouldBe(ms.elementAt(i), ["", ""]);
}
// With unicode flag, we advance code point by code point.
ms = u.allMatches(L + T + L + T);
assertEquals(3, ms.length);
for (var i = 0; i < ms.length; i++) {
shouldBe(ms.elementAt(i), ["", ""]);
}
// Test with a lot of copies.
const c = 1000;
ms = u.allMatches((L + T) * c);
assertEquals(c + 1, ms.length);
for (var i = 0; i < ms.length; i++) {
shouldBe(ms.elementAt(i), ["", ""]);
}
// Same with replaceAll().
assertEquals(
x + L + x + T + x + L + x + T + x, (L + T + L + T).replaceAll(r, "x"));
assertEquals(x + L + T + x + L + T + x, (L + T + L + T).replaceAll(u, "x"));
assertEquals((x + L + T) * c + x, ((L + T) * c).replaceAll(u, "x"));
// Also test String#split.
Expect.deepEquals(
["\u{12345}"], "\u{12345}".split(RegExp(r"(?:)", unicode: true)));
}

View file

@ -22,10 +22,6 @@ void assertThrows(fn, [num testid = null]) {
Expect.throws(fn, null, "Test $testid");
}
void assertDoesNotThrow(fn, [num testid = null]) {
fn();
}
void assertNull(actual, [num testid = null]) {
Expect.isNull(actual, "Test $testid");
}