dart-sdk/runtime/vm/regexp_assembler.h

270 lines
11 KiB
C
Raw Normal View History

// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
#ifndef RUNTIME_VM_REGEXP_ASSEMBLER_H_
#define RUNTIME_VM_REGEXP_ASSEMBLER_H_
#include "vm/object.h"
#if !defined(DART_PRECOMPILED_RUNTIME)
#include "vm/compiler/assembler/assembler.h"
#include "vm/compiler/backend/il.h"
#endif // !defined(DART_PRECOMPILED_RUNTIME)
namespace dart {
// Utility function for the DotPrinter
void PrintUtf16(uint16_t c);
extern "C" {
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
// Compares two-byte strings case insensitively as UCS2.
// Called from generated RegExp code.
uword /*BoolPtr*/ CaseInsensitiveCompareUCS2(uword /*StringPtr*/ str_raw,
uword /*SmiPtr*/ lhs_index_raw,
uword /*SmiPtr*/ rhs_index_raw,
uword /*SmiPtr*/ length_raw);
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
// Compares two-byte strings case insensitively as UTF16.
// Called from generated RegExp code.
uword /*BoolPtr*/ CaseInsensitiveCompareUTF16(uword /*StringPtr*/ str_raw,
uword /*SmiPtr*/ lhs_index_raw,
uword /*SmiPtr*/ rhs_index_raw,
uword /*SmiPtr*/ length_raw);
}
/// Convenience wrapper around a BlockEntryInstr pointer.
class BlockLabel : public ValueObject {
// Used by the IR assembler.
public:
BlockLabel();
~BlockLabel() { ASSERT(!is_linked()); }
intptr_t pos() const { return pos_; }
bool is_bound() const { return is_bound_; }
bool is_linked() const { return !is_bound_ && is_linked_; }
#if !defined(DART_PRECOMPILED_RUNTIME)
JoinEntryInstr* block() const { return block_; }
#endif // !defined(DART_PRECOMPILED_RUNTIME)
void Unuse() {
pos_ = -1;
is_bound_ = false;
is_linked_ = false;
}
void BindTo(intptr_t pos) {
pos_ = pos;
#if !defined(DART_PRECOMPILED_RUNTIME)
if (block_ != nullptr) block_->set_block_id(pos);
#endif // !defined(DART_PRECOMPILED_RUNTIME)
is_bound_ = true;
is_linked_ = false;
ASSERT(is_bound());
}
// Used by bytecode assembler to form a linked list out of
// forward jumps to an unbound label.
void LinkTo(intptr_t pos) {
#if !defined(DART_PRECOMPILED_RUNTIME)
ASSERT(block_ == nullptr);
#endif
ASSERT(!is_bound_);
pos_ = pos;
is_linked_ = true;
}
// Used by IR builder to mark block label as used.
void SetLinked() {
#if !defined(DART_PRECOMPILED_RUNTIME)
ASSERT(block_ != nullptr);
#endif
if (!is_bound_) {
is_linked_ = true;
}
}
private:
bool is_bound_ = false;
bool is_linked_ = false;
intptr_t pos_ = -1;
#if !defined(DART_PRECOMPILED_RUNTIME)
JoinEntryInstr* block_ = nullptr;
#endif // !defined(DART_PRECOMPILED_RUNTIME)
};
class RegExpMacroAssembler : public ZoneAllocated {
public:
// The implementation must be able to handle at least:
static const intptr_t kMaxRegister = (1 << 16) - 1;
static const intptr_t kMaxCPOffset = (1 << 15) - 1;
static const intptr_t kMinCPOffset = -(1 << 15);
static const intptr_t kTableSizeBits = 7;
static const intptr_t kTableSize = 1 << kTableSizeBits;
static const intptr_t kTableMask = kTableSize - 1;
enum {
kParamRegExpIndex = 0,
kParamStringIndex,
kParamStartOffsetIndex,
kParamCount
};
enum IrregexpImplementation { kBytecodeImplementation, kIRImplementation };
explicit RegExpMacroAssembler(Zone* zone);
virtual ~RegExpMacroAssembler();
// The maximal number of pushes between stack checks. Users must supply
// kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
// at least once for every stack_limit() pushes that are executed.
virtual intptr_t stack_limit_slack() = 0;
virtual bool CanReadUnaligned() = 0;
virtual void AdvanceCurrentPosition(intptr_t by) = 0; // Signed cp change.
virtual void AdvanceRegister(intptr_t reg, intptr_t by) = 0; // r[reg] += by.
// Continues execution from the position pushed on the top of the backtrack
// stack by an earlier PushBacktrack(BlockLabel*).
virtual void Backtrack() = 0;
virtual void BindBlock(BlockLabel* label) = 0;
virtual void CheckAtStart(BlockLabel* on_at_start) = 0;
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
virtual void CheckCharacter(unsigned c, BlockLabel* on_equal) = 0;
// Bitwise and the current character with the given constant and then
// check for a match with c.
virtual void CheckCharacterAfterAnd(unsigned c,
unsigned and_with,
BlockLabel* on_equal) = 0;
virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater) = 0;
virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less) = 0;
virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position) = 0;
[VM] Adding regexp lookbehind assertion support. See https://github.com/tc39/proposal-regexp-lookbehind for a high-level description of the feature and examples. This is one of the features requested in https://github.com/dart-lang/sdk/issues/34935. This work takes the feature as present in the v8 engine and appropriately merges it into our irregexp fork. Notable changes to the irregexp codebase to introduce this feature: ----- We can no longer assume that all matching proceeds forwards, since lookbehind matching proceeds backwards. Similarly, we cannot assume that we can only be at the start of a string if we started matching from that point. The direction of matching must also be taken into consideration when doing bounds checking, which previously assumed the engine would never attempt to look before the start of a string. ----- We may now parse backreferences to captures before the capture they reference, since we parse regular expressions left to right, but lookbehinds perform captures as they evaluate the string from right to left. Since RegExpBackReference objects contain a pointer to their corresponding capture, this means that we may need to create RegExpCapture objects prior to the parsing of the corresponding captured subexpression. Thus, RegExpCapture objects are now only initialized with their index, and the body is set later when the subexpression is encountered and parsed. This means any method that operates on the body of a RegExpCapture can no longer be const, which also affects the rest of the RegExpTree class hierarchy. This also means that we don't have a valid max_match length for backreferences based off the capture body, and must assume they can end up being any length. ----- Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540 Commit-Queue: Stevie Strickland <sstrickl@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
virtual void CheckNotAtStart(intptr_t cp_offset,
BlockLabel* on_not_at_start) = 0;
virtual void CheckNotBackReference(intptr_t start_reg,
[VM] Adding regexp lookbehind assertion support. See https://github.com/tc39/proposal-regexp-lookbehind for a high-level description of the feature and examples. This is one of the features requested in https://github.com/dart-lang/sdk/issues/34935. This work takes the feature as present in the v8 engine and appropriately merges it into our irregexp fork. Notable changes to the irregexp codebase to introduce this feature: ----- We can no longer assume that all matching proceeds forwards, since lookbehind matching proceeds backwards. Similarly, we cannot assume that we can only be at the start of a string if we started matching from that point. The direction of matching must also be taken into consideration when doing bounds checking, which previously assumed the engine would never attempt to look before the start of a string. ----- We may now parse backreferences to captures before the capture they reference, since we parse regular expressions left to right, but lookbehinds perform captures as they evaluate the string from right to left. Since RegExpBackReference objects contain a pointer to their corresponding capture, this means that we may need to create RegExpCapture objects prior to the parsing of the corresponding captured subexpression. Thus, RegExpCapture objects are now only initialized with their index, and the body is set later when the subexpression is encountered and parsed. This means any method that operates on the body of a RegExpCapture can no longer be const, which also affects the rest of the RegExpTree class hierarchy. This also means that we don't have a valid max_match length for backreferences based off the capture body, and must assume they can end up being any length. ----- Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540 Commit-Queue: Stevie Strickland <sstrickl@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
bool read_backward,
BlockLabel* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
[VM] Adding regexp lookbehind assertion support. See https://github.com/tc39/proposal-regexp-lookbehind for a high-level description of the feature and examples. This is one of the features requested in https://github.com/dart-lang/sdk/issues/34935. This work takes the feature as present in the v8 engine and appropriately merges it into our irregexp fork. Notable changes to the irregexp codebase to introduce this feature: ----- We can no longer assume that all matching proceeds forwards, since lookbehind matching proceeds backwards. Similarly, we cannot assume that we can only be at the start of a string if we started matching from that point. The direction of matching must also be taken into consideration when doing bounds checking, which previously assumed the engine would never attempt to look before the start of a string. ----- We may now parse backreferences to captures before the capture they reference, since we parse regular expressions left to right, but lookbehinds perform captures as they evaluate the string from right to left. Since RegExpBackReference objects contain a pointer to their corresponding capture, this means that we may need to create RegExpCapture objects prior to the parsing of the corresponding captured subexpression. Thus, RegExpCapture objects are now only initialized with their index, and the body is set later when the subexpression is encountered and parsed. This means any method that operates on the body of a RegExpCapture can no longer be const, which also affects the rest of the RegExpTree class hierarchy. This also means that we don't have a valid max_match length for backreferences based off the capture body, and must assume they can end up being any length. ----- Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540 Commit-Queue: Stevie Strickland <sstrickl@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
bool read_backward,
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
bool unicode,
BlockLabel* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckNotCharacter(unsigned c, BlockLabel* on_not_equal) = 0;
virtual void CheckNotCharacterAfterAnd(unsigned c,
unsigned and_with,
BlockLabel* on_not_equal) = 0;
// Subtract a constant from the current character, then and with the given
// constant and then check for a match with c.
virtual void CheckNotCharacterAfterMinusAnd(uint16_t c,
uint16_t minus,
uint16_t and_with,
BlockLabel* on_not_equal) = 0;
virtual void CheckCharacterInRange(uint16_t from,
uint16_t to, // Both inclusive.
BlockLabel* on_in_range) = 0;
virtual void CheckCharacterNotInRange(uint16_t from,
uint16_t to, // Both inclusive.
BlockLabel* on_not_in_range) = 0;
// The current character (modulus the kTableSize) is looked up in the byte
// array, and if the found byte is non-zero, we jump to the on_bit_set label.
virtual void CheckBitInTable(const TypedData& table,
BlockLabel* on_bit_set) = 0;
VM(RegExp): Allow OSR optimization of RegExp :matcher functions. Previously these functions would only contain a single CheckStackOverflowInstr in a backtracking block and that CheckStackOverflowInstr would have a zero loop_depth - which means it would not be considered eligable for OSR. This change: * adds CheckStackOverflowInstr with non-zero loop_depth in two other places (Boyer-Moore lookahead skip loop and greedy loop) where loops arise in the generated IL; * sets non-zero loop depth on the CheckStackOverflowInstr in the backtracking block; * adds a flag on CheckStackOverflowInstr that allows optimizing compiler to optimize away those checks that were inserted solely to serve as OSR entries. * ensures that IR generated by IRRegExpMacroAssembler is OSR compatible: * GraphEntryInstr has correct osr_id; * GraphEntry and normal entry have different block ids (B0 and B1 - instead of B0 and B0); * unreachable blocks are pruned and GraphEntry is rewired to point to OSR entry; * IRRegExpMacroAssembler::GrowStack should not assume that stack_array_cell and :stack are always in sync, because :stack can come from OSR or deoptimization why stack_array_cell is a constant associated with a particular Code object. * refactors the way the RegExp stack was growing: instead of having a special instruction just emit a call to a Dart function; * refactors the way block pruning for OSR is done by consolidating duplicated code in a single function. We allow the optimizing compiler to remove preemption checks from non-backtracking loops in the regexp code because those loops unlike backtracking have guaranteed O(input_length) time complexity. Performance Implications ------------------------ This change improves performance of regexps in cases where regexp spends a lot of time in the first invocation (either due to backtracking or due to long non matching prefix) by allowing VM to optimize the :matcher while :matcher is running. For example on regex-redux[1] benchmark it improves Dart performance by 3x (from ~18s to ~6s on my Mac Book Pro). CL history ---------- This relands commit d87cc52c3ee791e4dff9136c5c80353deb0f36a3. Original code review: https://codereview.chromium.org/2950783003/ [1] https://benchmarksgame.alioth.debian.org/u64q/program.php?test=regexredux&lang=dart&id=2 R=erikcorry@google.com Review-Url: https://codereview.chromium.org/2951053003 .
2017-06-23 10:51:52 +00:00
// Checks for preemption and serves as an OSR entry.
virtual void CheckPreemption(bool is_backtrack) {}
// Checks whether the given offset from the current position is before
// the end of the string. May overwrite the current character.
virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input) {
LoadCurrentCharacter(cp_offset, on_outside_input, true);
}
// Check whether a standard/default character class matches the current
// character. Returns false if the type of special character class does
// not have custom support.
// May clobber the current loaded character.
virtual bool CheckSpecialCharacterClass(uint16_t type,
BlockLabel* on_no_match) {
return false;
}
virtual void Fail() = 0;
// Check whether a register is >= a given constant and go to a label if it
// is. Backtracks instead if the label is NULL.
virtual void IfRegisterGE(intptr_t reg,
intptr_t comparand,
BlockLabel* if_ge) = 0;
// Check whether a register is < a given constant and go to a label if it is.
// Backtracks instead if the label is NULL.
virtual void IfRegisterLT(intptr_t reg,
intptr_t comparand,
BlockLabel* if_lt) = 0;
// Check whether a register is == to the current position and go to a
// label if it is.
virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq) = 0;
virtual IrregexpImplementation Implementation() = 0;
// The assembler is closed, iff there is no current instruction assigned.
virtual bool IsClosed() const = 0;
// Jump to the target label without setting it as the current instruction.
virtual void GoTo(BlockLabel* to) = 0;
virtual void LoadCurrentCharacter(intptr_t cp_offset,
BlockLabel* on_end_of_input,
bool check_bounds = true,
intptr_t characters = 1) = 0;
virtual void PopCurrentPosition() = 0;
virtual void PopRegister(intptr_t register_index) = 0;
// Prints string within the generated code. Used for debugging.
virtual void Print(const char* str) = 0;
// Prints all emitted blocks.
virtual void PrintBlocks() = 0;
// Pushes the label on the backtrack stack, so that a following Backtrack
// will go to this label. Always checks the backtrack stack limit.
virtual void PushBacktrack(BlockLabel* label) = 0;
virtual void PushCurrentPosition() = 0;
virtual void PushRegister(intptr_t register_index) = 0;
virtual void ReadCurrentPositionFromRegister(intptr_t reg) = 0;
virtual void ReadStackPointerFromRegister(intptr_t reg) = 0;
virtual void SetCurrentPositionFromEnd(intptr_t by) = 0;
virtual void SetRegister(intptr_t register_index, intptr_t to) = 0;
// Return whether the matching (with a global regexp) will be restarted.
virtual bool Succeed() = 0;
virtual void WriteCurrentPositionToRegister(intptr_t reg,
intptr_t cp_offset) = 0;
virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to) = 0;
virtual void WriteStackPointerToRegister(intptr_t reg) = 0;
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
// Check that we are not in the middle of a surrogate pair.
void CheckNotInSurrogatePair(intptr_t cp_offset, BlockLabel* on_failure);
// Controls the generation of large inlined constants in the code.
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
bool slow_safe() { return slow_safe_compiler_; }
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
enum GlobalMode {
NOT_GLOBAL,
GLOBAL,
GLOBAL_NO_ZERO_LENGTH_CHECK,
GLOBAL_UNICODE
};
// Set whether the regular expression has the global flag. Exiting due to
// a failure in a global regexp may still mean success overall.
inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
inline bool global() { return global_mode_ != NOT_GLOBAL; }
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
inline bool global_with_zero_length_check() {
return global_mode_ == GLOBAL || global_mode_ == GLOBAL_UNICODE;
}
inline bool global_unicode() { return global_mode_ == GLOBAL_UNICODE; }
Zone* zone() const { return zone_; }
private:
bool slow_safe_compiler_;
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
GlobalMode global_mode_;
Zone* zone_;
};
} // namespace dart
#endif // RUNTIME_VM_REGEXP_ASSEMBLER_H_