dart-sdk/runtime/vm/regexp_assembler_ir.h

453 lines
18 KiB
C
Raw Normal View History

// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
#ifndef RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_
#define RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_
#include "vm/compiler/assembler/assembler.h"
#include "vm/compiler/backend/il.h"
#include "vm/object.h"
#include "vm/regexp_assembler.h"
namespace dart {
class IRRegExpMacroAssembler : public RegExpMacroAssembler {
public:
// Type of input string to generate code for.
enum Mode { ASCII = 1, UC16 = 2 };
// Result of calling generated native RegExp code.
// RETRY: Something significant changed during execution, and the matching
// should be retried from scratch.
// EXCEPTION: Something failed during execution. If no exception has been
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
IRRegExpMacroAssembler(intptr_t specialization_cid,
intptr_t capture_count,
const ParsedFunction* parsed_function,
const ZoneGrowableArray<const ICData*>& ic_data_array,
VM(RegExp): Allow OSR optimization of RegExp :matcher functions. Previously these functions would only contain a single CheckStackOverflowInstr in a backtracking block and that CheckStackOverflowInstr would have a zero loop_depth - which means it would not be considered eligable for OSR. This change: * adds CheckStackOverflowInstr with non-zero loop_depth in two other places (Boyer-Moore lookahead skip loop and greedy loop) where loops arise in the generated IL; * sets non-zero loop depth on the CheckStackOverflowInstr in the backtracking block; * adds a flag on CheckStackOverflowInstr that allows optimizing compiler to optimize away those checks that were inserted solely to serve as OSR entries. * ensures that IR generated by IRRegExpMacroAssembler is OSR compatible: * GraphEntryInstr has correct osr_id; * GraphEntry and normal entry have different block ids (B0 and B1 - instead of B0 and B0); * unreachable blocks are pruned and GraphEntry is rewired to point to OSR entry; * IRRegExpMacroAssembler::GrowStack should not assume that stack_array_cell and :stack are always in sync, because :stack can come from OSR or deoptimization why stack_array_cell is a constant associated with a particular Code object. * refactors the way the RegExp stack was growing: instead of having a special instruction just emit a call to a Dart function; * refactors the way block pruning for OSR is done by consolidating duplicated code in a single function. We allow the optimizing compiler to remove preemption checks from non-backtracking loops in the regexp code because those loops unlike backtracking have guaranteed O(input_length) time complexity. Performance Implications ------------------------ This change improves performance of regexps in cases where regexp spends a lot of time in the first invocation (either due to backtracking or due to long non matching prefix) by allowing VM to optimize the :matcher while :matcher is running. For example on regex-redux[1] benchmark it improves Dart performance by 3x (from ~18s to ~6s on my Mac Book Pro). CL history ---------- This relands commit d87cc52c3ee791e4dff9136c5c80353deb0f36a3. Original code review: https://codereview.chromium.org/2950783003/ [1] https://benchmarksgame.alioth.debian.org/u64q/program.php?test=regexredux&lang=dart&id=2 R=erikcorry@google.com Review-Url: https://codereview.chromium.org/2951053003 .
2017-06-23 10:51:52 +00:00
intptr_t osr_id,
Zone* zone);
virtual ~IRRegExpMacroAssembler();
virtual bool CanReadUnaligned();
static RawArray* Execute(const RegExp& regexp,
const String& input,
const Smi& start_offset,
bool sticky,
Zone* zone);
virtual bool IsClosed() const { return (current_instruction_ == NULL); }
virtual intptr_t stack_limit_slack();
virtual void AdvanceCurrentPosition(intptr_t by);
virtual void AdvanceRegister(intptr_t reg, intptr_t by);
virtual void Backtrack();
virtual void BindBlock(BlockLabel* label);
virtual void CheckAtStart(BlockLabel* on_at_start);
virtual void CheckCharacter(uint32_t c, BlockLabel* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
uint32_t mask,
BlockLabel* on_equal);
virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater);
virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position);
[VM] Adding regexp lookbehind assertion support. See https://github.com/tc39/proposal-regexp-lookbehind for a high-level description of the feature and examples. This is one of the features requested in https://github.com/dart-lang/sdk/issues/34935. This work takes the feature as present in the v8 engine and appropriately merges it into our irregexp fork. Notable changes to the irregexp codebase to introduce this feature: ----- We can no longer assume that all matching proceeds forwards, since lookbehind matching proceeds backwards. Similarly, we cannot assume that we can only be at the start of a string if we started matching from that point. The direction of matching must also be taken into consideration when doing bounds checking, which previously assumed the engine would never attempt to look before the start of a string. ----- We may now parse backreferences to captures before the capture they reference, since we parse regular expressions left to right, but lookbehinds perform captures as they evaluate the string from right to left. Since RegExpBackReference objects contain a pointer to their corresponding capture, this means that we may need to create RegExpCapture objects prior to the parsing of the corresponding captured subexpression. Thus, RegExpCapture objects are now only initialized with their index, and the body is set later when the subexpression is encountered and parsed. This means any method that operates on the body of a RegExpCapture can no longer be const, which also affects the rest of the RegExpTree class hierarchy. This also means that we don't have a valid max_match length for backreferences based off the capture body, and must assume they can end up being any length. ----- Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540 Commit-Queue: Stevie Strickland <sstrickl@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
virtual void CheckNotAtStart(intptr_t cp_offset, BlockLabel* on_not_at_start);
virtual void CheckNotBackReference(intptr_t start_reg,
[VM] Adding regexp lookbehind assertion support. See https://github.com/tc39/proposal-regexp-lookbehind for a high-level description of the feature and examples. This is one of the features requested in https://github.com/dart-lang/sdk/issues/34935. This work takes the feature as present in the v8 engine and appropriately merges it into our irregexp fork. Notable changes to the irregexp codebase to introduce this feature: ----- We can no longer assume that all matching proceeds forwards, since lookbehind matching proceeds backwards. Similarly, we cannot assume that we can only be at the start of a string if we started matching from that point. The direction of matching must also be taken into consideration when doing bounds checking, which previously assumed the engine would never attempt to look before the start of a string. ----- We may now parse backreferences to captures before the capture they reference, since we parse regular expressions left to right, but lookbehinds perform captures as they evaluate the string from right to left. Since RegExpBackReference objects contain a pointer to their corresponding capture, this means that we may need to create RegExpCapture objects prior to the parsing of the corresponding captured subexpression. Thus, RegExpCapture objects are now only initialized with their index, and the body is set later when the subexpression is encountered and parsed. This means any method that operates on the body of a RegExpCapture can no longer be const, which also affects the rest of the RegExpTree class hierarchy. This also means that we don't have a valid max_match length for backreferences based off the capture body, and must assume they can end up being any length. ----- Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540 Commit-Queue: Stevie Strickland <sstrickl@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
bool read_backward,
BlockLabel* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
[VM] Adding regexp lookbehind assertion support. See https://github.com/tc39/proposal-regexp-lookbehind for a high-level description of the feature and examples. This is one of the features requested in https://github.com/dart-lang/sdk/issues/34935. This work takes the feature as present in the v8 engine and appropriately merges it into our irregexp fork. Notable changes to the irregexp codebase to introduce this feature: ----- We can no longer assume that all matching proceeds forwards, since lookbehind matching proceeds backwards. Similarly, we cannot assume that we can only be at the start of a string if we started matching from that point. The direction of matching must also be taken into consideration when doing bounds checking, which previously assumed the engine would never attempt to look before the start of a string. ----- We may now parse backreferences to captures before the capture they reference, since we parse regular expressions left to right, but lookbehinds perform captures as they evaluate the string from right to left. Since RegExpBackReference objects contain a pointer to their corresponding capture, this means that we may need to create RegExpCapture objects prior to the parsing of the corresponding captured subexpression. Thus, RegExpCapture objects are now only initialized with their index, and the body is set later when the subexpression is encountered and parsed. This means any method that operates on the body of a RegExpCapture can no longer be const, which also affects the rest of the RegExpTree class hierarchy. This also means that we don't have a valid max_match length for backreferences based off the capture body, and must assume they can end up being any length. ----- Change-Id: Iffe0e71b17b1a0c6fea77235e8aee5c093005811 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/94540 Commit-Queue: Stevie Strickland <sstrickl@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com>
2019-03-14 14:26:47 +00:00
bool read_backward,
Reland "[vm] Finish adding support for ECMAScript 2018 features." This work pulls in v8 support for these features with appropriate changes for Dart and closes https://github.com/dart-lang/sdk/issues/34935. This adds support for the following features: * Interpreting patterns as Unicode patterns instead of BMP patterns * the dotAll flag (`/s`) for changing the behavior of '.' to also match line terminators * Escapes for character classes described by Unicode property groups (e.g., \p{Greek} to match all Greek characters, or \P{Greek} for all non-Greek characters). The following TC39 proposals describe some of the added features: * https://github.com/tc39/proposal-regexp-dotall-flag * https://github.com/tc39/proposal-regexp-unicode-property-escapes These additional changes are included: * Extends named capture group names to include the full range of identifier characters supported by ECMAScript, not just ASCII. * Changing the RegExp interface to return RegExpMatch objects, not Match objects, so that downcasting is not necessary to use named capture groups from Dart **Note**: The changes to the RegExp interface are a breaking change for implementers of the RegExp interface. Current users of the RegExp interface (i.e., code using Dart RegExp objects) will not be affected. Change-Id: Ie62e6082a0e2fedc1680ef2576ce0c6db80fc19a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/100641 Reviewed-by: Martin Kustermann <kustermann@google.com> Commit-Queue: Stevie Strickland <sstrickl@google.com>
2019-04-29 09:11:48 +00:00
bool unicode,
BlockLabel* on_no_match);
virtual void CheckNotCharacter(uint32_t c, BlockLabel* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
uint32_t mask,
BlockLabel* on_not_equal);
virtual void CheckNotCharacterAfterMinusAnd(uint16_t c,
uint16_t minus,
uint16_t mask,
BlockLabel* on_not_equal);
virtual void CheckCharacterInRange(uint16_t from,
uint16_t to,
BlockLabel* on_in_range);
virtual void CheckCharacterNotInRange(uint16_t from,
uint16_t to,
BlockLabel* on_not_in_range);
virtual void CheckBitInTable(const TypedData& table, BlockLabel* on_bit_set);
// Checks whether the given offset from the current position is before
// the end of the string.
virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input);
virtual bool CheckSpecialCharacterClass(uint16_t type,
BlockLabel* on_no_match);
virtual void Fail();
virtual void IfRegisterGE(intptr_t reg,
intptr_t comparand,
BlockLabel* if_ge);
virtual void IfRegisterLT(intptr_t reg,
intptr_t comparand,
BlockLabel* if_lt);
virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq);
virtual IrregexpImplementation Implementation();
virtual void GoTo(BlockLabel* to);
virtual void LoadCurrentCharacter(intptr_t cp_offset,
BlockLabel* on_end_of_input,
bool check_bounds = true,
intptr_t characters = 1);
virtual void PopCurrentPosition();
virtual void PopRegister(intptr_t register_index);
virtual void Print(const char* str);
virtual void PushBacktrack(BlockLabel* label);
virtual void PushCurrentPosition();
virtual void PushRegister(intptr_t register_index);
virtual void ReadCurrentPositionFromRegister(intptr_t reg);
virtual void ReadStackPointerFromRegister(intptr_t reg);
virtual void SetCurrentPositionFromEnd(intptr_t by);
virtual void SetRegister(intptr_t register_index, intptr_t to);
virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(intptr_t reg, intptr_t cp_offset);
virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to);
virtual void WriteStackPointerToRegister(intptr_t reg);
virtual void PrintBlocks();
IndirectGotoInstr* backtrack_goto() const { return backtrack_goto_; }
GraphEntryInstr* graph_entry() const { return entry_block_; }
intptr_t num_stack_locals() const { return local_id_.Count(); }
intptr_t num_blocks() const { return block_id_.Count(); }
// Generate a dispatch block implementing backtracking. Must be done after
// graph construction.
void GenerateBacktrackBlock();
// Allocate the actual registers array once its size is known. Must be done
// after graph construction.
void FinalizeRegistersArray();
private:
intptr_t GetNextDeoptId() const {
return thread_->compiler_state().GetNextDeoptId();
}
// Generate the contents of preset blocks. The entry block is the entry point
// of the generated code.
void GenerateEntryBlock();
// Copies capture indices into the result area and returns true.
void GenerateSuccessBlock();
// Returns false.
void GenerateExitBlock();
enum ComparisonKind {
kEQ,
kNE,
kLT,
kGT,
kLTE,
kGTE,
};
struct InstanceCallDescriptor {
// Standard (i.e. most non-Smi) functions.
explicit InstanceCallDescriptor(const String& name)
: name(name), token_kind(Token::kILLEGAL), checked_argument_count(1) {}
InstanceCallDescriptor(const String& name,
Token::Kind token_kind,
intptr_t checked_argument_count)
: name(name),
token_kind(token_kind),
checked_argument_count(checked_argument_count) {}
// Special cases for Smi and indexing functions.
static InstanceCallDescriptor FromToken(Token::Kind token_kind) {
switch (token_kind) {
case Token::kEQ:
return InstanceCallDescriptor(Symbols::EqualOperator(), token_kind,
2);
case Token::kADD:
return InstanceCallDescriptor(Symbols::Plus(), token_kind, 2);
case Token::kSUB:
return InstanceCallDescriptor(Symbols::Minus(), token_kind, 2);
case Token::kBIT_OR:
return InstanceCallDescriptor(Symbols::BitOr(), token_kind, 2);
case Token::kBIT_AND:
return InstanceCallDescriptor(Symbols::BitAnd(), token_kind, 2);
case Token::kLT:
return InstanceCallDescriptor(Symbols::LAngleBracket(), token_kind,
2);
case Token::kLTE:
return InstanceCallDescriptor(Symbols::LessEqualOperator(),
token_kind, 2);
case Token::kGT:
return InstanceCallDescriptor(Symbols::RAngleBracket(), token_kind,
2);
case Token::kGTE:
return InstanceCallDescriptor(Symbols::GreaterEqualOperator(),
token_kind, 2);
case Token::kNEGATE:
return InstanceCallDescriptor(Symbols::UnaryMinus(), token_kind, 1);
case Token::kINDEX:
return InstanceCallDescriptor(Symbols::IndexToken(), token_kind, 2);
case Token::kASSIGN_INDEX:
return InstanceCallDescriptor(Symbols::AssignIndexToken(), token_kind,
2);
default:
UNREACHABLE();
}
UNREACHABLE();
return InstanceCallDescriptor(Symbols::Empty());
}
const String& name;
Token::Kind token_kind;
intptr_t checked_argument_count;
};
LocalVariable* Local(const String& name);
LocalVariable* Parameter(const String& name, intptr_t index) const;
ConstantInstr* Int64Constant(int64_t value) const;
ConstantInstr* Uint64Constant(uint64_t value) const;
ConstantInstr* BoolConstant(bool value) const;
ConstantInstr* StringConstant(const char* value) const;
// The word character map static member of the RegExp class.
// Byte map of one byte characters with a 0xff if the character is a word
// character (digit, letter or underscore) and 0x00 otherwise.
// Used by generated RegExp code.
ConstantInstr* WordCharacterMapConstant() const;
ComparisonInstr* Comparison(ComparisonKind kind,
PushArgumentInstr* lhs,
PushArgumentInstr* rhs);
ComparisonInstr* Comparison(ComparisonKind kind,
Definition* lhs,
Definition* rhs);
InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
PushArgumentInstr* arg1) const;
InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
PushArgumentInstr* arg1,
PushArgumentInstr* arg2) const;
InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
PushArgumentInstr* arg1,
PushArgumentInstr* arg2,
PushArgumentInstr* arg3) const;
InstanceCallInstr* InstanceCall(
const InstanceCallDescriptor& desc,
ZoneGrowableArray<PushArgumentInstr*>* arguments) const;
StaticCallInstr* StaticCall(const Function& function,
ICData::RebindRule rebind_rule) const;
StaticCallInstr* StaticCall(const Function& function,
PushArgumentInstr* arg1,
ICData::RebindRule rebind_rule) const;
StaticCallInstr* StaticCall(const Function& function,
PushArgumentInstr* arg1,
PushArgumentInstr* arg2,
ICData::RebindRule rebind_rule) const;
StaticCallInstr* StaticCall(const Function& function,
ZoneGrowableArray<PushArgumentInstr*>* arguments,
ICData::RebindRule rebind_rule) const;
// Creates a new block consisting simply of a goto to dst.
TargetEntryInstr* TargetWithJoinGoto(JoinEntryInstr* dst);
IndirectEntryInstr* IndirectWithJoinGoto(JoinEntryInstr* dst);
// Adds, respectively subtracts lhs and rhs and returns the result.
Definition* Add(PushArgumentInstr* lhs, PushArgumentInstr* rhs);
Definition* Sub(PushArgumentInstr* lhs, PushArgumentInstr* rhs);
LoadLocalInstr* LoadLocal(LocalVariable* local) const;
void StoreLocal(LocalVariable* local, Value* value);
PushArgumentInstr* PushArgument(Value* value);
PushArgumentInstr* PushLocal(LocalVariable* local);
PushArgumentInstr* PushRegisterIndex(intptr_t reg);
Value* LoadRegister(intptr_t reg);
void StoreRegister(intptr_t reg, intptr_t value);
void StoreRegister(PushArgumentInstr* registers,
PushArgumentInstr* index,
PushArgumentInstr* value);
// Load a number of characters at the given offset from the
// current position, into the current-character register.
void LoadCurrentCharacterUnchecked(intptr_t cp_offset,
intptr_t character_count);
// Returns the character within the passed string at the specified index.
Value* CharacterAt(LocalVariable* index);
// Load a number of characters starting from index in the pattern string.
Value* LoadCodeUnitsAt(LocalVariable* index, intptr_t character_count);
VM(RegExp): Allow OSR optimization of RegExp :matcher functions. Previously these functions would only contain a single CheckStackOverflowInstr in a backtracking block and that CheckStackOverflowInstr would have a zero loop_depth - which means it would not be considered eligable for OSR. This change: * adds CheckStackOverflowInstr with non-zero loop_depth in two other places (Boyer-Moore lookahead skip loop and greedy loop) where loops arise in the generated IL; * sets non-zero loop depth on the CheckStackOverflowInstr in the backtracking block; * adds a flag on CheckStackOverflowInstr that allows optimizing compiler to optimize away those checks that were inserted solely to serve as OSR entries. * ensures that IR generated by IRRegExpMacroAssembler is OSR compatible: * GraphEntryInstr has correct osr_id; * GraphEntry and normal entry have different block ids (B0 and B1 - instead of B0 and B0); * unreachable blocks are pruned and GraphEntry is rewired to point to OSR entry; * IRRegExpMacroAssembler::GrowStack should not assume that stack_array_cell and :stack are always in sync, because :stack can come from OSR or deoptimization why stack_array_cell is a constant associated with a particular Code object. * refactors the way the RegExp stack was growing: instead of having a special instruction just emit a call to a Dart function; * refactors the way block pruning for OSR is done by consolidating duplicated code in a single function. We allow the optimizing compiler to remove preemption checks from non-backtracking loops in the regexp code because those loops unlike backtracking have guaranteed O(input_length) time complexity. Performance Implications ------------------------ This change improves performance of regexps in cases where regexp spends a lot of time in the first invocation (either due to backtracking or due to long non matching prefix) by allowing VM to optimize the :matcher while :matcher is running. For example on regex-redux[1] benchmark it improves Dart performance by 3x (from ~18s to ~6s on my Mac Book Pro). CL history ---------- This relands commit d87cc52c3ee791e4dff9136c5c80353deb0f36a3. Original code review: https://codereview.chromium.org/2950783003/ [1] https://benchmarksgame.alioth.debian.org/u64q/program.php?test=regexredux&lang=dart&id=2 R=erikcorry@google.com Review-Url: https://codereview.chromium.org/2951053003 .
2017-06-23 10:51:52 +00:00
// Check whether preemption has been requested. Also serves as an OSR entry.
void CheckPreemption(bool is_backtrack);
// Byte size of chars in the string to match (decided by the Mode argument)
inline intptr_t char_size() { return static_cast<int>(mode_); }
// Equivalent to a conditional branch to the label, unless the label
// is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(ComparisonInstr* comparison,
BlockLabel* true_successor);
// Set up all local variables and parameters.
void InitializeLocals();
// Allocates a new local, and returns the appropriate id for placing it
// on the stack.
intptr_t GetNextLocalIndex();
// We never have any copied parameters.
intptr_t num_copied_params() const { return 0; }
// Return the position register at the specified index, creating it if
// necessary. Note that the number of such registers can exceed the amount
// required by the number of output captures.
LocalVariable* position_register(intptr_t index);
void set_current_instruction(Instruction* instruction);
// The following functions are responsible for appending instructions
// to the current instruction in various ways. The most simple one
// is AppendInstruction, which simply appends an instruction and performs
// bookkeeping.
void AppendInstruction(Instruction* instruction);
// Similar to AppendInstruction, but closes the current block by
// setting current_instruction_ to NULL.
void CloseBlockWith(Instruction* instruction);
// Appends definition and allocates a temp index for the result.
Value* Bind(Definition* definition);
// Loads and binds a local variable.
Value* BindLoadLocal(const LocalVariable& local);
// Appends the definition.
void Do(Definition* definition);
// Closes the current block with a jump to the specified block.
void GoTo(JoinEntryInstr* to);
// Accessors for our local stack_.
void PushStack(Definition* definition);
Definition* PopStack();
Definition* PeekStack();
void CheckStackLimit();
void GrowStack();
// Prints the specified argument. Used for debugging.
void Print(PushArgumentInstr* argument);
// A utility class tracking ids of various objects such as blocks, temps, etc.
class IdAllocator : public ValueObject {
public:
VM(RegExp): Allow OSR optimization of RegExp :matcher functions. Previously these functions would only contain a single CheckStackOverflowInstr in a backtracking block and that CheckStackOverflowInstr would have a zero loop_depth - which means it would not be considered eligable for OSR. This change: * adds CheckStackOverflowInstr with non-zero loop_depth in two other places (Boyer-Moore lookahead skip loop and greedy loop) where loops arise in the generated IL; * sets non-zero loop depth on the CheckStackOverflowInstr in the backtracking block; * adds a flag on CheckStackOverflowInstr that allows optimizing compiler to optimize away those checks that were inserted solely to serve as OSR entries. * ensures that IR generated by IRRegExpMacroAssembler is OSR compatible: * GraphEntryInstr has correct osr_id; * GraphEntry and normal entry have different block ids (B0 and B1 - instead of B0 and B0); * unreachable blocks are pruned and GraphEntry is rewired to point to OSR entry; * IRRegExpMacroAssembler::GrowStack should not assume that stack_array_cell and :stack are always in sync, because :stack can come from OSR or deoptimization why stack_array_cell is a constant associated with a particular Code object. * refactors the way the RegExp stack was growing: instead of having a special instruction just emit a call to a Dart function; * refactors the way block pruning for OSR is done by consolidating duplicated code in a single function. We allow the optimizing compiler to remove preemption checks from non-backtracking loops in the regexp code because those loops unlike backtracking have guaranteed O(input_length) time complexity. Performance Implications ------------------------ This change improves performance of regexps in cases where regexp spends a lot of time in the first invocation (either due to backtracking or due to long non matching prefix) by allowing VM to optimize the :matcher while :matcher is running. For example on regex-redux[1] benchmark it improves Dart performance by 3x (from ~18s to ~6s on my Mac Book Pro). CL history ---------- This relands commit d87cc52c3ee791e4dff9136c5c80353deb0f36a3. Original code review: https://codereview.chromium.org/2950783003/ [1] https://benchmarksgame.alioth.debian.org/u64q/program.php?test=regexredux&lang=dart&id=2 R=erikcorry@google.com Review-Url: https://codereview.chromium.org/2951053003 .
2017-06-23 10:51:52 +00:00
explicit IdAllocator(intptr_t first_id = 0) : next_id(first_id) {}
intptr_t Count() const { return next_id; }
intptr_t Alloc(intptr_t count = 1) {
ASSERT(count >= 0);
intptr_t current_id = next_id;
next_id += count;
return current_id;
}
void Dealloc(intptr_t count = 1) {
ASSERT(count <= next_id);
next_id -= count;
}
private:
intptr_t next_id;
};
Thread* thread_;
// Which mode to generate code for (ASCII or UC16).
Mode mode_;
// Which specific string class to generate code for.
intptr_t specialization_cid_;
// Block entries used internally.
GraphEntryInstr* entry_block_;
JoinEntryInstr* start_block_;
JoinEntryInstr* success_block_;
JoinEntryInstr* exit_block_;
// Shared backtracking block.
JoinEntryInstr* backtrack_block_;
// Single indirect goto instruction which performs all backtracking.
IndirectGotoInstr* backtrack_goto_;
const ParsedFunction* parsed_function_;
const ZoneGrowableArray<const ICData*>& ic_data_array_;
// All created blocks are contained within this set. Used for printing
// the generated code.
GrowableArray<BlockEntryInstr*> blocks_;
// The current instruction to link to when new code is emitted.
Instruction* current_instruction_;
// A list, acting as the runtime stack for both backtrack locations and
// stored positions within the string.
LocalVariable* stack_;
LocalVariable* stack_pointer_;
// Stores the current character within the string.
LocalVariable* current_character_;
// Stores the current location within the string as a negative offset
// from the end of the string.
LocalVariable* current_position_;
// The string being processed, passed as a function parameter.
LocalVariable* string_param_;
// Stores the length of string_param_.
LocalVariable* string_param_length_;
// The start index within the string, passed as a function parameter.
LocalVariable* start_index_param_;
// An assortment of utility variables.
LocalVariable* capture_length_;
LocalVariable* match_start_index_;
LocalVariable* capture_start_index_;
LocalVariable* match_end_index_;
LocalVariable* char_in_capture_;
LocalVariable* char_in_match_;
LocalVariable* index_temp_;
LocalVariable* result_;
// Stored positions containing group bounds. Generated as needed.
LocalVariable* registers_;
intptr_t registers_count_;
const intptr_t saved_registers_count_;
// The actual array objects used for the stack and registers.
Array& stack_array_cell_;
TypedData& registers_array_;
IdAllocator block_id_;
IdAllocator temp_id_;
IdAllocator arg_id_;
IdAllocator local_id_;
IdAllocator indirect_id_;
};
} // namespace dart
#endif // RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_